I'm trying to implement a pagerank algorithm with 1 million nodes as input.
However, I've got this error. I'm quite sure it's because an open recursive call problem, from generateParamList().
I've tried to make this method a different way not using recursive, but failed again and again. If you let me know how to modify the "generateParamList()", I will appreciate that!!!
Exception in thread "main" java.lang.StackOverflowError
at Ranking.getInboundLinks(Ranking.java:200)
at Ranking.generateParamList(Ranking.java:172)
import Jama.Matrix;
public class Ranking {
private final double DAMPING_FACTOR = 0.85;
private List params = new ArrayList();
private static Map<String, String[]> outlinkMap = new HashMap();
private static Map<String, String[]> inlinkMap = new HashMap();
public static void main(String[] args) throws IOException {
Ranking ranking = new Ranking();
FileInputStream in = new FileInputStream("linkgraph_test.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
String[] tempStringArray = null;
int iTemp = 0;
while ((strLine = br.readLine()) != null) {
tempStringArray = strLine.split(" ");
String[] tempValueArray = new String[tempStringArray.length-1];
for(iTemp=1;iTemp<tempStringArray.length;iTemp++){
tempValueArray[iTemp-1] = tempStringArray[iTemp];
}
outlinkMap.put(tempStringArray[0], tempValueArray);
}
in.close();
setInboundLinks();
for(int j=1;j<=outlinkMap.size();j++){
String keytoString = Integer.toString(j);
System.out.println(keytoString+" "+ranking.rank(keytoString));
}
}
/*
*
* Solve the equation of ax=b, which : a is the generated matrix based on the parameter constants. x is the page ranks matrix. b is a n*1 matrix which all the values are equal to the damping factor.
*
*/
public double rank(String pageId) {
generateParamList(pageId);
Matrix a = new Matrix(generateMatrix());
double[][] arrB = new double[params.size()][1];
for (int i = 0; i < params.size(); i++) {
arrB[i][0] = 1 - DAMPING_FACTOR;
}
Matrix b = new Matrix(arrB);
// Solve the equation and get the page ranks
Matrix x = a.solve(b);
int ind = 0;
int cnt = 0;
for (Iterator it = params.iterator(); it.hasNext();) {
String curPage = (String) it.next();
if (curPage.equals(pageId))
ind = cnt;
cnt++;
}
return x.getArray()[ind][0];
}
/*
*
* This method generates the matrix of the linear equations. The generated
*
* matrix is a n*n matrix where n is number of the related pages.
*
*/
private double[][] generateMatrix() {
double[][] arr = new double[params.size()][params.size()];
for (int i = 0; i < params.size(); i++) {
for (int j = 0; j < params.size(); j++) {
arr[i][j] = getMultiFactor((String) params.get(i),(String) params.get(j));
}
}
return arr;
}
/*
*
* This method returns the constant of the given variable in the linear equation.
*
*/
private double getMultiFactor(String sourceId, String linkId) {
if (sourceId.equals(linkId))
return 1;
if(!sourceId.equals(linkId) && getInboundLinks(sourceId)!=null ) {
//System.out.println("source"+sourceId+"\tlinkInd"+linkId);
String[] inc = getInboundLinks(sourceId);
for (int i = 0; i < inc.length; i++) {
if (inc[i].equals(linkId)) {
return -1*(DAMPING_FACTOR / getOutboundLinks(linkId).length);
}
}
}
return 0;
}
/*
*
* This method returns list of the related pages. This list is also the parameters in the linear equation.
*
*/
private void generateParamList(String pageId) {
System.out.println("start"+pageId);
if(getInboundLinks(pageId)!=null){
// Add the starting page.
if (!params.contains(pageId)) params.add(pageId);
// Get list of the inbound pages
String[] inc = getInboundLinks(pageId);
// Add the inbound links to the params list and do same for inbound links
for (int i = 0; i < inc.length; i++) {
if (!params.contains(inc[i])){
generateParamList(inc[i]);
}
}
}
}
/*
*
* Return list of the inbound links to a given page.
*
*/
private String[] getInboundLinks(String pageId) {
return (String[]) inlinkMap.get(pageId);
}
private static void setInboundLinks(){
int valueLength;
String keytoString;
String[] outlinkValueArray, inlinkValueArrayTemp, tempCopyArray, resultArray;
for(int i=1; i<=outlinkMap.size();i++){
keytoString = Integer.toString(i);
outlinkValueArray= outlinkMap.get(keytoString);
if(outlinkValueArray!=null){
valueLength = outlinkValueArray.length;
for(int j=0; j<valueLength;j++){
String[] tempValueArray = new String[1];
if(!inlinkMap.containsKey(outlinkValueArray[j])){
tempValueArray[0] = keytoString;
inlinkMap.put(outlinkValueArray[j],tempValueArray);
}
else{
tempCopyArray = inlinkMap.get(outlinkValueArray[j]);
tempValueArray[0] = keytoString;
resultArray = mergeArray(tempCopyArray, tempValueArray);
inlinkMap.put(outlinkValueArray[j],resultArray);
}
}
}
}
}
public static String[] mergeArray(String[] ar1, String[] ar2){
String[] ar3 = Arrays.copyOf(ar1, ar1.length+ar2.length);
System.arraycopy(ar2, 0, ar3, ar1.length, ar2.length);
return ar3;
}
/*
*
* Returns list of the outbound links from a page.
*
*/
private String[] getOutboundLinks(String pageId) {
// This simulates a simple page collection
return (String[]) outlinkMap.get(pageId);
}
}
input format: 1 126645 320349 144068 635538 37354 141327 53842 121180 4786 31658 285466 526963 93938 9097 32341 ............. 2 ......
Change you function generateParamList for this, it is a non recursive version. I think it do the same job, please test. I put some comments to help you understand the concept.
private void generateParamList(String pageId) {
// Don't process duplicates
if (params.contains(pageId)) return;
// Store links not processed
Stack<String> links = new Stack<String>();
// Store current pageId to be processed
links.push(pageId);
// Loop, processed the Stack link, not a recursive call
while (!links.empty()) {
pageId = links.pop();
// Link already processed
if (params.contains(pageId)) continue;
params.add(pageId);
// Get list of the inbound pages
String[] inc = getInboundLinks(pageId);
for (int i = 0; i < inc.length; i++) {
// Store link to be processed by the loop
links.push(inc[i]);
}
}
}