in src/main/java/com/twitter/sbf/graph/BipartiteGraph.java [248:375]
public Graph projectRight(double threshold) {
/*
* Maintain a hash table T where the keys are (i,j) for 1 <= i < j <= numRightVertices,
* and the values are integer counts whose final value will represent
* the size of the intersection of the adjacency lists of vertices i and j on the right side,
* i.e. |N(i) intersection N(j)|.
* To populate the hash table go through every vertex on the left side, and
* for every such vertex, look at all the pairs of vertices (i,j), i<j, formed
* using vertices i and j in its adjacency list.
* For every pair (i,j), update the hash table as follows: T[(i,j)] = T[(i,j)] + 1, if
* (i,j) is already present in the table, otherwise set T[(i,j)] = 1.
* At the end of the process, if N(i) and N(j) don't intersect, then (i,j) is not in T,
* otherwise T[(i,j)] is exactly |N(i) intersection N(j)|.
* We can now compute the adjacency lists and edge-weight lists for every vertex
* in the projected graph using the table T[(i,j)]. We also perform thresholding at this stage
* and delete edges whose weights are below the threshold.
* Since the Graph class constructor requires that we supply adjacency and weight lists
* in sorted order, we incur some additional cost in generating sorting lists. This turns out
* to be an additional multiplicative factor equaling log(numRightVertices)<<log(10^8)~30.
* Also, we implement the hash table T as a nested two-level HashTable for efficiency reasons.
*/
//Define and populate the hash table T (we refer to it as pairsOnTheRightThatShare in the code)
//Iterate through the vertices on the left side
HashMap<Integer, HashMap<Integer, Integer>> pairsOnTheRightThatShare = new HashMap<>();
for (int leftVertex = 0; leftVertex < this.numLeftVertices; leftVertex++) {
//Get the adjacency list of the current left vertex
int[] curAdjList = this.neighborsForLeft[leftVertex];
//Iterate over all pairs i<j in the adjacency list of the current left vertex we are on
for (int it1 = 0; it1 < curAdjList.length; it1++) {
for (int it2 = it1 + 1; it2 < curAdjList.length; it2++) {
//Define i and j as above.
//Note that since the adjacency lists in the bipartite graph are sorted,
//we have that i < j.
int i = curAdjList[it1];
int j = curAdjList[it2];
HashMap<Integer, Integer> mapOfI
= pairsOnTheRightThatShare.getOrDefault(i, new HashMap<>());
Integer curValOfIAndJ = mapOfI.getOrDefault(j, 0);
mapOfI.put(j, curValOfIAndJ + 1);
pairsOnTheRightThatShare.put(i, mapOfI);
//For convenience sake, we also maintain the same count at T[(j,i)].
//This symmetry allows us to construct the adjacency lists of the projected graph
//with ease in the next step.
HashMap<Integer, Integer> mapOfJ =
pairsOnTheRightThatShare.getOrDefault(j, new HashMap<>());
mapOfJ.put(i, curValOfIAndJ + 1);
pairsOnTheRightThatShare.put(j, mapOfJ);
}
}
}
//Use the hash table T to create adjacency lists and weight lists for every vertex
//in the projected graph. Use the user specified threshold along with a tolerance of 1e-5 to
//delete edges with weight below the threshold.
//We also maintain the number of edges in the projected graph.
long numEdgesProjected = 0L;
int[][] adjListsProjected = new int[this.numRightVertices][];
float[][] wtListsProjected = new float[this.numRightVertices][];
//We also maintain a HashTable to keep track of edge weights that have been
//already computed to avoid recomputing twice for the same edge.
HashMap<Integer, HashMap<Integer, Float>> computedEdgeWeights = new HashMap<>();
for (int i = 0; i < this.numRightVertices; i++) {
HashMap<Integer, Integer> mapOfI = pairsOnTheRightThatShare.getOrDefault(i, new HashMap<>());
//Define temporary ArrayLists to store the adjacency list and weights for vertex i
//in the projected graph after thresholding.
ArrayList<Integer> thresholdedAdjacencyListForI = new ArrayList<Integer>(0);
ArrayList<Float> thresholdedWtListForI = new ArrayList<Float>(0);
//The number of neighbors of vertex i in the projected graph after thresholding
int thresholdedNumOfNeighborsOfI = 0;
//If the list of neighbors of i is empty, then there is nothing more to do.
if (mapOfI.size() > 0) {
//Convert the set of keys of mapOfI into an array. These are neighbors of i
//in the projected graph pre-thresholding.
Integer[] keysOfI = mapOfI.keySet().toArray(new Integer[mapOfI.size()]);
//Sort the array of neighbors of i by their index/id.
Arrays.sort(keysOfI);
//Iterate through the neighbors in sorted list and set the weight and adjacency for each
for (int nId = 0; nId < mapOfI.size(); nId++) {
//Get the label of the neighbor
int j = keysOfI[nId];
//float variable to hold the weight of the edge between i and j
float weightOfIJEdge;
//If j < i then the weight for the edge between i and j in the projected graph
// would have been computed during the iteration through
// the neighbors of j and so we can just look it up
//from the hash table.
if (j < i) {
weightOfIJEdge = computedEdgeWeights.get(j).get(i);
} else {
//Recall that T[(i,j)] (and T[(j,i)]) now contains |N(i) intersection N(j)|
int intersectionSize = mapOfI.get(j);
//To compute weight of the edge between i and j, we must normalize
//the size of the intersection by Sqrt(degreeOfI * degreeOfJ),
//where degreeOfI and degreeOfJ are the degrees of i and j in the bipartite graph.
int degreeOfI = this.getNeighborsForRightById(i).length;
int degreeOfJ = this.getNeighborsForRightById(j).length;
weightOfIJEdge = intersectionSize / (float) Math.sqrt(degreeOfI * degreeOfJ);
//Add the computed weight to the hash table
HashMap<Integer, Float> tempWeightMap
= computedEdgeWeights.getOrDefault(i, new HashMap<Integer, Float>());
tempWeightMap.put(j, weightOfIJEdge);
computedEdgeWeights.put(i, tempWeightMap);
}
//Update the adjacency and weight list of vertex i IF the weight of the edge
//between i and j is greater than the threshold (using tolerance 1e-5)
if (weightOfIJEdge - threshold > 1e-5) {
thresholdedNumOfNeighborsOfI++;
thresholdedAdjacencyListForI.add(j);
thresholdedWtListForI.add(weightOfIJEdge);
}
}
}
//Update number of edges. Note this is double counting. We will divide by two later.
numEdgesProjected += thresholdedNumOfNeighborsOfI;
//Initialize the adjacency and weight list of vertex i in the projected graph
adjListsProjected[i] = new int[thresholdedNumOfNeighborsOfI];
wtListsProjected[i] = new float[thresholdedNumOfNeighborsOfI];
//Convert the ArrayLists to primitive arrays
for (int index = 0; index < thresholdedNumOfNeighborsOfI; index++) {
adjListsProjected[i][index] = thresholdedAdjacencyListForI.get(index);
wtListsProjected[i][index] = thresholdedWtListForI.get(index);
}
}
//Every edge has been counted twice, so we divide by two
//Instantiate a new instance of Graph and return
return new Graph(
this.numRightVertices, numEdgesProjected / 2, adjListsProjected, wtListsProjected
);
}