public Graph projectRight()

in src/main/java/com/twitter/sbf/graph/BipartiteGraph.java [248:375]


  public Graph projectRight(double threshold) {
    /*
     * Maintain a hash table T where the keys are (i,j) for 1 <= i < j <= numRightVertices,
     * and the values are integer counts whose final value will represent
     * the size of the intersection of the adjacency lists of vertices i and j on the right side,
     * i.e. |N(i) intersection N(j)|.
     * To populate the hash table go through every vertex on the left side, and
     * for every such vertex, look at all the pairs of vertices (i,j), i<j, formed
     * using vertices i and j in its adjacency list.
     * For every pair (i,j), update the hash table as follows: T[(i,j)] = T[(i,j)] + 1, if
     * (i,j) is already present in the table, otherwise set T[(i,j)] = 1.
     * At the end of the process, if N(i) and N(j) don't intersect, then (i,j) is not in T,
     * otherwise T[(i,j)] is exactly |N(i) intersection N(j)|.
     * We can now compute the adjacency lists and edge-weight lists for every vertex
     * in the projected graph using the table T[(i,j)]. We also perform thresholding at this stage
     * and delete edges whose weights are below the threshold.
     * Since the Graph class constructor requires that we supply adjacency and weight lists
     * in sorted order, we incur some additional cost in generating sorting lists. This turns out
     * to be an additional multiplicative factor equaling log(numRightVertices)<<log(10^8)~30.
     * Also, we implement the hash table T as a nested two-level HashTable for efficiency reasons.
     */
    //Define and populate the hash table T (we refer to it as pairsOnTheRightThatShare in the code)
    //Iterate through the vertices on the left side
    HashMap<Integer, HashMap<Integer, Integer>> pairsOnTheRightThatShare = new HashMap<>();
    for (int leftVertex = 0; leftVertex < this.numLeftVertices; leftVertex++) {
      //Get the adjacency list of the current left vertex
      int[] curAdjList = this.neighborsForLeft[leftVertex];
      //Iterate over all pairs i<j in the adjacency list of the current left vertex we are on
      for (int it1 = 0; it1 < curAdjList.length; it1++) {
        for (int it2 = it1 + 1; it2 < curAdjList.length; it2++) {
          //Define i and j as above.
          //Note that since the adjacency lists in the bipartite graph are sorted,
          //we have that i < j.
          int i = curAdjList[it1];
          int j = curAdjList[it2];
          HashMap<Integer, Integer> mapOfI
              = pairsOnTheRightThatShare.getOrDefault(i, new HashMap<>());
          Integer curValOfIAndJ = mapOfI.getOrDefault(j, 0);
          mapOfI.put(j, curValOfIAndJ + 1);
          pairsOnTheRightThatShare.put(i, mapOfI);
          //For convenience sake, we also maintain the same count at T[(j,i)].
          //This symmetry allows us to construct the adjacency lists of the projected graph
          //with ease in the next step.
          HashMap<Integer, Integer> mapOfJ =
              pairsOnTheRightThatShare.getOrDefault(j, new HashMap<>());
          mapOfJ.put(i, curValOfIAndJ + 1);
          pairsOnTheRightThatShare.put(j, mapOfJ);
        }
      }
    }
    //Use the hash table T to create adjacency lists and weight lists for every vertex
    //in the projected graph. Use the user specified threshold along with a tolerance of 1e-5 to
    //delete edges with weight below the threshold.
    //We also maintain the number of edges in the projected graph.
    long numEdgesProjected = 0L;
    int[][] adjListsProjected = new int[this.numRightVertices][];
    float[][] wtListsProjected = new float[this.numRightVertices][];
    //We also maintain a HashTable to keep track of edge weights that have been
    //already computed to avoid recomputing twice for the same edge.
    HashMap<Integer, HashMap<Integer, Float>> computedEdgeWeights = new HashMap<>();
    for (int i = 0; i < this.numRightVertices; i++) {
      HashMap<Integer, Integer> mapOfI = pairsOnTheRightThatShare.getOrDefault(i, new HashMap<>());
      //Define temporary ArrayLists to store the adjacency list and weights for vertex i
      //in the projected graph after thresholding.
      ArrayList<Integer> thresholdedAdjacencyListForI = new ArrayList<Integer>(0);
      ArrayList<Float> thresholdedWtListForI = new ArrayList<Float>(0);
      //The number of neighbors of vertex i in the projected graph after thresholding
      int thresholdedNumOfNeighborsOfI = 0;
      //If the list of neighbors of i is empty, then there is nothing more to do.
      if (mapOfI.size() > 0) {
        //Convert the set of keys of mapOfI into an array. These are neighbors of i
        //in the projected graph pre-thresholding.
        Integer[] keysOfI = mapOfI.keySet().toArray(new Integer[mapOfI.size()]);
        //Sort the array of neighbors of i by their index/id.
        Arrays.sort(keysOfI);
        //Iterate through the neighbors in sorted list and set the weight and adjacency for each
        for (int nId = 0; nId < mapOfI.size(); nId++) {
          //Get the label of the neighbor
          int j = keysOfI[nId];
          //float variable to hold the weight of the edge between i and j
          float weightOfIJEdge;
          //If j < i then the weight for the edge between i and j in the projected graph
          // would have been computed during the iteration through
          // the neighbors of j and so we can just look it up
          //from the hash table.
          if (j < i) {
            weightOfIJEdge = computedEdgeWeights.get(j).get(i);
          } else {
            //Recall that T[(i,j)] (and T[(j,i)]) now contains |N(i) intersection N(j)|
            int intersectionSize = mapOfI.get(j);
            //To compute weight of the edge between i and j, we must normalize
            //the size of the intersection by Sqrt(degreeOfI * degreeOfJ),
            //where degreeOfI and degreeOfJ are the degrees of i and j in the bipartite graph.
            int degreeOfI = this.getNeighborsForRightById(i).length;
            int degreeOfJ = this.getNeighborsForRightById(j).length;
            weightOfIJEdge = intersectionSize / (float) Math.sqrt(degreeOfI * degreeOfJ);
            //Add the computed weight to the hash table
            HashMap<Integer, Float> tempWeightMap
                = computedEdgeWeights.getOrDefault(i, new HashMap<Integer, Float>());
            tempWeightMap.put(j, weightOfIJEdge);
            computedEdgeWeights.put(i, tempWeightMap);
          }
          //Update the adjacency and weight list of vertex i IF the weight of the edge
          //between i and j is greater than the threshold (using tolerance 1e-5)
          if (weightOfIJEdge - threshold > 1e-5) {
            thresholdedNumOfNeighborsOfI++;
            thresholdedAdjacencyListForI.add(j);
            thresholdedWtListForI.add(weightOfIJEdge);
          }
        }
      }
      //Update number of edges. Note this is double counting. We will divide by two later.
      numEdgesProjected += thresholdedNumOfNeighborsOfI;
      //Initialize the adjacency and weight list of vertex i in the projected graph
      adjListsProjected[i] = new int[thresholdedNumOfNeighborsOfI];
      wtListsProjected[i] = new float[thresholdedNumOfNeighborsOfI];
      //Convert the ArrayLists to primitive arrays
      for (int index = 0; index < thresholdedNumOfNeighborsOfI; index++) {
        adjListsProjected[i][index] = thresholdedAdjacencyListForI.get(index);
        wtListsProjected[i][index] = thresholdedWtListForI.get(index);
      }
    }
    //Every edge has been counted twice, so we divide by two
    //Instantiate a new instance of Graph and return
    return new Graph(
        this.numRightVertices, numEdgesProjected / 2, adjListsProjected, wtListsProjected
    );
  }