private SparseRealMatrix getRepresentationsForRight()

in src/main/java/com/twitter/sbf/core/SimClustersInMemory.java [205:315]


  private SparseRealMatrix getRepresentationsForRight() {
    //Project the bipartite graph using BipartiteGraph.projectRight() to get
    //an undirected weighted graph on n (number of right vertices) vertices.
    //This is step 1 of SimClusters.
    System.out.println("Computing similarity graph (projection)...");
    Graph projectedGraph = this.g.projectRight(this.thresholdForStepOne);
    //Print statistics of the graph obtained after projection
    System.out.println(
        String.format(
           "Obtained similarity graph (projection) with %d edges and %d vertices",
            projectedGraph.getNumEdges(), projectedGraph.getNumVertices()
        )
    );
    System.out.println(
        String.format("The density of the graph is %f", projectedGraph.getDensity())
    );
    System.out.println(
        "Average edge weight (similarity score) in similarity graph: "
            + projectedGraph.getGlobalAvgWeight()
            + " with standard deviation " + projectedGraph.getWeightStandardDeviation()

    );
    //If squaring weights is specified, then perform it
    if (this.squareWeights) {
      System.out.println("Squaring edge weights in projected graph...");
      projectedGraph.squareWeights();
      System.out.println(
          String.format("The density after squaring is %f", projectedGraph.getDensity())
      );
      System.out.println(
          "Average edge weight of graph after squaring: "
              + projectedGraph.getGlobalAvgWeight()
              + " with standard deviation " + projectedGraph.getWeightStandardDeviation()
      );
    } else {
      System.out.println("No squaring will be performed..");
    }
    //If top-k retention is required then perform it
    if (this.retainTopK) {
      System.out.println("Retaining only top-k edges for every vertex...");
      projectedGraph = projectedGraph.retainTopKEdgesInGraph(this.topKParameter);
      System.out.println(
        String.format(
           "Number of edges after top-k retention: %d edges", projectedGraph.getNumEdges())
      );
      System.out.println(
          String.format("The density after top-k retention is %f", projectedGraph.getDensity())
      );
      System.out.println(
          "Average edge weight after top-k retention: "
              + projectedGraph.getGlobalAvgWeight()
              + " with standard deviation " + projectedGraph.getWeightStandardDeviation()

    );
    } else {
      System.out.println("Retaining all edges in projected graph..");
    }
    int n = projectedGraph.getNumVertices();
    SparseBinaryMatrix z = new SparseBinaryMatrix(n, this.mhParameters.k);
    PrintWriter err = new PrintWriter(System.out);
    // Compute right projection with appropriate threshold
    // If retainTopK is enabled, then retain only top-k edges for every vertex
    if ("randomNeighborhoods".equals(this.initType)) {
      System.out.println("Initializing matrix for step two from random neighborhoods");
      // set allowOverlap = false
      z.initFromBestNeighborhoods(
          projectedGraph, (gp, i) -> this.mhParameters.rng.nextDouble(), false, err
      );
    } else if ("bestNeighborhoods".equals(this.initType)) {
      System.out.println("Initializing matrix from best sub-neighborhoods in terms of conductance");
      z.initFromColSets(
          MHAlgorithm.getRandomBestConductanceSubNeighborhoods(
              projectedGraph, z.getNumCols(), this.mhParameters.rng
          )
      );
    } else if ("nonoverlappingNeighborhoods".equals(this.initType)) {
      System.out.println(
          "Initializing from best non-overlapping sub-neighborhoods in terms of conductance");
      z.initFromColSets(
          MHAlgorithm.getNonOverlappingBestSubNeighborhoods(
              projectedGraph, z.getNumCols(), this.mhParameters.rng
          )
      );
    } else if ("test".equals(this.initType)) {
      System.out.println("Unit test initialization will be used...");
      z.initFromBestNeighborhoods(projectedGraph, Graph::getNeighborhoodConductance,
          false, err);
    } else {
      System.out.println("Initializing factors randomly with 1 nnz/vertex");
      z.initEmptyRowsRandomly(this.mhParameters.rng);
      System.out.println("Random initialization: done");
    }

    //Perform step 2 of the SimClusters algorithm. This involves creating an MHAlgorithm object
    //and calling the optimize() method which runs the Metropolis-Hastings sampling based
    //community detection algorithm on projectedGraph using z as the initialization.
    MHAlgorithm stepTwo =
        new MHAlgorithm(this.mhParameters, projectedGraph, z, this.diagnosticsWriter);
    //Get the cluster assignment
    System.out.println("Running MHAlgorithm on the similarity graph..");
    SparseBinaryMatrix resultOfStepTwo = stepTwo.optimize();
    //Convert and return cluster assignment to a vector containing cluster affiliation scores.
    SparseRealMatrix representationsForRight =
        MHAlgorithm.heuristicallyScoreClusterAssignments(projectedGraph, resultOfStepTwo);
    //Perform column-wise normalization on representationsForRight if specified
    if (this.normalizeColumnsRight) {
      System.out.println("Normalizing columns of right representations..");
      representationsForRight.normalizeToUnitColumn();
    }
    return representationsForRight;
  }