in src/main/java/com/twitter/sbf/core/SimClustersInMemory.java [205:315]
private SparseRealMatrix getRepresentationsForRight() {
//Project the bipartite graph using BipartiteGraph.projectRight() to get
//an undirected weighted graph on n (number of right vertices) vertices.
//This is step 1 of SimClusters.
System.out.println("Computing similarity graph (projection)...");
Graph projectedGraph = this.g.projectRight(this.thresholdForStepOne);
//Print statistics of the graph obtained after projection
System.out.println(
String.format(
"Obtained similarity graph (projection) with %d edges and %d vertices",
projectedGraph.getNumEdges(), projectedGraph.getNumVertices()
)
);
System.out.println(
String.format("The density of the graph is %f", projectedGraph.getDensity())
);
System.out.println(
"Average edge weight (similarity score) in similarity graph: "
+ projectedGraph.getGlobalAvgWeight()
+ " with standard deviation " + projectedGraph.getWeightStandardDeviation()
);
//If squaring weights is specified, then perform it
if (this.squareWeights) {
System.out.println("Squaring edge weights in projected graph...");
projectedGraph.squareWeights();
System.out.println(
String.format("The density after squaring is %f", projectedGraph.getDensity())
);
System.out.println(
"Average edge weight of graph after squaring: "
+ projectedGraph.getGlobalAvgWeight()
+ " with standard deviation " + projectedGraph.getWeightStandardDeviation()
);
} else {
System.out.println("No squaring will be performed..");
}
//If top-k retention is required then perform it
if (this.retainTopK) {
System.out.println("Retaining only top-k edges for every vertex...");
projectedGraph = projectedGraph.retainTopKEdgesInGraph(this.topKParameter);
System.out.println(
String.format(
"Number of edges after top-k retention: %d edges", projectedGraph.getNumEdges())
);
System.out.println(
String.format("The density after top-k retention is %f", projectedGraph.getDensity())
);
System.out.println(
"Average edge weight after top-k retention: "
+ projectedGraph.getGlobalAvgWeight()
+ " with standard deviation " + projectedGraph.getWeightStandardDeviation()
);
} else {
System.out.println("Retaining all edges in projected graph..");
}
int n = projectedGraph.getNumVertices();
SparseBinaryMatrix z = new SparseBinaryMatrix(n, this.mhParameters.k);
PrintWriter err = new PrintWriter(System.out);
// Compute right projection with appropriate threshold
// If retainTopK is enabled, then retain only top-k edges for every vertex
if ("randomNeighborhoods".equals(this.initType)) {
System.out.println("Initializing matrix for step two from random neighborhoods");
// set allowOverlap = false
z.initFromBestNeighborhoods(
projectedGraph, (gp, i) -> this.mhParameters.rng.nextDouble(), false, err
);
} else if ("bestNeighborhoods".equals(this.initType)) {
System.out.println("Initializing matrix from best sub-neighborhoods in terms of conductance");
z.initFromColSets(
MHAlgorithm.getRandomBestConductanceSubNeighborhoods(
projectedGraph, z.getNumCols(), this.mhParameters.rng
)
);
} else if ("nonoverlappingNeighborhoods".equals(this.initType)) {
System.out.println(
"Initializing from best non-overlapping sub-neighborhoods in terms of conductance");
z.initFromColSets(
MHAlgorithm.getNonOverlappingBestSubNeighborhoods(
projectedGraph, z.getNumCols(), this.mhParameters.rng
)
);
} else if ("test".equals(this.initType)) {
System.out.println("Unit test initialization will be used...");
z.initFromBestNeighborhoods(projectedGraph, Graph::getNeighborhoodConductance,
false, err);
} else {
System.out.println("Initializing factors randomly with 1 nnz/vertex");
z.initEmptyRowsRandomly(this.mhParameters.rng);
System.out.println("Random initialization: done");
}
//Perform step 2 of the SimClusters algorithm. This involves creating an MHAlgorithm object
//and calling the optimize() method which runs the Metropolis-Hastings sampling based
//community detection algorithm on projectedGraph using z as the initialization.
MHAlgorithm stepTwo =
new MHAlgorithm(this.mhParameters, projectedGraph, z, this.diagnosticsWriter);
//Get the cluster assignment
System.out.println("Running MHAlgorithm on the similarity graph..");
SparseBinaryMatrix resultOfStepTwo = stepTwo.optimize();
//Convert and return cluster assignment to a vector containing cluster affiliation scores.
SparseRealMatrix representationsForRight =
MHAlgorithm.heuristicallyScoreClusterAssignments(projectedGraph, resultOfStepTwo);
//Perform column-wise normalization on representationsForRight if specified
if (this.normalizeColumnsRight) {
System.out.println("Normalizing columns of right representations..");
representationsForRight.normalizeToUnitColumn();
}
return representationsForRight;
}