in src/main/java/com/twitter/sbf/core/MHAlgorithm.java [169:236]
private SparseBinaryMatrix divideIntoConnectedComponents(SparseBinaryMatrix z) {
diagnosticsWriter.print("Going to divide matrix into connected components");
if (config.removeWeakLinksForConnectedComponents) {
diagnosticsWriter.println(" (removing weak links)");
} else {
diagnosticsWriter.println();
}
diagnosticsWriter.flush();
ImmutableList.Builder<IntSet> newCols = new ImmutableList.Builder<>();
long tic = System.nanoTime();
int numOriginalSingletons = 0;
int numNewSingletons = 0;
int numEmpty = 0;
long numWeakLinks = 0;
for (int i = 0; i < z.getNumCols(); i++) {
if (z.getColumn(i).size() > 1) {
Set<IntSet> comps;
if (config.removeWeakLinksForConnectedComponents) {
SubGraphWithoutWeakLinks sg = new SubGraphWithoutWeakLinks(g, z.getColumn(i));
comps = ConnectedComponents.connectedComponents(sg);
numWeakLinks += sg.getNumWeakLinks();
} else {
comps = ConnectedComponents.subGraphConnectedComponents(g, z.getColumn(i));
}
for (IntSet c : comps) {
newCols.add(c);
if (c.size() == 1) {
numNewSingletons += 1;
}
}
} else if (z.getColumn(i).size() == 1) {
numOriginalSingletons++;
} else {
numEmpty++;
}
}
ImmutableList<IntSet> newColsList = newCols.build();
IntSet[] newColsArray =
Arrays.copyOf(newColsList.toArray(), newColsList.size(), IntSet[].class);
SparseBinaryMatrix dividedZ = new SparseBinaryMatrix(g.getNumVertices(), newColsArray.length);
dividedZ.initFromColSets(newColsArray);
long toc = System.nanoTime();
double updateTime = (toc - tic) * 1e-9;
if (config.removeWeakLinksForConnectedComponents) {
diagnosticsWriter.println("Number of weak links found: " + numWeakLinks);
}
diagnosticsWriter.println(
getMetricsLine(dividedZ, 0, updateTime, 0, 0,
true)
);
int originalEmptyRows = (int) Math.round(z.emptyRowProportion() * z.getNumRows());
int newEmptyRows = (int) Math.round(dividedZ.emptyRowProportion() * dividedZ.getNumRows());
diagnosticsWriter.format(
"Found %d original singletons, %d new singletons, %d empty columns\n",
numOriginalSingletons, numNewSingletons, numEmpty);
diagnosticsWriter.format("Dividing into connected components and removing singletons "
+ "and empties changes the number of empty rows (unassigned nodes) from %d to %d\n",
originalEmptyRows, newEmptyRows);
diagnosticsWriter.format(
"Dividing into connected components and removing singletons "
+ "and empties changes K from %d to %d\n",
z.getNumCols(), dividedZ.getNumCols());
diagnosticsWriter.flush();
return dividedZ;
}