in src/main/java/com/twitter/sbf/app/Main.java [54:174]
private static void run() throws IOException {
// Sanity check
if (!config.clusterPrecisionFile.isEmpty() && Util.fileExists(config.clusterPrecisionFile)) {
throw new IllegalStateException("clusterPrecisionFile already exists!");
}
// Load graph from file
System.out.println("Loading graph");
Graph graph = Graph.fromFile(config.metisFile);
System.out.println("Load graph: done");
graph.print();
// Allocate empty Z
int n = graph.getNumVertices();
SparseBinaryMatrix z = new SparseBinaryMatrix(n, config.getAlgoConfig().k);
PrintWriter err = new PrintWriter(System.err);
long tic = System.currentTimeMillis();
// Initialize Z
if (!config.initFromRowsFile.isEmpty()) {
System.out.println("Initializing factors from rows");
z.initFromRows(config.initFromRowsFile);
System.out.println("Initialization from rows: done");
} else if (!config.initFromColsFile.isEmpty()) {
System.out.println("Initializing factors from columns");
IntSet[] initCols = readColumnsFromFile(config.initFromColsFile);
if (initCols.length != config.getAlgoConfig().k) {
System.out.format(
"Number of columns in %d different from K specified in config %d\n",
initCols.length, config.getAlgoConfig().k
);
System.out.println("Will use the number of columns in the file as the new K");
z = new SparseBinaryMatrix(n, initCols.length);
}
z.initFromColSets(initCols);
System.out.println("Initialization from columns: done");
} else if (config.initFromRandomNeighborhoods) {
System.out.println("Initializing from random neighborhoods");
// set allowOverlap = false
z.initFromBestNeighborhoods(
graph, (g, i) -> config.getAlgoConfig().rng.nextDouble(), false, err
);
PredictionStat prec0 =
MHAlgorithm.clusterPrecision(
graph, z, 0, 1000, config.getAlgoConfig().rng
);
System.out.println("Precision of cluster 0:" + prec0.precision());
PredictionStat prec1 =
MHAlgorithm.clusterPrecision(
graph, z, 1, 1000, config.getAlgoConfig().rng
);
System.out.println("Precision of cluster 1:" + prec1.precision());
System.out.println(
"Fraction of empty rows after initializing from random neighborhoods: "
+ z.emptyRowProportion()
);
} else if (config.initFromBestNeighborhood) {
System.out.println("Initializing from best sub-neighborhoods in terms of conductance");
z.initFromColSets(
MHAlgorithm.getRandomBestConductanceSubNeighborhoods(
graph, z.getNumCols(), config.getAlgoConfig().rng
)
);
System.out.println(
"Fraction of empty rows after initializing using best sub-neighborhoods: "
+ z.emptyRowProportion()
);
//Z.initEmptyRowsRandomly(config.getAlgoConfig().rng);
//System.out.println("Initializing from best neighborhoods in terms of conductance: done");
} else if (config.initFromNonoverlappingNeighborhood) {
System.out.println(
"Initializing from best non-overlapping sub-neighborhoods in terms of conductance");
z.initFromColSets(
MHAlgorithm.getNonOverlappingBestSubNeighborhoods(
graph, z.getNumCols(), config.getAlgoConfig().rng
)
);
System.out.println(
"Fraction of empty rows after initializing using best non-overlapping sub-neighborhoods: "
+ z.emptyRowProportion()
);
} else {
System.out.println("Initializing factors randomly with 1 nnz/vertex");
z.initEmptyRowsRandomly(config.getAlgoConfig().rng);
System.out.println("Random initialization: done");
}
long toc = System.currentTimeMillis();
System.out.println(String.format("Time to initialize: %.2f seconds\n", (toc - tic) / 1000.0));
MHAlgorithm algo = new MHAlgorithm(config.getAlgoConfig(), graph, z, err);
SparseBinaryMatrix optimizedZ = algo.optimize();
long toc2 = System.currentTimeMillis();
System.out.println(String.format("Time to optimize: %.2f seconds\n", (toc2 - toc) / 1000.0));
System.out.println(String.format("Time to initialize & optimize: %.2f seconds\n",
(toc2 - tic) / 1000.0));
// Write output
if (!config.outputByRowsFile.isEmpty()) {
System.out.println("Writing rows of Z to " + config.outputByRowsFile);
optimizedZ.outputByRows(config.outputByRowsFile);
System.out.println("Output by rows: done");
}
if (!config.outputByColsFile.isEmpty()) {
System.out.println("Writing columns of Z to " + config.outputByColsFile);
optimizedZ.outputByCols(config.outputByColsFile);
System.out.println("Output by columns: done");
}
if (!config.clusterPrecisionFile.isEmpty()) {
MHAlgorithm.evalClusterPrecision(graph, optimizedZ,
config.clusterPrecisionFile, config.getAlgoConfig().cpu);
}
if (!config.outputRowsWithScoresFile.isEmpty()) {
System.out.println("Writing rows of Z with scores to " + config.outputRowsWithScoresFile);
SparseRealMatrix srm =
MHAlgorithm.heuristicallyScoreClusterAssignments(graph, optimizedZ);
srm.normalizeToUnitColumn();
PrintWriter w = new PrintWriter(config.outputRowsWithScoresFile);
writeRowsWithScores(srm, w);
System.out.println("Output rows with scores: done");
}
}