in src/main/java/com/twitter/sbf/core/MHAlgorithm.java [629:688]
private static Map<String, PredictionStat> getPredictionStatEdgeSampling(
Graph graph,
SparseBinaryMatrix matrix,
RandomAdaptor rng,
int recallSamples,
int precisionSamples,
int verticesToSample) {
PredictionStat recallStat = new PredictionStat();
for (int i = 0; i < precisionSamples; i++) {
// sample first vertex according to degree distribution
int v1 = graph.getDegreeDistribution(rng).sample();
if (graph.getDegree(v1) > 0) {
int v2 = graph.getNeighbors(v1)[rng.nextInt(graph.getDegree(v1))];
float edgeWeight = graph.getWeightOfEdge(v1, v2);
boolean predictEdge = Util.hasCommonElement(matrix.getRow(v1), matrix.getRow(v2));
recallStat.incActualPositive();
recallStat.incWeightActualPositive(edgeWeight);
if (predictEdge) {
recallStat.incTruePositive();
recallStat.incWeightTruePositive(edgeWeight);
}
}
}
double[] colSizesNormalized = new double[matrix.getNumCols()];
int[] colIds = new int[matrix.getNumCols()];
double totalSize = 0;
for (int i = 0; i < colIds.length; i++) {
colSizesNormalized[i] = matrix.getColumn(i).size() * (matrix.getColumn(i).size() - 1);
colIds[i] = i;
totalSize += colSizesNormalized[i];
}
PredictionStat precisionStat = new PredictionStat();
for (int i = 0; i < colIds.length; i++) {
colSizesNormalized[i] = colSizesNormalized[i] / totalSize;
precisionStat.add(
clusterPrecision(
graph, matrix, i, (int) Math.ceil(colSizesNormalized[i] * recallSamples), rng
)
);
}
PredictionStat orphansStat = new PredictionStat();
for (int i = 0; i < verticesToSample; i++) {
int vId = rng.nextInt(graph.getNumVertices());
int[] row = matrix.getRow(vId);
if (row.length > 0) {
orphansStat.incEvalVertices();
if (isOrphan(graph, matrix, vId, row)) {
orphansStat.incEvalVerticesWithZeroTruePos();
}
}
}
return ImmutableMap.of(
"precision", precisionStat,
"recall", recallStat,
"orphans", orphansStat
);
}