in src/main/java/com/twitter/sbf/generator/GraphGenerator.java [358:492]
public GraphAndGroundTruth generateWithDisjointClusters() {
int[] firstIdsInEachCluster = new int[numClusters];
int[] lastIdsInEachCluster = new int[numClusters];
IntSet[] groundTruthClusters = new IntSet[numClusters];
Random r = this.rng;
int numNodes = 0;
for (int i = 0; i < numClusters; i++) {
int sampledClusterSize = minClusterSize + r.nextInt(maxClusterSize - minClusterSize);
firstIdsInEachCluster[i] = numNodes;
lastIdsInEachCluster[i] = numNodes + sampledClusterSize - 1;
numNodes += sampledClusterSize;
}
int[] vertexToClusterId = new int[numNodes];
groundTruthClusters[0] = new IntOpenHashSet();
for (int i = 0, clusterId = 0; i < numNodes; i++) {
if (i > lastIdsInEachCluster[clusterId]) {
clusterId++;
groundTruthClusters[clusterId] = new IntOpenHashSet();
}
vertexToClusterId[i] = clusterId;
groundTruthClusters[clusterId].add(i);
if (i == numNodes - 1) {
assert clusterId == numClusters - 1;
}
}
ArrayList<HashSet<Integer>> adjLists = new ArrayList<>(numNodes);
for (int i = 0; i < numNodes; i++) {
adjLists.add(new HashSet<>());
}
int numEdges = 0;
int numIntraClusterEdges = 0;
for (int clusterId = 0; clusterId < numClusters; clusterId++) {
int sizeOfThisCluster =
lastIdsInEachCluster[clusterId] - firstIdsInEachCluster[clusterId] + 1;
double probForThisCluster = getProbabilityInsideCluster(sizeOfThisCluster);
assert probForThisCluster > 0 && probForThisCluster < 1;
GeometricDistribution gd = new GeometricDistribution(rng, probForThisCluster);
int numEdgesInsideThisCluster = 0;
for (int nodeId = firstIdsInEachCluster[clusterId];
nodeId <= lastIdsInEachCluster[clusterId]; nodeId++) {
// Starting from nodeId after current node, repeat
for (int neighborNodePosition = nodeId + 1;
neighborNodePosition <= lastIdsInEachCluster[clusterId];) {
int numNodesToSkip = gd.sample();
neighborNodePosition += numNodesToSkip;
if (neighborNodePosition <= lastIdsInEachCluster[clusterId]) {
assert !adjLists.get(nodeId).contains(neighborNodePosition);
assert !adjLists.get(neighborNodePosition).contains(nodeId);
adjLists.get(nodeId).add(neighborNodePosition);
adjLists.get(neighborNodePosition).add(nodeId);
neighborNodePosition++;
numEdges++;
numEdgesInsideThisCluster++;
numIntraClusterEdges++;
}
}
}
float actualProbInsideCluster =
numEdgesInsideThisCluster * 1.0f / ((sizeOfThisCluster * (sizeOfThisCluster - 1)) / 2);
assert Math.abs(actualProbInsideCluster - probForThisCluster) < 0.1
: String.format("cluster %d: size %d, assigned prob. %f, actual prob. %f\n",
clusterId + 1, sizeOfThisCluster, probForThisCluster, actualProbInsideCluster);
}
int numGlobalEdges = 0;
GeometricDistribution gd = new GeometricDistribution(rng, getGlobalProbability());
for (int nodeId = 0; nodeId < numNodes; nodeId++) {
for (int neighborNodePosition = nodeId + 1; neighborNodePosition < numNodes;) {
int numNodesToSkip = gd.sample();
neighborNodePosition += numNodesToSkip;
if (neighborNodePosition < numNodes) {
if (!adjLists.get(nodeId).contains(neighborNodePosition)) {
adjLists.get(nodeId).add(neighborNodePosition);
adjLists.get(neighborNodePosition).add(nodeId);
neighborNodePosition++;
numEdges++;
numGlobalEdges++;
}
}
}
}
int[][] nbrs = new int[numNodes][];
int numEdges2 = 0;
for (int i = 0; i < numNodes; i++) {
nbrs[i] = new int[adjLists.get(i).size()];
int j = 0;
HashSet<Integer> s = adjLists.get(i);
for (int neighbor : s) {
nbrs[i][j] = neighbor;
j++;
numEdges2++;
}
Arrays.sort(nbrs[i]);
}
float[][] wts = null;
if (this.isWeighted) {
wts = new float[numNodes][];
for (int i = 0; i < numNodes; i++) {
wts[i] = new float[nbrs[i].length];
for (int j = 0; j < wts[i].length; j++) {
// Consider only cells in the lower-triangular part of the adjacency matrix, and then
// replicate the weight to the corresponding cell in the upper-triangular part.
if (nbrs[i][j] < i) {
// for inter-cluster edges, set their weight to be low
float newWt = 0;
if (vertexToClusterId[i] != vertexToClusterId[nbrs[i][j]]) {
newWt = (float) (lowerWeightMode - 1.5 * lowerWeightDist.getStandardDeviation());
} else {
newWt = (float) sampleEdgeWeight();
}
wts[i][j] = newWt;
int indexOfIInJ = Arrays.binarySearch(nbrs[nbrs[i][j]], i);
wts[nbrs[i][j]][indexOfIInJ] = newWt;
}
}
}
}
System.err.format("Num intra-cluster edges: %d, num global edges: %d, "
+ "fraction global edges: %f\n",
numIntraClusterEdges, numGlobalEdges, numGlobalEdges * 1.0f / numEdges);
assert numEdges * 2 == numEdges2
: String.format("numEdges %d, numEdges2 %d", numEdges, numEdges2);
return new GraphAndGroundTruth(new Graph(numNodes, numEdges, nbrs, wts), groundTruthClusters);
}