private SparseBinaryMatrix divideIntoConnectedComponents()

in src/main/java/com/twitter/sbf/core/MHAlgorithm.java [169:236]


  private SparseBinaryMatrix divideIntoConnectedComponents(SparseBinaryMatrix z) {
    diagnosticsWriter.print("Going to divide matrix into connected components");
    if (config.removeWeakLinksForConnectedComponents) {
      diagnosticsWriter.println(" (removing weak links)");
    } else {
      diagnosticsWriter.println();
    }
    diagnosticsWriter.flush();

    ImmutableList.Builder<IntSet> newCols = new ImmutableList.Builder<>();
    long tic = System.nanoTime();
    int numOriginalSingletons = 0;
    int numNewSingletons = 0;
    int numEmpty = 0;
    long numWeakLinks = 0;
    for (int i = 0; i < z.getNumCols(); i++) {
      if (z.getColumn(i).size() > 1) {
        Set<IntSet> comps;
        if (config.removeWeakLinksForConnectedComponents) {
          SubGraphWithoutWeakLinks sg = new SubGraphWithoutWeakLinks(g, z.getColumn(i));
          comps = ConnectedComponents.connectedComponents(sg);
          numWeakLinks += sg.getNumWeakLinks();
        } else {
          comps = ConnectedComponents.subGraphConnectedComponents(g, z.getColumn(i));
        }

        for (IntSet c : comps) {
          newCols.add(c);
          if (c.size() == 1) {
            numNewSingletons += 1;
          }
        }
      } else if (z.getColumn(i).size() == 1) {
        numOriginalSingletons++;
      } else {
        numEmpty++;
      }
    }
    ImmutableList<IntSet> newColsList = newCols.build();
    IntSet[] newColsArray =
        Arrays.copyOf(newColsList.toArray(), newColsList.size(), IntSet[].class);
    SparseBinaryMatrix dividedZ = new SparseBinaryMatrix(g.getNumVertices(), newColsArray.length);
    dividedZ.initFromColSets(newColsArray);
    long toc = System.nanoTime();
    double updateTime = (toc - tic) * 1e-9;
    if (config.removeWeakLinksForConnectedComponents) {
      diagnosticsWriter.println("Number of weak links found: " + numWeakLinks);
    }
    diagnosticsWriter.println(
        getMetricsLine(dividedZ, 0, updateTime, 0, 0,
            true)
    );

    int originalEmptyRows = (int) Math.round(z.emptyRowProportion() * z.getNumRows());
    int newEmptyRows = (int) Math.round(dividedZ.emptyRowProportion() * dividedZ.getNumRows());
    diagnosticsWriter.format(
        "Found %d original singletons, %d new singletons, %d empty columns\n",
        numOriginalSingletons, numNewSingletons, numEmpty);
    diagnosticsWriter.format("Dividing into connected components and removing singletons "
        + "and empties changes the number of empty rows (unassigned nodes) from %d to %d\n",
        originalEmptyRows, newEmptyRows);
    diagnosticsWriter.format(
        "Dividing into connected components and removing singletons "
            + "and empties changes K from %d to %d\n",
        z.getNumCols(), dividedZ.getNumCols());
    diagnosticsWriter.flush();
    return dividedZ;
  }