private static void run()

in src/main/java/com/twitter/sbf/app/Main.java [54:174]


  private static void run() throws IOException {
    // Sanity check
    if (!config.clusterPrecisionFile.isEmpty() && Util.fileExists(config.clusterPrecisionFile)) {
      throw new IllegalStateException("clusterPrecisionFile already exists!");
    }

    // Load graph from file
    System.out.println("Loading graph");
    Graph graph = Graph.fromFile(config.metisFile);
    System.out.println("Load graph: done");
    graph.print();

    // Allocate empty Z
    int n = graph.getNumVertices();
    SparseBinaryMatrix z = new SparseBinaryMatrix(n, config.getAlgoConfig().k);
    PrintWriter err = new PrintWriter(System.err);

    long tic = System.currentTimeMillis();
    // Initialize Z
    if (!config.initFromRowsFile.isEmpty()) {
      System.out.println("Initializing factors from rows");
      z.initFromRows(config.initFromRowsFile);
      System.out.println("Initialization from rows: done");
    } else if (!config.initFromColsFile.isEmpty()) {
      System.out.println("Initializing factors from columns");
      IntSet[] initCols = readColumnsFromFile(config.initFromColsFile);
      if (initCols.length != config.getAlgoConfig().k) {
        System.out.format(
            "Number of columns in %d different from K specified in config %d\n",
            initCols.length, config.getAlgoConfig().k
        );
        System.out.println("Will use the number of columns in the file as the new K");
        z = new SparseBinaryMatrix(n, initCols.length);
      }
      z.initFromColSets(initCols);
      System.out.println("Initialization from columns: done");
    } else if (config.initFromRandomNeighborhoods) {
      System.out.println("Initializing from random neighborhoods");
      // set allowOverlap = false
      z.initFromBestNeighborhoods(
          graph, (g, i) -> config.getAlgoConfig().rng.nextDouble(), false, err
      );
      PredictionStat prec0 =
          MHAlgorithm.clusterPrecision(
              graph, z, 0, 1000, config.getAlgoConfig().rng
          );
      System.out.println("Precision of cluster 0:" + prec0.precision());
      PredictionStat prec1 =
          MHAlgorithm.clusterPrecision(
              graph, z, 1, 1000, config.getAlgoConfig().rng
          );
      System.out.println("Precision of cluster 1:" + prec1.precision());
      System.out.println(
          "Fraction of empty rows after initializing from random neighborhoods: "
              + z.emptyRowProportion()
      );
    } else if (config.initFromBestNeighborhood) {
      System.out.println("Initializing from best sub-neighborhoods in terms of conductance");
      z.initFromColSets(
          MHAlgorithm.getRandomBestConductanceSubNeighborhoods(
              graph, z.getNumCols(), config.getAlgoConfig().rng
          )
      );
      System.out.println(
          "Fraction of empty rows after initializing using best sub-neighborhoods: "
              + z.emptyRowProportion()
      );
      //Z.initEmptyRowsRandomly(config.getAlgoConfig().rng);
      //System.out.println("Initializing from best neighborhoods in terms of conductance: done");
    } else if (config.initFromNonoverlappingNeighborhood) {
      System.out.println(
          "Initializing from best non-overlapping sub-neighborhoods in terms of conductance");
      z.initFromColSets(
          MHAlgorithm.getNonOverlappingBestSubNeighborhoods(
              graph, z.getNumCols(), config.getAlgoConfig().rng
          )
      );
      System.out.println(
          "Fraction of empty rows after initializing using best non-overlapping sub-neighborhoods: "
              + z.emptyRowProportion()
      );
    } else {
      System.out.println("Initializing factors randomly with 1 nnz/vertex");
      z.initEmptyRowsRandomly(config.getAlgoConfig().rng);
      System.out.println("Random initialization: done");
    }

    long toc = System.currentTimeMillis();
    System.out.println(String.format("Time to initialize: %.2f seconds\n", (toc - tic) / 1000.0));
    MHAlgorithm algo = new MHAlgorithm(config.getAlgoConfig(), graph, z, err);
    SparseBinaryMatrix optimizedZ = algo.optimize();
    long toc2 = System.currentTimeMillis();
    System.out.println(String.format("Time to optimize: %.2f seconds\n", (toc2 - toc) / 1000.0));
    System.out.println(String.format("Time to initialize & optimize: %.2f seconds\n",
      (toc2 - tic) / 1000.0));

    // Write output
    if (!config.outputByRowsFile.isEmpty()) {
      System.out.println("Writing rows of Z to " + config.outputByRowsFile);
      optimizedZ.outputByRows(config.outputByRowsFile);
      System.out.println("Output by rows: done");
    }
    if (!config.outputByColsFile.isEmpty()) {
      System.out.println("Writing columns of Z to " + config.outputByColsFile);
      optimizedZ.outputByCols(config.outputByColsFile);
      System.out.println("Output by columns: done");
    }
    if (!config.clusterPrecisionFile.isEmpty()) {
      MHAlgorithm.evalClusterPrecision(graph, optimizedZ,
          config.clusterPrecisionFile, config.getAlgoConfig().cpu);
    }
    if (!config.outputRowsWithScoresFile.isEmpty()) {
      System.out.println("Writing rows of Z with scores to " + config.outputRowsWithScoresFile);
      SparseRealMatrix srm =
          MHAlgorithm.heuristicallyScoreClusterAssignments(graph, optimizedZ);
      srm.normalizeToUnitColumn();
      PrintWriter w = new PrintWriter(config.outputRowsWithScoresFile);
      writeRowsWithScores(srm, w);
      System.out.println("Output rows with scores: done");
    }
  }