public SimilarityResponse getSimilarNodes()

in graphjet-core/src/main/java/com/twitter/graphjet/algorithms/intersection/IntersectionSimilarity.java [78:188]


  public SimilarityResponse getSimilarNodes(IntersectionSimilarityRequest request, Random random,
                                            RelatedTweetFilterChain filterChain) {
    long queryNode = request.getQueryNode();
    LongSet seedSet = request.getSeedSet();
    int queryNodeDegree = bipartiteGraph.getRightNodeDegree(queryNode);
    if (queryNodeDegree == 0) {
      queryNodeDegreeIsZeroCounter.incr();
      return null;
    }
    // first, get all neighbors of the query node and the seedset -- we sample if there are too many
    seedSet.add(queryNode);
    Long2IntMap queryNodeRightNeighbors = new Long2IntOpenHashMap(1024);
    LongSet neighborSet = new LongOpenHashSet(1024);
    for (long node : seedSet) {
      EdgeIterator currentIterator;
      if (bipartiteGraph.getRightNodeDegree(node) <= request.getMaxNumNeighbors()) {
        lessThanMaxNumNeighborsCounter.incr();
        currentIterator = bipartiteGraph.getRightNodeEdges(node);
      } else {
        moreThanMaxNumNeighborsCounter.incr();
        currentIterator =
            bipartiteGraph.getRandomRightNodeEdges(node, request.getMaxNumNeighbors(), random);
      }
      neighborSet.clear();
      int repeatedLHSNeighbors = 0;
      if (currentIterator != null) {
        while (currentIterator.hasNext()) {
          long neighbor = currentIterator.nextLong();
          if (neighborSet.contains(neighbor)) {  // check a set for neighbors to avoid double counts
            repeatedLHSNeighbors++;
            continue;
          }
          queryNodeRightNeighbors.put(neighbor, queryNodeRightNeighbors.get(neighbor) + 1);
          neighborSet.add(neighbor);
        }
      } else {
        queryNodeNullIteratorCounter.incr();
      }
    }

    // now, for each neighbor, get their neighbors (from left side) and update a map of counts
    Long2DoubleMap nodeToWeightedCooccurenceCountsMap = new Long2DoubleOpenHashMap(1024);
    Long2IntMap nodeToCooccurrenceCountsMap = new Long2IntOpenHashMap(1024);
    int repeatedRHSNeighbors = 0;
    for (long leftNeighbor : queryNodeRightNeighbors.keySet()) {
      int neighborDegree = bipartiteGraph.getLeftNodeDegree(leftNeighbor);
      if (neighborDegree < request.getMinNeighborDegree()) {
        continue;
      }
      int neighborWeight = queryNodeRightNeighbors.get(leftNeighbor);
      EdgeIterator rightNeighborsForLeftNeighbor;
      if (neighborDegree < request.getMaxNumSamplesPerNeighbor()) {
        rightNeighborsForLeftNeighbor = bipartiteGraph.getLeftNodeEdges(leftNeighbor);
      } else {
        rightNeighborsForLeftNeighbor = bipartiteGraph.getRandomLeftNodeEdges(
            leftNeighbor, request.getMaxNumSamplesPerNeighbor(), random);
      }
      neighborSet.clear();
      if (rightNeighborsForLeftNeighbor != null) { // redundant check if degree > 0
        while (rightNeighborsForLeftNeighbor.hasNext()) {
          long similarNode = rightNeighborsForLeftNeighbor.nextLong();
          if (neighborSet.contains(similarNode)) { // check a neighbors set to avoid double counts
            repeatedRHSNeighbors++;
            continue;
          }
          double currentCooccurrenceCount = nodeToWeightedCooccurenceCountsMap.get(similarNode);
          int currentRawCooccurrenceCount = nodeToCooccurrenceCountsMap.get(similarNode);
          double leftContrib = relatedTweetUpdateNormalization.computeLeftNeighborContribution(
              neighborDegree);
          leftContrib = Double.isInfinite(leftContrib) ? 0 : leftContrib;
          nodeToWeightedCooccurenceCountsMap.put(similarNode, currentCooccurrenceCount
              + neighborWeight * leftContrib);
          nodeToCooccurrenceCountsMap.put(similarNode,
            currentRawCooccurrenceCount + neighborWeight);
          neighborSet.add(similarNode);
        }
      }
    }
    // finally, normalize and select top neighbors
    int maxNumResults = Math.min(
      request.getMaxNumResults(),
      RecommendationRequest.MAX_RECOMMENDATION_RESULTS
    );
    PriorityQueue<SimilarityInfo> topResults = new PriorityQueue<SimilarityInfo>(maxNumResults);
    for (long similarNode : nodeToWeightedCooccurenceCountsMap.keySet()) {
      double cooccurrence = nodeToWeightedCooccurenceCountsMap.get(similarNode);
      int rawCooccurrence = nodeToCooccurrenceCountsMap.get(similarNode);
      int similarNodeDegree = bipartiteGraph.getRightNodeDegree(similarNode);
      if (rawCooccurrence < request.getMinCooccurrence()
          || filterChain.filter(similarNode)) {
        continue;
      }
      double normWeight = relatedTweetUpdateNormalization.computeScoreNormalization(
          cooccurrence, similarNodeDegree, queryNodeDegree);
      normWeight = Double.isInfinite(normWeight) ? 0 : normWeight;
      double score = cooccurrence * normWeight;
      if (topResults.size() < maxNumResults) {
        topResults.add(new SimilarityInfo(similarNode, score, rawCooccurrence, similarNodeDegree));
      } else if (score > topResults.peek().getWeight()) {
        topResults.poll();
        topResults.add(new SimilarityInfo(similarNode, score, rawCooccurrence, similarNodeDegree));
      }
    }
    List<SimilarityInfo> outputResults = Lists.newArrayListWithCapacity(topResults.size());
    while (!topResults.isEmpty()) {
      outputResults.add(topResults.poll());
    }
    Collections.reverse(outputResults);

    return new SimilarityResponse(outputResults, queryNodeDegree);
  }