in cassovary-core/src/main/scala/com/twitter/cassovary/algorithms/similarity/Similarity.scala [45:75]
def calculateSimilarity(u: Int, v: Int, dir: GraphDir = GraphDir.OutDir): Double
/**
* Iterate over graph nodes and calculate similarity scores for each node. If the graph stores edges in
* both in and out direction, then iterate over the neighbors in reverse `dir` of neighbors of node `u`.
* @param u current node ID
* @param k limit for similar nodes
* @param dir direction of edges in Directed Graph
* @return Seq of top `k` similar node ids and their similarity score with node `u`.
* Nodes with non-zero score are added. So, the length of the Seq can be less than `k`
*/
def getTopKSimilarNodes(u: Int, k: Int, dir: GraphDir = GraphDir.OutDir): Seq[(Int, Double)] = {
val similarNodesQueue = new SmallBoundedPriorityQueue[SimilarNodes](k)
val graphNodes = {
if (graph.storedGraphDir == StoredGraphDir.BothInOut) {
getNeighbors(u, dir) match {
case Some(neighbors) => neighbors.flatMap(getNeighbors(_, GraphDir.reverse(dir)).getOrElse(Seq.empty))
case None => Seq.empty[Int]
}
} else {
graph map (node => node.id)
}
}
graphNodes foreach { node =>
if (node != u) {
val similarityScore = calculateSimilarity(u, node, dir)
if (similarityScore > 0.0) similarNodesQueue += SimilarNodes(node, similarityScore)
}
}
similarNodesQueue.top(k).map(node => (node.nodeId, node.score))
}