in src/main/scala/com/spotify/bdrc/pipeline/TopItems.scala [101:112]
def sparkWithAlgebird(input: RDD[Rating]): Seq[(String, Double)] = {
import com.twitter.algebird.Aggregator.sortedReverseTake
import com.twitter.algebird.spark._
val aggregator = sortedReverseTake[(String, Double)](topK)(Ordering.by(_._2))
input
.map(x => (x.item, x.score))
// Sum values with addition
.reduceByKey(_ + _)
.algebird
// `aggregate` is an action and collects data back to the driver node
.aggregate(aggregator)
}