def sparkWithMllib()

in src/main/scala/com/spotify/bdrc/pipeline/TopItemsPerUser.scala [81:89]


  def sparkWithMllib(input: RDD[Rating]): RDD[Rating] = {
    import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
    input
      .keyBy(_.user)
      // From `spark-mllib`, compute top K per key with a priority queue
      .topByKey(topK)(Ordering.by(_.score))
      // Flatten result `Seq[Rating]`
      .flatMap(_._2)
  }