def spark()

in src/main/scala/com/spotify/bdrc/pipeline/TopItemsPerUser.scala [56:64]


  def spark(input: RDD[Rating]): RDD[Rating] = {
    input
      // `groupBy` shuffles all data, inefficient
      .groupBy(_.user)
      // Drop user key
      .values
      // Convert grouped values to a `List[Rating]` and sort on a single node, inefficient
      .flatMap(_.toList.sortBy(-_.score).take(topK))
  }