in src/main/scala/com/spotify/bdrc/pipeline/TopItemsPerUser.scala [56:64]
def spark(input: RDD[Rating]): RDD[Rating] = {
input
// `groupBy` shuffles all data, inefficient
.groupBy(_.user)
// Drop user key
.values
// Convert grouped values to a `List[Rating]` and sort on a single node, inefficient
.flatMap(_.toList.sortBy(-_.score).take(topK))
}