def spark()

in src/main/scala/com/spotify/bdrc/pipeline/TopItemsPerUser.scala [56:64]

6 lines of code
1 McCabe index (conditional complexity)


  def spark(input: RDD[Rating]): RDD[Rating] = {
    input
      // `groupBy` shuffles all data, inefficient
      .groupBy(_.user)
      // Drop user key
      .values
      // Convert grouped values to a `List[Rating]` and sort on a single node, inefficient
      .flatMap(_.toList.sortBy(-_.score).take(topK))
  }