in core/src/main/scala/com/spotify/featran/transformers/mdl/ThresholdFinder.scala [145:169]
private def evalThresholds(
candidates: Seq[(Float, Array[Long])],
lastSelected: Option[Float],
nLabels: Int
): Option[Float] = {
// Calculate the total frequencies by label
val totals = Array.fill(nLabels)(0L)
candidates.foreach(kv => MDLUtil.plusI(totals, kv._2))
// Compute the accumulated frequencies (both left and right) by label
var leftAccum = Array.fill(nLabels)(0L)
var entropyFreqs =
List.empty[(Float, Array[Long], Array[Long], Array[Long])]
candidates.foreach { case (cand, freq) =>
leftAccum = MDLUtil.plus(leftAccum, freq)
val rightTotal = MDLUtil.minus(totals, leftAccum)
entropyFreqs = (cand, freq, leftAccum, rightTotal) :: entropyFreqs
}
// select best threshold according to the criteria
val finalCandidates = bestThreshold(entropyFreqs, lastSelected, totals)
// Select among the list of accepted candidate, that with the minimum weightedHs
if (finalCandidates.nonEmpty) Some(finalCandidates.min._2) else None
}