in core/src/main/scala/com/spotify/featran/transformers/mdl/ThresholdFinder.scala [105:131]
def bestThreshold(
entropyFreqs: Seq[(Float, Array[Long], Array[Long], Array[Long])],
lastSelected: Option[Float],
totals: Array[Long]
): Seq[(Double, Float)] = {
val bucketInfo = new BucketInfo(ArraySeq.unsafeWrapArray(totals))
entropyFreqs.flatMap { case (cand, _, leftFreqs, rightFreqs) =>
val duplicate = lastSelected match {
case None => false
case Some(last) => cand == last
}
// avoid computing entropy if we have a dupe
if (duplicate) {
None
} else {
val (criterionValue, weightedHs, leftSum, rightSum) =
calcCriterionValue(
bucketInfo,
ArraySeq.unsafeWrapArray(leftFreqs),
ArraySeq.unsafeWrapArray(rightFreqs)
)
val criterion =
criterionValue > stoppingCriterion && leftSum > minBinWeight && rightSum > minBinWeight
if (criterion) Some((weightedHs, cand)) else None
}
}
}