in core/src/main/scala/com/spotify/featran/transformers/mdl/ThresholdFinder.scala [53:69]
def calcCriterionValue(
bucketInfo: BucketInfo,
leftFreqs: Seq[Long],
rightFreqs: Seq[Long]
): (Double, Double, Long, Long) = {
val k1 = leftFreqs.count(_ != 0)
val s1 = if (k1 > 0) leftFreqs.sum else 0
val hs1 = entropy(leftFreqs, s1)
val k2 = rightFreqs.count(_ != 0)
val s2 = if (k2 > 0) rightFreqs.sum else 0
val hs2 = entropy(rightFreqs, s2)
val weightedHs = (s1 * hs1 + s2 * hs2) / bucketInfo.s
val gain = bucketInfo.hs - weightedHs
val diff = bucketInfo.k * bucketInfo.hs - k1 * hs1 - k2 * hs2
val delta = log2(math.pow(3, bucketInfo.k.toDouble) - 2) - diff
(gain - (log2((bucketInfo.s - 1).toDouble) + delta) / bucketInfo.s, weightedHs, s1, s2)
}