def calcCriterionValue()

in core/src/main/scala/com/spotify/featran/transformers/mdl/ThresholdFinder.scala [53:69]


  def calcCriterionValue(
    bucketInfo: BucketInfo,
    leftFreqs: Seq[Long],
    rightFreqs: Seq[Long]
  ): (Double, Double, Long, Long) = {
    val k1 = leftFreqs.count(_ != 0)
    val s1 = if (k1 > 0) leftFreqs.sum else 0
    val hs1 = entropy(leftFreqs, s1)
    val k2 = rightFreqs.count(_ != 0)
    val s2 = if (k2 > 0) rightFreqs.sum else 0
    val hs2 = entropy(rightFreqs, s2)
    val weightedHs = (s1 * hs1 + s2 * hs2) / bucketInfo.s
    val gain = bucketInfo.hs - weightedHs
    val diff = bucketInfo.k * bucketInfo.hs - k1 * hs1 - k2 * hs2
    val delta = log2(math.pow(3, bucketInfo.k.toDouble) - 2) - diff
    (gain - (log2((bucketInfo.s - 1).toDouble) + delta) / bucketInfo.s, weightedHs, s1, s2)
  }