def bestThreshold()

in core/src/main/scala/com/spotify/featran/transformers/mdl/ThresholdFinder.scala [105:131]


  def bestThreshold(
    entropyFreqs: Seq[(Float, Array[Long], Array[Long], Array[Long])],
    lastSelected: Option[Float],
    totals: Array[Long]
  ): Seq[(Double, Float)] = {
    val bucketInfo = new BucketInfo(ArraySeq.unsafeWrapArray(totals))
    entropyFreqs.flatMap { case (cand, _, leftFreqs, rightFreqs) =>
      val duplicate = lastSelected match {
        case None       => false
        case Some(last) => cand == last
      }
      // avoid computing entropy if we have a dupe
      if (duplicate) {
        None
      } else {
        val (criterionValue, weightedHs, leftSum, rightSum) =
          calcCriterionValue(
            bucketInfo,
            ArraySeq.unsafeWrapArray(leftFreqs),
            ArraySeq.unsafeWrapArray(rightFreqs)
          )
        val criterion =
          criterionValue > stoppingCriterion && leftSum > minBinWeight && rightSum > minBinWeight
        if (criterion) Some((weightedHs, cand)) else None
      }
    }
  }