private def midpoint()

in core/src/main/scala/com/spotify/featran/transformers/mdl/MDLPDiscretizer.scala [54:95]


  private def midpoint(x1: Float, x2: Float): Float = (x1 + x2) / 2.0f

  def discretize(maxBins: Int = MDLPDiscretizer.DefaultMaxBins): Seq[Double] = {
    val featureValues = new java.util.TreeMap[Float, Array[Long]]()
    data.foreach { case (label, value) =>
      val key = value.toFloat
      val i = labels(label)
      val x = featureValues.get(key)
      if (x == null) {
        val y = Array.fill(labels.size)(0L)
        y(i) = 1L
        featureValues.put(key, y)
      } else {
        x(i) += 1L
      }
    }

    val cutPoint = if (!featureValues.isEmpty) {
      val it = featureValues.asScala.iterator
      var (lastX, lastFreqs) = it.next()
      var result = List.empty[(Float, Array[Long])]
      var accumFreqs = lastFreqs
      while (it.hasNext) {
        val (x, freqs) = it.next()
        if (isBoundary(freqs, lastFreqs)) {
          result = (midpoint(x, lastX), accumFreqs) :: result
          accumFreqs = Array.fill(labels.size)(0L)
        }
        lastX = x
        lastFreqs = freqs
        MDLUtil.plusI(accumFreqs, freqs)
      }
      (lastX, accumFreqs) :: result
    } else {
      Nil
    }

    val minBinWeight: Long = (minBinPercentage * data.length / 100.0).toLong
    val finder =
      new ThresholdFinder(labels.size, stoppingCriterion, maxBins, minBinWeight)
    finder.findThresholds(cutPoint.sortBy(_._1)).map(_.toDouble)
  }