static int TrellisQuantizeBlock()

in Extended/libwebp/src/enc/quant_enc.c [588:745]


static int TrellisQuantizeBlock(const VP8Encoder* const enc,
                                int16_t in[16], int16_t out[16],
                                int ctx0, int coeff_type,
                                const VP8Matrix* const mtx,
                                int lambda) {
  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
  CostArrayPtr const costs =
      (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
  const int first = (coeff_type == 0) ? 1 : 0;
  Node nodes[16][NUM_NODES];
  ScoreState score_states[2][NUM_NODES];
  ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
  ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
  int best_path[3] = {-1, -1, -1};   // store best-last/best-level/best-previous
  score_t best_score;
  int n, m, p, last;

  {
    score_t cost;
    const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
    const int last_proba = probas[VP8EncBands[first]][ctx0][0];

    // compute the position of the last interesting coefficient
    last = first - 1;
    for (n = 15; n >= first; --n) {
      const int j = kZigzag[n];
      const int err = in[j] * in[j];
      if (err > thresh) {
        last = n;
        break;
      }
    }
    // we don't need to go inspect up to n = 16 coeffs. We can just go up
    // to last + 1 (inclusive) without losing much.
    if (last < 15) ++last;

    // compute 'skip' score. This is the max score one can do.
    cost = VP8BitCost(0, last_proba);
    best_score = RDScoreTrellis(lambda, cost, 0);

    // initialize source node.
    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
      const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
      ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
      ss_cur[m].costs = costs[first][ctx0];
    }
  }

  // traverse trellis.
  for (n = first; n <= last; ++n) {
    const int j = kZigzag[n];
    const uint32_t Q  = mtx->q_[j];
    const uint32_t iQ = mtx->iq_[j];
    const uint32_t B = BIAS(0x00);     // neutral bias
    // note: it's important to take sign of the _original_ coeff,
    // so we don't have to consider level < 0 afterward.
    const int sign = (in[j] < 0);
    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
    int level0 = QUANTDIV(coeff0, iQ, B);
    int thresh_level = QUANTDIV(coeff0, iQ, BIAS(0x80));
    if (thresh_level > MAX_LEVEL) thresh_level = MAX_LEVEL;
    if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;

    {   // Swap current and previous score states
      ScoreState* const tmp = ss_cur;
      ss_cur = ss_prev;
      ss_prev = tmp;
    }

    // test all alternate level values around level0.
    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
      Node* const cur = &NODE(n, m);
      int level = level0 + m;
      const int ctx = (level > 2) ? 2 : level;
      const int band = VP8EncBands[n + 1];
      score_t base_score;
      score_t best_cur_score = MAX_COST;
      int best_prev = 0;   // default, in case

      ss_cur[m].score = MAX_COST;
      ss_cur[m].costs = costs[n + 1][ctx];
      if (level < 0 || level > thresh_level) {
        // Node is dead.
        continue;
      }

      {
        // Compute delta_error = how much coding this level will
        // subtract to max_error as distortion.
        // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
        const int new_error = coeff0 - level * Q;
        const int delta_error =
            kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
        base_score = RDScoreTrellis(lambda, 0, delta_error);
      }

      // Inspect all possible non-dead predecessors. Retain only the best one.
      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
        // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
        // eliminated since their score can't be better than the current best.
        const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
        // Examine node assuming it's a non-terminal one.
        const score_t score =
            base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
        if (score < best_cur_score) {
          best_cur_score = score;
          best_prev = p;
        }
      }
      // Store best finding in current node.
      cur->sign = sign;
      cur->level = level;
      cur->prev = best_prev;
      ss_cur[m].score = best_cur_score;

      // Now, record best terminal node (and thus best entry in the graph).
      if (level != 0) {
        const score_t last_pos_cost =
            (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
        const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
        const score_t score = best_cur_score + last_pos_score;
        if (score < best_score) {
          best_score = score;
          best_path[0] = n;                     // best eob position
          best_path[1] = m;                     // best node index
          best_path[2] = best_prev;             // best predecessor
        }
      }
    }
  }

  // Fresh start
  memset(in + first, 0, (16 - first) * sizeof(*in));
  memset(out + first, 0, (16 - first) * sizeof(*out));
  if (best_path[0] == -1) {
    return 0;   // skip!
  }

  {
    // Unwind the best path.
    // Note: best-prev on terminal node is not necessarily equal to the
    // best_prev for non-terminal. So we patch best_path[2] in.
    int nz = 0;
    int best_node = best_path[1];
    n = best_path[0];
    NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal

    for (; n >= first; --n) {
      const Node* const node = &NODE(n, best_node);
      const int j = kZigzag[n];
      out[n] = node->sign ? -node->level : node->level;
      nz |= node->level;
      in[j] = out[n] * mtx->q_[j];
      best_node = node->prev;
    }
    return (nz != 0);
  }
}