def lolly_nrce()

in twml/twml/metrics.py [0:0]
170 lines of code
6 McCabe index (conditional complexity)

def lolly_nrce(labels, predictions,
               weights=None,
               metrics_collections=None,
               updates_collections=None,
               name=None):
  """
  Compute the Lolly NRCE.

  Note: As this NRCE calculation uses Taylor expansion, it becomes inaccurate when the ctr is large,
  especially when the adjusted ctr goes above 1.0.

  Calculation:

  ::

    NRCE: lolly NRCE
    BCE: baseline cross entropy
    NCE: normalized cross entropy
    CE: cross entropy
    y_i: label of example i
    p_i: prediction of example i
    y: ctr
    p: average prediction
    a: normalizer

    Assumes any p_i and a * p_i is within [0, 1)
    NRCE = (1 - NCE / BCE) * 100
    BCE = - sum_i(y_i * log(y) + (1 - y_i) * log(1 - y))
        = - (y * log(y) + (1 - y) * log(1 - y))
    a = y / p
    CE = - sum_i(y_i * log(p_i) + (1 - y_i) * log(1 - p_i))
    NCE = - sum_i(y_i * log(a * p_i) + (1 - y_i) * log(1 - a * p_i))
        = - sum_i(y_i * log(p_i) + (1 - y_i) * log(1 - p_i))
          - sum_i(y_i * log(a))
          + sum_i((1 - y_i) * log(1 - p_i))
          - sum_i((1 - y_i) * log(1 - a * p_i))
        ~= CE - sum_i(y_i) * log(a)
          + sum_i((1 - y_i) * (- sum_{j=1~5}(p_i^j / j)))
          - sum_i((1 - y_i) * (- sum_{j=1~5}(a^j * p_i^j / j)))
          # Takes 5 items from the Taylor expansion, can be increased if needed
          # Error for each example is O(p_i^6)
        = CE - sum_i(y_i) * log(a)
          - sum_{j=1~5}(sum_i((1 - y_i) * p_i^j) / j)
          + sum_{j=1~5}(sum_i((1 - y_i) * p_i^j) * a^j / j)
        = CE - sum_i(y_i) * log(a)
          + sum_{j=1~5}(sum_i((1 - y_i) * p_i^j) * (a^j - 1) / j)

  Thus we keep track of CE, sum_i(y_i), sum_i((1 - y_i) * p_i^j) for j=1~5.
  We also keep track of p and y by sum_i(y_i), sum_i(p_i), sum_i(1) so that
  we can get a at the end, which leads to this NRCE.

  NRCE uses ctr and average pctr to normalize the pctrs.
  It removes the impact of prediction error from RCE.
  Usually NRCE is higher as the prediction error impact on RCE is negative.
  Removing prediction error in our model can make RCE closer to NRCE and thus improve RCE.

  In Lolly NRCE we use ctr and average pctr of the whole dataset.
  We thus remove the dataset level error in NRCE calculation.
  In this case, when we want to improve RCE to the level of NRCE,
  it is achievable as dataset level prediction error is easy to remove by calibration.
  Lolly NRCE is thus a good estimate about the potential gain by adding calibration.

  In DBv2 NRCE, we use per-batch ctr and average pctr. We remove the batch level error.
  This error is difficult to remove by modeling improvement,
  at least not by simple calibration.
  It thus cannot indicate the same opportunity as the Lolly NRCE does.

  Args:
    labels:
      the ground true value.
    predictions:
      the predicted values, whose shape must match labels.
    weights:
      optional weights, whose shape must match labels . Weight is 1 if not set.
    metrics_collections:
      optional list of collections to add this metric into.
    updates_collections:
      optional list of collections to add the associated update_op into.
    name:
      an optional variable_scope name.

  Return:
    rce_value:
      A ``Tensor`` representing the RCE.
    update_op:
      A update operation used to accumulate data into this metric.

  Note: Must have at least 1 positive and 1 negative sample accumulated,
        or NRCE will come out as NaN.
  """
  with tf.variable_scope(name, "lolly_nrce", (labels, predictions, weights)):
    labels = tf.to_float(labels, name="label_to_float")
    predictions = tf.to_float(predictions, name="predictions_to_float")

    if weights is None:
      weights = tf.ones(shape=tf.shape(labels), dtype=tf.float32, name="default_weight")
    else:
      weights = tf.to_float(weights, name="weight_to_float")

    positive_weights = tf.multiply(labels, weights, name="positive_weights")

    # clamp predictions to keep log(p) stable
    clip_predictions = tf.clip_by_value(
      predictions,
      CLAMP_EPSILON,
      1.0 - CLAMP_EPSILON,
      name="clip_predictions")
    weighted_predictions = tf.multiply(
      predictions, weights,
      name="weighted_predictions")

    logloss = _binary_cross_entropy(pred=clip_predictions, target=labels, name="logloss")
    weighted_logloss = tf.multiply(logloss, weights, name="weighted_logloss")

    negatives = tf.subtract(
      tf.ones(shape=tf.shape(labels), dtype=tf.float32),
      labels,
      name="negatives")
    negative_predictions = tf.multiply(
      predictions,
      negatives,
      name="negative_predictions")
    weighted_negative_predictions = tf.multiply(
      negative_predictions, weights,
      name="weighted_negative_predictions")
    negative_squared_predictions = tf.multiply(
      negative_predictions,
      negative_predictions,
      name="negative_squared_predictions")
    weighted_negative_squared_predictions = tf.multiply(
      negative_squared_predictions, weights,
      name="weighted_negative_squared_predictions")
    negative_cubed_predictions = tf.multiply(
      negative_squared_predictions,
      negative_predictions,
      name="negative_cubed_predictions")
    weighted_negative_cubed_predictions = tf.multiply(
      negative_cubed_predictions, weights,
      name="weighted_negative_cubed_predictions")
    negative_quartic_predictions = tf.multiply(
      negative_cubed_predictions,
      negative_predictions,
      name="negative_quartic_predictions")
    weighted_negative_quartic_predictions = tf.multiply(
      negative_quartic_predictions, weights,
      name="weighted_negative_quartic_predictions")
    negative_quintic_predictions = tf.multiply(
      negative_quartic_predictions,
      negative_predictions,
      name="negative_quintic_predictions")
    weighted_negative_quintic_predictions = tf.multiply(
      negative_quintic_predictions, weights,
      name="weighted_negative_quintic_predictions")

    # Tracked stats
    total_positive = _metric_variable(name="total_positive", shape=[], dtype=tf.float32)
    total_weight = _metric_variable(name="total_weight", shape=[], dtype=tf.float32)

    total_prediction = _metric_variable(name="total_prediction", shape=[], dtype=tf.float32)

    total_negative_prediction = _metric_variable(
      name="total_negative_prediction",
      shape=[], dtype=tf.float32)
    total_negative_squared_prediction = _metric_variable(
      name="total_negative_squared_prediction",
      shape=[], dtype=tf.float32)
    total_negative_cubed_prediction = _metric_variable(
      name="total_negative_cubed_prediction",
      shape=[], dtype=tf.float32)
    total_negative_quartic_prediction = _metric_variable(
      name="total_negative_quartic_prediction",
      shape=[], dtype=tf.float32)
    total_negative_quintic_prediction = _metric_variable(
      name="total_negative_quintic_prediction",
      shape=[], dtype=tf.float32)

    total_loss = _metric_variable(name="total_loss", shape=[], dtype=tf.float32)

    # Update tracked stats
    update_total_positive = tf.assign_add(
      total_positive, tf.reduce_sum(positive_weights), name="total_positive_update")
    update_total_weight = tf.assign_add(
      total_weight, tf.reduce_sum(weights), name="total_weight_update")
    update_total_prediction = tf.assign_add(
      total_prediction, tf.reduce_sum(weighted_predictions), name="total_prediction_update")
    update_total_negative_prediction = tf.assign_add(
      total_negative_prediction,
      tf.reduce_sum(weighted_negative_predictions), name="total_negative_prediction_update")
    update_total_negative_squared_prediction = tf.assign_add(
      total_negative_squared_prediction,
      tf.reduce_sum(weighted_negative_squared_predictions),
      name="total_negative_squared_prediction_update")
    update_total_negative_cubed_prediction = tf.assign_add(
      total_negative_cubed_prediction,
      tf.reduce_sum(weighted_negative_cubed_predictions),
      name="total_negative_cubed_prediction_update")
    update_total_negative_quartic_prediction = tf.assign_add(
      total_negative_quartic_prediction,
      tf.reduce_sum(weighted_negative_quartic_predictions),
      name="total_negative_quartic_prediction_update")
    update_total_negative_quintic_prediction = tf.assign_add(
      total_negative_quintic_prediction,
      tf.reduce_sum(weighted_negative_quintic_predictions),
      name="total_negative_quintic_prediction_update")
    update_total_loss = tf.assign_add(
      total_loss, tf.reduce_sum(weighted_logloss), name="total_loss_update")

    # metric value retrieval subgraph
    # ctr of this batch
    positive_rate = tf.truediv(total_positive, total_weight, name="positive_rate")
    # Note: we don't have to keep running averages for computing baseline CE. Because the prediction
    # is constant for every sample, we can simplify it to the formula below.
    baseline_loss = _binary_cross_entropy(
      pred=positive_rate,
      target=positive_rate,
      name="baseline_loss")

    # normalizing ratio for nrce
    # calculated using total ctr and pctr so the last batch has the dataset ctr and pctr
    normalizer = tf.truediv(total_positive, total_prediction, name="normalizer")
    # Taylor expansion to calculate nl = - sum(y * log(p * a) + (1 - y) * log (1 - p * a))
    # log(1 - p * a) = -sum_{i=1~+inf}(a^i * x^i / i)
    # log(1 - p) = -sum_{i=1~+inf}(a^i * x^i / i)
    normalized_loss = (
      total_loss -
      total_positive * tf.log(normalizer) +
      total_negative_prediction * (normalizer - 1) +
      total_negative_squared_prediction * (normalizer * normalizer - 1) / 2 +
      total_negative_cubed_prediction *
      (normalizer * normalizer * normalizer - 1) / 3 +
      total_negative_quartic_prediction *
      (normalizer * normalizer * normalizer * normalizer - 1) / 4 +
      total_negative_quintic_prediction *
      (normalizer * normalizer * normalizer * normalizer * normalizer - 1) / 5)

    # average normalized loss
    avg_loss = tf.truediv(normalized_loss, total_weight, name="avg_loss")

    nrce_t = tf.multiply(
      1.0 - tf.truediv(avg_loss, baseline_loss),
      100,
      name="lolly_nrce")

    # metric update subgraph
    update_positive_rate = tf.truediv(
      update_total_positive,
      update_total_weight,
      name="update_positive_rate")
    # Note: we don't have to keep running averages for computing baseline CE. Because the prediction
    # is constant for every sample, we can simplify it to the formula below.
    update_baseline_loss = _binary_cross_entropy(
      pred=update_positive_rate,
      target=update_positive_rate,
      name="update_baseline_loss")

    update_normalizer = tf.truediv(
      update_total_positive,
      update_total_prediction,
      name="update_normalizer")
    update_normalized_loss = (
      update_total_loss -
      update_total_positive * tf.log(update_normalizer) +
      update_total_negative_prediction *
      (update_normalizer - 1) +
      update_total_negative_squared_prediction *
      (update_normalizer * update_normalizer - 1) / 2 +
      update_total_negative_cubed_prediction *
      (update_normalizer * update_normalizer * update_normalizer - 1) / 3 +
      update_total_negative_quartic_prediction *
      (update_normalizer * update_normalizer * update_normalizer *
       update_normalizer - 1) / 4 +
      update_total_negative_quintic_prediction *
      (update_normalizer * update_normalizer * update_normalizer *
       update_normalizer * update_normalizer - 1) / 5)

    update_avg_loss = tf.truediv(
      update_normalized_loss,
      update_total_weight,
      name="update_avg_loss")

    update_op = tf.multiply(
      1.0 - tf.truediv(update_avg_loss, update_baseline_loss),
      100,
      name="update_op")

    if metrics_collections:
      tf.add_to_collections(metrics_collections, nrce_t)

    if updates_collections:
      tf.add_to_collections(updates_collections, update_op)

    return nrce_t, update_op