in twml/twml/metrics.py [0:0]
def lolly_nrce(labels, predictions,
weights=None,
metrics_collections=None,
updates_collections=None,
name=None):
"""
Compute the Lolly NRCE.
Note: As this NRCE calculation uses Taylor expansion, it becomes inaccurate when the ctr is large,
especially when the adjusted ctr goes above 1.0.
Calculation:
::
NRCE: lolly NRCE
BCE: baseline cross entropy
NCE: normalized cross entropy
CE: cross entropy
y_i: label of example i
p_i: prediction of example i
y: ctr
p: average prediction
a: normalizer
Assumes any p_i and a * p_i is within [0, 1)
NRCE = (1 - NCE / BCE) * 100
BCE = - sum_i(y_i * log(y) + (1 - y_i) * log(1 - y))
= - (y * log(y) + (1 - y) * log(1 - y))
a = y / p
CE = - sum_i(y_i * log(p_i) + (1 - y_i) * log(1 - p_i))
NCE = - sum_i(y_i * log(a * p_i) + (1 - y_i) * log(1 - a * p_i))
= - sum_i(y_i * log(p_i) + (1 - y_i) * log(1 - p_i))
- sum_i(y_i * log(a))
+ sum_i((1 - y_i) * log(1 - p_i))
- sum_i((1 - y_i) * log(1 - a * p_i))
~= CE - sum_i(y_i) * log(a)
+ sum_i((1 - y_i) * (- sum_{j=1~5}(p_i^j / j)))
- sum_i((1 - y_i) * (- sum_{j=1~5}(a^j * p_i^j / j)))
# Takes 5 items from the Taylor expansion, can be increased if needed
# Error for each example is O(p_i^6)
= CE - sum_i(y_i) * log(a)
- sum_{j=1~5}(sum_i((1 - y_i) * p_i^j) / j)
+ sum_{j=1~5}(sum_i((1 - y_i) * p_i^j) * a^j / j)
= CE - sum_i(y_i) * log(a)
+ sum_{j=1~5}(sum_i((1 - y_i) * p_i^j) * (a^j - 1) / j)
Thus we keep track of CE, sum_i(y_i), sum_i((1 - y_i) * p_i^j) for j=1~5.
We also keep track of p and y by sum_i(y_i), sum_i(p_i), sum_i(1) so that
we can get a at the end, which leads to this NRCE.
NRCE uses ctr and average pctr to normalize the pctrs.
It removes the impact of prediction error from RCE.
Usually NRCE is higher as the prediction error impact on RCE is negative.
Removing prediction error in our model can make RCE closer to NRCE and thus improve RCE.
In Lolly NRCE we use ctr and average pctr of the whole dataset.
We thus remove the dataset level error in NRCE calculation.
In this case, when we want to improve RCE to the level of NRCE,
it is achievable as dataset level prediction error is easy to remove by calibration.
Lolly NRCE is thus a good estimate about the potential gain by adding calibration.
In DBv2 NRCE, we use per-batch ctr and average pctr. We remove the batch level error.
This error is difficult to remove by modeling improvement,
at least not by simple calibration.
It thus cannot indicate the same opportunity as the Lolly NRCE does.
Args:
labels:
the ground true value.
predictions:
the predicted values, whose shape must match labels.
weights:
optional weights, whose shape must match labels . Weight is 1 if not set.
metrics_collections:
optional list of collections to add this metric into.
updates_collections:
optional list of collections to add the associated update_op into.
name:
an optional variable_scope name.
Return:
rce_value:
A ``Tensor`` representing the RCE.
update_op:
A update operation used to accumulate data into this metric.
Note: Must have at least 1 positive and 1 negative sample accumulated,
or NRCE will come out as NaN.
"""
with tf.variable_scope(name, "lolly_nrce", (labels, predictions, weights)):
labels = tf.to_float(labels, name="label_to_float")
predictions = tf.to_float(predictions, name="predictions_to_float")
if weights is None:
weights = tf.ones(shape=tf.shape(labels), dtype=tf.float32, name="default_weight")
else:
weights = tf.to_float(weights, name="weight_to_float")
positive_weights = tf.multiply(labels, weights, name="positive_weights")
# clamp predictions to keep log(p) stable
clip_predictions = tf.clip_by_value(
predictions,
CLAMP_EPSILON,
1.0 - CLAMP_EPSILON,
name="clip_predictions")
weighted_predictions = tf.multiply(
predictions, weights,
name="weighted_predictions")
logloss = _binary_cross_entropy(pred=clip_predictions, target=labels, name="logloss")
weighted_logloss = tf.multiply(logloss, weights, name="weighted_logloss")
negatives = tf.subtract(
tf.ones(shape=tf.shape(labels), dtype=tf.float32),
labels,
name="negatives")
negative_predictions = tf.multiply(
predictions,
negatives,
name="negative_predictions")
weighted_negative_predictions = tf.multiply(
negative_predictions, weights,
name="weighted_negative_predictions")
negative_squared_predictions = tf.multiply(
negative_predictions,
negative_predictions,
name="negative_squared_predictions")
weighted_negative_squared_predictions = tf.multiply(
negative_squared_predictions, weights,
name="weighted_negative_squared_predictions")
negative_cubed_predictions = tf.multiply(
negative_squared_predictions,
negative_predictions,
name="negative_cubed_predictions")
weighted_negative_cubed_predictions = tf.multiply(
negative_cubed_predictions, weights,
name="weighted_negative_cubed_predictions")
negative_quartic_predictions = tf.multiply(
negative_cubed_predictions,
negative_predictions,
name="negative_quartic_predictions")
weighted_negative_quartic_predictions = tf.multiply(
negative_quartic_predictions, weights,
name="weighted_negative_quartic_predictions")
negative_quintic_predictions = tf.multiply(
negative_quartic_predictions,
negative_predictions,
name="negative_quintic_predictions")
weighted_negative_quintic_predictions = tf.multiply(
negative_quintic_predictions, weights,
name="weighted_negative_quintic_predictions")
# Tracked stats
total_positive = _metric_variable(name="total_positive", shape=[], dtype=tf.float32)
total_weight = _metric_variable(name="total_weight", shape=[], dtype=tf.float32)
total_prediction = _metric_variable(name="total_prediction", shape=[], dtype=tf.float32)
total_negative_prediction = _metric_variable(
name="total_negative_prediction",
shape=[], dtype=tf.float32)
total_negative_squared_prediction = _metric_variable(
name="total_negative_squared_prediction",
shape=[], dtype=tf.float32)
total_negative_cubed_prediction = _metric_variable(
name="total_negative_cubed_prediction",
shape=[], dtype=tf.float32)
total_negative_quartic_prediction = _metric_variable(
name="total_negative_quartic_prediction",
shape=[], dtype=tf.float32)
total_negative_quintic_prediction = _metric_variable(
name="total_negative_quintic_prediction",
shape=[], dtype=tf.float32)
total_loss = _metric_variable(name="total_loss", shape=[], dtype=tf.float32)
# Update tracked stats
update_total_positive = tf.assign_add(
total_positive, tf.reduce_sum(positive_weights), name="total_positive_update")
update_total_weight = tf.assign_add(
total_weight, tf.reduce_sum(weights), name="total_weight_update")
update_total_prediction = tf.assign_add(
total_prediction, tf.reduce_sum(weighted_predictions), name="total_prediction_update")
update_total_negative_prediction = tf.assign_add(
total_negative_prediction,
tf.reduce_sum(weighted_negative_predictions), name="total_negative_prediction_update")
update_total_negative_squared_prediction = tf.assign_add(
total_negative_squared_prediction,
tf.reduce_sum(weighted_negative_squared_predictions),
name="total_negative_squared_prediction_update")
update_total_negative_cubed_prediction = tf.assign_add(
total_negative_cubed_prediction,
tf.reduce_sum(weighted_negative_cubed_predictions),
name="total_negative_cubed_prediction_update")
update_total_negative_quartic_prediction = tf.assign_add(
total_negative_quartic_prediction,
tf.reduce_sum(weighted_negative_quartic_predictions),
name="total_negative_quartic_prediction_update")
update_total_negative_quintic_prediction = tf.assign_add(
total_negative_quintic_prediction,
tf.reduce_sum(weighted_negative_quintic_predictions),
name="total_negative_quintic_prediction_update")
update_total_loss = tf.assign_add(
total_loss, tf.reduce_sum(weighted_logloss), name="total_loss_update")
# metric value retrieval subgraph
# ctr of this batch
positive_rate = tf.truediv(total_positive, total_weight, name="positive_rate")
# Note: we don't have to keep running averages for computing baseline CE. Because the prediction
# is constant for every sample, we can simplify it to the formula below.
baseline_loss = _binary_cross_entropy(
pred=positive_rate,
target=positive_rate,
name="baseline_loss")
# normalizing ratio for nrce
# calculated using total ctr and pctr so the last batch has the dataset ctr and pctr
normalizer = tf.truediv(total_positive, total_prediction, name="normalizer")
# Taylor expansion to calculate nl = - sum(y * log(p * a) + (1 - y) * log (1 - p * a))
# log(1 - p * a) = -sum_{i=1~+inf}(a^i * x^i / i)
# log(1 - p) = -sum_{i=1~+inf}(a^i * x^i / i)
normalized_loss = (
total_loss -
total_positive * tf.log(normalizer) +
total_negative_prediction * (normalizer - 1) +
total_negative_squared_prediction * (normalizer * normalizer - 1) / 2 +
total_negative_cubed_prediction *
(normalizer * normalizer * normalizer - 1) / 3 +
total_negative_quartic_prediction *
(normalizer * normalizer * normalizer * normalizer - 1) / 4 +
total_negative_quintic_prediction *
(normalizer * normalizer * normalizer * normalizer * normalizer - 1) / 5)
# average normalized loss
avg_loss = tf.truediv(normalized_loss, total_weight, name="avg_loss")
nrce_t = tf.multiply(
1.0 - tf.truediv(avg_loss, baseline_loss),
100,
name="lolly_nrce")
# metric update subgraph
update_positive_rate = tf.truediv(
update_total_positive,
update_total_weight,
name="update_positive_rate")
# Note: we don't have to keep running averages for computing baseline CE. Because the prediction
# is constant for every sample, we can simplify it to the formula below.
update_baseline_loss = _binary_cross_entropy(
pred=update_positive_rate,
target=update_positive_rate,
name="update_baseline_loss")
update_normalizer = tf.truediv(
update_total_positive,
update_total_prediction,
name="update_normalizer")
update_normalized_loss = (
update_total_loss -
update_total_positive * tf.log(update_normalizer) +
update_total_negative_prediction *
(update_normalizer - 1) +
update_total_negative_squared_prediction *
(update_normalizer * update_normalizer - 1) / 2 +
update_total_negative_cubed_prediction *
(update_normalizer * update_normalizer * update_normalizer - 1) / 3 +
update_total_negative_quartic_prediction *
(update_normalizer * update_normalizer * update_normalizer *
update_normalizer - 1) / 4 +
update_total_negative_quintic_prediction *
(update_normalizer * update_normalizer * update_normalizer *
update_normalizer * update_normalizer - 1) / 5)
update_avg_loss = tf.truediv(
update_normalized_loss,
update_total_weight,
name="update_avg_loss")
update_op = tf.multiply(
1.0 - tf.truediv(update_avg_loss, update_baseline_loss),
100,
name="update_op")
if metrics_collections:
tf.add_to_collections(metrics_collections, nrce_t)
if updates_collections:
tf.add_to_collections(updates_collections, update_op)
return nrce_t, update_op