in spotify_confidence/analysis/bayesian/bayesian_models.py [0:0]
def _multiple_difference_joint_base(self, level_name, level_df, remaining_groups, groupby, level, absolute):
grouped_df = level_df.groupby(remaining_groups)
grouped_df_keys = tuple(grouped_df.groups.keys())
self._validate_levels(level_df, remaining_groups, level)
posteriors = [self._sample_posterior(grouped_df.get_group(level)) for level in grouped_df_keys]
var_indx = grouped_df_keys.index(level)
other_indx = [i for i, value in enumerate(grouped_df_keys) if value != level]
posterior_matrix = np.vstack(posteriors)
ge_bool_matrix = posterior_matrix[var_indx, :] >= posterior_matrix[:, :]
best_arr = ge_bool_matrix.all(axis=0)
p_ge_all = best_arr.mean()
end_value = posterior_matrix[var_indx]
start_value = posterior_matrix[other_indx].max(axis=0)
if absolute:
difference_posterior = end_value - start_value
else:
difference_posterior = end_value / start_value - 1
# E(level - best level | level != best)
if not (~best_arr).sum():
expected_loss = 0
else:
expected_loss = difference_posterior[~best_arr].mean()
# E(level - median level | level = best)
if not (best_arr).sum():
expected_gain = 0
else:
expected_gain = difference_posterior[best_arr].mean()
expectation = difference_posterior.mean()
ci_l_expectation = pd.Series(difference_posterior).quantile((1.0 - self._interval_size) / 2)
ci_u_expectation = pd.Series(difference_posterior).quantile(
(1.0 - self._interval_size) / 2 + self._interval_size
)
difference_df = pd.DataFrame(
OrderedDict(
[
("level", [str(level)]),
("absolute_difference", absolute),
("difference", expectation),
("ci_lower", ci_l_expectation),
("ci_upper", ci_u_expectation),
("P({} >= all)".format(level), p_ge_all),
("{} potential loss".format(level), expected_loss),
("{} potential gain".format(level), expected_gain),
]
)
)
self._add_group_by_columns(difference_df, groupby, level_name)
return (difference_df, difference_posterior)