def _multiple_difference_joint_base()

in spotify_confidence/analysis/bayesian/bayesian_models.py [0:0]


    def _multiple_difference_joint_base(self, level_name, level_df, remaining_groups, groupby, level, absolute):
        grouped_df = level_df.groupby(remaining_groups)

        grouped_df_keys = tuple(grouped_df.groups.keys())

        self._validate_levels(level_df, remaining_groups, level)

        posteriors = [self._sample_posterior(grouped_df.get_group(level)) for level in grouped_df_keys]

        var_indx = grouped_df_keys.index(level)
        other_indx = [i for i, value in enumerate(grouped_df_keys) if value != level]

        posterior_matrix = np.vstack(posteriors)

        ge_bool_matrix = posterior_matrix[var_indx, :] >= posterior_matrix[:, :]

        best_arr = ge_bool_matrix.all(axis=0)

        p_ge_all = best_arr.mean()

        end_value = posterior_matrix[var_indx]
        start_value = posterior_matrix[other_indx].max(axis=0)

        if absolute:
            difference_posterior = end_value - start_value
        else:
            difference_posterior = end_value / start_value - 1

        # E(level - best level | level != best)
        if not (~best_arr).sum():
            expected_loss = 0
        else:
            expected_loss = difference_posterior[~best_arr].mean()

        # E(level - median level | level = best)
        if not (best_arr).sum():
            expected_gain = 0
        else:
            expected_gain = difference_posterior[best_arr].mean()

        expectation = difference_posterior.mean()
        ci_l_expectation = pd.Series(difference_posterior).quantile((1.0 - self._interval_size) / 2)
        ci_u_expectation = pd.Series(difference_posterior).quantile(
            (1.0 - self._interval_size) / 2 + self._interval_size
        )

        difference_df = pd.DataFrame(
            OrderedDict(
                [
                    ("level", [str(level)]),
                    ("absolute_difference", absolute),
                    ("difference", expectation),
                    ("ci_lower", ci_l_expectation),
                    ("ci_upper", ci_u_expectation),
                    ("P({} >= all)".format(level), p_ge_all),
                    ("{} potential loss".format(level), expected_loss),
                    ("{} potential gain".format(level), expected_gain),
                ]
            )
        )
        self._add_group_by_columns(difference_df, groupby, level_name)

        return (difference_df, difference_posterior)