def _categorical_summary_plot()

in spotify_confidence/analysis/bayesian/bayesian_models.py [0:0]


    def _categorical_summary_plot(self, level_name, level_df, remaining_groups, groupby):
        if not remaining_groups:
            remaining_groups = groupby

        grouped_df = level_df.groupby(de_list_if_length_one(remaining_groups))

        distributions = pd.DataFrame()
        for group_name, group_df in grouped_df:
            beta_dist = self._beta_pdf(group_df)
            beta_dist["group"] = str(group_name)
            distributions = pd.concat([distributions, beta_dist], axis=0)

        # Filter out the long tails of the distributions
        filtered_xs = distributions.groupby("x")["y"].max().reset_index().loc[lambda x: x["y"] > 0.01]
        distributions = distributions[distributions["x"].isin(filtered_xs["x"])]

        # Remove legend if only one color
        color_column = "group" if len(grouped_df) > 1 else None

        ch = chartify.Chart()
        ch.plot.area(
            distributions,
            "x",
            "y",
            color_column=color_column,
            stacked=False,
            color_order=[str(x) for x in list(grouped_df.groups.keys())],
        )
        ch.set_title("Estimate of {} / {}".format(self._numerator_column, self._denominator_column))

        if groupby:
            ch.set_subtitle("{}: {}".format(groupby, level_name))
        else:
            ch.set_subtitle("")
        ch.axes.set_xaxis_label("{} / {}".format(self._numerator_column, self._denominator_column))
        ch.axes.set_yaxis_label("Probability Density")
        ch.set_source_label("")
        ch.axes.set_yaxis_range(0)
        axis_format = axis_format_precision(distributions["x"].min(), distributions["x"].max(), absolute=True)
        ch.axes.set_xaxis_tick_format(axis_format)

        ch.style.color_palette.reset_palette_order()

        # Plot callouts for the means
        for group_name, group_df in grouped_df:
            posterior_alpha, posterior_beta = self._posterior_parameters(group_df)
            posterior_mean = posterior_alpha / (posterior_alpha + posterior_beta)
            density = beta.pdf(posterior_mean, posterior_alpha, posterior_beta)
            ch.callout.line(
                posterior_mean,
                orientation="height",
                line_color=ch.style.color_palette.next_color(),
                line_dash="dashed",
            )
            ch.callout.text(
                f"{posterior_mean:{format_str_precision(posterior_mean, absolute=False)}}", posterior_mean, density
            )

        ch.axes.hide_yaxis()
        if color_column:
            ch.set_legend_location("outside_bottom")
        return ch