in spotify_confidence/analysis/bayesian/bayesian_models.py [0:0]
def _categorical_summary_plot(self, level_name, level_df, remaining_groups, groupby):
if not remaining_groups:
remaining_groups = groupby
grouped_df = level_df.groupby(de_list_if_length_one(remaining_groups))
distributions = pd.DataFrame()
for group_name, group_df in grouped_df:
beta_dist = self._beta_pdf(group_df)
beta_dist["group"] = str(group_name)
distributions = pd.concat([distributions, beta_dist], axis=0)
# Filter out the long tails of the distributions
filtered_xs = distributions.groupby("x")["y"].max().reset_index().loc[lambda x: x["y"] > 0.01]
distributions = distributions[distributions["x"].isin(filtered_xs["x"])]
# Remove legend if only one color
color_column = "group" if len(grouped_df) > 1 else None
ch = chartify.Chart()
ch.plot.area(
distributions,
"x",
"y",
color_column=color_column,
stacked=False,
color_order=[str(x) for x in list(grouped_df.groups.keys())],
)
ch.set_title("Estimate of {} / {}".format(self._numerator_column, self._denominator_column))
if groupby:
ch.set_subtitle("{}: {}".format(groupby, level_name))
else:
ch.set_subtitle("")
ch.axes.set_xaxis_label("{} / {}".format(self._numerator_column, self._denominator_column))
ch.axes.set_yaxis_label("Probability Density")
ch.set_source_label("")
ch.axes.set_yaxis_range(0)
axis_format = axis_format_precision(distributions["x"].min(), distributions["x"].max(), absolute=True)
ch.axes.set_xaxis_tick_format(axis_format)
ch.style.color_palette.reset_palette_order()
# Plot callouts for the means
for group_name, group_df in grouped_df:
posterior_alpha, posterior_beta = self._posterior_parameters(group_df)
posterior_mean = posterior_alpha / (posterior_alpha + posterior_beta)
density = beta.pdf(posterior_mean, posterior_alpha, posterior_beta)
ch.callout.line(
posterior_mean,
orientation="height",
line_color=ch.style.color_palette.next_color(),
line_dash="dashed",
)
ch.callout.text(
f"{posterior_mean:{format_str_precision(posterior_mean, absolute=False)}}", posterior_mean, density
)
ch.axes.hide_yaxis()
if color_column:
ch.set_legend_location("outside_bottom")
return ch