in spotify_confidence/examples.py [0:0]
def example_data_binomial():
"""
Returns an output dataframe with categorical
features (country and test variation), and orginal features (date),
as well as number of successes and total observations for each combination
"""
countries = ["ca", "us"]
dates = pd.date_range("2018-01-01", "2018-02-01")
variation_names = ["test", "control", "test2"]
# test ca, test us, control ca, control us, test2 ca, test2 us
success_rates = [0.3, 0.32, 0.24, 0.22, 0.25, 0.42]
n_observations = [50, 80, 30, 50, 40, 50]
return_df = pd.DataFrame()
for i, (country, variation) in enumerate(product(countries, variation_names)):
df = pd.DataFrame({"date": dates})
df["country"] = country
df["variation_name"] = variation
df["total"] = np.random.poisson(n_observations[i], size=len(dates))
df["success"] = df["total"].apply(lambda x: np.random.binomial(x, success_rates[i]))
return_df = pd.concat([return_df, df], axis=0)
return return_df