def example_data_binomial()

in spotify_confidence/examples.py [0:0]


def example_data_binomial():
    """
    Returns an output dataframe with categorical
    features (country and test variation), and orginal features (date),
    as well as number of successes and total observations for each combination
    """
    countries = ["ca", "us"]
    dates = pd.date_range("2018-01-01", "2018-02-01")
    variation_names = ["test", "control", "test2"]

    # test ca, test us, control ca, control us, test2 ca, test2 us
    success_rates = [0.3, 0.32, 0.24, 0.22, 0.25, 0.42]
    n_observations = [50, 80, 30, 50, 40, 50]

    return_df = pd.DataFrame()

    for i, (country, variation) in enumerate(product(countries, variation_names)):
        df = pd.DataFrame({"date": dates})
        df["country"] = country
        df["variation_name"] = variation
        df["total"] = np.random.poisson(n_observations[i], size=len(dates))
        df["success"] = df["total"].apply(lambda x: np.random.binomial(x, success_rates[i]))
        return_df = pd.concat([return_df, df], axis=0)

    return return_df