def example_data()

in chartify/examples.py [0:0]


def example_data():
    """Data set used in Chartify examples."""
    import numpy as np
    import pandas as pd

    np.random.seed(1)
    N_SAMPLES = 1000

    example_data = pd.DataFrame()
    date_range = pd.date_range("2017-01-01", "2017-12-31")

    COUNTRIES, COUNTRY_P = ["US", "GB", "CA", "JP", "BR"], [0.35, 0.17, 0.23, 0.15, 0.1]

    FRUIT = ["Orange", "Apple", "Banana", "Grape"]
    PRICE = [0.5, 1.0, 0.25, 2.0]
    fruit_price_map = dict(list(zip(FRUIT, PRICE)))
    day_probabilities = np.random.dirichlet(list(range(1, 366)))
    example_data["date"] = np.random.choice(date_range, p=day_probabilities, size=N_SAMPLES)

    COUNTRY_FRUIT_P = {c: np.random.dirichlet([len(FRUIT)] * len(FRUIT)) for c in COUNTRIES}
    example_data["country"] = np.random.choice(COUNTRIES, p=COUNTRY_P, size=N_SAMPLES)

    example_data["fruit"] = example_data["country"].apply(lambda x: np.random.choice(FRUIT, p=COUNTRY_FRUIT_P[x]))

    example_data["unit_price"] = example_data["fruit"].map(fruit_price_map) * (
        1.0 + np.random.normal(0, 0.1, size=N_SAMPLES)
    )
    example_data["quantity"] = example_data["unit_price"].apply(
        lambda x: max(0, np.random.poisson(max(3.0 - x * 1.25, 0)) + 1)
    )
    example_data["total_price"] = example_data["unit_price"] * example_data["quantity"]
    return example_data