spotify_confidence/analysis/frequentist/confidence_computers/bootstrap_computer.py (40 lines of code) (raw):

from typing import Tuple, Dict import numpy as np from pandas import DataFrame, Series from spotify_confidence.analysis.constants import CI_LOWER, CI_UPPER, SFX1, SFX2, BOOTSTRAPS, INTERVAL_SIZE def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: bootstrap_samples = kwargs[BOOTSTRAPS] return df[bootstrap_samples].map(lambda a: a.mean()) def variance(df: Series, **kwargs: Dict[str, str]) -> float: bootstrap_samples = kwargs[BOOTSTRAPS] variance = df[bootstrap_samples].map(lambda a: a.var()) if (variance < 0).any(): raise ValueError("Computed variance is negative. " "Please check your inputs.") return variance def std_err(row: Series, **kwargs: Dict[str, str]) -> float: return None def add_point_estimate_ci(df: DataFrame, **kwargs: Dict[str, str]) -> Series: bootstrap_samples = kwargs[BOOTSTRAPS] interval_size = kwargs[INTERVAL_SIZE] df[CI_LOWER] = df[bootstrap_samples].map(lambda a: np.percentile(a, 100 * (1 - interval_size) / 2)) df[CI_UPPER] = df[bootstrap_samples].map(lambda a: np.percentile(a, 100 * (1 - (1 - interval_size) / 2))) return df def p_value(row, **kwargs: Dict[str, str]) -> float: return -1 def ci(df, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Series, Series]: bootstrap_samples = kwargs[BOOTSTRAPS] lower = df.apply( lambda row: np.percentile( row[bootstrap_samples + SFX2] - row[bootstrap_samples + SFX1], 100 * row[alpha_column] / 2 ), axis=1, ) upper = df.apply( lambda row: np.percentile( row[bootstrap_samples + SFX2] - row[bootstrap_samples + SFX1], 100 * (1 - row[alpha_column] / 2) ), axis=1, ) return lower, upper def achieved_power(df: DataFrame, mde: float, alpha: float) -> DataFrame: return None