in spotify_confidence/analysis/frequentist/confidence_computers/sample_size_computer.py [0:0]
def _sample_size_from_summary_df(df: DataFrame, **kwargs: Dict) -> DataFrame:
if (df[ADJUSTED_POWER].isna()).any():
df[REQUIRED_SAMPLE_SIZE_METRIC] = None
else:
all_weights = kwargs[TREATMENT_WEIGHTS]
control_weight, treatment_weights = all_weights[0], all_weights[1:]
binary = df[kwargs[IS_BINARY]].values[0]
z_alpha = st.norm.ppf(
1
- df[ADJUSTED_ALPHA_POWER_SAMPLE_SIZE].values[0] / (2 if df[PREFERENCE_TEST].values[0] == TWO_SIDED else 1)
)
z_power = st.norm.ppf(df[ADJUSTED_POWER].values[0])
non_inferiority = is_non_inferiority(df[NIM].values[0])
max_sample_size = 0
for treatment_weight in treatment_weights:
kappa = control_weight / treatment_weight
proportion_of_total = (control_weight + treatment_weight) / sum(all_weights)
if ALTERNATIVE_HYPOTHESIS in df and NULL_HYPOTHESIS in df and (df[ALTERNATIVE_HYPOTHESIS].notna()).all():
this_sample_size = confidence_computers[ZTEST].required_sample_size(
proportion_of_total=proportion_of_total,
z_alpha=z_alpha,
z_power=z_power,
binary=binary,
non_inferiority=non_inferiority,
hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS],
control_avg=df[POINT_ESTIMATE],
control_var=df[VARIANCE],
kappa=kappa,
)
max_sample_size = max(this_sample_size.max(), max_sample_size)
df[REQUIRED_SAMPLE_SIZE_METRIC] = None if max_sample_size == 0 else max_sample_size
return df