in spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py [0:0]
def _powered_effect_and_required_sample_size_from_difference_df(df: DataFrame, **kwargs: Dict) -> DataFrame:
if df[kwargs[METHOD]].values[0] not in [ZTEST, ZTESTLINREG] and kwargs[MDE] in df:
raise ValueError("Minimum detectable effects only supported for ZTest.")
elif df[kwargs[METHOD]].values[0] not in [ZTEST, ZTESTLINREG] or (df[ADJUSTED_POWER].isna()).any():
df[POWERED_EFFECT] = None
df[REQUIRED_SAMPLE_SIZE] = None
df[REQUIRED_SAMPLE_SIZE_METRIC] = None
return df
else:
n1, n2 = df[kwargs[DENOMINATOR] + SFX1], df[kwargs[DENOMINATOR] + SFX2]
kappa = n1 / n2
binary = (df[kwargs[NUMERATOR_SUM_OF_SQUARES] + SFX1] == df[kwargs[NUMERATOR] + SFX1]).all()
proportion_of_total = (n1 + n2) / df[f"current_total_{kwargs[DENOMINATOR]}"]
z_alpha = st.norm.ppf(
1
- df[ADJUSTED_ALPHA_POWER_SAMPLE_SIZE].values[0] / (2 if df[PREFERENCE_TEST].values[0] == TWO_SIDED else 1)
)
z_power = st.norm.ppf(df[ADJUSTED_POWER].values[0])
nim = df[NIM].values[0]
if isinstance(nim, float):
non_inferiority = not isnan(nim)
elif nim is None:
non_inferiority = nim is not None
df[POWERED_EFFECT] = confidence_computers[df[kwargs[METHOD]].values[0]].powered_effect(
df=df.assign(kappa=kappa)
.assign(current_number_of_units=df[f"current_total_{kwargs[DENOMINATOR]}"])
.assign(proportion_of_total=proportion_of_total),
z_alpha=z_alpha,
z_power=z_power,
binary=binary,
non_inferiority=non_inferiority,
avg_column=ORIGINAL_POINT_ESTIMATE + SFX1,
var_column=VARIANCE + SFX1,
)
if ALTERNATIVE_HYPOTHESIS in df and NULL_HYPOTHESIS in df and (df[ALTERNATIVE_HYPOTHESIS].notna()).all():
df[REQUIRED_SAMPLE_SIZE] = confidence_computers[df[kwargs[METHOD]].values[0]].required_sample_size(
proportion_of_total=1,
z_alpha=z_alpha,
z_power=z_power,
binary=binary,
non_inferiority=non_inferiority,
hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS],
control_avg=df[ORIGINAL_POINT_ESTIMATE + SFX1],
control_var=df[VARIANCE + SFX1],
kappa=kappa,
)
df[REQUIRED_SAMPLE_SIZE_METRIC] = confidence_computers[df[kwargs[METHOD]].values[0]].required_sample_size(
proportion_of_total=proportion_of_total,
z_alpha=z_alpha,
z_power=z_power,
binary=binary,
non_inferiority=non_inferiority,
hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS],
control_avg=df[ORIGINAL_POINT_ESTIMATE + SFX1],
control_var=df[VARIANCE + SFX1],
kappa=kappa,
)
else:
df[REQUIRED_SAMPLE_SIZE] = None
df[REQUIRED_SAMPLE_SIZE_METRIC] = None
return df