def _sample_size_from_summary

def _sample_size_from_summary_df()

in spotify_confidence/analysis/frequentist/confidence_computers/sample_size_computer.py [0:0]

32 lines of code
8 McCabe index (conditional complexity)


def _sample_size_from_summary_df(df: DataFrame, **kwargs: Dict) -> DataFrame:
    if (df[ADJUSTED_POWER].isna()).any():
        df[REQUIRED_SAMPLE_SIZE_METRIC] = None
    else:
        all_weights = kwargs[TREATMENT_WEIGHTS]
        control_weight, treatment_weights = all_weights[0], all_weights[1:]

        binary = df[kwargs[IS_BINARY]].values[0]
        z_alpha = st.norm.ppf(
            1
            - df[ADJUSTED_ALPHA_POWER_SAMPLE_SIZE].values[0] / (2 if df[PREFERENCE_TEST].values[0] == TWO_SIDED else 1)
        )
        z_power = st.norm.ppf(df[ADJUSTED_POWER].values[0])
        non_inferiority = is_non_inferiority(df[NIM].values[0])

        max_sample_size = 0
        for treatment_weight in treatment_weights:
            kappa = control_weight / treatment_weight
            proportion_of_total = (control_weight + treatment_weight) / sum(all_weights)

            if ALTERNATIVE_HYPOTHESIS in df and NULL_HYPOTHESIS in df and (df[ALTERNATIVE_HYPOTHESIS].notna()).all():
                this_sample_size = confidence_computers[ZTEST].required_sample_size(
                    proportion_of_total=proportion_of_total,
                    z_alpha=z_alpha,
                    z_power=z_power,
                    binary=binary,
                    non_inferiority=non_inferiority,
                    hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS],
                    control_avg=df[POINT_ESTIMATE],
                    control_var=df[VARIANCE],
                    kappa=kappa,
                )
                max_sample_size = max(this_sample_size.max(), max_sample_size)

        df[REQUIRED_SAMPLE_SIZE_METRIC] = None if max_sample_size == 0 else max_sample_size

    return df