in spotify_confidence/analysis/frequentist/multiple_comparison.py [0:0]
def add_adjusted_p_and_is_significant(df: DataFrame, **kwargs: Dict) -> DataFrame:
n_comparisons = kwargs[NUMBER_OF_COMPARISONS]
if kwargs[FINAL_EXPECTED_SAMPLE_SIZE] is not None:
if kwargs[CORRECTION_METHOD] not in [
BONFERRONI,
BONFERRONI_ONLY_COUNT_TWOSIDED,
BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY,
SPOT_1,
]:
raise ValueError(
f"{kwargs[CORRECTION_METHOD]} not supported for sequential tests. Use one of"
f"{BONFERRONI}, {BONFERRONI_ONLY_COUNT_TWOSIDED}, "
f"{BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY}, {SPOT_1}"
)
groups_except_ordinal = [
column
for column in df.index.names
if kwargs[ORDINAL_GROUP_COLUMN] is not None
and column is not None
and (column != kwargs[ORDINAL_GROUP_COLUMN] or kwargs[FINAL_EXPECTED_SAMPLE_SIZE] is None)
]
df = groupbyApplyParallel(
df.groupby(groups_except_ordinal + [kwargs[METHOD], "level_1", "level_2"], as_index=False, sort=False),
lambda df: compute_sequential_adjusted_alpha(df, **kwargs),
)
elif kwargs[CORRECTION_METHOD] in [
HOLM,
HOMMEL,
SIMES_HOCHBERG,
SIDAK,
HOLM_SIDAK,
FDR_BH,
FDR_BY,
FDR_TSBH,
FDR_TSBKY,
SPOT_1_HOLM,
SPOT_1_HOMMEL,
SPOT_1_SIMES_HOCHBERG,
SPOT_1_SIDAK,
SPOT_1_HOLM_SIDAK,
SPOT_1_FDR_BH,
SPOT_1_FDR_BY,
SPOT_1_FDR_TSBH,
SPOT_1_FDR_TSBKY,
]:
if kwargs[CORRECTION_METHOD].startswith("spot-"):
correction_method = kwargs[CORRECTION_METHOD][7:]
else:
correction_method = kwargs[CORRECTION_METHOD]
df[ADJUSTED_ALPHA] = df[ALPHA] / n_comparisons
is_significant, adjusted_p, _, _ = multipletests(
pvals=df[P_VALUE], alpha=1 - kwargs[INTERVAL_SIZE], method=correction_method
)
df[ADJUSTED_P] = adjusted_p
df[IS_SIGNIFICANT] = is_significant
elif kwargs[CORRECTION_METHOD] in [
BONFERRONI,
BONFERRONI_ONLY_COUNT_TWOSIDED,
BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY,
SPOT_1,
]:
df[ADJUSTED_ALPHA] = df[ALPHA] / n_comparisons
df[ADJUSTED_P] = df[P_VALUE].map(lambda p: min(p * n_comparisons, 1))
df[IS_SIGNIFICANT] = df[P_VALUE] < df[ADJUSTED_ALPHA]
else:
raise ValueError("Can't figure out which correction method to use :(")
return df