spotify_confidence/analysis/frequentist/nims_and_mdes.py (83 lines of code) (raw):

from numpy import isnan from pandas import DataFrame from spotify_confidence.analysis.confidence_utils import listify from spotify_confidence.analysis.constants import ( NIM_TYPE, NIM_COLUMN_DEFAULT, PREFERRED_DIRECTION_COLUMN_DEFAULT, ORIGINAL_POINT_ESTIMATE, TWO_SIDED, INCREASE_PREFFERED, DECREASE_PREFFERED, NIM, PREFERENCE, NULL_HYPOTHESIS, ALTERNATIVE_HYPOTHESIS, ) def add_nim_input_columns_from_tuple_or_dict(df, nims: NIM_TYPE, mde_column: str) -> DataFrame: if type(nims) is tuple: return df.assign(**{NIM_COLUMN_DEFAULT: nims[0]}).assign(**{PREFERRED_DIRECTION_COLUMN_DEFAULT: nims[1]}) elif type(nims) is dict: nim_values = {key: value[0] for key, value in nims.items()} nim_preferences = {key: value[1] for key, value in nims.items()} return df.assign(**{NIM_COLUMN_DEFAULT: lambda df: df.index.to_series().map(nim_values)}).assign( **{PREFERRED_DIRECTION_COLUMN_DEFAULT: lambda df: df.index.to_series().map(nim_preferences)} ) elif nims is None or not nims: return df.assign(**{NIM_COLUMN_DEFAULT: None}).assign( **{ PREFERRED_DIRECTION_COLUMN_DEFAULT: ( None if PREFERRED_DIRECTION_COLUMN_DEFAULT not in df or mde_column is None else df[PREFERRED_DIRECTION_COLUMN_DEFAULT] ) } ) else: return df def add_nims_and_mdes( df: DataFrame, mde_column: str, nim_column: str, preferred_direction_column: str, ) -> DataFrame: def _set_nims_and_mdes(grp: DataFrame) -> DataFrame: nim = grp[nim_column].astype(float) input_preference = grp[preferred_direction_column].values[0] mde = None if mde_column is None else grp[mde_column] nim_is_na = nim.isna().all() mde_is_na = True if mde is None else mde.isna().all() if input_preference is None or (type(input_preference) is float and isnan(input_preference)): signed_nim = 0.0 if nim_is_na else nim * grp[ORIGINAL_POINT_ESTIMATE] preference = TWO_SIDED signed_mde = None if mde_is_na else mde * grp[ORIGINAL_POINT_ESTIMATE] elif input_preference.lower() == INCREASE_PREFFERED: signed_nim = 0.0 if nim_is_na else -nim * grp[ORIGINAL_POINT_ESTIMATE] preference = "larger" signed_mde = None if mde_is_na else mde * grp[ORIGINAL_POINT_ESTIMATE] elif input_preference.lower() == DECREASE_PREFFERED: signed_nim = 0.0 if nim_is_na else nim * grp[ORIGINAL_POINT_ESTIMATE] preference = "smaller" signed_mde = None if mde_is_na else -mde * grp[ORIGINAL_POINT_ESTIMATE] else: raise ValueError(f"{input_preference.lower()} not in " f"{[INCREASE_PREFFERED, DECREASE_PREFFERED]}") return ( grp.assign(**{NIM: nim}) .assign(**{PREFERENCE: preference}) .assign(**{NULL_HYPOTHESIS: signed_nim}) .assign(**{ALTERNATIVE_HYPOTHESIS: signed_mde if nim_is_na else 0.0}) ) index_names = [name for name in df.index.names if name is not None] return ( df.groupby( [nim_column, preferred_direction_column] + listify(mde_column), dropna=False, as_index=False, sort=False, group_keys=True, ) .apply(_set_nims_and_mdes) .pipe(lambda df: df.reset_index(index_names)) .reset_index(drop=True) .pipe(lambda df: df if index_names == [] else df.set_index(index_names)) )