in spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py [0:0]
def _sufficient_statistics(self) -> DataFrame:
if self._sufficient is None:
kwargs = {
NUMERATOR: self._numerator,
NUMERATOR_SUM_OF_SQUARES: self._numerator_sumsq,
DENOMINATOR: self._denominator,
BOOTSTRAPS: self._bootstrap_samples_column,
INTERVAL_SIZE: self._interval_size,
FEATURE: self._feature,
FEATURE_SUMSQ: self._feature_ssq,
FEATURE_CROSS: self._feature_cross,
}
groupby = [col for col in [self._method_column, self._metric_column] if col is not None]
self._sufficient = (
self._df.groupby(groupby, sort=False, group_keys=True)
.apply(
lambda df: df.assign(
**{
POINT_ESTIMATE: lambda df: confidence_computers[
df[self._method_column].values[0]
].point_estimate(df, **kwargs)
}
)
.assign(
**{
ORIGINAL_POINT_ESTIMATE: lambda df: (
confidence_computers[ZTEST].point_estimate(df, **kwargs)
if df[self._method_column].values[0] == ZTESTLINREG
else confidence_computers[df[self._method_column].values[0]].point_estimate(
df, **kwargs
)
)
}
)
.assign(
**{
VARIANCE: lambda df: confidence_computers[df[self._method_column].values[0]].variance(
df, **kwargs
)
}
)
.assign(
**{
ORIGINAL_VARIANCE: lambda df: (
confidence_computers[ZTEST].variance(df, **kwargs)
if df[self._method_column].values[0] == ZTESTLINREG
else confidence_computers[df[self._method_column].values[0]].variance(df, **kwargs)
)
}
)
.pipe(
lambda df: confidence_computers[df[self._method_column].values[0]].add_point_estimate_ci(
df, **kwargs
)
)
)
.pipe(reset_named_indices)
)
return self._sufficient