in spotify_confidence/analysis/frequentist/confidence_computers/z_test_computer.py [0:0]
def variance(df: DataFrame, **kwargs: Dict[str, str]) -> float:
numerator = kwargs[NUMERATOR]
denominator = kwargs[DENOMINATOR]
numerator_sumsq = kwargs[NUMERATOR_SUM_OF_SQUARES]
binary = df[numerator_sumsq] == df[numerator]
if binary.all():
# This equals row[POINT_ESTIMATE]*(1-row[POINT_ESTIMATE]) when the data is binary,
# and also gives a robust fallback in case it's not
variance = df[numerator_sumsq] / df[denominator] - df[ORIGINAL_POINT_ESTIMATE] ** 2
else:
variance = (df[numerator_sumsq] - np.power(df[numerator], 2) / df[denominator]) / (df[denominator] - 1)
if (variance < 0).any():
raise ValueError("Computed variance is negative. " "Please check your inputs.")
return variance