in spotify_confidence/analysis/frequentist/confidence_computers/z_test_linreg_computer.py [0:0]
def estimate_slope(df, **kwargs: Dict) -> DataFrame:
if kwargs[FEATURE] not in df:
return df
def col_sum(x):
return reduce(lambda x, y: x + y, x)
def dimension(x):
return x.shape[0] if isinstance(x, np.ndarray) and x.size > 1 else 1
k = df[kwargs[FEATURE_SUMSQ]].apply(dimension).iloc[0]
XX0 = np.zeros((k + 1, k + 1))
XX0[1 : (k + 1), 1 : (k + 1)] = col_sum(df[kwargs[FEATURE_SUMSQ]])
XX0[0, 0] = col_sum(df[kwargs[DENOMINATOR]])
XX0[0, 1 : (k + 1)] = col_sum(df[kwargs[FEATURE]])
XX0[1 : (k + 1), 0] = col_sum(df[kwargs[FEATURE]])
Xy0 = np.zeros((k + 1, 1))
Xy0[0,] = col_sum(df[kwargs[NUMERATOR]])
Xy0[1 : (k + 1),] = np.atleast_2d(col_sum(df[kwargs[FEATURE_CROSS]])).reshape(-1, 1)
try:
b = np.matmul(np.linalg.inv(XX0), Xy0)
except np.linalg.LinAlgError:
b = np.zeros((k + 1, 1))
out = b[1 : (k + 1)]
if out.size == 1:
out = out.item()
outseries = Series(index=df.index, dtype=df[kwargs[FEATURE]].dtype)
df[REGRESSION_PARAM] = outseries.apply(lambda x: out)
return df