in sourcecode/scoring/pflip_model.py [0:0]
def _get_bucket_count_totals(self, notes: pd.DataFrame, ratings: pd.DataFrame) -> pd.DataFrame:
"""Returns a DataFrame with one row per note and 9 columns containing buckets of rating counts.
Args:
notes: pd.DataFrame used to specify the universe of all notes to include.
ratings: pd.DataFrame containing all ratings for feature extraction.
Returns:
pd.DataFrame containing one row per note and one column containing all user rating tags.
"""
summary = ratings[[c.noteIdKey, _RATER_FACTOR, c.helpfulnessLevelKey]].copy()
summary = summary[~summary[_RATER_FACTOR].isna()]
summary[_NEGATIVE] = summary[_RATER_FACTOR] < -0.3
summary[_NEUTRAL] = (summary[_RATER_FACTOR] >= -0.3) & (summary[_RATER_FACTOR] <= 0.3)
summary[_POSITIVE] = summary[_RATER_FACTOR] > 0.3
summary[c.helpfulValueTsv] = summary[c.helpfulnessLevelKey] == c.helpfulValueTsv
summary[c.somewhatHelpfulValueTsv] = summary[c.helpfulnessLevelKey] == c.somewhatHelpfulValueTsv
summary[c.notHelpfulValueTsv] = summary[c.helpfulnessLevelKey] == c.notHelpfulValueTsv
for viewpoint in [_NEGATIVE, _NEUTRAL, _POSITIVE]:
for rating in [c.helpfulValueTsv, c.somewhatHelpfulValueTsv, c.notHelpfulValueTsv]:
summary[f"{viewpoint}_{rating}"] = summary[viewpoint].multiply(summary[rating])
summary = summary[[c.noteIdKey] + _BUCKET_COUNT_COLS]
summary = summary.groupby(c.noteIdKey).sum().reset_index(drop=False)
summary[_BUCKET_COUNT_COLS] = summary[_BUCKET_COUNT_COLS].astype(np.float64)
summary = (
notes[[c.noteIdKey]].merge(summary, on=c.noteIdKey, how="left").fillna(0.0).astype(np.int64)
)
return summary