in sourcecode/scoring/pflip_model.py [0:0]
def _get_helpful_rating_stats(self, notes: pd.DataFrame, ratings: pd.DataFrame) -> pd.DataFrame:
"""Compute aggregate statistics about the Helpful ratings on a note.
Args:
notes: pd.DataFrame used to specify the universe of all notes to include.
ratings: pd.DataFrame containing all ratings for feature extraction.
Returns:
pd.DataFrame with one row per note and 7 columns for aggregate statistics about rater
factors for Helpful ratings.
"""
# Prune ratings to only include Helpful ratings from users with a factor
ratings = ratings[[c.noteIdKey, _RATER_FACTOR, c.helpfulnessLevelKey]].copy()
ratings = ratings[~ratings[_RATER_FACTOR].isna()]
ratings = ratings[ratings[c.helpfulnessLevelKey] == c.helpfulValueTsv]
ratings = ratings.drop(columns=c.helpfulnessLevelKey)
# Compute rating stats
maxPosHelpful = (
ratings[ratings[_RATER_FACTOR] > 0]
.groupby(c.noteIdKey)
.max()
.reset_index()
.rename(columns={_RATER_FACTOR: _MAX_POS_HELPFUL})
)
maxNegHelpful = (
ratings[ratings[_RATER_FACTOR] <= 0]
.groupby(c.noteIdKey)
.min()
.abs()
.reset_index()
.rename(columns={_RATER_FACTOR: _MAX_NEG_HELPFUL})
)
meanPosHelpful = (
ratings[ratings[_RATER_FACTOR] > 0]
.groupby(c.noteIdKey)
.mean()
.reset_index()
.rename(columns={_RATER_FACTOR: _MEAN_POS_HELPFUL})
)
meanNegHelpful = (
ratings[ratings[_RATER_FACTOR] <= 0]
.groupby(c.noteIdKey)
.mean()
.abs()
.reset_index()
.rename(columns={_RATER_FACTOR: _MEAN_NEG_HELPFUL})
)
stdHelpful = (
ratings.groupby(c.noteIdKey).std().reset_index().rename(columns={_RATER_FACTOR: _STD_HELPFUL})
)
# Compile into features per-note
notes = notes[[c.noteIdKey]].merge(maxPosHelpful, on=c.noteIdKey, how="left")
notes = notes.merge(maxNegHelpful, on=c.noteIdKey, how="left")
notes = notes.merge(meanPosHelpful, on=c.noteIdKey, how="left")
notes = notes.merge(meanNegHelpful, on=c.noteIdKey, how="left")
notes = notes.merge(stdHelpful, on=c.noteIdKey, how="left")
notes[_MAX_DIFF] = notes[_MAX_POS_HELPFUL] + notes[_MAX_NEG_HELPFUL]
notes[_MEAN_DIFF] = notes[_MEAN_POS_HELPFUL] + notes[_MEAN_NEG_HELPFUL]
return notes