def _get_helpful_rating_stats()

in sourcecode/scoring/pflip_model.py [0:0]


  def _get_helpful_rating_stats(self, notes: pd.DataFrame, ratings: pd.DataFrame) -> pd.DataFrame:
    """Compute aggregate statistics about the Helpful ratings on a note.

    Args:
      notes: pd.DataFrame used to specify the universe of all notes to include.
      ratings: pd.DataFrame containing all ratings for feature extraction.

    Returns:
      pd.DataFrame with one row per note and 7 columns for aggregate statistics about rater
        factors for Helpful ratings.
    """
    # Prune ratings to only include Helpful ratings from users with a factor
    ratings = ratings[[c.noteIdKey, _RATER_FACTOR, c.helpfulnessLevelKey]].copy()
    ratings = ratings[~ratings[_RATER_FACTOR].isna()]
    ratings = ratings[ratings[c.helpfulnessLevelKey] == c.helpfulValueTsv]
    ratings = ratings.drop(columns=c.helpfulnessLevelKey)
    # Compute rating stats
    maxPosHelpful = (
      ratings[ratings[_RATER_FACTOR] > 0]
      .groupby(c.noteIdKey)
      .max()
      .reset_index()
      .rename(columns={_RATER_FACTOR: _MAX_POS_HELPFUL})
    )
    maxNegHelpful = (
      ratings[ratings[_RATER_FACTOR] <= 0]
      .groupby(c.noteIdKey)
      .min()
      .abs()
      .reset_index()
      .rename(columns={_RATER_FACTOR: _MAX_NEG_HELPFUL})
    )
    meanPosHelpful = (
      ratings[ratings[_RATER_FACTOR] > 0]
      .groupby(c.noteIdKey)
      .mean()
      .reset_index()
      .rename(columns={_RATER_FACTOR: _MEAN_POS_HELPFUL})
    )
    meanNegHelpful = (
      ratings[ratings[_RATER_FACTOR] <= 0]
      .groupby(c.noteIdKey)
      .mean()
      .abs()
      .reset_index()
      .rename(columns={_RATER_FACTOR: _MEAN_NEG_HELPFUL})
    )
    stdHelpful = (
      ratings.groupby(c.noteIdKey).std().reset_index().rename(columns={_RATER_FACTOR: _STD_HELPFUL})
    )
    # Compile into features per-note
    notes = notes[[c.noteIdKey]].merge(maxPosHelpful, on=c.noteIdKey, how="left")
    notes = notes.merge(maxNegHelpful, on=c.noteIdKey, how="left")
    notes = notes.merge(meanPosHelpful, on=c.noteIdKey, how="left")
    notes = notes.merge(meanNegHelpful, on=c.noteIdKey, how="left")
    notes = notes.merge(stdHelpful, on=c.noteIdKey, how="left")
    notes[_MAX_DIFF] = notes[_MAX_POS_HELPFUL] + notes[_MAX_NEG_HELPFUL]
    notes[_MEAN_DIFF] = notes[_MEAN_POS_HELPFUL] + notes[_MEAN_NEG_HELPFUL]
    return notes