def _get_bucket_count_totals()

in sourcecode/scoring/pflip_model.py [0:0]


  def _get_bucket_count_totals(self, notes: pd.DataFrame, ratings: pd.DataFrame) -> pd.DataFrame:
    """Returns a DataFrame with one row per note and 9 columns containing buckets of rating counts.

    Args:
      notes: pd.DataFrame used to specify the universe of all notes to include.
      ratings: pd.DataFrame containing all ratings for feature extraction.

    Returns:
      pd.DataFrame containing one row per note and one column containing all user rating tags.
    """
    summary = ratings[[c.noteIdKey, _RATER_FACTOR, c.helpfulnessLevelKey]].copy()
    summary = summary[~summary[_RATER_FACTOR].isna()]
    summary[_NEGATIVE] = summary[_RATER_FACTOR] < -0.3
    summary[_NEUTRAL] = (summary[_RATER_FACTOR] >= -0.3) & (summary[_RATER_FACTOR] <= 0.3)
    summary[_POSITIVE] = summary[_RATER_FACTOR] > 0.3
    summary[c.helpfulValueTsv] = summary[c.helpfulnessLevelKey] == c.helpfulValueTsv
    summary[c.somewhatHelpfulValueTsv] = summary[c.helpfulnessLevelKey] == c.somewhatHelpfulValueTsv
    summary[c.notHelpfulValueTsv] = summary[c.helpfulnessLevelKey] == c.notHelpfulValueTsv
    for viewpoint in [_NEGATIVE, _NEUTRAL, _POSITIVE]:
      for rating in [c.helpfulValueTsv, c.somewhatHelpfulValueTsv, c.notHelpfulValueTsv]:
        summary[f"{viewpoint}_{rating}"] = summary[viewpoint].multiply(summary[rating])
    summary = summary[[c.noteIdKey] + _BUCKET_COUNT_COLS]
    summary = summary.groupby(c.noteIdKey).sum().reset_index(drop=False)
    summary[_BUCKET_COUNT_COLS] = summary[_BUCKET_COUNT_COLS].astype(np.float64)
    summary = (
      notes[[c.noteIdKey]].merge(summary, on=c.noteIdKey, how="left").fillna(0.0).astype(np.int64)
    )
    return summary