def get_user_incorrect_ratio()

in sourcecode/scoring/incorrect_filter.py [0:0]


def get_user_incorrect_ratio(ratings: pd.DataFrame) -> pd.DataFrame:
  """Computes empirical p(incorrect | not helpful tags assigned) per rater.
  Called during prescoring only, since it uses entire rating history.

  Args:
    ratings: DF containing ratings.

  Returns:
    pd.DataFrame containing one row per user who assigned not helpful tags with their empirical propensity
    to assign "incorrect" tag
  """
  # Filter down to just ratings with some nh tags used.
  nhTagRatings = ratings.loc[ratings[c.notHelpfulTagsTSVOrder].sum(axis=1) > 0]

  user_incorrect = (
    (
      nhTagRatings[[c.raterParticipantIdKey, c.notHelpfulIncorrectTagKey]]
      .groupby(c.raterParticipantIdKey)
      .agg("sum")
    )
    .rename(columns={c.notHelpfulIncorrectTagKey: c.incorrectTagRatingsMadeByRaterKey})
    .reset_index()
  )

  user_nh_rating_count = (
    (
      nhTagRatings[[c.raterParticipantIdKey, c.noteIdKey]]
      .groupby(c.raterParticipantIdKey)
      .agg("count")
    )
    .rename(columns={c.noteIdKey: c.totalRatingsMadeByRaterKey})
    .reset_index()
  )

  user_totals = user_incorrect.merge(user_nh_rating_count, on=c.raterParticipantIdKey)

  return user_totals