def _create_dataset_with_extreme_rating_on_each_note()

in sourcecode/scoring/matrix_factorization/pseudo_raters.py [0:0]


  def _create_dataset_with_extreme_rating_on_each_note(self, ratingToAddWithoutNoteId):
    ## for each rating (ided by raterParticipantId and raterIndex)
    if ratingToAddWithoutNoteId[c.helpfulNumKey] is not None:
      ratingsWithNoteIds = []
      for noteRow in (
        self.ratingFeaturesAndLabels[[c.noteIdKey, mf_c.noteIndexKey]]
        .drop_duplicates()
        .itertuples()
      ):
        ratingToAdd = ratingToAddWithoutNoteId.copy()
        ratingToAdd[c.noteIdKey] = getattr(noteRow, c.noteIdKey)
        ratingToAdd[mf_c.noteIndexKey] = getattr(noteRow, mf_c.noteIndexKey)
        ratingsWithNoteIds.append(ratingToAdd)
      extremeRatingsToAdd = pd.DataFrame(ratingsWithNoteIds).drop(
        [c.internalRaterInterceptKey, c.internalRaterFactor1Key], axis=1
      )
      extremeRatingsToAdd[c.noteIdKey] = extremeRatingsToAdd[c.noteIdKey].astype(np.int64)
      if isinstance(self.ratingFeaturesAndLabels[c.raterParticipantIdKey].dtype, pd.Int64Dtype):
        # Only convert ID type from string to Int64 if is necessary to match existing IDs (which is
        # expected when running in prod, but not always in unit tests or public data.)
        extremeRatingsToAdd[c.raterParticipantIdKey] = extremeRatingsToAdd[
          c.raterParticipantIdKey
        ].astype(pd.Int64Dtype())
      ratingFeaturesAndLabelsWithExtremeRatings = pd.concat(
        [self.ratingFeaturesAndLabels, extremeRatingsToAdd]
      )
    else:
      ratingFeaturesAndLabelsWithExtremeRatings = self.ratingFeaturesAndLabels
    return ratingFeaturesAndLabelsWithExtremeRatings