in sourcecode/scoring/matrix_factorization/pseudo_raters.py [0:0]
def _create_dataset_with_extreme_rating_on_each_note(self, ratingToAddWithoutNoteId):
## for each rating (ided by raterParticipantId and raterIndex)
if ratingToAddWithoutNoteId[c.helpfulNumKey] is not None:
ratingsWithNoteIds = []
for noteRow in (
self.ratingFeaturesAndLabels[[c.noteIdKey, mf_c.noteIndexKey]]
.drop_duplicates()
.itertuples()
):
ratingToAdd = ratingToAddWithoutNoteId.copy()
ratingToAdd[c.noteIdKey] = getattr(noteRow, c.noteIdKey)
ratingToAdd[mf_c.noteIndexKey] = getattr(noteRow, mf_c.noteIndexKey)
ratingsWithNoteIds.append(ratingToAdd)
extremeRatingsToAdd = pd.DataFrame(ratingsWithNoteIds).drop(
[c.internalRaterInterceptKey, c.internalRaterFactor1Key], axis=1
)
extremeRatingsToAdd[c.noteIdKey] = extremeRatingsToAdd[c.noteIdKey].astype(np.int64)
if isinstance(self.ratingFeaturesAndLabels[c.raterParticipantIdKey].dtype, pd.Int64Dtype):
# Only convert ID type from string to Int64 if is necessary to match existing IDs (which is
# expected when running in prod, but not always in unit tests or public data.)
extremeRatingsToAdd[c.raterParticipantIdKey] = extremeRatingsToAdd[
c.raterParticipantIdKey
].astype(pd.Int64Dtype())
ratingFeaturesAndLabelsWithExtremeRatings = pd.concat(
[self.ratingFeaturesAndLabels, extremeRatingsToAdd]
)
else:
ratingFeaturesAndLabelsWithExtremeRatings = self.ratingFeaturesAndLabels
return ratingFeaturesAndLabelsWithExtremeRatings