in sourcecode/scoring/matrix_factorization/pseudo_raters.py [0:0]
def _aggregate_note_params(self, noteParamsList, joinOrig=False):
rawRescoredNotesWithEachExtraRater = pd.concat(
noteParamsList,
unsafeAllowed={
Constants.extraRaterInterceptKey,
Constants.extraRaterFactor1Key,
Constants.extraRatingHelpfulNumKey,
},
)
rawRescoredNotesWithEachExtraRater.drop(mf_c.noteIndexKey, axis=1, inplace=True)
rawRescoredNotesWithEachExtraRater = rawRescoredNotesWithEachExtraRater.sort_values(
by=[c.noteIdKey, Constants.extraRaterInterceptKey]
)
rawRescoredNotesWithEachExtraRaterAgg = (
rawRescoredNotesWithEachExtraRater[
[c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]
]
.groupby(c.noteIdKey)
.agg({"min", "median", "max"})
)
refitSameRatings = rawRescoredNotesWithEachExtraRater[
pd.isna(rawRescoredNotesWithEachExtraRater[Constants.extraRaterInterceptKey])
][[c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]].set_index(c.noteIdKey)
refitSameRatings.columns = pd.MultiIndex.from_product(
[refitSameRatings.columns, [Constants.refitOriginalKey]]
)
notesWithConfidenceBounds = refitSameRatings.join(rawRescoredNotesWithEachExtraRaterAgg)
if joinOrig:
orig = self.noteParams[
[c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]
].set_index(c.noteIdKey)
orig.columns = pd.MultiIndex.from_product([orig.columns, [Constants.originalKey]])
notesWithConfidenceBounds = notesWithConfidenceBounds.join(orig)
raterFacs = self.ratingFeaturesAndLabels.merge(self.raterParams, on=c.raterParticipantIdKey)
raterFacs[Constants.allKey] = 1
raterFacs[Constants.negFacKey] = raterFacs[c.internalRaterFactor1Key] < 0
raterFacs[Constants.posFacKey] = raterFacs[c.internalRaterFactor1Key] > 0
r = raterFacs.groupby(c.noteIdKey)[
[Constants.allKey, Constants.negFacKey, Constants.posFacKey]
].sum()
r.columns = pd.MultiIndex.from_product([[c.ratingCountKey], r.columns])
notesWithConfidenceBounds = notesWithConfidenceBounds.join(r)
def flatten_column_names(c):
if type(c) == tuple:
return f"{c[0]}_{c[1]}"
else:
return c
notesWithConfidenceBounds.columns = [
flatten_column_names(c) for c in notesWithConfidenceBounds.columns
]
notesWithConfidenceBounds = notesWithConfidenceBounds[
notesWithConfidenceBounds.columns.sort_values()
]
return notesWithConfidenceBounds