def _aggregate_note_params()

in sourcecode/scoring/matrix_factorization/pseudo_raters.py [0:0]


  def _aggregate_note_params(self, noteParamsList, joinOrig=False):
    rawRescoredNotesWithEachExtraRater = pd.concat(
      noteParamsList,
      unsafeAllowed={
        Constants.extraRaterInterceptKey,
        Constants.extraRaterFactor1Key,
        Constants.extraRatingHelpfulNumKey,
      },
    )
    rawRescoredNotesWithEachExtraRater.drop(mf_c.noteIndexKey, axis=1, inplace=True)
    rawRescoredNotesWithEachExtraRater = rawRescoredNotesWithEachExtraRater.sort_values(
      by=[c.noteIdKey, Constants.extraRaterInterceptKey]
    )

    rawRescoredNotesWithEachExtraRaterAgg = (
      rawRescoredNotesWithEachExtraRater[
        [c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]
      ]
      .groupby(c.noteIdKey)
      .agg({"min", "median", "max"})
    )

    refitSameRatings = rawRescoredNotesWithEachExtraRater[
      pd.isna(rawRescoredNotesWithEachExtraRater[Constants.extraRaterInterceptKey])
    ][[c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]].set_index(c.noteIdKey)
    refitSameRatings.columns = pd.MultiIndex.from_product(
      [refitSameRatings.columns, [Constants.refitOriginalKey]]
    )
    notesWithConfidenceBounds = refitSameRatings.join(rawRescoredNotesWithEachExtraRaterAgg)

    if joinOrig:
      orig = self.noteParams[
        [c.noteIdKey, c.internalNoteInterceptKey, c.internalNoteFactor1Key]
      ].set_index(c.noteIdKey)
      orig.columns = pd.MultiIndex.from_product([orig.columns, [Constants.originalKey]])
      notesWithConfidenceBounds = notesWithConfidenceBounds.join(orig)

    raterFacs = self.ratingFeaturesAndLabels.merge(self.raterParams, on=c.raterParticipantIdKey)
    raterFacs[Constants.allKey] = 1
    raterFacs[Constants.negFacKey] = raterFacs[c.internalRaterFactor1Key] < 0
    raterFacs[Constants.posFacKey] = raterFacs[c.internalRaterFactor1Key] > 0
    r = raterFacs.groupby(c.noteIdKey)[
      [Constants.allKey, Constants.negFacKey, Constants.posFacKey]
    ].sum()
    r.columns = pd.MultiIndex.from_product([[c.ratingCountKey], r.columns])
    notesWithConfidenceBounds = notesWithConfidenceBounds.join(r)

    def flatten_column_names(c):
      if type(c) == tuple:
        return f"{c[0]}_{c[1]}"
      else:
        return c

    notesWithConfidenceBounds.columns = [
      flatten_column_names(c) for c in notesWithConfidenceBounds.columns
    ]
    notesWithConfidenceBounds = notesWithConfidenceBounds[
      notesWithConfidenceBounds.columns.sort_values()
    ]

    return notesWithConfidenceBounds