def compute_note

def compute_note_stats()

in sourcecode/scoring/note_ratings.py [0:0]
45 lines of code
2 McCabe index (conditional complexity)

def compute_note_stats(ratings: pd.DataFrame, noteStatusHistory: pd.DataFrame) -> pd.DataFrame:
  """Compute aggregate note statics over available ratings and merge in noteStatusHistory fields.

  This function computes note aggregates over ratings and then merges additional fields from
  noteStatusHistory.  In general, we do not expect that every note in noteStatusHistory will
  also appear in ratings (e.g. some notes have no ratings) so the aggregate values for some
  notes will be NaN.  We do expect that all notes observed in ratings will appear in
  noteStatusHistory, and verify that expectation with an assert.

  Note that the content of both ratings and noteStatusHistory may vary across callsites.  For
  example:
  * Scoring models operating on subsets of notes and ratings may pre-filter both
    ratings and noteStatusHistory to only include notes/ratings that are in-scope.
  * During meta scoring we may invoke compute_note_stats with the full set of ratings
    and notes to compute note stats supporting contributor helpfulness aggregates.

  Args:
    ratings (pd.DataFrame): all ratings
    noteStatusHistory (pd.DataFrame): history of note statuses
  Returns:
    pd.DataFrame containing stats about each note
  """
  last28Days = (
    1000
    * (
      datetime.fromtimestamp(c.epochMillis / 1000, tz=timezone.utc)
      - timedelta(days=c.emergingWriterDays)
    ).timestamp()
  )
  ratingsToUse = pd.DataFrame(
    ratings[[c.noteIdKey] + c.helpfulTagsTSVOrder + c.notHelpfulTagsTSVOrder]
  )
  ratingsToUse.loc[:, c.numRatingsKey] = 1
  ratingsToUse.loc[:, c.numRatingsLast28DaysKey] = False
  ratingsToUse.loc[ratings[c.createdAtMillisKey] > last28Days, c.numRatingsLast28DaysKey] = True
  noteStats = ratingsToUse.groupby(c.noteIdKey).sum()

  noteStats = noteStats.merge(
    noteStatusHistory[
      [
        c.noteIdKey,
        c.createdAtMillisKey,
        c.noteAuthorParticipantIdKey,
        c.classificationKey,
        c.currentLabelKey,
        c.lockedStatusKey,
      ]
    ],
    on=c.noteIdKey,
    how="outer",
    unsafeAllowed=set(
      [
        c.numRatingsKey,
        c.numRatingsLast28DaysKey,
      ]
      + c.helpfulTagsTSVOrder
      + c.notHelpfulTagsTSVOrder
    ),
  )

  # Fill in nan values resulting from the outer merge with zero since these values were not
  # present during aggregation.
  columns = [
    c.numRatingsKey,
    c.numRatingsLast28DaysKey,
  ] + (c.helpfulTagsTSVOrder + c.notHelpfulTagsTSVOrder)
  noteStats = noteStats.fillna({col: 0 for col in columns})
  noteStats[columns] = noteStats[columns].astype(np.int64)

  # Validate that notes in ratings were a subset of noteStatusHistory.
  assert len(noteStats) == len(noteStatusHistory), "noteStatusHistory should contain all notes"
  return noteStats