def _compute_rating_cutoff()

in sourcecode/scoring/pflip_model.py [0:0]


  def _compute_rating_cutoff(self, noteStatusHistory: pd.DataFrame) -> pd.DataFrame:
    """Compute time limits on which ratings to include for each note.

    Given that the model is applied before a note receives status and the model is
    attempting to predict final locked status, we restrict the model to ratings that
    occur before the first non-NMR status issued to the note (or alternately include
    all ratings if the note never received a non-NMR status).

    Args:
      noteStatusHistory: pd.DataFrame used to determine time of the first CRH status,
        if applicable.

    Returns:
      pd.DataFrame with noteId and STATUS_MTS columns
    """
    ratingCutoff = noteStatusHistory[
      [
        c.noteIdKey,
        c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey,
        c.timestampMillisOfNoteFirstNonNMRLabelKey,
      ]
    ].copy()
    ratingCutoff.loc[
      ratingCutoff[c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey] == -1,
      c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey,
    ] = np.nan
    ratingCutoff[_STATUS_MTS] = ratingCutoff[
      [
        c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey,
        c.timestampMillisOfNoteFirstNonNMRLabelKey,
      ]
    ].min(axis=1)
    return ratingCutoff[[c.noteIdKey, _STATUS_MTS]].dropna()