in sourcecode/scoring/pflip_model.py [0:0]
def _compute_rating_cutoff(self, noteStatusHistory: pd.DataFrame) -> pd.DataFrame:
"""Compute time limits on which ratings to include for each note.
Given that the model is applied before a note receives status and the model is
attempting to predict final locked status, we restrict the model to ratings that
occur before the first non-NMR status issued to the note (or alternately include
all ratings if the note never received a non-NMR status).
Args:
noteStatusHistory: pd.DataFrame used to determine time of the first CRH status,
if applicable.
Returns:
pd.DataFrame with noteId and STATUS_MTS columns
"""
ratingCutoff = noteStatusHistory[
[
c.noteIdKey,
c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey,
c.timestampMillisOfNoteFirstNonNMRLabelKey,
]
].copy()
ratingCutoff.loc[
ratingCutoff[c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey] == -1,
c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey,
] = np.nan
ratingCutoff[_STATUS_MTS] = ratingCutoff[
[
c.timestampMillisOfFirstNmrDueToMinStableCrhTimeKey,
c.timestampMillisOfNoteFirstNonNMRLabelKey,
]
].min(axis=1)
return ratingCutoff[[c.noteIdKey, _STATUS_MTS]].dropna()