def _set_note_sign_weights()

in sourcecode/scoring/matrix_factorization/normalized_loss.py [0:0]


  def _set_note_sign_weights(self, ratings, alpha):
    assert alpha is not None
    # Save total weight and total note weights for reference
    origNoteTotalWeight = (
      ratings[[c.noteIdKey, "weights"]]
      .groupby(c.noteIdKey)
      .sum()
      .reset_index(drop=False)
      .rename(columns={"weights": "origNoteTotal"})
    )
    origTotalWeight = origNoteTotalWeight["origNoteTotal"].values.sum()
    # Calculate positive rating weight updates
    notePosTotals = (
      ratings[ratings[c.internalRaterFactor1Key] > 0][[c.noteIdKey, "weights"]]
      .groupby(c.noteIdKey)
      .sum()
      .reset_index(drop=False)
    )
    notePosTotals = notePosTotals.rename(columns={"weights": "notePosTotal"})
    noteNegTotals = (
      ratings[ratings[c.internalRaterFactor1Key] <= 0][[c.noteIdKey, "weights"]]
      .groupby(c.noteIdKey)
      .sum()
      .reset_index(drop=False)
    )
    noteNegTotals = noteNegTotals.rename(columns={"weights": "noteNegTotal"})
    tmp = notePosTotals.merge(
      noteNegTotals
    )  # OK if we drop some notes - notes with only positive or negative ratings get no update
    tmp["multiplier"] = (alpha + tmp["noteNegTotal"]) / (alpha + tmp["notePosTotal"])
    # Apply positive rating weight updates
    tmp = ratings.merge(tmp[[c.noteIdKey, "multiplier"]], how="left").fillna({"multiplier": 1})
    tmp.loc[tmp[c.internalRaterFactor1Key] <= 0, "multiplier"] = 1
    tmp["weights"] = tmp["weights"] * tmp["multiplier"]
    # Renormalize totals allocated to each note
    newNoteTotalWeight = (
      tmp[[c.noteIdKey, "weights"]]
      .groupby(c.noteIdKey)
      .sum()
      .reset_index(drop=False)
      .rename(columns={"weights": "newNoteTotal"})
    )
    tmp = tmp.merge(newNoteTotalWeight)
    tmp["weights"] = tmp["weights"] * (tmp["newNoteTotal"] ** -1)
    tmp = tmp.merge(origNoteTotalWeight)
    tmp["weights"] = tmp["weights"] * tmp["origNoteTotal"]
    # Re-form ratings, validate and return
    ratings = tmp[[c.noteIdKey, c.raterParticipantIdKey, c.internalRaterFactor1Key, "weights"]]
    assert (
      abs(origTotalWeight - ratings["weights"].sum()) < 10**-5
    ), f"{origTotalWeight} vs {ratings['weights'].sum()}"
    return ratings