in sourcecode/scoring/matrix_factorization/normalized_loss.py [0:0]
def _set_note_sign_weights(self, ratings, alpha):
assert alpha is not None
# Save total weight and total note weights for reference
origNoteTotalWeight = (
ratings[[c.noteIdKey, "weights"]]
.groupby(c.noteIdKey)
.sum()
.reset_index(drop=False)
.rename(columns={"weights": "origNoteTotal"})
)
origTotalWeight = origNoteTotalWeight["origNoteTotal"].values.sum()
# Calculate positive rating weight updates
notePosTotals = (
ratings[ratings[c.internalRaterFactor1Key] > 0][[c.noteIdKey, "weights"]]
.groupby(c.noteIdKey)
.sum()
.reset_index(drop=False)
)
notePosTotals = notePosTotals.rename(columns={"weights": "notePosTotal"})
noteNegTotals = (
ratings[ratings[c.internalRaterFactor1Key] <= 0][[c.noteIdKey, "weights"]]
.groupby(c.noteIdKey)
.sum()
.reset_index(drop=False)
)
noteNegTotals = noteNegTotals.rename(columns={"weights": "noteNegTotal"})
tmp = notePosTotals.merge(
noteNegTotals
) # OK if we drop some notes - notes with only positive or negative ratings get no update
tmp["multiplier"] = (alpha + tmp["noteNegTotal"]) / (alpha + tmp["notePosTotal"])
# Apply positive rating weight updates
tmp = ratings.merge(tmp[[c.noteIdKey, "multiplier"]], how="left").fillna({"multiplier": 1})
tmp.loc[tmp[c.internalRaterFactor1Key] <= 0, "multiplier"] = 1
tmp["weights"] = tmp["weights"] * tmp["multiplier"]
# Renormalize totals allocated to each note
newNoteTotalWeight = (
tmp[[c.noteIdKey, "weights"]]
.groupby(c.noteIdKey)
.sum()
.reset_index(drop=False)
.rename(columns={"weights": "newNoteTotal"})
)
tmp = tmp.merge(newNoteTotalWeight)
tmp["weights"] = tmp["weights"] * (tmp["newNoteTotal"] ** -1)
tmp = tmp.merge(origNoteTotalWeight)
tmp["weights"] = tmp["weights"] * tmp["origNoteTotal"]
# Re-form ratings, validate and return
ratings = tmp[[c.noteIdKey, c.raterParticipantIdKey, c.internalRaterFactor1Key, "weights"]]
assert (
abs(origTotalWeight - ratings["weights"].sum()) < 10**-5
), f"{origTotalWeight} vs {ratings['weights'].sum()}"
return ratings