def _get_pair_tuples()

in sourcecode/scoring/post_selection_similarity_old.py [0:0]


def _get_pair_tuples(ratings, windowMillis):
  tuples = []
  ratings = ratings.sort_values([c.noteIdKey, c.createdAtMillisKey])
  values = ratings[
    [c.noteIdKey, c.createdAtMillisKey, c.raterParticipantIdKey, c.tweetIdKey]
  ].values
  print(len(values))
  for i in range(len(values)):
    priorNote, priorTs, priorRater, priorTweet = values[i]
    if i == 0 or i == 1000 or i == 100000 or i % 5000000 == 0:
      print(f"i={i}  len(tuples)={len(tuples)}")
    j = i + 1
    while j < len(values):
      nextNote, nextTs, nextRater, nextTweet = values[j]
      assert priorNote <= nextNote, (priorNote, nextNote)
      if nextNote != priorNote:
        break  # break if we're onto a new note
      assert priorTweet == nextTweet, (priorTweet, nextTweet)  # tweet should be same
      assert priorRater != nextRater, (priorRater, nextRater)  # rater should be different
      assert priorTs <= nextTs, (priorTs, nextTs)
      if nextTs > (priorTs + windowMillis):
        break  # break if we're beyond the overlap window
      leftRater, rigthRater = tuple(sorted((priorRater, nextRater)))
      tuples.append((leftRater, rigthRater, priorTweet))
      j += 1
  return tuples