in sourcecode/scoring/post_selection_similarity_old.py [0:0]
def aggregate_into_cliques(graphDf):
with c.time_block("Aggregate into cliques by post selection similarity"):
userToCliqueMap = dict()
cliqueToUserMap = dict()
nextNewCliqueId = 1 # start cliqueIdxs from 1
for i, row in graphDf.iterrows():
sid = row["leftRaterId"]
tid = row["rightRaterId"]
if sid in userToCliqueMap:
if tid in userToCliqueMap:
# both in map. merge if not same clique
if userToCliqueMap[sid] != userToCliqueMap[tid]:
# merge. assign all member's of target clique to source clique.
# slow way: iterate over all values here.
# fast way: maintain a reverse map of cliqueToUserMap.
sourceDestClique = userToCliqueMap[sid]
oldTargetCliqueToDel = userToCliqueMap[tid]
for userId in cliqueToUserMap[oldTargetCliqueToDel]:
cliqueToUserMap[sourceDestClique].append(userId)
userToCliqueMap[userId] = sourceDestClique
del cliqueToUserMap[oldTargetCliqueToDel]
gc.collect()
else:
# source in map; target not. add target to source's clique
sourceClique = userToCliqueMap[sid]
userToCliqueMap[tid] = sourceClique
cliqueToUserMap[sourceClique].append(tid)
elif tid in userToCliqueMap:
# target in map; source not. add source to target's clique
targetClique = userToCliqueMap[tid]
userToCliqueMap[sid] = targetClique
cliqueToUserMap[targetClique].append(sid)
else:
# new clique
userToCliqueMap[sid] = nextNewCliqueId
userToCliqueMap[tid] = nextNewCliqueId
cliqueToUserMap[nextNewCliqueId] = [sid, tid]
nextNewCliqueId += 1
return cliqueToUserMap, userToCliqueMap