in cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUnifiedSimilarityEngine.scala [70:390]
override def get(
query: Query
): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = {
query.sourceInfo.internalId match {
case _: InternalId.TweetId =>
StatsUtil.trackOptionItemsStats(fetchCandidatesStat) {
val twhinQuery =
HnswANNEngineQuery(
sourceId = query.sourceInfo.internalId,
modelId = query.twhinModelId,
params = query.params)
val utgCandidatesFut =
if (query.enableUtg)
tweetBasedUserTweetGraphSimilarityEngine.getCandidates(query.utgQuery)
else Future.None
val uvgCandidatesFut =
if (query.enableUvg)
tweetBasedUserVideoGraphSimilarityEngine.getCandidates(query.uvgQuery)
else Future.None
val sannCandidatesFut = if (query.enableSimClustersANN) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANNQuery)
} else Future.None
val sann1CandidatesFut =
if (query.enableSimClustersANN1) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN1Query)
} else Future.None
val sann2CandidatesFut =
if (query.enableSimClustersANN2) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN2Query)
} else Future.None
val sann3CandidatesFut =
if (query.enableSimClustersANN3) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN3Query)
} else Future.None
val sann5CandidatesFut =
if (query.enableSimClustersANN5) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN5Query)
} else Future.None
val sann4CandidatesFut =
if (query.enableSimClustersANN4) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN4Query)
} else Future.None
val experimentalSANNCandidatesFut =
if (query.enableExperimentalSimClustersANN) {
simClustersANNSimilarityEngine.getCandidates(query.experimentalSimClustersANNQuery)
} else Future.None
val qigCandidatesFut =
if (query.enableQig)
tweetBasedQigSimilarTweetsSimilarityEngine.getCandidates(query.qigQuery)
else Future.None
val twHINCandidateFut = if (query.enableTwHIN) {
tweetBasedTwHINANNSimilarityEngine.getCandidates(twhinQuery)
} else Future.None
Future
.join(
utgCandidatesFut,
sannCandidatesFut,
sann1CandidatesFut,
sann2CandidatesFut,
sann3CandidatesFut,
sann5CandidatesFut,
sann4CandidatesFut,
experimentalSANNCandidatesFut,
qigCandidatesFut,
twHINCandidateFut,
uvgCandidatesFut
).map {
case (
userTweetGraphCandidates,
simClustersANNCandidates,
simClustersANN1Candidates,
simClustersANN2Candidates,
simClustersANN3Candidates,
simClustersANN5Candidates,
simClustersANN4Candidates,
experimentalSANNCandidates,
qigSimilarTweetsCandidates,
twhinCandidates,
userVideoGraphCandidates) =>
val filteredUTGTweets =
userTweetGraphFilter(userTweetGraphCandidates.toSeq.flatten)
val filteredUVGTweets =
userVideoGraphFilter(userVideoGraphCandidates.toSeq.flatten)
val filteredSANNTweets = simClustersCandidateMinScoreFilter(
simClustersANNCandidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANNQuery.storeQuery.simClustersANNConfigId)
val filteredSANN1Tweets = simClustersCandidateMinScoreFilter(
simClustersANN1Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN1Query.storeQuery.simClustersANNConfigId)
val filteredSANN2Tweets = simClustersCandidateMinScoreFilter(
simClustersANN2Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN2Query.storeQuery.simClustersANNConfigId)
val filteredSANN3Tweets = simClustersCandidateMinScoreFilter(
simClustersANN3Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN3Query.storeQuery.simClustersANNConfigId)
val filteredSANN4Tweets = simClustersCandidateMinScoreFilter(
simClustersANN4Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN4Query.storeQuery.simClustersANNConfigId)
val filteredSANN5Tweets = simClustersCandidateMinScoreFilter(
simClustersANN5Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN5Query.storeQuery.simClustersANNConfigId)
val filteredExperimentalSANNTweets = simClustersCandidateMinScoreFilter(
experimentalSANNCandidates.toSeq.flatten,
query.simClustersVideoBasedMinScore,
query.experimentalSimClustersANNQuery.storeQuery.simClustersANNConfigId)
val filteredQigTweets = qigSimilarTweetsFilter(
qigSimilarTweetsCandidates.toSeq.flatten,
query.qigMaxTweetAgeHours,
query.qigMaxNumSimilarTweets
)
val filteredTwHINTweets = twhinFilter(
twhinCandidates.toSeq.flatten.sortBy(-_.score),
query.twhinMaxTweetAgeHours,
tweetBasedTwHINANNSimilarityEngine.getScopedStats
)
val utgTweetsWithCGInfo = filteredUTGTweets.map { tweetWithScore =>
val similarityEngineInfo = TweetBasedUserTweetGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val uvgTweetsWithCGInfo = filteredUVGTweets.map { tweetWithScore =>
val similarityEngineInfo = TweetBasedUserVideoGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sannTweetsWithCGInfo = filteredSANNTweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANNQuery, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann1TweetsWithCGInfo = filteredSANN1Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN1Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann2TweetsWithCGInfo = filteredSANN2Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN2Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann3TweetsWithCGInfo = filteredSANN3Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN3Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann4TweetsWithCGInfo = filteredSANN4Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN4Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann5TweetsWithCGInfo = filteredSANN5Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN5Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val experimentalSANNTweetsWithCGInfo = filteredExperimentalSANNTweets.map {
tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(
query.experimentalSimClustersANNQuery,
tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val qigTweetsWithCGInfo = filteredQigTweets.map { tweetWithScore =>
val similarityEngineInfo = TweetBasedQigSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val twHINTweetsWithCGInfo = filteredTwHINTweets.map { tweetWithScore =>
val similarityEngineInfo = tweetBasedTwHINANNSimilarityEngine
.toSimilarityEngineInfo(twhinQuery, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val candidateSourcesToBeInterleaved =
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
sannTweetsWithCGInfo,
experimentalSANNTweetsWithCGInfo,
sann1TweetsWithCGInfo,
sann2TweetsWithCGInfo,
sann3TweetsWithCGInfo,
sann5TweetsWithCGInfo,
sann4TweetsWithCGInfo,
qigTweetsWithCGInfo,
uvgTweetsWithCGInfo,
utgTweetsWithCGInfo,
twHINTweetsWithCGInfo
)
val interleavedCandidates =
InterleaveUtil.interleave(candidateSourcesToBeInterleaved)
val unifiedCandidatesWithUnifiedCGInfo =
interleavedCandidates.map { candidate =>
/***
* when a candidate was made by interleave/keepGivenOrder,
* then we apply getTweetBasedUnifiedCGInfo() to override with the unified CGInfo
*
* we'll not have ALL SEs that generated the tweet
* in contributingSE list for interleave. We only have the chosen SE available.
*/
TweetWithCandidateGenerationInfo(
tweetId = candidate.tweetId,
candidateGenerationInfo = getTweetBasedUnifiedCGInfo(
candidate.candidateGenerationInfo.sourceInfoOpt,
candidate.getSimilarityScore,
candidate.candidateGenerationInfo.contributingSimilarityEngines
) // getSimilarityScore comes from either unifiedScore or single score
)
}
stats
.stat("unified_candidate_size").add(unifiedCandidatesWithUnifiedCGInfo.size)
val truncatedCandidates =
unifiedCandidatesWithUnifiedCGInfo.take(query.maxCandidateNumPerSourceKey)
stats.stat("truncatedCandidates_size").add(truncatedCandidates.size)
Some(truncatedCandidates)
}
}
case _ =>
stats.counter("sourceId_is_not_tweetId_cnt").incr()
Future.None
}
}