in cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUnifiedSimilarityEngine.scala [55:313]
override def get(
query: Query
): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = {
query.sourceInfo.internalId match {
case _: InternalId.UserId =>
StatsUtil.trackOptionItemsStats(fetchCandidatesStat) {
val sannCandidatesFut = if (query.enableSimClustersANN) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANNQuery)
} else Future.None
val sann1CandidatesFut =
if (query.enableSimClustersANN1) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN1Query)
} else Future.None
val sann2CandidatesFut =
if (query.enableSimClustersANN2) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN2Query)
} else Future.None
val sann3CandidatesFut =
if (query.enableSimClustersANN3) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN3Query)
} else Future.None
val sann4CandidatesFut =
if (query.enableSimClustersANN4) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN4Query)
} else Future.None
val sann5CandidatesFut =
if (query.enableSimClustersANN5) {
simClustersANNSimilarityEngine.getCandidates(query.simClustersANN5Query)
} else Future.None
val experimentalSANNCandidatesFut =
if (query.enableExperimentalSimClustersANN) {
simClustersANNSimilarityEngine.getCandidates(query.experimentalSimClustersANNQuery)
} else Future.None
val utgCandidatesFut = if (query.enableUtg) {
producerBasedUserTweetGraphSimilarityEngine.getCandidates(query.utgQuery)
} else Future.None
Future
.join(
sannCandidatesFut,
sann1CandidatesFut,
sann2CandidatesFut,
sann3CandidatesFut,
sann4CandidatesFut,
sann5CandidatesFut,
experimentalSANNCandidatesFut,
utgCandidatesFut
).map {
case (
simClustersAnnCandidates,
simClustersAnn1Candidates,
simClustersAnn2Candidates,
simClustersAnn3Candidates,
simClustersAnn4Candidates,
simClustersAnn5Candidates,
experimentalSANNCandidates,
userTweetGraphCandidates) =>
val filteredSANNTweets = simClustersCandidateMinScoreFilter(
simClustersAnnCandidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANNQuery.storeQuery.simClustersANNConfigId)
val filteredExperimentalSANNTweets = simClustersCandidateMinScoreFilter(
experimentalSANNCandidates.toSeq.flatten,
query.simClustersMinScore,
query.experimentalSimClustersANNQuery.storeQuery.simClustersANNConfigId)
val filteredSANN1Tweets = simClustersCandidateMinScoreFilter(
simClustersAnn1Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN1Query.storeQuery.simClustersANNConfigId)
val filteredSANN2Tweets = simClustersCandidateMinScoreFilter(
simClustersAnn2Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN2Query.storeQuery.simClustersANNConfigId)
val filteredSANN3Tweets = simClustersCandidateMinScoreFilter(
simClustersAnn3Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN3Query.storeQuery.simClustersANNConfigId)
val filteredSANN4Tweets = simClustersCandidateMinScoreFilter(
simClustersAnn4Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN4Query.storeQuery.simClustersANNConfigId)
val filteredSANN5Tweets = simClustersCandidateMinScoreFilter(
simClustersAnn5Candidates.toSeq.flatten,
query.simClustersMinScore,
query.simClustersANN5Query.storeQuery.simClustersANNConfigId)
val filteredUTGTweets =
userTweetGraphFilter(userTweetGraphCandidates.toSeq.flatten)
val sannTweetsWithCGInfo = filteredSANNTweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANNQuery, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann1TweetsWithCGInfo = filteredSANN1Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN1Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann2TweetsWithCGInfo = filteredSANN2Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN2Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann3TweetsWithCGInfo = filteredSANN3Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN3Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann4TweetsWithCGInfo = filteredSANN4Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN4Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val sann5TweetsWithCGInfo = filteredSANN5Tweets.map { tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query.simClustersANN5Query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val experimentalSANNTweetsWithCGInfo = filteredExperimentalSANNTweets.map {
tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(
query.experimentalSimClustersANNQuery,
tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val utgTweetsWithCGInfo = filteredUTGTweets.map { tweetWithScore =>
val similarityEngineInfo =
ProducerBasedUserTweetGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(query.sourceInfo),
similarityEngineInfo,
Seq(similarityEngineInfo)
))
}
val candidateSourcesToBeInterleaved =
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
sannTweetsWithCGInfo,
sann1TweetsWithCGInfo,
sann2TweetsWithCGInfo,
sann3TweetsWithCGInfo,
sann4TweetsWithCGInfo,
sann5TweetsWithCGInfo,
experimentalSANNTweetsWithCGInfo,
)
if (query.utgCombinationMethod == UnifiedSETweetCombinationMethod.Interleave) {
candidateSourcesToBeInterleaved += utgTweetsWithCGInfo
}
val interleavedCandidates =
InterleaveUtil.interleave(candidateSourcesToBeInterleaved)
val candidateSourcesToBeOrdered =
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](interleavedCandidates)
if (query.utgCombinationMethod == UnifiedSETweetCombinationMethod.Frontload)
candidateSourcesToBeOrdered.prepend(utgTweetsWithCGInfo)
val candidatesFromGivenOrderCombination =
SimilaritySourceOrderingUtil.keepGivenOrder(candidateSourcesToBeOrdered)
val unifiedCandidatesWithUnifiedCGInfo = candidatesFromGivenOrderCombination.map {
candidate =>
/***
* when a candidate was made by interleave/keepGivenOrder,
* then we apply getProducerBasedUnifiedCGInfo() to override with the unified CGInfo
*
* in contributingSE list for interleave. We only have the chosen SE available.
* This is hard to add for interleave, and we plan to add it later after abstraction improvement.
*/
TweetWithCandidateGenerationInfo(
tweetId = candidate.tweetId,
candidateGenerationInfo = getProducerBasedUnifiedCGInfo(
candidate.candidateGenerationInfo.sourceInfoOpt,
candidate.getSimilarityScore,
candidate.candidateGenerationInfo.contributingSimilarityEngines
) // getSimilarityScore comes from either unifiedScore or single score
)
}
stats.stat("unified_candidate_size").add(unifiedCandidatesWithUnifiedCGInfo.size)
val truncatedCandidates =
unifiedCandidatesWithUnifiedCGInfo.take(query.maxCandidateNumPerSourceKey)
stats.stat("truncatedCandidates_size").add(truncatedCandidates.size)
Some(truncatedCandidates)
}
}
case _ =>
stats.counter("sourceId_is_not_userId_cnt").incr()
Future.None
}
}