override def get()

in cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUnifiedSimilarityEngine.scala [70:390]


  override def get(
    query: Query
  ): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = {

    query.sourceInfo.internalId match {
      case _: InternalId.TweetId =>
        StatsUtil.trackOptionItemsStats(fetchCandidatesStat) {
          val twhinQuery =
            HnswANNEngineQuery(
              sourceId = query.sourceInfo.internalId,
              modelId = query.twhinModelId,
              params = query.params)
          val utgCandidatesFut =
            if (query.enableUtg)
              tweetBasedUserTweetGraphSimilarityEngine.getCandidates(query.utgQuery)
            else Future.None

          val uvgCandidatesFut =
            if (query.enableUvg)
              tweetBasedUserVideoGraphSimilarityEngine.getCandidates(query.uvgQuery)
            else Future.None

          val sannCandidatesFut = if (query.enableSimClustersANN) {
            simClustersANNSimilarityEngine.getCandidates(query.simClustersANNQuery)
          } else Future.None

          val sann1CandidatesFut =
            if (query.enableSimClustersANN1) {
              simClustersANNSimilarityEngine.getCandidates(query.simClustersANN1Query)
            } else Future.None

          val sann2CandidatesFut =
            if (query.enableSimClustersANN2) {
              simClustersANNSimilarityEngine.getCandidates(query.simClustersANN2Query)
            } else Future.None

          val sann3CandidatesFut =
            if (query.enableSimClustersANN3) {
              simClustersANNSimilarityEngine.getCandidates(query.simClustersANN3Query)
            } else Future.None

          val sann5CandidatesFut =
            if (query.enableSimClustersANN5) {
              simClustersANNSimilarityEngine.getCandidates(query.simClustersANN5Query)
            } else Future.None

          val sann4CandidatesFut =
            if (query.enableSimClustersANN4) {
              simClustersANNSimilarityEngine.getCandidates(query.simClustersANN4Query)
            } else Future.None

          val experimentalSANNCandidatesFut =
            if (query.enableExperimentalSimClustersANN) {
              simClustersANNSimilarityEngine.getCandidates(query.experimentalSimClustersANNQuery)
            } else Future.None

          val qigCandidatesFut =
            if (query.enableQig)
              tweetBasedQigSimilarTweetsSimilarityEngine.getCandidates(query.qigQuery)
            else Future.None

          val twHINCandidateFut = if (query.enableTwHIN) {
            tweetBasedTwHINANNSimilarityEngine.getCandidates(twhinQuery)
          } else Future.None

          Future
            .join(
              utgCandidatesFut,
              sannCandidatesFut,
              sann1CandidatesFut,
              sann2CandidatesFut,
              sann3CandidatesFut,
              sann5CandidatesFut,
              sann4CandidatesFut,
              experimentalSANNCandidatesFut,
              qigCandidatesFut,
              twHINCandidateFut,
              uvgCandidatesFut
            ).map {
              case (
                    userTweetGraphCandidates,
                    simClustersANNCandidates,
                    simClustersANN1Candidates,
                    simClustersANN2Candidates,
                    simClustersANN3Candidates,
                    simClustersANN5Candidates,
                    simClustersANN4Candidates,
                    experimentalSANNCandidates,
                    qigSimilarTweetsCandidates,
                    twhinCandidates,
                    userVideoGraphCandidates) =>
                val filteredUTGTweets =
                  userTweetGraphFilter(userTweetGraphCandidates.toSeq.flatten)
                val filteredUVGTweets =
                  userVideoGraphFilter(userVideoGraphCandidates.toSeq.flatten)
                val filteredSANNTweets = simClustersCandidateMinScoreFilter(
                  simClustersANNCandidates.toSeq.flatten,
                  query.simClustersMinScore,
                  query.simClustersANNQuery.storeQuery.simClustersANNConfigId)

                val filteredSANN1Tweets = simClustersCandidateMinScoreFilter(
                  simClustersANN1Candidates.toSeq.flatten,
                  query.simClustersMinScore,
                  query.simClustersANN1Query.storeQuery.simClustersANNConfigId)

                val filteredSANN2Tweets = simClustersCandidateMinScoreFilter(
                  simClustersANN2Candidates.toSeq.flatten,
                  query.simClustersMinScore,
                  query.simClustersANN2Query.storeQuery.simClustersANNConfigId)

                val filteredSANN3Tweets = simClustersCandidateMinScoreFilter(
                  simClustersANN3Candidates.toSeq.flatten,
                  query.simClustersMinScore,
                  query.simClustersANN3Query.storeQuery.simClustersANNConfigId)

                val filteredSANN4Tweets = simClustersCandidateMinScoreFilter(
                  simClustersANN4Candidates.toSeq.flatten,
                  query.simClustersMinScore,
                  query.simClustersANN4Query.storeQuery.simClustersANNConfigId)

                val filteredSANN5Tweets = simClustersCandidateMinScoreFilter(
                  simClustersANN5Candidates.toSeq.flatten,
                  query.simClustersMinScore,
                  query.simClustersANN5Query.storeQuery.simClustersANNConfigId)

                val filteredExperimentalSANNTweets = simClustersCandidateMinScoreFilter(
                  experimentalSANNCandidates.toSeq.flatten,
                  query.simClustersVideoBasedMinScore,
                  query.experimentalSimClustersANNQuery.storeQuery.simClustersANNConfigId)

                val filteredQigTweets = qigSimilarTweetsFilter(
                  qigSimilarTweetsCandidates.toSeq.flatten,
                  query.qigMaxTweetAgeHours,
                  query.qigMaxNumSimilarTweets
                )

                val filteredTwHINTweets = twhinFilter(
                  twhinCandidates.toSeq.flatten.sortBy(-_.score),
                  query.twhinMaxTweetAgeHours,
                  tweetBasedTwHINANNSimilarityEngine.getScopedStats
                )
                val utgTweetsWithCGInfo = filteredUTGTweets.map { tweetWithScore =>
                  val similarityEngineInfo = TweetBasedUserTweetGraphSimilarityEngine
                    .toSimilarityEngineInfo(tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }

                val uvgTweetsWithCGInfo = filteredUVGTweets.map { tweetWithScore =>
                  val similarityEngineInfo = TweetBasedUserVideoGraphSimilarityEngine
                    .toSimilarityEngineInfo(tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }
                val sannTweetsWithCGInfo = filteredSANNTweets.map { tweetWithScore =>
                  val similarityEngineInfo = SimClustersANNSimilarityEngine
                    .toSimilarityEngineInfo(query.simClustersANNQuery, tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }
                val sann1TweetsWithCGInfo = filteredSANN1Tweets.map { tweetWithScore =>
                  val similarityEngineInfo = SimClustersANNSimilarityEngine
                    .toSimilarityEngineInfo(query.simClustersANN1Query, tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }
                val sann2TweetsWithCGInfo = filteredSANN2Tweets.map { tweetWithScore =>
                  val similarityEngineInfo = SimClustersANNSimilarityEngine
                    .toSimilarityEngineInfo(query.simClustersANN2Query, tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }
                val sann3TweetsWithCGInfo = filteredSANN3Tweets.map { tweetWithScore =>
                  val similarityEngineInfo = SimClustersANNSimilarityEngine
                    .toSimilarityEngineInfo(query.simClustersANN3Query, tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }
                val sann4TweetsWithCGInfo = filteredSANN4Tweets.map { tweetWithScore =>
                  val similarityEngineInfo = SimClustersANNSimilarityEngine
                    .toSimilarityEngineInfo(query.simClustersANN4Query, tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }
                val sann5TweetsWithCGInfo = filteredSANN5Tweets.map { tweetWithScore =>
                  val similarityEngineInfo = SimClustersANNSimilarityEngine
                    .toSimilarityEngineInfo(query.simClustersANN5Query, tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }

                val experimentalSANNTweetsWithCGInfo = filteredExperimentalSANNTweets.map {
                  tweetWithScore =>
                    val similarityEngineInfo = SimClustersANNSimilarityEngine
                      .toSimilarityEngineInfo(
                        query.experimentalSimClustersANNQuery,
                        tweetWithScore.score)
                    TweetWithCandidateGenerationInfo(
                      tweetWithScore.tweetId,
                      CandidateGenerationInfo(
                        Some(query.sourceInfo),
                        similarityEngineInfo,
                        Seq(similarityEngineInfo)
                      ))
                }
                val qigTweetsWithCGInfo = filteredQigTweets.map { tweetWithScore =>
                  val similarityEngineInfo = TweetBasedQigSimilarityEngine
                    .toSimilarityEngineInfo(tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }

                val twHINTweetsWithCGInfo = filteredTwHINTweets.map { tweetWithScore =>
                  val similarityEngineInfo = tweetBasedTwHINANNSimilarityEngine
                    .toSimilarityEngineInfo(twhinQuery, tweetWithScore.score)
                  TweetWithCandidateGenerationInfo(
                    tweetWithScore.tweetId,
                    CandidateGenerationInfo(
                      Some(query.sourceInfo),
                      similarityEngineInfo,
                      Seq(similarityEngineInfo)
                    ))
                }

                val candidateSourcesToBeInterleaved =
                  ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
                    sannTweetsWithCGInfo,
                    experimentalSANNTweetsWithCGInfo,
                    sann1TweetsWithCGInfo,
                    sann2TweetsWithCGInfo,
                    sann3TweetsWithCGInfo,
                    sann5TweetsWithCGInfo,
                    sann4TweetsWithCGInfo,
                    qigTweetsWithCGInfo,
                    uvgTweetsWithCGInfo,
                    utgTweetsWithCGInfo,
                    twHINTweetsWithCGInfo
                  )

                val interleavedCandidates =
                  InterleaveUtil.interleave(candidateSourcesToBeInterleaved)

                val unifiedCandidatesWithUnifiedCGInfo =
                  interleavedCandidates.map { candidate =>
                    /***
                     * when a candidate was made by interleave/keepGivenOrder,
                     * then we apply getTweetBasedUnifiedCGInfo() to override with the unified CGInfo
                     *
                     * we'll not have ALL SEs that generated the tweet
                     * in contributingSE list for interleave. We only have the chosen SE available.
                     */
                    TweetWithCandidateGenerationInfo(
                      tweetId = candidate.tweetId,
                      candidateGenerationInfo = getTweetBasedUnifiedCGInfo(
                        candidate.candidateGenerationInfo.sourceInfoOpt,
                        candidate.getSimilarityScore,
                        candidate.candidateGenerationInfo.contributingSimilarityEngines
                      ) // getSimilarityScore comes from either unifiedScore or single score
                    )
                  }
                stats
                  .stat("unified_candidate_size").add(unifiedCandidatesWithUnifiedCGInfo.size)

                val truncatedCandidates =
                  unifiedCandidatesWithUnifiedCGInfo.take(query.maxCandidateNumPerSourceKey)
                stats.stat("truncatedCandidates_size").add(truncatedCandidates.size)

                Some(truncatedCandidates)
            }
        }

      case _ =>
        stats.counter("sourceId_is_not_tweetId_cnt").incr()
        Future.None
    }
  }