in src/java/com/twitter/search/earlybird/search/relevance/scoring/FeatureBasedScoringFunction.java [504:691]
private double applyBoosts(
LinearScoringData data,
double score,
boolean withHitAttribution,
boolean forExplanation) {
double boostedScore = score;
if (params.useLuceneScoreAsBoost) {
data.normalizedLuceneScore = normalizeLuceneScore(
(float) data.luceneScore, (float) params.maxLuceneScoreBoost);
boostedScore *= data.normalizedLuceneScore;
}
if (data.isOffensive) {
boostedScore *= params.offensiveDamping;
}
if (data.isUserSpam && params.spamUserDamping != LinearScoringData.NO_BOOST_VALUE) {
data.spamUserDampApplied = true;
boostedScore *= params.spamUserDamping;
}
if (data.isUserNSFW && params.nsfwUserDamping != LinearScoringData.NO_BOOST_VALUE) {
data.nsfwUserDampApplied = true;
boostedScore *= params.nsfwUserDamping;
}
if (data.isUserBot && params.botUserDamping != LinearScoringData.NO_BOOST_VALUE) {
data.botUserDampApplied = true;
boostedScore *= params.botUserDamping;
}
// cards
if (data.hasCard && params.hasCardBoosts[data.cardType] != LinearScoringData.NO_BOOST_VALUE) {
boostedScore *= params.hasCardBoosts[data.cardType];
data.hasCardBoostApplied = true;
}
// trends
if (data.hasMultipleHashtagsOrTrends) {
boostedScore *= params.multipleHashtagsOrTrendsDamping;
} else if (data.hasTrend) {
data.tweetHasTrendsBoostApplied = true;
boostedScore *= params.tweetHasTrendBoost;
}
// Media/News url boosts.
if (data.hasImageUrl || data.hasVideoUrl) {
data.hasMedialUrlBoostApplied = true;
boostedScore *= params.tweetHasMediaUrlBoost;
}
if (data.hasNewsUrl) {
data.hasNewsUrlBoostApplied = true;
boostedScore *= params.tweetHasNewsUrlBoost;
}
if (data.isFromVerifiedAccount) {
data.tweetFromVerifiedAccountBoostApplied = true;
boostedScore *= params.tweetFromVerifiedAccountBoost;
}
if (data.isFromBlueVerifiedAccount) {
data.tweetFromBlueVerifiedAccountBoostApplied = true;
boostedScore *= params.tweetFromBlueVerifiedAccountBoost;
}
if (data.isFollow) {
// direct follow, so boost both replies and non-replies.
data.directFollowBoostApplied = true;
boostedScore *= params.directFollowBoost;
} else if (data.isTrusted) {
// trusted circle
if (!data.isReply) {
// non-at-reply, in trusted network
data.trustedCircleBoostApplied = true;
boostedScore *= params.trustedCircleBoost;
}
} else if (data.isReply) {
// at-reply out of my network
data.outOfNetworkReplyPenaltyApplied = true;
boostedScore -= params.outOfNetworkReplyPenalty;
}
if (data.isSelfTweet) {
data.selfTweetBoostApplied = true;
data.selfTweetMult = params.selfTweetBoost;
boostedScore *= params.selfTweetBoost;
}
// Language Demotion
// User language based demotion
// The data.userLangMult is set in scoreInternal(), and this setting step is always before
// the applying boosts step
if (params.useUserLanguageInfo) {
boostedScore *= data.userLangMult;
}
// UI language based demotion
if (params.uiLangId != ThriftLanguage.UNKNOWN.getValue()
&& params.uiLangId != data.tweetLangId) {
if (data.tweetLangId == ThriftLanguage.ENGLISH.getValue()) {
data.uiLangMult = params.langEnglishTweetDemote;
} else if (params.uiLangId == ThriftLanguage.ENGLISH.getValue()) {
data.uiLangMult = params.langEnglishUIDemote;
} else {
data.uiLangMult = params.langDefaultDemote;
}
} else {
data.uiLangMult = LinearScoringData.NO_BOOST_VALUE;
}
boostedScore *= data.uiLangMult;
if (params.useAgeDecay) {
// shallow sigmoid with an inflection point at ageDecayHalflife
data.ageDecayMult = ageDecay.getAgeDecayMultiplier(data.tweetAgeInSeconds);
boostedScore *= data.ageDecayMult;
}
// Hit Attribute Demotion
// Scoring is currently based on tokenized user name, text, and url in the tweet
// If hit attribute collection is enabled, we demote score based on these fields
if (hitAttributeHelper != null && params.enableHitDemotion) {
Map<Integer, List<String>> hitMap;
if (forExplanation && fieldHitAttribution != null) {
// if this scoring call is for generating an explanation,
// we'll use the fieldHitAttribution found in the search result's metadata because
// collectors are not called during the debug workflow
hitMap = Maps.transformValues(fieldHitAttribution.getHitMap(), FieldHitList::getHitFields);
} else if (withHitAttribution) {
hitMap = hitAttributeHelper.getHitAttribution(getCurrentDocID());
} else {
hitMap = Maps.newHashMap();
}
Set<String> uniqueFieldHits = ImmutableSet.copyOf(Iterables.concat(hitMap.values()));
data.hitFields.addAll(uniqueFieldHits);
// there should always be fields that are hit
// if there aren't, we assume this is a call from 'explain' in debug mode
// do not override hit attribute data if in debug mode
if (!uniqueFieldHits.isEmpty()) {
// demotions based strictly on field hits
if (uniqueFieldHits.size() == 1) {
if (uniqueFieldHits.contains(
EarlybirdFieldConstant.RESOLVED_LINKS_TEXT_FIELD.getFieldName())) {
// if url was the only field that was hit, demote
data.hasUrlOnlyHitDemotionApplied = true;
boostedScore *= params.urlOnlyHitDemotion;
} else if (uniqueFieldHits.contains(
EarlybirdFieldConstant.TOKENIZED_FROM_USER_FIELD.getFieldName())) {
// if name was the only field that was hit, demote
data.hasNameOnlyHitDemotionApplied = true;
boostedScore *= params.nameOnlyHitDemotion;
}
} else if (!uniqueFieldHits.contains(EarlybirdFieldConstant.TEXT_FIELD.getFieldName())
&& !uniqueFieldHits.contains(EarlybirdFieldConstant.MENTIONS_FIELD.getFieldName())
&& !uniqueFieldHits.contains(EarlybirdFieldConstant.HASHTAGS_FIELD.getFieldName())
&& !uniqueFieldHits.contains(EarlybirdFieldConstant.STOCKS_FIELD.getFieldName())) {
// if text or special text was never hit, demote
data.hasNoTextHitDemotionApplied = true;
boostedScore *= params.noTextHitDemotion;
} else if (uniqueFieldHits.size() == 2) {
// demotions based on field hit combinations
// want to demote if we only hit two of the fields (one being text)
// but with separate terms
Set<String> fieldIntersections = QueryCommonFieldHitsVisitor.findIntersection(
hitAttributeHelper.getNodeToRankMap(),
hitMap,
query);
if (fieldIntersections.isEmpty()) {
if (uniqueFieldHits.contains(
EarlybirdFieldConstant.TOKENIZED_FROM_USER_FIELD.getFieldName())) {
// if name is hit but has no hits in common with text, demote
// want to demote cases where we hit part of the person's name
// and tweet text separately
data.hasSeparateTextAndNameHitDemotionApplied = true;
boostedScore *= params.separateTextAndNameHitDemotion;
} else if (uniqueFieldHits.contains(
EarlybirdFieldConstant.RESOLVED_LINKS_TEXT_FIELD.getFieldName())) {
// if url is hit but has no hits in common with text, demote
// want to demote cases where we hit a potential domain keyword
// and tweet text separately
data.hasSeparateTextAndUrlHitDemotionApplied = true;
boostedScore *= params.separateTextAndUrlHitDemotion;
}
}
}
}
}
return boostedScore;
}