private void fillTermMetadata()

in src/java/com/twitter/search/earlybird/index/EarlybirdSingleSegmentSearcher.java [312:408]


  private void fillTermMetadata(Term term, ThriftFacetCountMetadata metadata,
                                FacetLabelProvider.FacetLabelAccessor photoAccessor,
                                byte debugMode) throws IOException {
    boolean isTwimg = term.field().equals(EarlybirdFieldConstant.TWIMG_LINKS_FIELD.getFieldName());
    int internalDocID = DocIDToTweetIDMapper.ID_NOT_FOUND;
    long statusID = -1;
    long userID = -1;
    Term facetTerm = term;

    // Deal with the from_user_id facet.
    if (term.field().equals(EarlybirdFieldConstant.FROM_USER_ID_CSF.getFieldName())) {
      userID = Long.parseLong(term.text());
      facetTerm = new Term(EarlybirdFieldConstant.FROM_USER_ID_FIELD.getFieldName(),
          LongTermAttributeImpl.copyIntoNewBytesRef(userID));
    } else if (isTwimg) {
      statusID = Long.parseLong(term.text());
      internalDocID = twitterReader.getSegmentData().getDocIDToTweetIDMapper().getDocID(statusID);
    }

    if (internalDocID == DocIDToTweetIDMapper.ID_NOT_FOUND) {
      // If this is not a twimg, this is how statusID should be looked up
      //
      // If this is a twimg but we couldn't find the internalDocID, that means this segment,
      // or maybe even this earlybird, does not contain the original tweet. Then we treat this as
      // a normal facet for now
      internalDocID = twitterReader.getOldestDocID(facetTerm);
      if (internalDocID >= 0) {
        statusID =
            twitterReader.getSegmentData().getDocIDToTweetIDMapper().getTweetID(internalDocID);
      } else {
        statusID = -1;
      }
    }

    // make sure tweet is not deleted
    if (internalDocID < 0 || twitterReader.getDeletesView().isDeleted(internalDocID)) {
      return;
    }

    if (metadata.isSetStatusId()
        && metadata.getStatusId() > 0
        && metadata.getStatusId() <= statusID) {
      // we already have the metadata for this facet from an earlier tweet
      return;
    }

    // now check if this tweet is offensive, e.g. antisocial, nsfw, sensitive
    EarlybirdDocumentFeatures documentFeatures = new EarlybirdDocumentFeatures(twitterReader);
    documentFeatures.advance(internalDocID);
    boolean isOffensiveFlagSet =
        documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_OFFENSIVE_FLAG);
    boolean isSensitiveFlagSet =
        documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_SENSITIVE_CONTENT);
    boolean offensive = isOffensiveFlagSet || isSensitiveFlagSet;

    // also, user should not be marked as antisocial, nsfw or offensive
    if (userID < 0) {
      userID = documentFeatures.getFeatureValue(EarlybirdFieldConstant.FROM_USER_ID_CSF);
    }
    offensive |= userTable.isSet(userID,
        UserTable.ANTISOCIAL_BIT
        | UserTable.OFFENSIVE_BIT
        | UserTable.NSFW_BIT);

    metadata.setStatusId(statusID);
    metadata.setTwitterUserId(userID);
    metadata.setCreated_at(twitterReader.getSegmentData().getTimeMapper().getTime(internalDocID));
    int langId = (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.LANGUAGE);
    Locale lang = ThriftLanguageUtil.getLocaleOf(ThriftLanguage.findByValue(langId));
    metadata.setStatusLanguage(ThriftLanguageUtil.getThriftLanguageOf(lang));
    metadata.setStatusPossiblySensitive(offensive);
    if (isTwimg && photoAccessor != null && !metadata.isSetNativePhotoUrl()) {
      int termID = twitterReader.getTermID(term);
      if (termID != EarlybirdIndexSegmentAtomicReader.TERM_NOT_FOUND) {
        BytesRef termPayload = photoAccessor.getTermPayload(termID);
        if (termPayload != null) {
          metadata.setNativePhotoUrl(termPayload.utf8ToString());
        }
      }
    }

    if (debugMode > 3) {
      StringBuilder sb = new StringBuilder(256);
      if (metadata.isSetExplanation()) {
        sb.append(metadata.getExplanation());
      }
      sb.append(String.format("TweetId=%d (%s %s), UserId=%d (%s %s), Term=%s\n",
          statusID,
          isOffensiveFlagSet ? "OFFENSIVE" : "",
          isSensitiveFlagSet ? "SENSITIVE" : "",
          userID,
          userTable.isSet(userID, UserTable.ANTISOCIAL_BIT) ? "ANTISOCIAL" : "",
          userTable.isSet(userID, UserTable.NSFW_BIT) ? "NSFW" : "",
          term.toString()));
      metadata.setExplanation(sb.toString());
    }
  }