public static ThriftSchema buildThriftSchema()

in src/java/com/twitter/search/common/schema/earlybird/EarlybirdSchemaCreateTool.java [484:701]


  public static ThriftSchema buildThriftSchema(EarlybirdCluster cluster) {
    EarlybirdSchemaBuilder builder = new EarlybirdSchemaBuilder(
        new EarlybirdFieldConstants(), cluster, TokenStreamSerializer.Version.VERSION_2);

    builder.withSchemaVersion(
        FlushVersion.CURRENT_FLUSH_VERSION.getVersionNumber(),
        FlushVersion.CURRENT_FLUSH_VERSION.getMinorVersion(),
        FlushVersion.CURRENT_FLUSH_VERSION.getDescription(),
        FlushVersion.CURRENT_FLUSH_VERSION.isOfficial());

    // ID field, used for partitioning
    builder.withPartitionFieldId(0)
        .withSortableLongTermField(EarlybirdFieldConstant.ID_FIELD.getFieldName())
        // Text Fields that are searched by default
        .withTextField(EarlybirdFieldConstant.RESOLVED_LINKS_TEXT_FIELD.getFieldName(), true)
        .withSearchFieldByDefault(
            EarlybirdFieldConstant.RESOLVED_LINKS_TEXT_FIELD.getFieldName(), 0.1f)
        .withPretokenizedTextField(EarlybirdFieldConstant.TEXT_FIELD.getFieldName(), true)
        .withSearchFieldByDefault(EarlybirdFieldConstant.TEXT_FIELD.getFieldName(), 1.0f);
    builder.withTweetSpecificNormalization(EarlybirdFieldConstant.TEXT_FIELD.getFieldName())
        .withTextField(EarlybirdFieldConstant.TOKENIZED_FROM_USER_FIELD.getFieldName(), true)
        .withSearchFieldByDefault(
            EarlybirdFieldConstant.TOKENIZED_FROM_USER_FIELD.getFieldName(), 0.2f)

        // Text fields not searched by default
        .withTextField(EarlybirdFieldConstant.FROM_USER_FIELD.getFieldName(), false)
        .withTextField(EarlybirdFieldConstant.TO_USER_FIELD.getFieldName(), false)

        // cards are not searched by default, and have weight 0.
        .withPretokenizedTextField(EarlybirdFieldConstant.CARD_TITLE_FIELD.getFieldName(), false)
        .withPretokenizedTextField(
            EarlybirdFieldConstant.CARD_DESCRIPTION_FIELD.getFieldName(), false)
        .withTextField(EarlybirdFieldConstant.CARD_LANG.getFieldName(), false)

        // Out-of-order append fields
        .withLongTermField(EarlybirdFieldConstant.LIKED_BY_USER_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.RETWEETED_BY_USER_ID.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.REPLIED_TO_BY_USER_ID.getFieldName())

        // No Position fields, sorted alphabetically
        .withPretokenizedNoPositionField(EarlybirdFieldConstant.CARD_DOMAIN_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.CARD_NAME_FIELD.getFieldName())
        .withIntTermField(EarlybirdFieldConstant.CREATED_AT_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.ENTITY_ID_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.GEO_HASH_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.FROM_USER_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.IN_REPLY_TO_TWEET_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.IN_REPLY_TO_USER_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.RETWEET_SOURCE_TWEET_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.RETWEET_SOURCE_USER_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.CONVERSATION_ID_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.PLACE_ID_FIELD.getFieldName())
        .withTextField(EarlybirdFieldConstant.PLACE_FULL_NAME_FIELD.getFieldName(), false)
        .withIndexedNotTokenizedField(
            EarlybirdFieldConstant.PLACE_COUNTRY_CODE_FIELD.getFieldName())
        .withIndexedNotTokenizedField(
            EarlybirdFieldConstant.PROFILE_GEO_COUNTRY_CODE_FIELD.getFieldName())
        .withTextField(EarlybirdFieldConstant.PROFILE_GEO_REGION_FIELD.getFieldName(), false)
        .withTextField(EarlybirdFieldConstant.PROFILE_GEO_LOCALITY_FIELD.getFieldName(), false)
        .withTermTextLookup(EarlybirdFieldConstant.FROM_USER_ID_FIELD.getFieldName())
        .withTermTextLookup(EarlybirdFieldConstant.IN_REPLY_TO_USER_ID_FIELD.getFieldName())
        .withPretokenizedNoPositionField(EarlybirdFieldConstant.HASHTAGS_FIELD.getFieldName())
        .withIndexedNotTokenizedField(ImmutableSchema.HF_PHRASE_PAIRS_FIELD)
        .withIndexedNotTokenizedField(ImmutableSchema.HF_TERM_PAIRS_FIELD)
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.IMAGE_LINKS_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.INTERNAL_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.ISO_LANGUAGE_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.LINKS_FIELD.getFieldName())
        .withIntTermField(EarlybirdFieldConstant.LINK_CATEGORY_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.MENTIONS_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.NEWS_LINKS_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.NORMALIZED_SOURCE_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.PLACE_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.SOURCE_FIELD.getFieldName())
        .withPretokenizedNoPositionField(EarlybirdFieldConstant.STOCKS_FIELD.getFieldName())
        .withIndexedNotTokenizedField(EarlybirdFieldConstant.VIDEO_LINKS_FIELD.getFieldName())
        .withIntTermField(NORMALIZED_FAVORITE_COUNT_GREATER_THAN_OR_EQUAL_TO_FIELD.getFieldName())
        .withIntTermField(NORMALIZED_REPLY_COUNT_GREATER_THAN_OR_EQUAL_TO_FIELD.getFieldName())
        .withIntTermField(NORMALIZED_RETWEET_COUNT_GREATER_THAN_OR_EQUAL_TO_FIELD.getFieldName())

        .withIntTermField(EarlybirdFieldConstant.COMPOSER_SOURCE.getFieldName())

        .withLongTermField(EarlybirdFieldConstant.QUOTED_TWEET_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.QUOTED_USER_ID_FIELD.getFieldName())
        .withLongTermField(EarlybirdFieldConstant.DIRECTED_AT_USER_ID_FIELD.getFieldName())

        // Named entity fields
        .withIndexedNotTokenizedField(
            EarlybirdFieldConstant.NAMED_ENTITY_FROM_URL_FIELD.getFieldName(), true)
        .withIndexedNotTokenizedField(
            EarlybirdFieldConstant.NAMED_ENTITY_FROM_TEXT_FIELD.getFieldName(), true)
        .withIndexedNotTokenizedField(
            EarlybirdFieldConstant.NAMED_ENTITY_WITH_TYPE_FROM_URL_FIELD.getFieldName(), true)
        .withIndexedNotTokenizedField(
            EarlybirdFieldConstant.NAMED_ENTITY_WITH_TYPE_FROM_TEXT_FIELD.getFieldName(), true)

        // camelCase-tokenized user handles and tokenized user names, not searchable by default
        .withPretokenizedTextField(
            EarlybirdFieldConstant.CAMELCASE_USER_HANDLE_FIELD.getFieldName(), false)
        .withPretokenizedTextField(
            EarlybirdFieldConstant.TOKENIZED_USER_NAME_FIELD.getFieldName(), false)

        .withIndexedNotTokenizedField(
            EarlybirdFieldConstant.SPACE_ID_FIELD.getFieldName())
        .withTextField(EarlybirdFieldConstant.SPACE_ADMIN_FIELD.getFieldName(), false)
        .withPretokenizedTextField(EarlybirdFieldConstant.SPACE_TITLE_FIELD.getFieldName(), false)
        .withTextField(EarlybirdFieldConstant.TOKENIZED_SPACE_ADMIN_FIELD.getFieldName(), true)
        .withPretokenizedTextField(
            EarlybirdFieldConstant.CAMELCASE_TOKENIZED_SPACE_ADMIN_FIELD.getFieldName(), false)
        .withPretokenizedTextField(
            EarlybirdFieldConstant.TOKENIZED_SPACE_ADMIN_DISPLAY_NAME_FIELD.getFieldName(), false)
        .withPretokenizedTextField(
            EarlybirdFieldConstant.URL_DESCRIPTION_FIELD.getFieldName(), false)
        .withPretokenizedTextField(
            EarlybirdFieldConstant.URL_TITLE_FIELD.getFieldName(), false);

    builder
        .withPhotoUrlFacetField(EarlybirdFieldConstant.TWIMG_LINKS_FIELD.getFieldName())
        .withOutOfOrderEnabledForField(
            EarlybirdFieldConstant.LIKED_BY_USER_ID_FIELD.getFieldName())
        .withOutOfOrderEnabledForField(
            EarlybirdFieldConstant.RETWEETED_BY_USER_ID.getFieldName())
        .withOutOfOrderEnabledForField(
            EarlybirdFieldConstant.REPLIED_TO_BY_USER_ID.getFieldName());

    // ColumnStrideFields.
    boolean loadCSFIntoRAMDefault = cluster != EarlybirdCluster.FULL_ARCHIVE;

    builder
        .withColumnStrideField(EarlybirdFieldConstants.ENCODED_TWEET_FEATURES_FIELD_NAME,
                ThriftCSFType.INT, NUMBER_OF_INTEGERS_FOR_FEATURES,
                true, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.FROM_USER_ID_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, /* the full archive loads this field into RAM */ true)
        .withColumnStrideField(EarlybirdFieldConstant.SHARED_STATUS_ID_CSF.getFieldName(),
                ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.CARD_TYPE_CSF_FIELD.getFieldName(),
                ThriftCSFType.BYTE, 1, false, loadCSFIntoRAMDefault)
         // CSF Used by archive mappers
        .withColumnStrideField(EarlybirdFieldConstant.CREATED_AT_CSF_FIELD.getFieldName(),
            ThriftCSFType.INT, 1, false, /* the full archive loads this field into RAM */ true)
        .withColumnStrideField(EarlybirdFieldConstant.ID_CSF_FIELD.getFieldName(),
            ThriftCSFType.LONG, 1, false, /* the full archive loads this field into RAM */ true)
        .withColumnStrideField(EarlybirdFieldConstant.LAT_LON_CSF_FIELD.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.CONVERSATION_ID_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.QUOTED_TWEET_ID_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.QUOTED_USER_ID_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.CARD_LANG_CSF.getFieldName(),
            ThriftCSFType.INT, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.CARD_URI_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.DIRECTED_AT_USER_ID_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)
        .withColumnStrideField(
            EarlybirdFieldConstant.EXCLUSIVE_CONVERSATION_AUTHOR_ID_CSF.getFieldName(),
            ThriftCSFType.LONG, 1, false, loadCSFIntoRAMDefault)

    /* Semicolon on separate line to preserve git blame. */;

    builder.withColumnStrideField(
        EarlybirdFieldConstants.EXTENDED_ENCODED_TWEET_FEATURES_FIELD_NAME,
        ThriftCSFType.INT, NUMBER_OF_INTEGERS_FOR_EXTENDED_FEATURES,
        true, loadCSFIntoRAMDefault);

    for (Map.Entry<String, FeatureConfiguration> entry : FEATURE_CONFIGURATION_MAP.entrySet()) {
      String fullName = entry.getKey();
      String baseName = getBaseFieldName(fullName);
      EarlybirdFieldConstant fieldConstant = EarlybirdFieldConstants.getFieldConstant(fullName);
      if (fieldConstant.isValidFieldInCluster(cluster)) {
        builder.withFeatureConfiguration(baseName, fullName, entry.getValue());
      }
    }
    // Add facet settings for facet fields
    // boolean args are respectively whether to use skiplist, whether offensive, whether to use CSF
    builder
        .withFacetConfigs(EarlybirdFieldConstant.MENTIONS_FIELD.getFieldName(),
            EarlybirdFieldConstant.MENTIONS_FACET, true, false, false)
        .withFacetConfigs(EarlybirdFieldConstant.HASHTAGS_FIELD.getFieldName(),
            EarlybirdFieldConstant.HASHTAGS_FACET, true, false, false)
        .withFacetConfigs(EarlybirdFieldConstant.STOCKS_FIELD.getFieldName(),
            EarlybirdFieldConstant.STOCKS_FACET, true, false, false)
        .withFacetConfigs(EarlybirdFieldConstant.IMAGE_LINKS_FIELD.getFieldName(),
            EarlybirdFieldConstant.IMAGES_FACET, true, true, false)
        .withFacetConfigs(EarlybirdFieldConstant.VIDEO_LINKS_FIELD.getFieldName(),
            EarlybirdFieldConstant.VIDEOS_FACET, true, true, false)
        .withFacetConfigs(EarlybirdFieldConstant.NEWS_LINKS_FIELD.getFieldName(),
            EarlybirdFieldConstant.NEWS_FACET, true, false, false)
        .withFacetConfigs(EarlybirdFieldConstant.ISO_LANGUAGE_FIELD.getFieldName(),
            EarlybirdFieldConstant.LANGUAGES_FACET, false, false, false)
        .withFacetConfigs(EarlybirdFieldConstant.SOURCE_FIELD.getFieldName(),
            EarlybirdFieldConstant.SOURCES_FACET, false, false, false)
        .withFacetConfigs(EarlybirdFieldConstant.TWIMG_LINKS_FIELD.getFieldName(),
            EarlybirdFieldConstant.TWIMG_FACET, true, true, false)
        .withFacetConfigs(EarlybirdFieldConstant.FROM_USER_ID_CSF.getFieldName(),
            EarlybirdFieldConstant.FROM_USER_ID_FACET, false, false, true /* facet on CSF */)
        .withFacetConfigs(EarlybirdFieldConstant.SHARED_STATUS_ID_CSF.getFieldName(),
            EarlybirdFieldConstant.RETWEETS_FACET, false, false, true /* facet on CSF */)
        .withFacetConfigs(EarlybirdFieldConstant.LINKS_FIELD.getFieldName(),
            EarlybirdFieldConstant.LINKS_FACET, true, false, false)
        .withFacetConfigs(
            EarlybirdFieldConstant.NAMED_ENTITY_WITH_TYPE_FROM_URL_FIELD.getFieldName(),
            true, false, false)
        .withFacetConfigs(
            EarlybirdFieldConstant.NAMED_ENTITY_WITH_TYPE_FROM_TEXT_FIELD.getFieldName(),
            true, false, false)
        .withFacetConfigs(
            EarlybirdFieldConstant.ENTITY_ID_FIELD.getFieldName(),
            true, false, false)
        .withFacetConfigs(EarlybirdFieldConstant.SPACE_ID_FIELD.getFieldName(),
            EarlybirdFieldConstant.SPACES_FACET, true, false, false);
    return builder.build();
  }