parse_tweet

in rb/lib/twitter-text/validation.rb [36:125]


      def parse_tweet(text, options = {})
        options = DEFAULT_TCO_URL_LENGTHS.merge(options)
        config = options[:config] || Twitter::TwitterText::Configuration.default_configuration
        normalized_text = text.to_nfc
        unless (normalized_text.length > 0)
          ParseResults.empty()
        end

        scale = config.scale
        max_weighted_tweet_length = config.max_weighted_tweet_length
        scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
        transformed_url_length = config.transformed_url_length * scale
        ranges = config.ranges

        url_entities = Twitter::TwitterText::Extractor.extract_urls_with_indices(normalized_text)
        emoji_entities = config.emoji_parsing_enabled ? Twitter::TwitterText::Extractor.extract_emoji_with_indices(normalized_text) : []

        has_invalid_chars = false
        weighted_count = 0
        offset = 0
        display_offset = 0
        valid_offset = 0

        while offset < normalized_text.codepoint_length
          
          char_weight = config.default_weight
          entity_length = 0

          url_entities.each do |url_entity|
            if url_entity[:indices].first == offset
              entity_length = url_entity[:indices].last - url_entity[:indices].first
              weighted_count += transformed_url_length
              offset += entity_length
              display_offset += entity_length
              if weighted_count <= scaled_max_weighted_tweet_length
                valid_offset += entity_length
              end
              
              break
            end
          end

          emoji_entities.each do |emoji_entity|
            if emoji_entity[:indices].first == offset
              entity_length = emoji_entity[:indices].last - emoji_entity[:indices].first
              weighted_count += char_weight 
              offset += entity_length
              display_offset += entity_length
              if weighted_count <= scaled_max_weighted_tweet_length
                valid_offset += entity_length
              end
              
              break
            end
          end

          next if entity_length > 0

          if offset < normalized_text.codepoint_length
            code_point = normalized_text[offset]

            ranges.each do |range|
              if range.contains?(code_point.unpack("U").first)
                char_weight = range.weight
                break
              end
            end

            weighted_count += char_weight

            has_invalid_chars = contains_invalid?(code_point) unless has_invalid_chars
            codepoint_length = code_point.codepoint_length
            offset += codepoint_length
            display_offset += codepoint_length
            

            if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
              valid_offset += codepoint_length
            end
          end
        end

        normalized_text_offset = text.codepoint_length - normalized_text.codepoint_length
        scaled_weighted_length = weighted_count / scale
        is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
        permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length

        return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
      end