in rb/lib/twitter-text/validation.rb [36:125]
def parse_tweet(text, options = {})
options = DEFAULT_TCO_URL_LENGTHS.merge(options)
config = options[:config] || Twitter::TwitterText::Configuration.default_configuration
normalized_text = text.to_nfc
unless (normalized_text.length > 0)
ParseResults.empty()
end
scale = config.scale
max_weighted_tweet_length = config.max_weighted_tweet_length
scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
transformed_url_length = config.transformed_url_length * scale
ranges = config.ranges
url_entities = Twitter::TwitterText::Extractor.extract_urls_with_indices(normalized_text)
emoji_entities = config.emoji_parsing_enabled ? Twitter::TwitterText::Extractor.extract_emoji_with_indices(normalized_text) : []
has_invalid_chars = false
weighted_count = 0
offset = 0
display_offset = 0
valid_offset = 0
while offset < normalized_text.codepoint_length
char_weight = config.default_weight
entity_length = 0
url_entities.each do |url_entity|
if url_entity[:indices].first == offset
entity_length = url_entity[:indices].last - url_entity[:indices].first
weighted_count += transformed_url_length
offset += entity_length
display_offset += entity_length
if weighted_count <= scaled_max_weighted_tweet_length
valid_offset += entity_length
end
break
end
end
emoji_entities.each do |emoji_entity|
if emoji_entity[:indices].first == offset
entity_length = emoji_entity[:indices].last - emoji_entity[:indices].first
weighted_count += char_weight
offset += entity_length
display_offset += entity_length
if weighted_count <= scaled_max_weighted_tweet_length
valid_offset += entity_length
end
break
end
end
next if entity_length > 0
if offset < normalized_text.codepoint_length
code_point = normalized_text[offset]
ranges.each do |range|
if range.contains?(code_point.unpack("U").first)
char_weight = range.weight
break
end
end
weighted_count += char_weight
has_invalid_chars = contains_invalid?(code_point) unless has_invalid_chars
codepoint_length = code_point.codepoint_length
offset += codepoint_length
display_offset += codepoint_length
if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
valid_offset += codepoint_length
end
end
end
normalized_text_offset = text.codepoint_length - normalized_text.codepoint_length
scaled_weighted_length = weighted_count / scale
is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length
return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
end