in rb/lib/twitter-text/extractor.rb [277:309]
def extract_hashtags_with_indices(text, options = {:check_url_overlap => true})
return [] unless text =~ /[
tags = []
text.scan(Twitter::TwitterText::Regex[:valid_hashtag]) do |before, hash, hash_text|
match_data = $~
start_position = match_data.char_begin(2)
end_position = match_data.char_end(3)
after = $'
unless after =~ Twitter::TwitterText::Regex[:end_hashtag_match]
tags << {
:hashtag => hash_text,
:indices => [start_position, end_position]
}
end
end
if options[:check_url_overlap]
# extract URLs
urls = extract_urls_with_indices(text)
unless urls.empty?
tags.concat(urls)
# remove duplicates
tags = remove_overlapping_entities(tags)
# remove URL entities
tags.reject!{|entity| !entity[:hashtag] }
end
end
tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last} if block_given?
tags
end