extract_hashtags_with_indices

in rb/lib/twitter-text/extractor.rb [277:309]


      def extract_hashtags_with_indices(text, options = {:check_url_overlap => true}) 
        return [] unless text =~ /[

        tags = []
        text.scan(Twitter::TwitterText::Regex[:valid_hashtag]) do |before, hash, hash_text|
          match_data = $~
                        start_position = match_data.char_begin(2)
          end_position = match_data.char_end(3)
          after = $'
          unless after =~ Twitter::TwitterText::Regex[:end_hashtag_match]
            tags << {
              :hashtag => hash_text,
              :indices => [start_position, end_position]
            }
          end
        end

        if options[:check_url_overlap]
          # extract URLs
          urls = extract_urls_with_indices(text)
          unless urls.empty?
            tags.concat(urls)
            # remove duplicates
            tags = remove_overlapping_entities(tags)
            # remove URL entities
            tags.reject!{|entity| !entity[:hashtag] }
          end
        end

        tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last} if block_given?
        tags
      end