tokenizer

in lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb [15:48]


      def tokenizer
        @tokenizer ||= begin
          recognizers = [
            
            TokenRecognizer.new(:negative, /-x/),
            TokenRecognizer.new(:improper_fraction, /x\.x/),
            TokenRecognizer.new(:proper_fraction, /0\.x/),
            TokenRecognizer.new(:master, /x\.0/),

            
            TokenRecognizer.new(:equals, /=/),
            TokenRecognizer.new(:rule, /%%?[[:word:]-]+/),  
            TokenRecognizer.new(:right_arrow, />/),
            TokenRecognizer.new(:left_arrow, /</),
            TokenRecognizer.new(:open_bracket, /\[/),
            TokenRecognizer.new(:close_bracket, /\]/),
            TokenRecognizer.new(:decimal, /[0
            TokenRecognizer.new(:plural, /\$\(.*\)\$/),

            
            TokenRecognizer.new(:semicolon, /;/),
          ]

          splitter_source = recognizers.map { |r| r.regex.source }.join("|")
          splitter = Regexp.new("(#{splitter_source})")

          Tokenizer.new(
            recognizers + [
              TokenRecognizer.new(:plaintext, //)  
            ], splitter
          )
        end
      end