divide_up_dictionary_range

in lib/twitter_cldr/segmentation/brahmic_break_engine.rb [55:125]


      def divide_up_dictionary_range(cursor, end_pos)
        return to_enum(__method__, cursor, end_pos) unless block_given?
        return if (end_pos - cursor.position) < min_word_span

        state = EngineState.new(
          cursor: cursor,
          end_pos: end_pos,
          words: PossibleWordList.new(lookahead)
        )

        while cursor.position < end_pos
          state.current = cursor.position
          state.word_length = 0

          
          candidates = state.words[state.words_found].candidates(
            cursor, dictionary, end_pos
          )

          
          if candidates == 1
            state.word_length = state.words[state.words_found].accept_marked(cursor)
            state.words_found += 1
          elsif candidates > 1
            mark_best_candidate(cursor, end_pos, state)
            state.word_length = state.words[state.words_found].accept_marked(cursor)
            state.words_found += 1
          end

          
          
          
          
          
          if cursor.position < end_pos && state.word_length < root_combine_threshold
            
            
            
            preceeding_words = state.words[state.words_found].candidates(
              cursor, dictionary, end_pos
            )

            if preceeding_words <= 0 && (state.word_length == 0 || state.words[state.words_found].longest_prefix < prefix_combine_threshold)
              advance_to_plausible_word_boundary(cursor, end_pos, state)
            else
              
              cursor.position = state.current + state.word_length
            end
          end

          
          while cursor.position < end_pos && mark_set.include?(cursor.codepoint)
            cursor.advance
            state.word_length += 1
          end

          
          
          
          
          state.word_length += advance_past_suffix.call(
            cursor, end_pos, state
          )

          
          if state.word_length > 0
            yield state.current + state.word_length
          end
        end
      end