private[this] def splitChunks()

in src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanChunker.scala [105:127]


  private[this] def splitChunks(text: String): List[ChunkMatch] = {
    if (text.charAt(0).isSpaceChar) {
      List(ChunkMatch(0, text.length, text, Space))
    } else {
      val chunksBuf = new ListBuffer[ChunkMatch]()
      var matchedLen = 0
      CHUNKING_ORDER.foreach { pos =>
        if (matchedLen < text.length) {
          val m = POS_PATTERNS(pos).matcher(text)
          while (m.find()) {
            val cm = ChunkMatch(m.start, m.end, m.group(), pos)
            if (chunksBuf.forall(cm.disjoint)) {
              chunksBuf += cm
              matchedLen += cm.end - cm.start
            }
          }
        }
      }

      val chunks = chunksBuf.sortBy(cm => cm.start).toList
      fillInUnmatched(text, chunks, Foreign)
    }
  }