def chunk()

in src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanChunker.scala [180:190]


  def chunk(input: CharSequence): Seq[KoreanToken] = {
    val s = input.toString
    val (l: List[KoreanToken], i: Int) = splitBySpaceKeepingSpace(s).flatMap {
      s => splitChunks(s)
    }.foldLeft(List[KoreanToken](), 0) {
      case ((l: List[KoreanToken], i: Int), m: ChunkMatch) =>
        val segStart = s.indexOf(m.text, i)
        (KoreanToken(m.text, m.pos, segStart, m.text.length) :: l, segStart + m.text.length)
    }
    l.reverse
  }