in src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanChunker.scala [180:190]
def chunk(input: CharSequence): Seq[KoreanToken] = {
val s = input.toString
val (l: List[KoreanToken], i: Int) = splitBySpaceKeepingSpace(s).flatMap {
s => splitChunks(s)
}.foldLeft(List[KoreanToken](), 0) {
case ((l: List[KoreanToken], i: Int), m: ChunkMatch) =>
val segStart = s.indexOf(m.text, i)
(KoreanToken(m.text, m.pos, segStart, m.text.length) :: l, segStart + m.text.length)
}
l.reverse
}