in src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanChunker.scala [137:158]
private[this] def fillInUnmatched(text: String,
chunks: Seq[ChunkMatch],
pos: KoreanPos.Value): List[ChunkMatch] = {
// Add Foreign for unmatched parts
val (chunksWithForeign, prevEnd) = chunks.foldLeft((List[ChunkMatch](), 0)) {
case ((l: List[ChunkMatch], prevEnd: Int), cm: ChunkMatch) if cm.start == prevEnd =>
(cm :: l, cm.end)
case ((l: List[ChunkMatch], prevEnd: Int), cm: ChunkMatch) if cm.start > prevEnd =>
(cm :: ChunkMatch(prevEnd, cm.start, text.slice(prevEnd, cm.start), pos) :: l, cm.end)
case ((l: List[ChunkMatch], prevEnd: Int), cm: ChunkMatch) =>
throw new IllegalStateException("Non-disjoint chunk matches found.")
}
val output = if (prevEnd < text.length) {
ChunkMatch(prevEnd, text.length, text.slice(prevEnd, text.length), pos) :: chunksWithForeign
} else {
chunksWithForeign
}
output.reverse
}