in src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanChunker.scala [66:85]
def splitBySpaceKeepingSpace(s: CharSequence): Seq[String] = {
val space = """\s+""".r.pattern
val m = space.matcher(s)
var tokens = new ListBuffer[String]()
var index = 0
while(m.find()) {
if (index < m.start) {
tokens += s.subSequence(index, m.start).toString
}
tokens += s.subSequence(m.start, m.end).toString
index = m.end
}
if (index < s.length()) {
tokens += s.subSequence(index, s.length()).toString
}
return tokens.toList
}