def splitBySpaceKeepingSpace()

in src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanChunker.scala [66:85]


  def splitBySpaceKeepingSpace(s: CharSequence): Seq[String] = {
    val space = """\s+""".r.pattern
    val m = space.matcher(s)

    var tokens = new ListBuffer[String]()
    var index = 0
    while(m.find()) {
      if (index < m.start) {
        tokens += s.subSequence(index, m.start).toString
      }
      tokens += s.subSequence(m.start, m.end).toString
      index = m.end
    }

    if (index < s.length()) {
      tokens += s.subSequence(index, s.length()).toString
    }

    return tokens.toList
  }