in src/main/scala/com/twitter/penguin/korean/phrase_extractor/KoreanPhraseExtractor.scala [74:98]
private def trimPhraseChunk(phrases: KoreanPhraseChunk): KoreanPhraseChunk = {
def trimNonNouns: Seq[KoreanPhrase] = {
phrases
.dropWhile(t => !PhraseHeadPoses.contains(t.pos))
.reverse
.dropWhile(t => !PhrasTailPoses.contains(t.pos))
.reverse
}
def trimSpacesFromPhrase(phrases: Seq[KoreanPhrase]): Seq[KoreanPhrase] = {
phrases.zipWithIndex.map {
case (phrase, i) if phrases.length == 1 =>
KoreanPhrase(phrase.tokens
.dropWhile(_.pos == Space)
.reverse.dropWhile(_.pos == Space).reverse, phrase.pos)
case (phrase, i) if i == 0 =>
KoreanPhrase(phrase.tokens.dropWhile(_.pos == Space), phrase.pos)
case (phrase, i) if i == phrases.length - 1 =>
KoreanPhrase(phrase.tokens.reverse.dropWhile(_.pos == Space).reverse, phrase.pos)
case (phrase, i) => phrase
}
}
trimSpacesFromPhrase(trimNonNouns)
}