in src/main/scala/com/twitter/penguin/korean/phrase_extractor/KoreanPhraseExtractor.scala [307:329]
def extractPhrases(tokens: Seq[KoreanToken],
filterSpam: Boolean = false,
addHashtags: Boolean = true): Seq[KoreanPhrase] = {
val hashtags = tokens.flatMap {
case t: KoreanToken if t.pos == KoreanPos.Hashtag => Some(KoreanPhrase(Seq(t), KoreanPos.Hashtag))
case t: KoreanToken if t.pos == KoreanPos.CashTag => Some(KoreanPhrase(Seq(t), KoreanPos.CashTag))
case _ => None
}
val collapsed = collapsePos(tokens)
val candidates = getCandidatePhraseChunks(collapsed, filterSpam)
val permutatedCandidates = permutateCadidates(candidates)
val phrases = permutatedCandidates.map {
phraseChunk: KoreanPhraseChunk => KoreanPhrase(trimPhraseChunk(phraseChunk).flatMap(_.tokens))
}
if (addHashtags) {
phrases ++ hashtags
} else {
phrases
}
}