in src/main/scala/com/twitter/penguin/korean/phrase_extractor/KoreanPhraseExtractor.scala [204:215]
private def distinctPhrases(chunks: Seq[KoreanPhraseChunk]): Seq[KoreanPhraseChunk] = {
val (l, buffer) = chunks.foldLeft((List[KoreanPhraseChunk](), Set[String]())) {
case ((l: List[KoreanPhraseChunk], buffer: Set[String]), chunk: KoreanPhraseChunk) =>
val phraseText = chunk.map(_.tokens.map(_.text).mkString("")).mkString("")
if (buffer.contains(phraseText)) {
(l, buffer)
} else {
(chunk :: l, buffer + phraseText)
}
}
l.reverse
}