in src/main/scala/com/twitter/penguin/korean/normalizer/KoreanNormalizer.scala [96:130]
protected[korean] def normalizeCodaN(chunk: CharSequence): CharSequence = {
if (chunk.length < 2) return chunk
val lastTwo = chunk.subSequence(chunk.length() - 2, chunk.length())
val last = chunk.charAt(chunk.length() - 1)
val lastTwoHead = lastTwo.charAt(0)
// Exception cases
if (koreanDictionary(Noun).contains(chunk) ||
koreanDictionary(Conjunction).contains(chunk) ||
koreanDictionary(Adverb).contains(chunk) ||
koreanDictionary(Noun).contains(lastTwo) ||
lastTwoHead < '가' || lastTwoHead > '힣' ||
CODA_N_EXCPETION.contains(lastTwoHead)
) {
return chunk
}
val hc = decomposeHangul(lastTwoHead)
val newHead = new StringBuilder()
.append(chunk.subSequence(0, chunk.length() - 2))
.append(composeHangul(hc.onset, hc.vowel))
if (hc.coda == 'ㄴ' &&
(last == '데' || last == '가' || last == '지') &&
koreanDictionary(Noun).contains(newHead)
) {
val mid = if (hc.vowel == 'ㅡ') "은" else "인"
newHead + mid + last
} else {
chunk
}
}