protected[korean] def normalizeCodaN()

in src/main/scala/com/twitter/penguin/korean/normalizer/KoreanNormalizer.scala [96:130]

28 lines of code
14 McCabe index (conditional complexity)


  protected[korean] def normalizeCodaN(chunk: CharSequence): CharSequence = {
    if (chunk.length < 2) return chunk

    val lastTwo = chunk.subSequence(chunk.length() - 2, chunk.length())
    val last = chunk.charAt(chunk.length() - 1)

    val lastTwoHead = lastTwo.charAt(0)

    // Exception cases
    if (koreanDictionary(Noun).contains(chunk) ||
        koreanDictionary(Conjunction).contains(chunk) ||
        koreanDictionary(Adverb).contains(chunk) ||
        koreanDictionary(Noun).contains(lastTwo) ||
        lastTwoHead < '가' || lastTwoHead > '힣' ||
        CODA_N_EXCPETION.contains(lastTwoHead)
    ) {
      return chunk
    }

    val hc = decomposeHangul(lastTwoHead)

    val newHead = new StringBuilder()
        .append(chunk.subSequence(0, chunk.length() - 2))
        .append(composeHangul(hc.onset, hc.vowel))

    if (hc.coda == 'ㄴ' &&
        (last == '데' || last == '가' || last == '지') &&
        koreanDictionary(Noun).contains(newHead)
    ) {
      val mid = if (hc.vowel == 'ㅡ') "은" else "인"
      newHead + mid + last
    } else {
      chunk
    }
  }