protected[korean] def isKoreanNameVariation()

in src/main/scala/com/twitter/penguin/korean/util/KoreanSubstantive.scala [73:95]


  protected[korean] def isKoreanNameVariation(chunk: CharSequence): Boolean = {
    val nounDict = koreanDictionary(Noun)

    val s = chunk.toString
    if (isName(s)) return true
    if (s.length < 3 || s.length > 5) return false

    val decomposed = s.map { c: Char => decomposeHangul(c)}
    val lastChar = decomposed.last
    if (!Hangul.CODA_MAP.contains(lastChar.onset)) return false
    if (lastChar.onset == 'ㅇ' || lastChar.vowel != 'ㅣ' || lastChar.coda != ' ') return false
    if (decomposed.init.last.coda != ' ') return false

    // Recover missing 'ㅇ' (우혀니 -> 우현, 우현이, 빠순이 -> 빠순, 빠순이)
    val recovered = decomposed.zipWithIndex.map {
      case (hc: HangulChar, i: Int) if i == s.length - 1 => '이'
      case (hc: HangulChar, i: Int) if i == s.length - 2 =>
        composeHangul(HangulChar(hc.onset, hc.vowel, decomposed.last.onset))
      case (hc: HangulChar, i: Int) => composeHangul(hc)
    }.mkString("")

    Seq(recovered, recovered.init).exists(isName)
  }