in src/main/scala/com/twitter/penguin/korean/util/KoreanConjugation.scala [84:252]
protected[korean] def conjugatePredicated(words: Set[String], isAdjective: Boolean): Set[String] = {
lazy val expanded = words.flatMap { word: String =>
val init = word.init
val lastChar = word.last
val lastCharString = lastChar.toString
val lastCharDecomposed = decomposeHangul(lastChar)
val expandedLast: Seq[String] = lastCharDecomposed match {
// Cases without codas
// 하다, special case
case HangulChar('ㅎ', 'ㅏ', ' ') =>
val endings = if (isAdjective) Seq("합", "해", "히", "하") else Seq("합", "해")
addPreEomi(lastChar, PRE_EOMI_COMMON ++ PRE_EOMI_2 ++ PRE_EOMI_6 ++ PRE_EOMI_RESPECT) ++
CODAS_COMMON.map {
case c: Char if c == 'ㅆ' => composeHangul('ㅎ', 'ㅐ', c).toString
case c: Char => composeHangul('ㅎ', 'ㅏ', c).toString
} ++ addPreEomi('하', PRE_EOMI_VOWEL ++ PRE_EOMI_1_5 ++ PRE_EOMI_6) ++
addPreEomi('해', PRE_EOMI_1_1) ++ endings
// 쏘다
case HangulChar(o: Char, 'ㅗ', ' ') =>
addPreEomi(lastChar, PRE_EOMI_VOWEL ++ PRE_EOMI_2 ++ PRE_EOMI_1_3 ++ PRE_EOMI_6) ++
CODAS_NO_PAST.map(composeHangul(o, 'ㅗ', _).toString) ++
Seq(composeHangul(o, 'ㅘ', ' ').toString,
composeHangul(o, 'ㅘ', 'ㅆ').toString,
lastCharString)
// 맞추다, 겨누다, 재우다,
case HangulChar(o: Char, 'ㅜ', ' ') =>
addPreEomi(lastChar, PRE_EOMI_VOWEL ++ PRE_EOMI_1_2 ++ PRE_EOMI_2 ++ PRE_EOMI_6) ++
CODAS_NO_PAST.map(composeHangul(o, 'ㅜ', _).toString) ++
Seq(composeHangul(o, 'ㅝ').toString,
composeHangul(o, 'ㅝ', 'ㅆ').toString,
lastCharString)
// 치르다, 구르다, 굴르다, 뜨다, 모으다, 고르다, 골르다
case HangulChar(o: Char, 'ㅡ', ' ') =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
CODAS_NO_PAST.map(composeHangul(o, 'ㅡ', _).toString) ++
Seq(composeHangul(o, 'ㅝ').toString,
composeHangul(o, 'ㅓ').toString,
composeHangul(o, 'ㅏ').toString,
composeHangul(o, 'ㅝ', 'ㅆ').toString,
composeHangul(o, 'ㅓ', 'ㅆ').toString,
composeHangul(o, 'ㅏ', 'ㅆ').toString,
lastCharString)
// 사귀다
case HangulChar('ㄱ', 'ㅟ', ' ') =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
CODAS_NO_PAST.map(composeHangul('ㄱ', 'ㅟ', _).toString) ++
Seq(composeHangul('ㄱ', 'ㅕ', ' ').toString, composeHangul('ㄱ', 'ㅕ', 'ㅆ').toString) ++
Seq(lastCharString)
// 쥐다
case HangulChar(o: Char, 'ㅟ', ' ') =>
CODAS_NO_PAST.map(composeHangul(o, 'ㅟ', _).toString) ++
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
Seq(lastCharString)
// 마시다, 엎드리다, 치다, 이다, 아니다
case HangulChar(o: Char, 'ㅣ', ' ') =>
CODAS_NO_PAST.map(composeHangul(o, 'ㅣ', _).toString) ++
addPreEomi(lastChar, PRE_EOMI_1_2 ++ PRE_EOMI_2 ++ PRE_EOMI_6) ++
Seq(composeHangul(o, 'ㅣ', 'ㅂ') + "니",
composeHangul(o, 'ㅕ', ' ').toString,
composeHangul(o, 'ㅕ', 'ㅆ').toString,
lastCharString)
// 꿰다, 꾀다
case HangulChar(o: Char, v: Char, ' ') if v == 'ㅞ' || v == 'ㅚ' || v == 'ㅙ' =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
CODAS_COMMON.map(composeHangul(o, v, _).toString) ++
Seq(lastCharString)
// All other vowel endings: 둘러서다, 켜다, 세다, 캐다, 차다
case HangulChar(o: Char, v: Char, ' ') =>
CODAS_COMMON.map(composeHangul(o, v, _).toString) ++
addPreEomi(lastChar, PRE_EOMI_VOWEL ++ PRE_EOMI_1_1 ++ PRE_EOMI_2 ++ PRE_EOMI_6) ++
Seq(lastCharString)
// Cases with codas
// 만들다, 알다, 풀다
case HangulChar(o: Char, v: Char, 'ㄹ') if (o == 'ㅁ' && v == 'ㅓ') || v == 'ㅡ' || v == 'ㅏ' || v == 'ㅜ' =>
addPreEomi(lastChar, PRE_EOMI_1_2 ++ PRE_EOMI_3) ++
addPreEomi(composeHangul(o, v, ' '),
PRE_EOMI_2 ++ PRE_EOMI_6 ++ PRE_EOMI_RESPECT) ++
Seq(composeHangul(o, v, 'ㄻ').toString,
composeHangul(o, v, 'ㄴ').toString,
lastCharString)
// 낫다, 뺴앗다
case HangulChar(o: Char, 'ㅏ', 'ㅅ') =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
addPreEomi(composeHangul(o, 'ㅏ'), PRE_EOMI_4 ++ PRE_EOMI_5) ++
Seq(lastCharString)
// 묻다
case HangulChar('ㅁ', 'ㅜ', 'ㄷ') =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
Seq(composeHangul('ㅁ', 'ㅜ', 'ㄹ').toString,
lastCharString)
// 붇다
case HangulChar(o: Char, 'ㅜ', 'ㄷ') =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
addPreEomi(composeHangul(o, 'ㅜ', ' '),
PRE_EOMI_1_2 ++ PRE_EOMI_1_4 ++ PRE_EOMI_4 ++ PRE_EOMI_5) ++
Seq(composeHangul(o, 'ㅜ', 'ㄹ').toString,
lastCharString)
// 눕다
case HangulChar(o: Char, 'ㅜ', 'ㅂ') =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
addPreEomi(composeHangul(o, 'ㅜ', ' '), PRE_EOMI_1_4 ++ PRE_EOMI_4 ++ PRE_EOMI_5) ++
Seq(lastCharString)
// 간지럽다, 갑작스럽다 -> 갑작스런
case HangulChar(o: Char, 'ㅓ', 'ㅂ') if isAdjective =>
addPreEomi(composeHangul(o, 'ㅓ', ' '), PRE_EOMI_1_4 ++ PRE_EOMI_7) ++
Seq(composeHangul(o, 'ㅓ', ' ').toString, composeHangul(o, 'ㅓ', 'ㄴ').toString, lastCharString)
// 아름답다, 가볍다, 덥다, 간지럽다
case HangulChar(o: Char, v: Char, 'ㅂ') if isAdjective =>
addPreEomi(composeHangul(o, v, ' '), PRE_EOMI_1_4 ++ PRE_EOMI_7) ++
Seq(composeHangul(o, v, ' ').toString, lastCharString)
// 놓다
case HangulChar(o: Char, 'ㅗ', 'ㅎ') =>
addPreEomi(lastChar, PRE_EOMI_2 ++ PRE_EOMI_6) ++
CODAS_COMMON.map(composeHangul(o, 'ㅗ', _).toString) ++
Seq(composeHangul(o, 'ㅘ', ' ').toString, composeHangul(o, 'ㅗ', ' ').toString, lastCharString)
// 파랗다, 퍼렇다, 어떻다
case HangulChar(o: Char, v: Char, 'ㅎ') if isAdjective =>
CODAS_COMMON.map(composeHangul(o, v, _).toString) ++
CODAS_FOR_CONTRACTION.map(composeHangul(o, 'ㅐ', _).toString) ++
Seq(composeHangul(o, 'ㅐ', ' ').toString,
composeHangul(o, v, ' ').toString,
lastCharString)
// 1 char with coda adjective, 있다, 컸다
case HangulChar(o: Char, v: Char, c: Char) if word.length == 1 || (isAdjective && c == 'ㅆ') =>
addPreEomi(lastChar,
PRE_EOMI_COMMON ++ PRE_EOMI_1_2 ++ PRE_EOMI_1_3 ++ PRE_EOMI_2 ++ PRE_EOMI_4 ++ PRE_EOMI_5 ++ PRE_EOMI_6) ++
Seq(lastCharString)
// 1 char with coda adjective, 밝다
case HangulChar(o: Char, v: Char, c: Char) if word.length == 1 && isAdjective =>
addPreEomi(lastChar,
PRE_EOMI_COMMON ++ PRE_EOMI_1_2 ++ PRE_EOMI_1_3 ++ PRE_EOMI_2 ++ PRE_EOMI_4 ++ PRE_EOMI_5) ++
Seq(lastCharString)
// 부여잡다, 얻어맞다, 얻어먹다
case _ =>
Seq(lastCharString)
}
expandedLast.map(init + _)
}
if (isAdjective) {
expanded
} else {
// Edge cases: these more likely to be a conjugation of an adjective than a verb
expanded -- Set("아니", "입", "입니", "나는")
}
}