Skip to content

Commit

Permalink
Dev (#5)
Browse files Browse the repository at this point in the history
* feat: requirements

* feat: modify docker

* feat: 꼬임

* feat: docker ignore

* feat: pronounce rule
  • Loading branch information
ssoxong authored Nov 13, 2024
1 parent e6d3fef commit 33ab1c9
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 3 deletions.
2 changes: 1 addition & 1 deletion crud/difficulty.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ def decomposition(korean_word: str):
res.append([CHOSUNG_LIST[ch1], JUNGSUNG_LIST[ch2], JONGSUNG_LIST[ch3]])
else:
res.append([w])
print (res)
# print (res)

return res
94 changes: 92 additions & 2 deletions crud/pronounce.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ def analysis_pronounce_crud(text):
"구개음화": analysis_gugaeumhwa(text, dec),
"비음화": analysis_beumhwa(text, dec),
"유음화": analysis_yueumhwa(text, dec),
"연음화": analysis_yeoneumhwa(text, dec)
"연음화": analysis_yeoneumhwa(text, dec),
"경음화": anaylsis_gyeonumhwa(text, dec),
"겹받침 쓰기": doubleb_analysis(text, dec),
"거센소리": geosensori_analysis(text, dec)
}

def analysis_gugaeumhwa(text, dec):
Expand All @@ -19,6 +22,7 @@ def analysis_gugaeumhwa(text, dec):
for i, r in enumerate(dec):
r = [col for col in r if col.strip()]
if len(r)!=3: continue
if i+1>=len(dec): continue
if not (r[2]=='ㄷ' or r[2]=='ㅌ'): continue
print(r)
if (text[i+1]=='이' or text[i+1]=='히'):
Expand All @@ -33,6 +37,8 @@ def analysis_beumhwa(text, dec):
for i, r in enumerate(dec):
r = [col for col in r if col.strip()]
if len(r)!=3: continue
if i+1>=len(dec): continue

if r[2] in payeoleum and dec[i+1][0] in beeum:
beumhwa.append(text[i:i+2])

Expand All @@ -44,6 +50,8 @@ def analysis_yueumhwa(text, dec):
for i, r in enumerate(dec):
r = [col for col in r if col.strip()]
if len(r)!=3: continue
if i+1>len(dec): continue

if r[2]=='ㄹ' and dec[i+1][0]=='ㄴ':
yueumhwa.append(text[i:i+2])
elif r[2]=='ㄴ' and dec[i+1][0]=='ㄹ':
Expand All @@ -68,4 +76,86 @@ def analysis_yeoneumhwa(text, dec):
yeoneumhwa.append(forward+word)
# 받침 뒤에 ㅏ, ㅓ, ㅗ, ㅜ, ㅟ로 시작하는 실질 형태소가 오는 경우

return yeoneumhwa
return yeoneumhwa

def anaylsis_gyeonumhwa(text, dec):
gyeongumhwa = []
pos = pecab.pos(text)

# 받침 뒤
b1list = ['ㄱ','ㄷ','ㅂ', 'ㄲ','ㅋ','ㄳ','ㄺ','ㅅ','ㅆ','ㅈ','ㅊ','ㅌ','ㅍ','ㄼ','ㄿ','ㅄ']
b2list = ['ㄼ', 'ㄾ']

n1list = ['ㄱ','ㄷ','ㅂ','ㅅ','ㅈ']
n2list = ['ㄱ','ㄷ','ㅅ','ㅈ']
for i, r in enumerate(dec):
r = [col for col in r if col.strip()]
if len(r)!=3: continue
if i+1>=len(dec): continue

# 받침 ㄱ,ㄷ,ㅂ 뒤 ㄱㄷㅂㅅㅈ
if r[2] in b1list and dec[i+1][0] in n1list:
gyeongumhwa.append(text[i:i+2])
# 어간 받침 ㄼ, ㄾ 뒤 ㄱ, ㄷ, ㅅ, ㅈ
if r[2] in b2list and dec[i+1][0] in n2list:
gyeongumhwa.append(text[i:i+2])

# 용언의 어간 받침 ㄴ, ㅁ 뒤 ㄱ, ㄷ, ㅅ, ㅈ
# VV(동사)[2]
for i, (word, tag) in enumerate(pos):
if tag == 'VV':
wdec = crud.difficulty.decomposition(word)
if wdec[-1][2] not in ['ㄴ','ㅁ']: continue
sdec = crud.difficulty.decomposition(pos[i+1][0])
if sdec[-1][0] not in n2list: continue
gyeongumhwa.append(word+pos[i+1][0])


# 관형사형 어미 -(으)ㄹ 뒤 ㄱ, ㄷ, ㅂ, ㅅ, ㅈ
# ETM
for i, (word, tag) in enumerate(pos):
print(tag)
if 'ETM' in tag:
wdec = crud.difficulty.decomposition(word)
if wdec[-1][2] not in ['ㄹ']: continue
sdec = crud.difficulty.decomposition(pos[i+1][0])

if sdec[0][0] not in n1list: continue
gyeongumhwa.append(word+pos[i+1][0])

# 한자어 ㄹ 받침 뒤 ㄷ, ㅅ, ㅈ
return gyeongumhwa

def doubleb_analysis(text, dec):
# 겹받침

doubleb = []
doubleblist = ['ㄳ', 'ㄵ', 'ㄶ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅄ']
for i, r in enumerate(dec):
r = [col for col in r if col.strip()]
if len(r)!=3: continue
if i+1>=len(dec): continue

# 받침 ㄱ,ㄷ,ㅂ 뒤 ㄱㄷㅂㅅㅈ
if r[2] in doubleblist:
doubleb.append(text[i:i+2])

return doubleb

def geosensori_analysis(text, dec):
geosensori=[]
geosensorilist = ['ㅎ','ㄶ','ㅀ']
trigger = ['ㄱ', 'ㄷ', 'ㅂ', 'ㅈ', 'ㄵ', 'ㄺ', 'ㄼ']

for i, r in enumerate(dec):
r = [col for col in r if col.strip()]
if len(r)!=3: continue
if i+1>=len(dec): continue

# 받침 ㄱ,ㄷ,ㅂ 뒤 ㄱㄷㅂㅅㅈ
if r[2] in geosensorilist and dec[i+1][0] in trigger:
geosensori.append(text[i:i+2])
if r[2] in trigger and dec[i+1][0] in geosensorilist:
geosensori.append(text[i:i+2])

return geosensori
5 changes: 5 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from pecab import PeCab
pecab = PeCab()

text = "밥을 담고 할 사람? 열정있게"
print(pecab.pos(text))

0 comments on commit 33ab1c9

Please sign in to comment.