-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
233 lines (199 loc) · 7.79 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
from fastapi import FastAPI, HTTPException, Body, Query
from pydantic import BaseModel, Field
from typing import Dict
import os
from dotenv import load_dotenv
import json
from crud import difficulty, pronounce, score, ocr
app = FastAPI()
load_dotenv()
from enum import Enum
class PronounceRule(str, Enum):
구개음화 = "구개음화"
연음화 = "연음화"
경음화 = "경음화"
유음화 = "유음화"
비음화 = "비음화"
음운규칙_없음 = "음운규칙 없음"
겹받침_쓰기 = "겹받침 쓰기"
기식음화 = "기식음화"
class ClaudeRequest(BaseModel):
difficulty: int = Field(default=3, ge=1, le=5)
rule: PronounceRule
count: int = Field(default=5, ge=1, le=20)
@app.post("/phonological_rules", tags=['analysis'])
async def analysis_pronounce(text: Dict[int, str] = Body(
example=
{
"1": "맏이가 동생을 돌보았다",
"2": "굳이 그렇게까지 할 필요는 없어",
"3": "해돋이를 보러 산에 올랐다",
"4": "옷이 낡아서 새로 샀다",
"5": "같이 영화 보러 갈래?"
}
)):
analysis = {}
for n, t in text.items():
if not t:
raise HTTPException(status_code=400, detail="text에 빈 문자열이 포함되어 있습니다.")
analysis[n]=pronounce.pronounce_crud(t)
return analysis
@app.post("/claude", tags=['AI'])
async def generate_claude(request: ClaudeRequest):
try:
import anthropic
from datetime import datetime
client_claude = anthropic.Anthropic(
api_key=os.getenv('CLAUDE_API_KEY'), # 환경 변수를 설정했다면 생략 가능
)
message = client_claude.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1000,
# 다양한 결과값을 얻기 위해 temperature 값 조절
temperature=0.5,
system="""너는 음운 규칙별 받아쓰기 문제를 생성하는거야.
음운 규칙에는 구개음화, 연음화, 경음화, 유음화, 비음화, 음운규칙 없음, 겹받침 쓰기, 기식음화가 있어.\n
내가 'n 난이도로 [m]유형으로 k문제 만들어줘' 라고 하면 맞춰서 받아쓰기 문제를 만들어줘.\n
n: 1~5 (초등학교 기준, 1: 단어, 2: 쉬운 단어가 있는 간단한 문장, 3: 쉬운 단어가 있는 짧은 문장, 4: 짧은 문장, 5: 문장)\n
m: 구개음화, 연음화, 경음화, 유음화, 비음화, 음운규칙 없음, 겹받침 쓰기, 기식음화\n
답변 형식: 문제번호:문제 형태로 json형식으로 반환""",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": f"{request.difficulty} 난이도로 [{request.rule}] 유형으로 {request.count}문제 만들어줘. (seed: {datetime.now().isoformat()})"
}
]
}
]
)
generated_problem = message.content[0].text
generated_problem = json.loads(generated_problem)
return generated_problem
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
class DifficultyRequest(BaseModel):
text: str = Field("맏이가 동생을 돌보았다")
@app.post('/difficulties', tags=['analysis'], summary='디버깅용')
async def difficulties_endpoint(texts: list[DifficultyRequest]):
print(texts)
return [await calc_difficulty(t) for t in texts]
@app.post("/difficulty", tags=['analysis'])
async def calc_difficulty(text: DifficultyRequest):
b_grade={
'ㄱ':2, 'ㄴ':2, 'ㄹ':2, 'ㅁ':2, 'ㅇ':2,
'ㄷ':3, 'ㅂ':3, 'ㅅ':3, 'ㅈ':3, 'ㅎ':3, 'ㅆ':3,
'ㅊ':4, 'ㅋ':4, 'ㅌ':4, 'ㅍ':4, 'ㄲ':4,
'ㄵ':5, 'ㄶ':5,
'ㄺ':6, 'ㄻ':6, 'ㄼ':6, 'ㅀ':6, 'ㅄ':6,
'ㄳ':7, 'ㄽ':7, 'ㄾ':7, 'ㄿ':7,
}
m_grade={
'ㅏ':1, 'ㅓ':1, 'ㅗ':1, 'ㅜ':1, 'ㅡ':1, 'ㅣ':1,
'ㅐ':2, 'ㅔ':2,
'ㅑ':3, 'ㅕ':3, 'ㅛ':3,
'ㅚ':4, 'ㅟ':4,
'ㅘ':5, 'ㅝ':5, 'ㅢ':5,
'ㅖ':6, 'ㅙ':6, 'ㅞ':6,
'ㅒ':7, 'ㅠ':7,
}
# pronounce 추출해서 해당하는 부분만 스코어링
s = text.text
analysis = pronounce.pronounce_crud(s)
ab_list, am_list = difficulty_dec(s)
tsum = sum(b_grade.get(ab) for ab in ab_list)
tsum += sum(m_grade.get(am) for am in am_list)
spro = ''
for k, v in analysis.items():
if not v: continue
spro+=''.join(v)
b_list, m_list = difficulty_dec(spro)
if not len(spro): spro='0'
# 정규화 필요
b_grade_sum = sum(b_grade.get(b) for b in b_list)
m_grade_sum = sum(m_grade.get(m) for m in m_list)
total = (b_grade_sum+m_grade_sum)/((tsum)+len(s))
difficulty_thresholds = [
(1, 0.05), # 20% 이하
(2, 0.1), # 40% 이하
(3, 0.35), # 60% 이하
(4, 0.5), # 80% 이하
(5, 0.7) # 100% 이하
]
for difficulty, threshold in difficulty_thresholds:
if total <= threshold:
if len(s)<=3:
difficulty-=3
elif len(s)<=5:
difficulty-=2
elif len(s)<=7:
difficulty-=1
elif len(s)>=9:
difficulty+=1
elif len(s)>=13:
difficulty+=2
return max(1, min(5, difficulty))
return max(1, min(5, difficulty))
class ScoreRequest(BaseModel):
workbook: dict[int, str] = Field(description="문제집")
answer: str = Field(description="답안 S3 주소")
@app.post("/score", tags=['analysis'])
async def score_endpoint(s: ScoreRequest = Body(
example={
"workbook": {
"1": "시끌벅적한 소리가",
"2": "힘차게 밟았다",
"3": "아까부터 그랬단다"
},
"answer": "https://bada-static-bucket.s3.ap-northeast-2.amazonaws.com/117398116290476725816_66b9f0945dd57334d0755ca4.png"
}
)):
response = score.score_crud(s)
# return {
# "1": 80,
# "2": 90,
# "3": 47
# }
return response
@app.get("/")
async def root():
return {"message": "한글바다 AI 서버입니다."}
if __name__ == "__main__":
import uvicorn
uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)
# uvicorn main:app --reload
def difficulty_dec(s: str):
res = difficulty.decomposition(s)
b_list = []
m_list = []
strip_list = [[col for col in row if col.strip()] for row in res]
for i in strip_list:
if len(i)==0: continue
m_list.append(i[1])
if len(i) == 3:
b_list.append(i[2])
return b_list, m_list
@app.get('/ocr', tags=['AI'], summary="디버깅용")
async def ocr_endpoint(filepath: str=Query(default="https://bada-static-bucket.s3.ap-northeast-2.amazonaws.com/117398116290476725816_66b9f0945dd57334d0755ca4.png",description="s3 파일 주소")):
return ocr.infer_ocr(filepath)
class LevenRequest(BaseModel):
workbook: dict[int, str] = Field(description="문제집")
answer: dict[int, str] = Field(description="답안")
@app.post('/levenshtein', tags=['AI'], summary='디버깅용')
async def leven_endpoint(req: LevenRequest = Body(
example={
"workbook": {
1: "시끌벅적한 소리가",
2: "힘차게 밟았다",
3: "아까부터 그랬단다"
},
"answer":{
1: "시끌벅적한 소리가",
2: "차게 밟았다.",
3: "아까부터 그렀단다."
}
}
)):
return score.simillarity(workbook=req.workbook, answer=req.answer)