diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1f10b42 --- /dev/null +++ b/.gitignore @@ -0,0 +1,138 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyderworkspace + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +datasets/ + +*.bin \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0698b7f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +ARG PYTORCH="2.2.0" +ARG CUDA="12.1" +ARG CUDNN="8" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV DEBIAN_FRONTEND=noninteractive + +# Update package list and install software-properties-common +RUN apt update && apt install -y software-properties-common + +# Add deadsnakes PPA for Python 3.9 +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt update + +RUN apt install -y git vim libgl1-mesa-glx libglib2.0-0 ninja-build libsm6 libxrender-dev libxext6 libgl1-mesa-glx python3.9 python3.9-dev python3.9-distutils wget net-tools zip unzip +RUN apt-get clean && rm -rf /var/lib/apt/lists/* + +# Set Python 3.9 as the default python version +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 + +# Install pip for Python 3.9 +RUN wget https://bootstrap.pypa.io/get-pip.py +RUN python3.9 get-pip.py + +# Install Python Library +RUN pip install --upgrade pip +RUN pip install --upgrade setuptools +RUN pip install Pillow Flask Flask-Cors tensorflow>=2.0.0 transformers + +# Set the default command to run when the container starts +WORKDIR /app + diff --git a/README.md b/README.md new file mode 100644 index 0000000..90df3d1 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +![thumbnail](./resource/thumb.png) + +> 릴리즈 버전 [Release v1.0](https://github.com/DGU-ITRC/PLASS_QNA/releases/tag/v1.6) + +TBU diff --git a/model.py b/model.py new file mode 100644 index 0000000..ba475eb --- /dev/null +++ b/model.py @@ -0,0 +1,54 @@ +import argparse +import os +import torch +import random +import numpy as np +from transformers import DistilBertTokenizerFast +from transformers import DistilBertForQuestionAnswering + +def init_args(context, question): + if context is None or question is None: + context = "Stephen Silvagni (born 31 May 1967) is a former Australian rules footballer for the Carlton Football Club." + question = "What was the name of Stephen Silvagni's team?" + args = { + 'context': context, + 'question': question, + 'seed': 42, + 'save_dir': 'save/baseline-01', + } + return args + +def set_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + +def predict(context, question): + args = init_args(context, question) + set_seed(args['seed']) + checkpoint_path = os.path.join(args['save_dir'], 'checkpoint') + model = DistilBertForQuestionAnswering.from_pretrained(checkpoint_path) + tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased') + args['device'] = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + os.environ["TOKENIZERS_PARALLELISM"] = "false" + context = args['context'] + question = args['question'] + inputs = tokenizer(question, context, return_tensors="pt") + with torch.no_grad(): + outputs = model(**inputs, ) + start_index = outputs.start_logits.argmax() + end_index = outputs.end_logits.argmax() + start_idx = int(start_index.numpy()) + end_idx = int(end_index.numpy()) + predict_tokens = inputs.input_ids[0,start_index:end_index+1] + predict_answer = tokenizer.decode(predict_tokens) + result = {'context': context, 'question': question, 'start_idx': start_idx, 'end_idx': end_idx, 'answer': predict_answer} + print(result) + return result + + +if __name__ == '__main__': + context = "Stephen Silvagni (born 31 May 1967) is a former Australian rules footballer for the Carlton Football Club." + question = "What was the name of Stephen Silvagni's team?" + predict(context, question) diff --git a/resource/thumb.png b/resource/thumb.png new file mode 100644 index 0000000..43660f4 Binary files /dev/null and b/resource/thumb.png differ diff --git a/save/baseline-01/baseline-01/events.out.tfevents.1686223320.G292-Z40-00 b/save/baseline-01/baseline-01/events.out.tfevents.1686223320.G292-Z40-00 new file mode 100644 index 0000000..9f2d87c Binary files /dev/null and b/save/baseline-01/baseline-01/events.out.tfevents.1686223320.G292-Z40-00 differ diff --git a/save/baseline-01/baseline-01/log_train.txt b/save/baseline-01/baseline-01/log_train.txt new file mode 100644 index 0000000..3572936 --- /dev/null +++ b/save/baseline-01/baseline-01/log_train.txt @@ -0,0 +1,26 @@ +[06.08.23 20:20:39] Args: { + "batch_size": 16, + "do_eval": false, + "do_test": false, + "do_train": true, + "eval": false, + "eval_datasets": "race,relation_extraction,duorc", + "eval_dir": "datasets/oodomain_test", + "eval_every": 5000, + "lr": 3e-05, + "num_epochs": 3, + "num_visuals": 10, + "recompute_features": false, + "run_name": "baseline", + "save_dir": "save/baseline-01/baseline-01", + "seed": 42, + "sub_file": "", + "train": false, + "train_datasets": "squad,nat_questions,newsqa", + "train_dir": "datasets/indomain_train", + "val_dir": "datasets/indomain_val", + "visualize_predictions": false +} +[06.08.23 20:20:39] Preparing Training Data... +[06.08.23 20:21:46] Preparing Validation Data... +[06.08.23 20:22:00] Epoch: 0 diff --git a/save/baseline-01/baseline-02/events.out.tfevents.1686224058.G292-Z40-00 b/save/baseline-01/baseline-02/events.out.tfevents.1686224058.G292-Z40-00 new file mode 100644 index 0000000..e9e139f Binary files /dev/null and b/save/baseline-01/baseline-02/events.out.tfevents.1686224058.G292-Z40-00 differ diff --git a/save/baseline-01/baseline-02/log_train.txt b/save/baseline-01/baseline-02/log_train.txt new file mode 100644 index 0000000..a5743c9 --- /dev/null +++ b/save/baseline-01/baseline-02/log_train.txt @@ -0,0 +1,26 @@ +[06.08.23 20:33:01] Args: { + "batch_size": 16, + "do_eval": false, + "do_test": false, + "do_train": true, + "eval": false, + "eval_datasets": "race,relation_extraction,duorc", + "eval_dir": "datasets/oodomain_test", + "eval_every": 5000, + "lr": 3e-05, + "num_epochs": 3, + "num_visuals": 10, + "recompute_features": false, + "run_name": "baseline", + "save_dir": "save/baseline-01/baseline-02", + "seed": 42, + "sub_file": "", + "train": false, + "train_datasets": "squad,nat_questions,newsqa", + "train_dir": "datasets/indomain_train", + "val_dir": "datasets/indomain_val", + "visualize_predictions": false +} +[06.08.23 20:33:01] Preparing Training Data... +[06.08.23 20:34:05] Preparing Validation Data... +[06.08.23 20:34:18] Epoch: 0 diff --git a/save/baseline-01/baseline-03/events.out.tfevents.1686224179.G292-Z40-00 b/save/baseline-01/baseline-03/events.out.tfevents.1686224179.G292-Z40-00 new file mode 100644 index 0000000..b485580 Binary files /dev/null and b/save/baseline-01/baseline-03/events.out.tfevents.1686224179.G292-Z40-00 differ diff --git a/save/baseline-01/baseline-03/log_train.txt b/save/baseline-01/baseline-03/log_train.txt new file mode 100644 index 0000000..697a944 --- /dev/null +++ b/save/baseline-01/baseline-03/log_train.txt @@ -0,0 +1,26 @@ +[06.08.23 20:35:04] Args: { + "batch_size": 16, + "do_eval": false, + "do_test": false, + "do_train": true, + "eval": false, + "eval_datasets": "race,relation_extraction,duorc", + "eval_dir": "datasets/oodomain_test", + "eval_every": 5000, + "lr": 3e-05, + "num_epochs": 3, + "num_visuals": 10, + "recompute_features": false, + "run_name": "baseline", + "save_dir": "save/baseline-01/baseline-03", + "seed": 42, + "sub_file": "", + "train": false, + "train_datasets": "squad,nat_questions,newsqa", + "train_dir": "datasets/indomain_train", + "val_dir": "datasets/indomain_val", + "visualize_predictions": false +} +[06.08.23 20:35:04] Preparing Training Data... +[06.08.23 20:36:06] Preparing Validation Data... +[06.08.23 20:36:19] Epoch: 0 diff --git a/save/baseline-01/baseline-04/events.out.tfevents.1686224596.G292-Z40-00 b/save/baseline-01/baseline-04/events.out.tfevents.1686224596.G292-Z40-00 new file mode 100644 index 0000000..41521f0 Binary files /dev/null and b/save/baseline-01/baseline-04/events.out.tfevents.1686224596.G292-Z40-00 differ diff --git a/save/baseline-01/baseline-04/log_train.txt b/save/baseline-01/baseline-04/log_train.txt new file mode 100644 index 0000000..286a748 --- /dev/null +++ b/save/baseline-01/baseline-04/log_train.txt @@ -0,0 +1,27 @@ +[06.08.23 20:40:51] Args: { + "batch_size": 16, + "do_eval": false, + "do_test": false, + "do_train": true, + "eval": false, + "eval_datasets": "race,relation_extraction,duorc", + "eval_dir": "datasets/oodomain_test", + "eval_every": 5000, + "lr": 3e-05, + "num_epochs": 3, + "num_visuals": 10, + "recompute_features": false, + "run_name": "baseline", + "save_dir": "save/baseline-01/baseline-04", + "seed": 42, + "sub_file": "", + "train": false, + "train_datasets": "squad,nat_questions,newsqa", + "train_dir": "datasets/indomain_train", + "val_dir": "datasets/indomain_val", + "visualize_predictions": false +} +[06.08.23 20:40:51] Preparing Training Data... +[06.08.23 20:42:57] Preparing Validation Data... +[06.08.23 20:43:16] Epoch: 0 +[06.08.23 20:43:17] Evaluating at step 0... diff --git a/save/baseline-01/checkpoint/.gitkeep b/save/baseline-01/checkpoint/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/save/baseline-01/checkpoint/config.json b/save/baseline-01/checkpoint/config.json new file mode 100644 index 0000000..562d7ba --- /dev/null +++ b/save/baseline-01/checkpoint/config.json @@ -0,0 +1,23 @@ +{ + "_name_or_path": "distilbert-base-uncased", + "activation": "gelu", + "architectures": [ + "DistilBertForQuestionAnswering" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "pad_token_id": 0, + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "transformers_version": "4.2.2", + "vocab_size": 30522 +} diff --git a/save/baseline-01/log_test.txt b/save/baseline-01/log_test.txt new file mode 100644 index 0000000..f99ff5b --- /dev/null +++ b/save/baseline-01/log_test.txt @@ -0,0 +1,8 @@ +[06.09.23 01:22:42] Eval F1: -1.00, EM: -1.00 +[06.09.23 01:22:42] Writing submission file to save/baseline-01/test_mtl_submission.csv... +[06.12.23 16:54:13] Eval F1: -1.00, EM: -1.00 +[06.12.23 16:54:13] Writing submission file to save/baseline-01/test_mtl_submission.csv... +[06.13.23 17:38:30] Eval F1: -1.00, EM: -1.00 +[06.13.23 17:38:30] Writing submission file to save/baseline-01/test_mtl_submission.csv... +[06.15.23 20:54:55] Eval F1: -1.00, EM: -1.00 +[06.15.23 20:54:55] Writing submission file to save/baseline-01/test_mtl_submission.csv... diff --git a/save/baseline-01/log_train.txt b/save/baseline-01/log_train.txt new file mode 100644 index 0000000..9653f2c --- /dev/null +++ b/save/baseline-01/log_train.txt @@ -0,0 +1,96 @@ +[11.01.22 10:43:46] Args: { + "batch_size": 16, + "do_eval": false, + "do_train": true, + "eval": false, + "eval_datasets": "race,relation_extraction,duorc", + "eval_dir": "datasets/oodomain_test", + "eval_every": 2000, + "lr": 3e-05, + "num_epochs": 3, + "num_visuals": 10, + "recompute_features": false, + "run_name": "baseline", + "save_dir": "save/baseline-01", + "seed": 42, + "sub_file": "", + "train": false, + "train_datasets": "squad,nat_questions,newsqa", + "train_dir": "datasets/indomain_train", + "val_dir": "datasets/indomain_val", + "visualize_predictions": false +} +[11.01.22 10:43:46] Preparing Training Data... +[11.01.22 10:45:30] Preparing Validation Data... +[11.01.22 10:45:50] Epoch: 0 +[11.01.22 10:45:50] Evaluating at step 0... +[11.01.22 10:48:24] Visualizing in TensorBoard... +[11.01.22 10:48:24] Eval F1: 07.69, EM: 00.04 +[11.01.22 10:54:49] Evaluating at step 2000... +[11.01.22 10:57:28] Visualizing in TensorBoard... +[11.01.22 10:57:28] Eval F1: 58.08, EM: 42.50 +[11.01.22 11:03:54] Evaluating at step 4000... +[11.01.22 11:06:30] Visualizing in TensorBoard... +[11.01.22 11:06:30] Eval F1: 63.52, EM: 47.34 +[11.01.22 11:12:57] Evaluating at step 6000... +[11.01.22 11:15:32] Visualizing in TensorBoard... +[11.01.22 11:15:32] Eval F1: 64.62, EM: 48.71 +[11.01.22 11:22:02] Evaluating at step 8000... +[11.01.22 11:24:54] Visualizing in TensorBoard... +[11.01.22 11:24:54] Eval F1: 65.99, EM: 50.04 +[11.01.22 11:31:19] Evaluating at step 10000... +[11.01.22 11:33:55] Visualizing in TensorBoard... +[11.01.22 11:33:55] Eval F1: 67.20, EM: 51.48 +[11.01.22 11:40:20] Evaluating at step 12000... +[11.01.22 11:42:56] Visualizing in TensorBoard... +[11.01.22 11:42:56] Eval F1: 68.45, EM: 52.97 +[11.01.22 11:49:22] Evaluating at step 14000... +[11.01.22 11:51:58] Visualizing in TensorBoard... +[11.01.22 11:51:58] Eval F1: 68.18, EM: 52.47 +[11.01.22 11:55:38] Epoch: 1 +[11.01.22 11:58:24] Evaluating at step 16000... +[11.01.22 12:01:01] Visualizing in TensorBoard... +[11.01.22 12:01:01] Eval F1: 69.09, EM: 53.22 +[11.01.22 12:07:45] Evaluating at step 18000... +[11.01.22 12:10:21] Visualizing in TensorBoard... +[11.01.22 12:10:21] Eval F1: 69.09, EM: 53.30 +[11.01.22 12:17:01] Evaluating at step 20000... +[11.01.22 12:19:37] Visualizing in TensorBoard... +[11.01.22 12:19:37] Eval F1: 69.28, EM: 53.62 +[11.01.22 12:26:15] Evaluating at step 22000... +[11.01.22 12:28:51] Visualizing in TensorBoard... +[11.01.22 12:28:51] Eval F1: 69.42, EM: 53.50 +[11.01.22 12:35:16] Evaluating at step 24000... +[11.01.22 12:37:52] Visualizing in TensorBoard... +[11.01.22 12:37:52] Eval F1: 69.80, EM: 53.79 +[11.01.22 12:44:17] Evaluating at step 26000... +[11.01.22 12:46:53] Visualizing in TensorBoard... +[11.01.22 12:46:53] Eval F1: 69.76, EM: 54.15 +[11.01.22 12:53:17] Evaluating at step 28000... +[11.01.22 12:55:53] Visualizing in TensorBoard... +[11.01.22 12:55:53] Eval F1: 70.16, EM: 54.50 +[11.01.22 13:02:18] Evaluating at step 30000... +[11.01.22 13:04:55] Visualizing in TensorBoard... +[11.01.22 13:04:55] Eval F1: 70.70, EM: 55.03 +[11.01.22 13:05:50] Epoch: 2 +[11.01.22 13:11:19] Evaluating at step 32000... +[11.01.22 13:14:01] Visualizing in TensorBoard... +[11.01.22 13:14:01] Eval F1: 70.08, EM: 54.36 +[11.01.22 13:20:26] Evaluating at step 34000... +[11.01.22 13:23:02] Visualizing in TensorBoard... +[11.01.22 13:23:02] Eval F1: 69.76, EM: 53.99 +[11.01.22 13:29:29] Evaluating at step 36000... +[11.01.22 13:32:06] Visualizing in TensorBoard... +[11.01.22 13:32:06] Eval F1: 69.85, EM: 53.85 +[11.01.22 13:38:34] Evaluating at step 38000... +[11.01.22 13:41:11] Visualizing in TensorBoard... +[11.01.22 13:41:11] Eval F1: 70.15, EM: 54.14 +[11.01.22 13:47:38] Evaluating at step 40000... +[11.01.22 13:50:15] Visualizing in TensorBoard... +[11.01.22 13:50:15] Eval F1: 69.95, EM: 54.48 +[11.01.22 13:56:40] Evaluating at step 42000... +[11.01.22 13:59:17] Visualizing in TensorBoard... +[11.01.22 13:59:17] Eval F1: 70.03, EM: 54.13 +[11.01.22 14:05:56] Evaluating at step 44000... +[11.01.22 14:08:48] Visualizing in TensorBoard... +[11.01.22 14:08:48] Eval F1: 70.31, EM: 54.54 diff --git a/save/baseline-01/log_validation.txt b/save/baseline-01/log_validation.txt new file mode 100644 index 0000000..f6f2ad2 --- /dev/null +++ b/save/baseline-01/log_validation.txt @@ -0,0 +1,6 @@ +#indomain_aug[12.12.22 22:14:03] Eval F1: 54.36, EM: 39.87 +[12.12.22 22:14:03] Writing submission file to save/baseline-01/validation_... +[12.13.22 14:45:54] Eval F1: 34.00, EM: 21.99 +[12.13.22 14:45:54] Writing submission file to save/baseline-01/validation_... +[12.13.22 14:46:48] Eval F1: 49.88, EM: 34.55 +[12.13.22 14:46:48] Writing submission file to save/baseline-01/validation_... diff --git a/server.py b/server.py new file mode 100644 index 0000000..59f03e8 --- /dev/null +++ b/server.py @@ -0,0 +1,55 @@ +import model +from fastapi import FastAPI, Request, Form +from fastapi.templating import Jinja2Templates +from fastapi.encoders import jsonable_encoder +from fastapi.responses import JSONResponse +from starlette.middleware.cors import CORSMiddleware + +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +templates = Jinja2Templates(directory="template") + +SERVER_INFO = { + "name": "Question and Answer API Server", + "version": "1.0.0", + "port": 50001 +} + +@app.get("/") +async def demo(request: Request): + """ + 데모를 실행할 수 있는 페이지를 반환합니다. + + Html file: /templates/index.html + + Args: + request (Request): FastAPI Request 객체 + + Returns: + TemplateResponse: HTML 파일 + """ + return templates.TemplateResponse("index.html",{"request":request}) + +@app.post("/predict") +async def predict(context: str = Form(...), question: str = Form(...)): + """ + 질문에 대한 답변을 반환합니다. + + Args: + context (str): 질문에 대한 문맥 + question (str): 질문 + + Returns: + dict: 질문에 대한 답변 + """ + response = model.predict(context, question) + json_response = jsonable_encoder(response) + return JSONResponse(content=json_response) \ No newline at end of file diff --git a/template/index.html b/template/index.html new file mode 100644 index 0000000..f149d17 --- /dev/null +++ b/template/index.html @@ -0,0 +1,40 @@ + + +
+ + +