diff --git a/.github/workflows/nlp-question-generator.yml b/.github/workflows/nlp-question-generator.yml
new file mode 100644
index 00000000..c2a72a61
--- /dev/null
+++ b/.github/workflows/nlp-question-generator.yml
@@ -0,0 +1,54 @@
+name: Question Generator
+
+on:
+ push:
+ branches:
+ - main
+ - v*-branch
+ paths:
+ - tasks/nlp-question-generator/**
+ - tests/datasets.py
+ - tests/server.py
+ - tests/test_nlp_question_generator.py
+
+ pull_request:
+ branches:
+ - main
+ - v*-branch
+ paths:
+ - tasks/nlp-question-generator/**
+ - tests/datasets.py
+ - tests/server.py
+ - tests/test_nlp_question_generator.py
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ # runs all of the steps inside the specified container rather than on the VM host.
+ # Because of this the network configuration changes from host based network to a container network.
+ container:
+ image: platiagro/platiagro-notebook-image:0.3.0
+
+ services:
+
+ minio:
+ image: bitnami/minio:latest
+ env:
+ MINIO_ACCESS_KEY: minio
+ MINIO_SECRET_KEY: minio123
+ ports:
+ - 9000:9000
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Test with pytest
+ run: |
+ pip install pytest
+ pytest -v tests/test_nlp_question_generator.py
+ timeout-minutes: 90
+ env:
+ MINIO_ENDPOINT: minio:9000
+ MINIO_ACCESS_KEY: minio
+ MINIO_SECRET_KEY: minio123
diff --git a/README.md b/README.md
index 62316b55..eb3559ef 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ Task | Status | License
[Sparse Document Retriever](tasks/nlp-sparse-document-retriever/) | [](https://github.com/platiagro/tasks/actions/workflows/nlp-sparse-document-retriever.yml) | TBD
[Dense Document Retriever](tasks/nlp-dense-document-retriever/) | [](https://github.com/platiagro/tasks/actions/workflows/nlp-dense-document-retriever.yml) | TBD
[Document Reader](tasks/nlp-document-reader/) | [](https://github.com/platiagro/tasks/actions/workflows/nlp-document-reader.yml) | TBD
+[Question Generator](tasks/nlp-question-generator/) | [](https://github.com/platiagro/tasks/actions/workflows/nlp-question-generator.yml) | TBD
[Normalizer](tasks/normalizer/) | [](https://github.com/platiagro/tasks/actions/workflows/normalizer.yml) | [BSD 3-Clause](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING)
[Pre Selection](tasks/pre-selection/) | [](https://github.com/platiagro/tasks/actions/workflows/pre-selection.yml) | [BSD 3-Clause](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING)
[Random Forest Classifier](tasks/random-forest-classifier/) | [](https://github.com/platiagro/tasks/actions/workflows/random-forest-classifier.yml) | [BSD 3-Clause](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING)
diff --git a/tasks/nlp-question-generator/Deployment.ipynb b/tasks/nlp-question-generator/Deployment.ipynb
new file mode 100644
index 00000000..fba77312
--- /dev/null
+++ b/tasks/nlp-question-generator/Deployment.ipynb
@@ -0,0 +1,155 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question Generator - Implantação\n",
+ "\n",
+ "Utiliza um transformer T5 pré treinado em português e disponibilizado pelo [huggingfaces](https://platiagro.github.io/tutorials/).
\n",
+ "\n",
+ "### **Em caso de dúvidas, consulte os [tutoriais da PlatIAgro](https://platiagro.github.io/tutorials/).**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Declaração de Classe para Predições em Tempo Real\n",
+ "\n",
+ "A tarefa de implantação cria um serviço REST para predições em tempo-real.
\n",
+ "Para isso você deve criar uma classe `Model` que implementa o método `predict`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Overwriting Model.py\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%writefile Model.py\n",
+ "import joblib\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from typing import List\n",
+ "from expander import DocExpander\n",
+ "from aux_functions import build_df_result\n",
+ "\n",
+ "\n",
+ "class Model:\n",
+ " \n",
+ " def __init__(self):\n",
+ " self.loaded = False\n",
+ " \n",
+ " \n",
+ " def load(self):\n",
+ " \n",
+ " artifacts = joblib.load(\"/tmp/data/qgenerator.joblib\")\n",
+ " self.model = artifacts[\"model\"]\n",
+ " self.expand_context = artifacts[\"expand_context\"]\n",
+ " self.infer_num_gen_sentences = artifacts[\"infer_num_gen_sentences\"]\n",
+ " self.column_context = artifacts[\"column_context\"]\n",
+ " self.column_question = artifacts[\"column_question\"]\n",
+ " self.loaded = True\n",
+ "\n",
+ " def class_names(self) -> List:\n",
+ " return ['doc_id','context','questions','expanded_context']\n",
+ " \n",
+ " def expand(self,df):\n",
+ " if self.expand_context:\n",
+ " exp = DocExpander() \n",
+ " df_final = exp.expand_sql(df,context_column_name=self.column_context,questions_column_name = self.column_question)\n",
+ " \n",
+ " return df_final\n",
+ "\n",
+ " def predict(self, X, feature_names, meta=None):\n",
+ " \n",
+ " if not self.loaded:\n",
+ " self.load()\n",
+ " \n",
+ " feature_names_pipeline = ['doc_id', 'context']\n",
+ " feature_names_qa = ['context']\n",
+ " \n",
+ " if feature_names != feature_names_pipeline and feature_names != feature_names_qa:\n",
+ " raise ValueError(f'feature_names deve ser {feature_names_pipeline} ou {feature_names_qa}')\n",
+ " \n",
+ " \n",
+ " df_input = pd.DataFrame(X,columns=feature_names)\n",
+ " contexts = df_input['context'].to_numpy()\n",
+ " gen_questions_dict = self.model.forward(contexts=contexts, num_gen_sentences=self.infer_num_gen_sentences)\n",
+ " df_result = build_df_result(gen_questions_dict,column_context=self.column_context,column_question=self.column_question)\n",
+ " df_result = self.expand(df_result)\n",
+ " \n",
+ " if feature_names == feature_names_pipeline:\n",
+ " df_input = df_input[['doc_id']] \n",
+ " df_input['index'] = df_input.index\n",
+ " df_result['index'] = df_result.index\n",
+ " df_result = pd.merge(df_input, df_result, on='index', how='outer')\n",
+ " del df_result['index']\n",
+ " \n",
+ " return df_result.to_numpy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import pandas as pd\n",
+ "# df = pd.read_csv(\"squad-test-v1.1.csv\")\n",
+ "# n_lines = 10\n",
+ "# contexts = df['context'][:n_lines]\n",
+ "# indexes = df.index[:n_lines]\n",
+ "\n",
+ "# df_small = pd.DataFrame({'doc_id':indexes,'context':contexts})\n",
+ "# X = df_small.to_numpy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# from Model import Model\n",
+ "# model = Model()\n",
+ "# result = model.predict(X,['doc_id','context'])\n",
+ "# result"
+ ]
+ }
+ ],
+ "metadata": {
+ "celltoolbar": "Tags",
+ "experiment_id": "dd63cfbd-7a97-41ac-bd9b-fd11711ba459",
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.10"
+ },
+ "operator_id": "e4150bc8-88f2-4d98-b68a-6c246270c403",
+ "task_id": "ccfeb3fe-3d3a-43cf-bdc4-d0b07017e468"
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tasks/nlp-question-generator/Experiment.ipynb b/tasks/nlp-question-generator/Experiment.ipynb
new file mode 100644
index 00000000..c38db481
--- /dev/null
+++ b/tasks/nlp-question-generator/Experiment.ipynb
@@ -0,0 +1,540 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question Generator - Experimento\n",
+ "\n",
+ "Utiliza um transformer T5 pré treinado em português e disponibilizado pelo [huggingfaces](https://platiagro.github.io/tutorials/).
\n",
+ "### **Em caso de dúvidas, consulte os [tutoriais da PlatIAgro](https://platiagro.github.io/tutorials/).**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Declaração de parâmetros e hiperparâmetros\n",
+ "\n",
+ "Declare parâmetros com o botão
na barra de ferramentas.
\n",
+ "A variável `dataset` possui o caminho para leitura do arquivos importados na tarefa de \"Upload de dados\".
\n",
+ "Você também pode importar arquivos com o botão
na barra de ferramentas."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "tags": [
+ "parameters"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "dataset = \"/tmp/data/reports_contexts_small.csv\" #@param {type:\"string\"}\n",
+ "\n",
+ "### Dados para Treinamento\n",
+ "\n",
+ "# Dataset\n",
+ "column_context = \"context\" #@param [\"context\"] {type:\"string\",label:\"Coluna contexto\",description:\"Coluna em que estão contidas os contextos\"}\n",
+ "column_question = \"questions\" #@param {type:\"string\",label:\"Coluna pergunta\",description:\"Coluna em que estão contidas listas de perguntas por células. Apenas considerada se train_from_zero=True\"}\n",
+ "train_from_zero = False #@param {type:\"boolean\",label:\"Treinamento do algorítimo do zero\",description:\"Caso True utiliza o algorítimo com finne-tunning no squad em português. Caso True retreina do zero\"}\n",
+ "train_from_squad = False #@param {type:\"boolean\",label:\"Treinamento do algorítimo pelo Squad\",description:\"Caso True utiliza treinará algorítimo com finne-tunning no squad em português. Caso False teinará com o dataset passado\"}\n",
+ "expand_context = True #@param {type:\"boolean\",label:\"Expansão de contextos\",description:\"Expande o contexto passado com as perguntas geradas separadas por espaço\"}\n",
+ "\n",
+ "#prepare_data\n",
+ "dev_size_from_data= 0.2 #@param {type:\"float\",label:\"Porcentagem para avaliação\",description:\"Parcela dos dados utilizadas para avaliação, sendo o restante utilizado para treino. Apenas considerada se train_from_zero=True e train_from_squad=True\"}\n",
+ "test_size_from_dev= 0.5 #@param {type:\"float\",label:\"Porcentagem para teste\",description:\"Parcela dos dados utilizadas para avaliação que serã utilizados para teste, sendo o restante utilizado para validação. Apenas considerada se train_from_zero=True\"}\n",
+ "#batch_dataset_preparation = 30 #@param {type:\"float\",label:\"Batch para preparação dos dados\",description:\"Tamanho do batchque o tokenizador utilizará para preparar o dataset. Apenas considerada se train_from_zero=True\"}\n",
+ "\n",
+ "model_name = \"unicamp-dl/ptt5-base-portuguese-vocab\" #@param {type:\"string\",label:\"Modelo\",description:\"Modelo utilizado da base de modelo do hugginfaces\"}\n",
+ "PREFIX = \"gerador_perguntas:\" #@param {type:\"string\",label:\"Prefixo para o T5\",description:\"Incluindo em cada sentença passada ao transformers. Apenas considerado se train_from_zero=True\"}\n",
+ "num_gen_sentences = 2 #@param {type:\"integer\",label:\"Número de perguntas geradas experimentação\",description:\"Apenas consideradao se train_from_zero=True\"}\n",
+ "infer_num_gen_sentences = 10 #@param {type:\"integer\",label:\"Número de perguntas geradas na inferência\"}\n",
+ "train_batch_size= 2 #@param {type:\"integer\",label:\"Batch size para treino\",description:\"Tamanho do batch de treino. Está associado a num_gen_sentences. Apenas considerado se train_from_zero=True\"}\n",
+ "eval_batch_size= 8 #@param {type:\"integer\",label:\"Batch size para avaliação\",description:\"Tamanho do batch de validação e teste. Está associado a num_gen_sentences. Apenas considerado se train_from_zero=True\"}\n",
+ "infer_batch_size = 8 #@param {type:\"integer\",label:\"Batch size para inferência\"}\n",
+ "no_repeat_ngram_size= 2 #@param {type:\"float\",label:\"Sequência máxima de tokens repetidos\",description:\"Após a repetição de tokens configurada, força a trocar de token na geração do decoder\"}\n",
+ "temperature= 0.7 #@param {type:\"float\",label:\"Temperatura de randomização do decoder\",description:\"Pode ser entre 0 e 1. Quanto mais próxima de 0, mais próximo da decodificação gulosa (que procura tokens de maior probabilidade), quanto mais próximo de 1 randomiza entre os tokens contidos no top_p\"}\n",
+ "top_p= 0.92 #@param {type:\"float\",label:\"Porcentagem de consideração\",description:\"Considera apenas os tokens que compoẽ a porcentagem top_op no histograma de probabilidades dos tokens de saída https://huggingface.co/blog/how-to-generate\"}\n",
+ "source_max_length= 512 #@param {type:\"integer\",label:\"Tamanho do contexto de entrada\",description:\"Tamanho máximo contexto de entrada em tokens\"}\n",
+ "target_max_length= 100 #@param {type:\"integer\",label:\"Tamanho da sentença gerada\",description:\"Tamanho máximo da pergunta derada em tokens\"}\n",
+ "learning_rate= 3.0e-5 #@param {type:\"float\",label:\"Taxa de aprendizado\"}\n",
+ "eps= 1.0e-08 #@param {type:\"integer\",float:\"Valor de estabilidade do otimizador Adam\"}\n",
+ "seed = 13 #@param {type:\"integer\",label:\"Semente de aleatoriedade\"}\n",
+ "\n",
+ "#lightning_params\n",
+ "num_gpus= 1 #@param {type:\"integer\",label:\"Numero de GPUs\"}\n",
+ "profiler= True #@param {type:\"integer\",label:\"Resumo\"}\n",
+ "max_epochs= 1 #@param {type:\"integer\",label:\"Máximo de épocas para treinamento\"}\n",
+ "accumulate_grad_batches= 16 #@param {type:\"integer\",label:\"Batchs acumulados\",description:\"Batchs acumulados antes de atualizar os pesos\"}\n",
+ "check_val_every_n_epoch= 1 #@param {type:\"integer\",label:\"Frequência Validação\",description:\"Frequência da chamada da validação em épocas\"}\n",
+ "progress_bar_refresh_rate= 1 #@param {type:\"integer\",label:\"Frequência de autuazaliação da barra de progresso\"}\n",
+ "gradient_clip_val= 1.0 #@param {type:\"float\",label:\"Favor de corte dos gradientes\",\"description\":\"O fator evita que os gradientes explodam definindo um limite para os mesmos\"}\n",
+ "fast_dev_run= False #@param {type:\"boolean\",label:\"Rodar um batch\",description:\"Utilizado para validar que todas as partes estão funcionando antes de treinar o modelo por inteiro\"}\n",
+ "\n",
+ "#early_stop_callback\n",
+ "monitor= 'avg_train_loss' #@param {type:\"integer\",label:\"Frequência de autuazaliação da barra de progresso\"}\n",
+ "min_delta= 0.01 #@param {type:\"integer\",label:\"Variação mínima entre épocas\"}\n",
+ "patience= 1 #@param {type:\"integer\",label:\"Epera após atingir variação mínima\"}\n",
+ "verbose= False #@param {type:\"boolean\",label:\"Disponibilizar informações early stop\"}\n",
+ "mode= 'min' #@param [\"min\",\"max\"] {type:\"integer\",label:\"Modo de parada\",description: \"Modo de funcionamento para critério de parada\"}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Leitura do conjunto de dados\n",
+ "\n",
+ "O exemplo abaixo faz a leitura de dados tabulares (ex: .csv).
\n",
+ "Modifique o código de acordo com o tipo de dado que desejar ler."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_csv(dataset)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if \"context\" not in df.columns:\n",
+ " raise ValueError(\"A coluna context deve estar obrigatoramente contida no dataset\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Download de arquivos necessários"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "GLOVE_WEIGHTS_PATH = \"glove_s300_portugues.txt\"\n",
+ "SQUAD_TRAIN_PATH = \"squad-train-v1.1.json\"\n",
+ "SQUAD_DEV_PATH = \"squad-dev-v1.1.json\"\n",
+ "if not os.path.exists(GLOVE_WEIGHTS_PATH):\n",
+ " !wget -nc https://storage.googleapis.com/platiagro/Vident/glove_s300_portugues.txt\n",
+ "if not os.path.exists(SQUAD_TRAIN_PATH):\n",
+ " !wget -nc https://storage.googleapis.com/platiagro/Vident/squad-train-v1.1.json\n",
+ "if not os.path.exists(SQUAD_DEV_PATH):\n",
+ " !wget -nc https://storage.googleapis.com/platiagro/Vident/squad-dev-v1.1.json"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Divide dataset em subconjuntos de treino, validação e teste\n",
+ "\n",
+ "Subconjunto de treino: amostra de dados usada para treinar o modelo.
\n",
+ "Subconjunto de treino: amostra de dados usada para validar o modelo.
\n",
+ "Subconjunto de teste: amostra de dados usada para fornecer uma avaliação imparcial do treinamento do modelo no subconjunto de dados de treino."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ " \n",
+ "if (train_from_zero and not train_from_squad):\n",
+ " df_train,df_ = train_test_split(df, train_size=dev_size_from_data)\n",
+ " df_valid, df_test = train_test_split(df_, train_size=test_size_from_dev)\n",
+ " train_output = 'df_squad_train_bert_chuncked.csv'\n",
+ " valid_output = 'df_squad_valid_bert_chuncked.csv'\n",
+ " test_output = 'df_squad_test_bert_chuncked.csv'\n",
+ " df_train.to_csv(os.path.join(train_output),index=False)\n",
+ " df_valid.to_csv(os.path.join(valid_output),index=False)\n",
+ " df_test.to_csv(os.path.join(test_output),index=False)\n",
+ "else:\n",
+ " df_test = df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Configurando Argumentos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Mapeando dirpaths\n",
+ "data_dir = root_dir = os.getcwd()\n",
+ "logs_dir = os.path.join(root_dir)\n",
+ "# Colocando parâmetros de entrada no fromato esperado\n",
+ "hparams = {\n",
+ " \"model_name\":model_name,\n",
+ " \"PREFIX\":PREFIX,\n",
+ " \"num_gen_sentences\":num_gen_sentences,\n",
+ " \"infer_num_gen_sentences\":infer_num_gen_sentences,\n",
+ " \"no_repeat_ngram_size\":no_repeat_ngram_size,\n",
+ " \"temperature\":temperature,\n",
+ " \"top_p\":top_p,\n",
+ " \"train_batch_size\":train_batch_size,\n",
+ " \"eval_batch_size\":eval_batch_size,\n",
+ " \"infer_batch_size\":infer_batch_size,\n",
+ " \"source_max_length\":source_max_length,\n",
+ " \"target_max_length\":target_max_length,\n",
+ " \"learning_rate\":learning_rate,\n",
+ " \"eps\":eps,\n",
+ " \"seed\":seed,\n",
+ "}\n",
+ "\n",
+ "lightning_params = {\n",
+ " \"num_gpus\":num_gpus,\n",
+ " \"profiler\":profiler,\n",
+ " \"max_epochs\":max_epochs,\n",
+ " \"accumulate_grad_batches\":accumulate_grad_batches,\n",
+ " \"check_val_every_n_epoch\":check_val_every_n_epoch,\n",
+ " \"progress_bar_refresh_rate\":progress_bar_refresh_rate,\n",
+ " \"gradient_clip_val\":gradient_clip_val,\n",
+ " \"fast_dev_run\":fast_dev_run,\n",
+ "}\n",
+ "\n",
+ "early_stop_callback_params = {\n",
+ " \"monitor\":monitor,\n",
+ " \"min_delta\":min_delta,\n",
+ " \"patience\":patience,\n",
+ " \"verbose\":verbose,\n",
+ " \"mode\":mode, \n",
+ "}\n",
+ "\n",
+ "prepare_data_params = {\n",
+ " #\"batch_dataset_preparation\":batch_dataset_preparation,\n",
+ " \"test_size_from_dev\":test_size_from_dev,\n",
+ "}\n",
+ "\n",
+ "# Configurações\n",
+ "config = {'params':{'hparams':hparams,\n",
+ " 'lightning_params':lightning_params,\n",
+ " 'early_stop_callback_params':early_stop_callback_params,\n",
+ " 'prepare_data_params':prepare_data_params },\n",
+ "\n",
+ " 'dirpaths':{'data_dirpath':data_dir,\n",
+ " 'log_dirpath':logs_dir,\n",
+ " 'cwd_dirpath':root_dir},\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Treinamento do Zero / Recuperação dos pesos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...\n",
+ "[nltk_data] Unzipping corpora/stopwords.zip.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "08ac3e8118c54ec7a0613790deb89025",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/756k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "GPU available: True, used: True\n",
+ "TPU available: None, using: 0 TPU cores\n",
+ "Global seed set to 13\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d280dc4be51e488cbe6ebc6a4725f9ed",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/456 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1f27456dc5c048a0bebd4548bdd113e2",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/892M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import pytorch_lightning as pl\n",
+ "from caller import Qgenerator_caller\n",
+ "\n",
+ "# Criando Caller\n",
+ "qgenerator_caller = Qgenerator_caller(config)\n",
+ "\n",
+ "#Fazendo Build\n",
+ "qgenerator_caller.build()\n",
+ "\n",
+ "\n",
+ "if train_from_zero:\n",
+ " \n",
+ " #Preparando dados\n",
+ " if train_from_squad:\n",
+ " squad_train_path = os.path.join(data_dir,'squad-train-v1.1.json')\n",
+ " squad_dev_path= os.path.join(data_dir,'squad-dev-v1.1.json')\n",
+ " \n",
+ " prepared_datapaths = qgenerator_caller.prepare_data(squad_train_path=squad_train_path,\n",
+ " squad_dev_path=squad_dev_path)\n",
+ " else:\n",
+ " \n",
+ " prepared_datapaths = {\n",
+ " 'prepared_data_train_path':train_output,\n",
+ " 'prepared_data_valid_path':valid_output,\n",
+ " 'prepared_data_test_path':test_output\n",
+ " }\n",
+ " \n",
+ " \n",
+ " # Treinamento\n",
+ " qgenerator_caller.train(train_path=prepared_datapaths['prepared_data_train_path'],\n",
+ " valid_path=prepared_datapaths['prepared_data_valid_path'],\n",
+ " test_path=prepared_datapaths['prepared_data_test_path'],\n",
+ " glove_weights_path=GLOVE_WEIGHTS_PATH)\n",
+ "\n",
+ " MODEL_PATH = \"trained_model.ckpt\"\n",
+ " qgenerator_caller.save_checkpoint(MODEL_PATH)\n",
+ " \n",
+ " # Avaliação\n",
+ " qgenerator_caller.evaluate()\n",
+ " #del qgenerator_caller\n",
+ "else:\n",
+ " MODEL_PATH = \"default_qgenerator_squad_pt.ckpt\"\n",
+ " if not os.path.exists(MODEL_PATH):\n",
+ " !wget -nc https://storage.googleapis.com/platiagro/Vident/default_qgenerator_squad_pt.ckpt\n",
+ " # Load do modelo\n",
+ " qgenerator_caller.load_model(checkpoint_path=MODEL_PATH)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Gerando as perguntas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 2/2 [00:20<00:00, 10.27s/it]\n"
+ ]
+ }
+ ],
+ "source": [
+ "from aux_functions import build_df_result\n",
+ "contexts = df_test[column_context].to_numpy()\n",
+ "gen_questions_dict = qgenerator_caller.forward(contexts=contexts, num_gen_sentences=infer_num_gen_sentences)\n",
+ "df_result = build_df_result(gen_questions_dict,column_context=column_context,column_question=column_question)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Expandindo os contextos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from expander import DocExpander\n",
+ "\n",
+ "if expand_context:\n",
+ " exp = DocExpander() \n",
+ " df_final = exp.expand_sql(df_result,context_column_name=column_context,questions_column_name = column_question)\n",
+ "else:\n",
+ " df_final = df_result.copy()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Salvando os resultados"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhEAAAGmCAYAAADCudZCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAACA7klEQVR4nO3dd3xN9//A8VcSEYpYCYkVhCqtTYWsm4RshMTeitpUBa22Vu3Nt9UaNSqqpXYWUkEURQlKFRlGdhA78/z+yC+3rpuQXELo+/l49FHO53M+533v/dx33udzThw9RVEQQgghhCgo/dcdgBBCCCHeTFJECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQifFXvaAqanpcUZGhpVf9rhCvAypqWlZRkbFpXgWRY7MTVGUpaamxxsZGZo9vV2vEP6dCMXBZczLHlOIl+JA8FJkfoqiSOamKMoOBC8F0Ht6u1S9QgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEUVUxQrG9OvtSuXKFQq8b0dPG6xbNyyEqIT41/sNatKvt6v67z+t/4rRI7xfY0Tibbd43khmTR+Sr779ersSsGNeocTRqmUDvDvZF3i/F8nrRZUUEUVUxYpl6d/HDTNdioj2Nti0kSJCFK73G9Sifx839d+Hj13MmnUBrzEiIV6NVh/Wx8dLhyLiBfJ6UfXSHwUu/mVgoM/YUV2wt2lCekYm4yd9w5gRPrxbtzpR1+KYu8CPx6npbN4whcNHztLw/dpcuxHPpxO/YdoXAwBYMn8UQXuPs25jEJ+N78W7datz/kIkM+duoHrVSixZMJpPJixHZduED1s24PCRcGrVNKdWTXNcnVvJUwEFAKXeKcHMaYOpXr0yf1+Kpo3VBxw9/hdmlSsw8OM5TPy0J/XercHAj+fQrOm7jBnhQ4UKxoT8dool/9tC44aWTBrfiwoVjImNS2b6rPUMG+wFZD/db84CP/r3duXoH3+x7Jtf6eLtQHcfRwwM9Nnlf4Qf1gcw8dOeNG3yLjduJlCvbnUWLPmZg4fP8MWkvli3bkhqWjp+m/ex5dcDr/fNEs9laGjAp2O6Y92mIUlJKcxZ4MesaYPZvusw0dfi+GJSPz4eOZ8xI3woVaokiqJQoYIx02au4/xfEcye8TFNG9fhcWo623YcZINfMP16u9KjqxNHj/9Fi+bvsf7HILZuD6V5s3p87tubmLhk3ilpxKPHaQA4OTRn0ABPSpY0Yuu2UDb+tBdT03LMnj6E0qXfISo6Ns/4a9U0Z/zY7tSuVYXzFyLx/exb9XilS5Xg4OFwFi37mT49XbRiunz1Bp062AHZc3+s73JMKpbVimXc6K40/MCS0Z8uZc2KiQTv/4O2Ds2Bf/P63IWbCv/DKmSyElGIPN3a4NquFbPmb+Tzr1Yyarg3ZcuWZviYRWRlZjF2ZBd139TUNJav+JVGH1jS6ANLFi79GYCpX6/lm++3M2xwR4yNSzFq3BIqlDemb08Xzl+IZJd/GJPG96KDpw0Llmxm4097uXY9ntBDp+nS66vX9dJFEePh3pr679Xky2mrSUtLz7Ofnp4eX0zqyz+XrzPhsxW4u1pha90IlV1THj5KZfiYRXy/ehcxMUls8AsGoEuvrwg9dFo9Rq2a5gwf4sXGn/byvxXb6NPThcaN6gBQoXwZ1v0YRPS1eDp72VG2bCmcHJqzet0ePp34DRcuRhbuGyFeCg+3NjjYN2PyV6u4+HcUvp/0YOk3W+nVvR2jhnvjt3kfUdFxQPYS/rxFPxEbm8zQQR0A2BNwhI+GzuOnn/fTr7crZcuWAsDIqDghB/7k1J+X6OKtAmD4EC8iomJZ9s1WTE3KAWBc5h0mjOtB6KHTzJm/kY/6e1C3TjW6d3GifLkyTJ6yindKlsgzft9xPdDT12PY6IX89Mt+jI1LMWFcDw4c/JOvpv+Aq3Mr2jm1zDWmCxcjCdx7nITE23Tp9RXXrsXnGsv3q3dR6p0SfLt0HA8fPWaDX5BWXn8bSBFRiGrVNCc+4RbHjv/FpX+uUb5cGc7/FUFUdBxnzl7BwsJM3ff3Y+e5cDEKgNKlS3L33kMA7qTc5/79R1jUMKN6tUosXzQWixqVqfduDQC2/BpKFXMTIiJjCD97hYcPU8nMzOLx4zSSklJe+WsWRVPVKqbEJ9ziwsUoTv15SatdXz87FZQ1LkX5cmWws2nMwrkjMDDQp967NQje/wdpaRn8b/EndOviSMmSRjx4+AiApKQUUlP/LUwsalQG4FBYOAfDzgBQs0b2XI+JTeL8XxHcuJlI6VIlSUl5wLadh+jZtS3zZw+jrmW1wnwbxEtSs4YZxYrpM+frj3FyaE6tmmYc++MCd+7co3y5MmzfdUjd92pkDFcjbnL2/FWqVjGlWDEDHOybsWzRGPr3cUNfXx+TimWB7JOpsN/PEhUdR+lSJQGoVtWUM+GXuXzlBtHX4wGoUsWE4sUN8Wpvy1ef9wfgvXdrUK2qqcbx8lLLwpzfj54nKjqOP0//QxXzihQvbsiRo+c4HX6ZO3fuqefs0zGlp2fy+HEqWZlZJCWlULly+VxjefDwMYF7j1Gtqik7d4eRnp6pldffBnI5oxBFRsXi4dYaq1bvk5ycwu079/jg/drUtDCjSaM6RP9/pQ6QmZmF8sS+aWkZAFSvasrlK9eJvhZHZmYWazcEYGhogFFxQwAGDfDkakQMNS3MsLNpzKGwcNLS0qlYsSwVKxiTfOvuq3zJooi6cTMR13ataFC/Js2b1QMgLj6ZBvVrYm5WkQb1a5KenkHK3QfcvnOP8LNXCNr7BxUrGHMzJpFiBgYsXLqZd0qWYOnC0TT8oLZ6jtaqac6NmwnqY127nv1nO5vG3Pv/pBl1LY736tUgKyt7liuKgp6eHoaGxfjz9D/8uuMgH3/UgQ6eNuzYHfYq3xqhg6hrcaSmprPkf1u4e+8hNS3McHNuRenSJYmLv0X/Pm4s//ZXACxrVcGydlUafWDJzZhEatc0x8mhOX6b91GyRHE6e9kDegBa8wOy526TxnX54+RFLKpX5vyFSGJik0lLSyd43x8cOXaOquYm/HUxipo1zVHZNlEfLy+R0bG0af0Bh4+EU758GSIiY0lLy8C6dUOKGxpSrlwZoq7FYVa5Qq4xpaVl8E6pEpibVSQ+/nausVSoYEynDnb8fekaPbq2VRfi8G9ef/gwtVA+n1dJViIKkX/QUfaFnGDyhN7MnzWM/63Yxt17D/h22TiKFTNg6Te/5rnv9RvxXLgYxSeju9K9ixMrVu3k7r0HfDGpL8OGeFHcqDitW72PnU1jZs37kU0/72f0cG9Kly7Jbwf/pNEHtVmx7NNX+GpFURYQeJSLf0fx9dRB6lWHoL1/kJaWzpyvP+be/ewf9oqi8PWcDdS0MGfalwPw6awiIzOTOpZVmTdrGPNnD+Pc+QjOnL3CqdOXSEy8ww/fT6LhEwk7IjKG71fvpHdPF0b+/9J2+Nkrucalr6/HgL5urP1+Eg0/qM32XYcL/80QL2xPwO8cPHyGUcM685lvL2pUr8yggZ6sXuvPgiWb6ehpw/sNagKQlJzC+LHdMTevyPdrdhF1LZ7wc1fo4GHNO+/kfckhx7crd1C7VhU+Gd2V5FvZq6t37z5g7sJNfNiyPjO++ghX51akpqazeUsIt+/cZ/b0ITxOTctzzIVLNgOw8htf+vRw4e7dByxY8hOOquZMn/IRe/f/wb6Qk3nuf/TYeRQFNq3/ipLvGOUay5gRPly/kcCY8ct49DiVoYM6auX1t4GeoijP71UwitzMJ4qqA8FL//M3m7q0+5BJ43vRscvn3L374HWHI/7f2zg3F88byaPHaXz+1crXHYp4QQeCl0LOktET5HKGEEKIt1Y7pxaMH9td/fe9ISdYuOTn1xjR20WKCCH+Y4L3/UHwvj9edxjiP+CTCf973SEQ9vs5/vr/m9YBHj54/PqCeQtJESGEEOKt9ehRKo8evfk3MBZVcmOlEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXTyzKd4pqamxxkZGVYuyIBPPnNdiKJG5qcoqmRuiqJMUZQsPT09g6e3P+9R4AV+rPf/Py5UCCGEEG8XrSpXLmcIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghXoo9e/YQHx//usMQQrxC+S4iBg3wZMeWWXz/v/FUr16pMGN6o9y9excPDw9UKhVWVlacPHky13537txh27ZtL3Ss0NBQpk6d+tx+U6dOZf/+/eq/29jY5Hv8L7744rn9oqKi6N27d77GzDF27FgyMzMLtI/Q3bp166hXrx4HDx4kKipKPW8iIiJwc3NDpVLh7e1NYmJigcbt378/V65c0dqelJTEli1bqFy5QM/re+WmTp1KaGjo6w5DILkzv4p67sxXEdG0cV16dHVi+sx1JCbdYezILoUd1xtjw4YNdO7cmdDQUMLCwqhXr16u/QryRcjKynqZIRaJYy5ZsgQDA60HwIlC5Ovri729vca2wYMHs2zZMkJDQxk3bhyjRo16Kce6cuUKs2fPfiljif8GyZ35U9RzZ76KiGZN3yUh8Q5/nvmH34+ep0mjOhQrVnRf1Kv0zjvvcPToUZKSkihWrBhlypQBYPr06ahUKhwdHYmKimLlypXs27cPlUpFYmIic+fOxdraGkdHR65duwZA48aN6d27N/PmzdM4xsCBA2nbti2rV69Wb1u9ejW2trbY2try559/5ivWK1eu4OzsjL29PV9//TWQfWY5cuRIXF1dAfjjjz9wc3PD0dGRW7duATB69Gjs7Ozw9PQkJSVFY8yNGzdiZWWFtbU14eHhAHz33XdYWVkxceJEVCoVACqVioyMDIKCglCpVLRo0YINGzYU5K0WOqpatSojR44kOjoaMzMz6tatC4C1tTUJCQlkZmaqPx9A/ZmtXbtW/Vnt3bs3z/H37NnDhAkT8PHxISgoSKMtJSUFT09P7OzsGD16tNa+VlZWDB48mCZNmqj3zW1OWVlZ8fHHH9O4cWPWrl1L586dadSokbp9z5492NnZ0aZNG60Ybt26hYODA25ubvzxxx8AZGRk0KNHD+zs7OjRo4f6tedQqVSMGjUKKysrVq5cma/3WRSM5M63JHcqivKs/xSV82hl154wJTIqVlE5j1amzVyrKIqidO7+haJyHq31339NWlqaMm3aNOWDDz5QnJyclNjYWCU8PFwZMmSIoiiKcuHCBWXIkCFKZGSk0qtXL0VRFCU2NlZxdnZWFEVRDh8+rAwdOlRRFEWpUKGCcv/+fY3xjx8/rgwaNEhRFEWZOXOmMmXKFCUxMVFp3769kpWVpSQnJysdO3bU2GfKlClKo0aNFHt7e8Xe3l6xtLRUFEVRunbtqly7dk1RFEXp3r27cv36daVfv37Kpk2bFEVRlAMHDiht27ZVFEVRNm/erMyePVv5448/lIEDByqKoig//vijMmfOHPVrycjIUFq2bKmkpaUpkZGRioeHh5Kenq60atVKycjIUI4dO6bY29sriqIo9vb2Snp6uvLgwQNFURQlPT1dadOmzcv5EISWtWvXKqtWrdLYdvToUWX8+PEa27p3767ExsaqPx9FUdSfWc5ndefOHaVdu3aKoihKv379lMuXL6v3z8zMVO+bmpqqODo6aow/b948ZcOGDYqiKMpHH32kHDt2TKO9bt26Snx8vHLjxg3Fy8sr1zmV0y8uLk65efOmUrlyZeXRo0fKkSNHlFGjRj03hrlz5yobN25UFEVRnJ2dlQMHDii//PKLMn36dEVRFGXGjBnK5s2bNfaxt7dXwsLClIyMDKV169ZKamrq895yUUCSO9/I3KlVJxTLT6GRcvcBJUsaAVCypBFZWVncvfugUIubN4WhoSFfffUVX331FT/99BNLliyhWbNmhIaGqitJc3NzjX2ioqJo1KgRAC1atGDatGkA1KtXj1KlSmn0jYiIoGnTpgA0b96co0ePEhERQXh4OA4ODnnGtXDhQtq2bQv8e13v0qVL9OnTB8heIrx586Z63Bw5x2rSpAn79u2jZs2aNGvWTB3rwYMH1X0TExOxsLDA0NCQmjVrkpKSQlJSEjVq1MDAwIAmTZpoxXXq1CmmTZtGeno6Fy5cyDN+8fKZm5sTExOjsS0hIYHy5cujp6en3qYoCgDBwcEsXboURVFISEjIdcykpCQuXryonmsJCQkoiqIe7+rVq7i7uwPZ8+fKlSu0atVKvb+pqSmVKmXfY3Xnzp1c51ROv5z7LSwtLSlRogRVqlTh9u3bz40hIiICT09PAPVcvnr1qsa8PnXqlNZra9q0KQYGBlhYWJCQkEC1atXy8S6L/JLc+XbkznwVEafPXKZnt7Y0a/oubaw+4Oy5q2RkFN0bPV6l6OhoqlSpgqGhIZUqVSIrK4t3330XZ2dnli9fDkB6erp62RigZs2a6uWrkydPYmlpCYC+vvbVpVq1anHgwAEATp8+rd7WsmVLtm7dqh4/P+rVq8eSJUswNzcnMzMTPT09VqxYoXHcnLjCw8OxtLTE0tJSvZT9ZKyQndijo6NJT0/n5s2blC1bFhMTE65fv05WVhZnz57VimHevHmsXr2aqlWr8u677+YrbvFyWFhYEBMTw+XLl6lbty5HjhyhQoUKGBkZUbZsWWJjYylZsiRxcXEAzJ49m4MHD5Kamoq1tXWuY5qYmNCwYUOCg4MxMDAgPT1doyCxtLTk1KlTvP/++5w8eZJBgwZp7P908ZLbnHq639P7PC+GWrVqER4eToMGDTh9+jQuLi7quDw8PDh58iR16tTRem3h4eF8+OGHREdHqwsd8fJI7nw7cme+iog/z/zDz1t/Y8rk/sTH3+brOUXoesxrdubMGbp27UrJkiUxNDRk7dq1VKtWDTMzM1QqFXp6evTo0YOPPvqIW7du4ePjw8qVK3FwcKBNmzYUL16c9evX5zl+q1atWLFiBU5OTlhYWFCjRg1MTU3x8PDAzs4OAwMDHB0d+fLLL58b68yZMxk4cCCpqakYGhry66+/avUxNDTE1dWVx48f8+uvv1KxYkXWrVuHra0tZcqUYdOmTdy5cwcAAwMDRowYga2tLfr6+nzzzTcUK1aMfv360aZNG1q3bo2hoaHG+J06daJjx440adKEcuXKaR1/1KhRLF++nKCgIDIzM/Hw8FBvEy9u1apVjBw5ktu3b/P48WP1nehDhgyhffv22NjYYGpqCqC+l+HDDz/M9bOC7OQ9btw4nJyc0NPTo0GDBnzzzTfq9sGDB9OzZ09WrVpFo0aNsLKyemZ8uc2p53leDIMGDcLb25sNGzZgZJS9ourl5cXWrVuxs7PD3NyciRMnao27ZcsWxo4dy4ABAyhevDjr1q2jYcOGNG/eXD0nn9wmCkZy59uRO/Vyli7zoDi4jCnQgAeCl75QQOLNl5GRQbFixTh+/Dg//PAD33///esO6T9n69atzJkzh4ULF2r9hgZAWloanp6efPHFF9jZ2b2GCIs2lUrF/v37KVYsX+dZQrwUb0Du1Ht6g3xDxEu3fPlyduzYQVpa2jPPFETh8fHxwcfHJ8/24sWLP/M3LoQQr96bmDtlJUIIIYQQ+aG1EiH/7LUQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF08sxnZ6SmpmUaGRUvUKGRlaWgr6/1z2sLUSQoioKensxPUfTI3BRFmaIoWXp6egZPby+UB3DdvXuvgOEJ8WoYG5eR+SmKJJmboigzNi4D8gAuIYQQQrwsUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInxV53AKJoW7ZsGfHx8bRt2xYHB4fXHY54CwQFBdKsWXMqVar0ukMRolD8l/JmvlYiTCqWZc13EzkQvJSJn/Ys7JiKrNjYWGxtbalUyZSMjIw8+x0+fJgZM6YDUL16NTw9PfH09GDGjOmkp6cD0Lx5Mzw83PHwcGfcuE8AaNjwA9avX68xxvDhw/DwcKdGjeq4u7vh4eGOv7+/xvg544SFhTF79iwOHDgAQGhoKB4e7ri5udKrV09u3UoG4Pfff6d69WqkpaWpY86rb4sWLdiwYT3W1tbqvvv378fV1QUPD3c+//wzMjMzNV6/n58fzZs3o3379rRv357ffvst3/uKVyvnswoLCyM6OprZs2cBEBkZibd3Zzw83OnduzdJSUkFGnfYsKFcvXpVa3tycjI7dux44QJi0qSJec4dPz8/9fdIvH6SN7O9rXkzX0VERkYmewJ+Jy4uubDjKdLKly/Prl27aNmyZb73adCgAXv27GHPHn8MDQ1ZtWolACYmJvj7B+DvH8CiRYsBqFixIlu2/KKx/7ffrsDfP4CGDRuya9du/P0DMDY21hg/ZxwbGxv19qSkJObNm8vmzT8TGBjEtGnTSEvL/iLu3r0LL69OHDwYWuC+ycnJLFy4gF9/3Ya/fwAVK5qwbt06rdc9evQYdu/ezYYNG5g3by6xsbH53le8WqNHj9GYO9nbRjNv3jz8/QMYOXIkvr7jX8qxIiKuMmXK1BceZ86cuRgYaD1QsFApisJzHlgociF58+3Om/kqIu6k3Gf7rsM8epz2/M5vsRIlSlC+fHmd9x837lP27t2bZ7uRkRGtWrXSqEB1tXdvMN26dadMmTIA1KlTFzMzMwCuXLnCxIkT2bPHv8B9g4OD6NatO6VKlQJgxIgR7NmzO884ypcvT+/efThw4LcC7yterSpVqjBkyMdcu3aNypUrYWlZBwArKysSE5PIzMzEw8NdfTbp4eEOwMaNG/HwcMfe3p6QkJA8xw8KCuSrr76ib98+7N+/T6Pt7NmzuLu74ejowIIFCzTa0tLS6NLFBw8Pd/r166c+dkZGBhERETg6OtCjR3fat29PdHS0er/09HTat2+Pm5srvXv31jp7GzZsKCNGjMDNzZWZM78GYP/+ferX8tNPmwCYPXsWw4cPo1MnL5KT/9snUrqQvPl25025sfIVKl68uDoBJyUlqZfT5s6do+4zePAQ1qxZne8xL1y4oB4nJSVFvT0uLh4zs8pa/c+cOUPTpk2pVq0aiYkJZGVlFbivubmZuk+JEiXUS415MTMzIz4+Qad9xatjaGhIxYoViY+Pw8zMXKOtUiXTPC9pdO7cGX//AHbt2sXy5cty7ZOVlcXy5cvZvXsP/v4BLF2q2a9u3br4+wfw228HCA09wKNHj9RtN27cUJ+BPn0Gtnz5cubOncvGjX4kJSVqtBUrVoxffvmFwMAg6tV7l4MHD2rFZW9vR2BgEOHh4cTExNCmjTX+/gGEhISwdu1adT9Lyzrs2LETExOTXF+fKDySN4t23pQbK1+htLQ0ihXLfstzkuLTzMzMKF26NJcvX87XmDnLctrjVCY2Nk5r++7duwgLC+PkyZNcv36d48ePv1Dfx48fq19TXmJjYzEzq4yiKAXeV7x6lSubERcXq7EtMTGJcuXKoaenp96Ws7QfEhLCd9+tQFEUEhM1f5DnSE5O5tKlS3Ts2OH/x0tEURT1eNHR0Uye/DmPHj3i8uXLJCYmUqNGDQBq165NgwbvM2jQRzRp0pSRI0eqx42OjuL99z/AwMCA+vXraxzzwYMHjBkzmtjYWBISErC0tNSKq1GjxgA0aPA+0dHRREVFMWfOHDIy0vn770vqfk2aNMnXeydePsmbRTtv5mslwsBAn+rVK1GsmAGlSpWkenW5q1oXS5cuwdXV9bn9Pv54KN99t+KFjuXs7MIvv/zMvXv3ALh69SpxcXGcPn2a4OC9bNu2HT+/TezZs7tAfdu1c2bz5p948OABAN988w0eHp55xnHnzh1++mkTDg6OBd5XvB41atQgLi6Oq1evAHDs2DHKly+PkZERxsbGxMXFkZycTHx8PACLFi1k69Zf2bTpJ/T1c08pFStWpEGD99XXp48c+V2jIFmzZjVjx35CQEAgtWvX1rj3IDU1lREjRrB69Rr2799HQkKCus3CoiYXLvxFZmYmf//9t8YxQ0JCqFOnDgEBgXTo0JHcbmc4f/4cABcvXqBGjRosXbqE5cuXs3PnLsqWLavul9frEoVP8mbRzpv5KmdMTMqxYfVkAKpXq4StdSMcXMYUamBFUXp6Ot7e3pw7d55OnToxZcpXVK9egw0bNuDr65vrPhcuXMDT0xNQaNWqFWPHZt9RnLMsB2Bubs7q1WvU+zRr1uyFriFCdsXu6zuBbt26oigK5cuXZ9y4cRrLse+++y4nTpzQ6JucnIylpWWefU1NTRk3bhze3p3R19enUaNGjBmjPReWLVvK1q1bARg/3ld9rfB5+549e5YzZ87Qt29fJk2ayMyZs/jrr7/U28SrsXTpMnx9x3Pnzh1SU1PZuXMXAP3796d7925YWVmp54erqytubq40b95c4wfvk/T19Rk5cgQdOrRHT0+PevXqsXDhInW7i4sLvr7jqVfvPQwNi2vse/36NUaMGEFmZiY1a9bE1NRU3TZq1CgGDx6Eqakp5cqVw9DQUN3WokULFixYwOnTpzE2Ns51JSIs7AirV6/G2tqaqlWr4unZnp49e9CwYcNcX0tuczJn26u+0fNNIXnz7c6bes+521gpaLFwIHgpd+/ee6GgxOvz1VdfMX369NcdRqExNi4j8/MJO3bsYPHixcycOVPrNzQgeym5a9cu+PpO0Ph1taIiIyODYsWKkZmZibNzO4KD9+Z7qXfYsKGMH++ba3HxOsjcfHO97XkTsucnoPf09qJzYUW8drNnz+L48WOvOwzxCnl5eeHl5ZVne/HixdmxY+erC6iAoqIiGTVqFA8ePKRPnz5F6lqx+G/4r+dNWYkQ/ylytieKKpmboijLayVC7hYSQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETp757IzU1LRMI6PiBSo0FEVBT0/rn9cWokiQ+SmKKpmboihTFCVLT09P63n3hfIALiGEEEK8deQBXEIIIYR4OaSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIeAMcOXKEy5cvv+4whHghN2/eZN++fa87DPEfIXnz1chXEdG4UR02rJlM8O4FrF4xEcvaVQs7rjdOTEwMzZo1o0SJEmRkZGi1z5w5kypVqvDFF1/kun9ISAgqlQo7Ozs6depEcnIyAOnp6fzvf/+jdu3aWvuoVCoA1q1bx6lTp174NeSMVxjmzJnDzZs3c207c+YMf/75Z6EdW2TPkXr16nHw4EGioqKYOnUqABEREbi5uaFSqfD29iYxMbFA4/bv358rV67kq+/cuXN57733tLZPnTqV0NBQzpw5w5o1awp0/NzkjPc8AQEBNG3a9KUcU+hG8uazvQl5s1h+OhU3LMaPm4K5cvUm07/6iEEDPPnsy+8LO7Y3SoUKFQgJCaFTp065tg8aNIg2bdoQEhKi1ZaYmMj06dPZs2cPZcqU4Z9//iEtLQ2A6OhovvzySwwMtB6epta/f/+X8hpehqysLPT1tWvTSZMm5bnPmTNnyMjIoFmzZoUZ2n+er68v9vb2REVFqbcNHjyY7777jrp163LkyBFGjRrF5s2bX/qx09LScHd3p3r16nn2adKkCU2aNHnpx87Ljh07+Omnn3ItbMSrIXkz25ucN/O1EnHi1N/sCzlJZFQs0dfiKF26ZGHH9cYpUaIE5cuXz7O9cuXKeT7mNyAggD59+lCmTBkA3n33XczNzQkPD6dfv34MHDiQjRs3AnDixAmaNWtG165duX37NpB95rV//35SU1Pp0KEDrq6udO/enXXr1gEwevRo7Ozs8PT0JCUlRePYuY137do1HB0dsba2Zu7cuVrxfvDBB3Tp0oXmzZtz4sQJAKysrBg2bBjjx48nPDwca2trrKys1HHnnLE+evSIHj164OjoSLdu3UhPT2flypXMnz+fXr165fftFi+gatWqjBw5kujoaMzMzKhbty4A1tbWJCQkkJmZiUqlUp8Z5pxprV27FpVKRYsWLdi7d2+e4+/YsQMrKyscHBw4ePAgKSkpdO7cmVmzZjF69GgAbt26hYODA25ubvzxxx8AhIaGqs84v/jiC+zs7Bg1apQ62W/cuBErKyusra0JDw/XOGZu48XExODg4ICNjQ3Dhw/X6B8WFsauXbvo168fYWFhuY5tZWXF4MGDadKkCUFBQbq+3eIZJG+++XmzQPdENKhfkw9b1Ccw6FhhxfOfFBsbi7m5udb2L7/8Ej8/Pw4fPszy5ctJT09n+vTp7Nixgx9++IEbN25o9N+xYwdt2rQhKChI/cU8ceIEDx484NChQ3Tv3p3vvvtOY5/cxps7dy7Tpk3jyJEjHDhwgJiYGI19YmJiWLt2Lbt27WLatGkAJCUlMXnyZBYtWpRr3DlWr15Nhw4d+O2331CpVGzdupUhQ4bg6+uLn5/fi7+Z4rkMDQ0xMTEhNjaWKlWqaLRVrlw5z0sa3bp1IzQ0lJCQEBYsWJBrn6ysLGbOnMmBAwc4cOAAtra2rFy5km7dunHo0CEePnzI8ePHWb16NYMGDSIwMJDMzEyNMWJjY/nzzz85dOgQNjY2AGRmZrJs2TIOHz6Mn58fkydP1tgnt/FMTEzYt28fYWFh3L17V+P6uI2NDa6urvj5+dG6detcx7516xYzZ87E39+f77+XldeiRvJm0cib+S4ialqYMWv6EIL3/0FAsBQRL5O5ubnWhAO4ffs2NWvWxNDQkFq1apGQkMCdO3eoUaMGpUuX5t1339XoHxkZSaNGjQDUy8JXr15VL3e1aNFC6/p1buM9uU+TJk2IjIzU2KdOnTqULl2aqlWrqiv0SpUqUa1atTzjznHx4kWWLFmCSqVi/fr1Gm3i1cpt3iUkJFC+fHmNsz9FUQAIDg5GpVLRoUMHrl+/nuuYiYmJWFhYULJk9mqlvr5+rnMwIiKCpk2bAmgtx0ZHR/PBBx8A/87jnHENDQ2pWbOm1plhbuMlJyfj4+ODSqUiLCws1+/Ys8Y2NTWlUqVKVK1alTt37uTxLorXRfJm0ZCvIqKSaXnmzRxG9LU4Nm7aS/nyZQo7rv8Ud3d3Nm7cyL179wC4cuUKsbGxlCtXjqioKNLT04mIiKBSpUqULVuWGzdu8ODBA607j2vVqsW5c+cAOHv2LACWlpbqm4dOnjyJpaWlxj65jffkPqdPn6ZmzZoa+1y5coUHDx4QExODsbExgMb1vNzizlGvXj0mTJhAaGgox44dY/jw4RgaGmqdjYrCZ2FhQUxMjPpzP3LkCBUqVMDIyIiyZcsSGxtLUlIScXFxAMyePZvAwEB27tyZ6/VbyP7Be+3aNR4/fgxkr0zkNgdr1aqlvmxw+vRprbguXLgA/DuPTU1NiY6OJj09naioKMqWLauxT27jbdq0CS8vL0JDQ7G2tlYXQ7nFnNvYuRVSouiQvFk08ma+bqxs2qQupqblMDUtx+YfpxIXl0yPftMLO7Y3Snp6Om5uboSHh+Pi4sKsWbOwsLBgzZo1TJ48mTVr1vDtt99y69Ytbt++zTfffKPe19TUlC+//BJPT08URaFChQqsWbOG6dOn07NnTzIzMxkxYgSGhoZ8+eWXdOjQgXfffZcaNWpoxODl5UWXLl1wcXGhdOnSGBoa0rJlS9atW4etrS1lypRh06ZNGvvkNt6ECRPo168faWlptG/fnqpVNX8bp3r16gwcOJArV67w7bffar0XucWdY8iQIQwePJhvv/0WRVGYPXs2VlZW9O/fn/Pnz6vfq8mTJzNnzhz69OmDgYGBept4uVatWsXIkSO5ffs2jx8/Zv/+/UD259S+fXtsbGwwNTUFwNPTEzs7Oz788EPKlSuX63j6+vp89tln2NvbU6pUKaZMmcLgwYPp2bMnq1atolGjRlhZWVG3bl28vb3ZsGEDRkZGGmOYm5vTpEkTbG1tadCgAYaGhhgYGDBixAhsbW3R19fX+P5A9g14T4/n6OhI37592bFjxzPfg+eN/aSxY8eycOFCzp07x6lTp/joo4/U2551E5/IneTNf72peVPvORW24uAypkADHghe+kIBiReTkZFBsWLFGDZsGH379qV169Yv/Rg2NjaEhYUVaJ8ePXqwYMECrS+WeDW2bt3KnDlzWLhwIfb29lrtaWlpeHp6qm9ofN1y5vHPP/9MREQEn3322esOSbzFJG/mm9ZdrvlaiRBvDg8PD+7fv0+dOnUK5Yugiw0bNvDo0aOi9EX4z/Hx8cHHxyfP9uLFiz/zNy5etcmTJ3P06FEMDAz45ZdfXnc44i0neVN3shIhhBBCiPzQWomQf/ZaCCGEEDqRIkIIIYQQOpEiQgghhBA6kSJCCCGEEDqRIkIIIYQQOpEiQgghhBA6kSJCCCGEEDqRIkIIIYQQOpEiQgghhBA6kSJCCCGEEDqRIkIIIYQQOnnmszNSU9MyjYyKF6jQyMpS0NfX+ue1hSgSFEVBT0/mpyh6ZG6KokxRlCw9PT2t590XygO47t69V8DwhHg1jI3LyPwURZLMTVGUGRuXAXkAlxBCCCFeFikihBBCCKETKSKEEEIIoRMpIoQQQgihEykihBBCCKETKSKEEEIIoRMpIoQQQgihEykihBBCCKETKSKEEEIIoRMpIoQQQgihEykihBBCCKGTfBURlrWr8vPGqQTvXsgP30/i3brVCzuut0JsbCy2trZUqmRKRkaGVvuYMWNwdm6Hi4sz58+f12iLjo6mTh1LPDzc6dChA4mJic893tmzZ9mwYUOe7T/++GPBX0QefH3H59m2aNEiYmJi8j2Wn58f69evfxlhiWfw8/OjefNmhIWFER0dzezZswCoXr0aHh7ueHi4M3funNccZf75+flx+vTpXNuio6M5ePDgSznO875X4uWT3Jk/RSF35quIuH//IbPmbWTY6IUYl3mHLp1VhRzW26F8+fLs2rWLli1b5tr+ySefsHfvPr799lvmzNFO3g4ODvj7B9CvXz/WrVur3q4oCrk9OK1Ro0b07ds3z3g2bnw5X4SsrCzmz1+QZ/u4ceOoUqXKSzmWeLlGjx6DjY2NxrYGDRrg7x+Av38AEydOek2R5S0rKyvX7b169aJp06a5tl27do1Dh168iMjKynru90q8fJI73xz5KiLiE24TfvYKN2MSeZyazvUbCYUd11uhRIkSlC9fPs/2mjVrAlCsmCEGBlpPWFVr1KgRN2/GMHv2LIYPH0anTl4kJyczcOAA3NxcGThwABkZGRw+fJgZM6YDsH79elxdXXB1deHMmTOsXbuWCxcu4OHhzl9//cXPP2/GyckRZ+d2nDt3TuN4KSkpdO3aBTc3VyZM8AWyK97+/fvTpYsP58+fx8XFGYCAgADs7OwYPXq0etuwYUO5evUqfn5+9O7dGx8fb3x8vFEUhbNnz+Lu7oajowMLFuT9ZRKFq0qVKgwZ8nGe7a1bWzF48CBat7Zi69YtdOnig52dHTdv3tToFxUVhaOjA927d8PNzVW9wnHgwAEgey5ER0fz6NEjBg4cgKenJ/379yc9PV1jHCcnR4YNG4q9vT1BQYEAeHi48+WXX/Lxx0O4fv06np6eODu3Y/HixQDq4yiKwieffIKnpydduvhw+/Zt1q1by88//0z79u0BmDDBFzc3V7p27UJKSorGsc+dO4ezczucnBz5+efN6rjHj/+Uzp07aXyvxKshufPNyZ35vieiV/d2+G+fS9mypfjzzOXCjOk/Z9q0qQwdOjTP9iNHjlCnTh0ALC3rsGPHTg4fPsx7771HYGAQ9evXZ+fOner+ycnJBAYGEBgYxKZNPzFv3lwGDBigPuN87733+O677wgKCmbVqtVaCXLdunV07tyZwMAgHj58xMmTJwAoW7YsW7ZspVGjRuq+S5YsJjAwkEmTJpGQoF1cVq1aha1bf8XcvArnz5+nbt26+PsH8NtvBwgNPcCjR49e6L0TujE0NKRixYoA6gTp4eHO2rXZZ22JiYksX/4/lixZytKly/j5518YNWok27dv1xhn2bKlzJo1m40b/bh161aex9uwYT3u7u7s2bMHGxsbdu7codGelJTE559PJiAggEWLFqm3t2/vyapVq1myZDGff/45e/fu4/DhQ8TGxqr7BAUFUa1aNfbs2cOQIUP44Ycf6N9/AN26dWP37t2cOnWKBw8eEhgYhLe3Nz/88IPGsWfO/JpVq1YTFBTM999/ry5wrKys2LFjJ6Loktz5+hXLb8dd/kf44+RFxo/tzvCPvRg+etHzdxLP9e233/Dee+/RunVrrbYDBw7g6emBubk5ixYt5n//W06TJk0AiIyMpHHjxgA0bdqU06fPUKlSJQCioiI5f/48np4euR4zKSmJ6tVrYGhoiIWFBXfv3tVoj4yMxNnZWT12REQEgPrYTzIwMKBUqVKUKlVK/UPpSfXrNwCgShVzUlJSiI6OZvLkz3n06BGXL1/O1/VKUbhyEuSTateuTYkSJTAzM6NevXfR19fHzMycS5cuafSLioqiUaNGFCtWjPfffx8APT09dXvO0vGlS5fUZ3WPHz/Gx8dHY5wKFSpQvXr2vVZPnlk2aZJ9ueLJ+d6wYSOio6PVfS5dusS2bb/y228hZGRk0LLlhxpjR0ZG0qRJznelGWFhyzXa79y5g4WFBQAWFhbqOZnbfBdFh+TOopE781VEfPB+bTIyMnjw8DGZmZmkZ2QWdlz/CSEhIRw//gfr1q3Ltd3BwYFVq1ZrbNPXz148qlWrFmfOnMHFxZXTp09Tq1ZtdR8Li5o0bdpMfTNQzplVTnI3MTHh+vVrpKenExMTg7GxscYxssc+Tf369Tl9+jT9+vXln38uq4/9pKysLB4+fEhKSgrJycla7U//QFmzZjVjx36Cra0tLi7OuV6fFK/fk59bbkVBDguLmpw/f47mzVtw4cIFAIyNjYmPj0NRFC5e/BuAunXrYm+vomPHjgBalzNu377NzZs3KV++PJmZ/+aXp+e7tbU1Z8+G8/HH/16KqVu3Lt2792DUqFHqsU+dOkVmZpZ63wMHfgPg9Ok/qVWrlsaxy5YtS3R0NFWqVCEqKgpTU9P/f93yy2tFleTOopM78/UtqVrFhNnTP2bVN76gp8c3K7YVdlxvhfT0dDp06MC5c+fp1KkTJ0+eID4+nvnz5wPZ12mjo6Pw9PRgzJgxBRrb09OTixf/xs3Nlb/+uqBOzpA90V1cXHBzc8XT04PFi7NXjapWrUrv3r25evUqgwcPxtXVhUGDPmLy5C80xu7Xrx+//vorrq4uGBkV1zqze9KYMWNxc3Nj5syZ6mr+WVxcXPD1HU+/fv0wNCyu1Z5z5/KTd94/625m8eKevJwxefLnBdp31KhRTJo0iV69emJqmv35t2/fgW+/XUG/fv0oV64cAP37D2DPnt20b98eT09PwsPPaIxTsWJFZs+ejZubG2PHfqJ1nDFjxjJr1kzatWuLjY2txs1n7u7uXLsWjaenJ56enuzbt4/69etz/Pgx+vfvT/PmzSlRogSuri5s2bKFgQMHaoz9+eeTGTToI1xcnBk8eDCGhoZ5vt6cubh//z6Cg4M0tomXR3KntqKaO/WeU80oDi4F+4AOBC/l7t17LxSU0M3+/fs5ffo0vr6+r+yYGRkZFCtWjJiYGMaMGc2WLVtf2bF1YWxc5j87P3fs2MHixYuZOXOm1m9ovAzDhg1l0qTP1JcGCsLFxZng4L0F2mfq1Km4urpiZWVV4OMVRf/lufm6Se58PmPjMgB6T2+X9bq3RExMDPPnz8PV1fWVHnfHjh24u7vRo0cPxo9/dV9AUXBeXl4cPHiwUAqIV23//n2cPHlSfW1bCF1J7nwxshIh/lPkbE8UVTI3RVEmKxFCCCGEeKmkiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETp757IzU1LRMI6PiBSo0FEXReA66EEWJzE9RVMncFEWZoihZenp6Bk9vL5QHcAkhhBDirSMP4BJCCCHEyyFFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0UqAiYsKnPTkQvBQ7m8aFFc8bKSYmhmbNmlGiRAkyMjK02seMGYO9vT2tWrXiyJEjGm2hoaFYWFigUqno2LEjjx8/zvUYNjY2hRJ7XhYtWoSdnR02NjaMGTPmtcQgXp5169ZRr149Dh48SFRUFFOnTgUgIiICNzc3VCoV3t7eJCYmvt5AC0ilUuX6nRNvBsmdb758FxFmlSugsm1SiKG8uSpUqEBISAhWVla5ti9YsICDBw/yyy+/MGvWLK32Pn36EBoaSps2bdi6detLjS0rK6vA+wQGBnLp0iUOHjxIWFgYnTt3fqkxidfD19cXe3t7jW2DBw9m2bJlhIaGMm7cOHXSe9vp8r0QL5/kzjdfvouIXt3bsTfkRGHG8sYqUaIE5cuXz7Pd0NAQgPv379O4cd6rOE2aNOHGjRvq6tvW1pZr165p9Fm7di0qlYoWLVqwd+9eUlNTadeunbrdycmJtLQ0rKysGDZsGOPHjycoKEi9z4YNGwD45ptvsLKywsHBgT///FPjGL/88gu+vr7qxxLn/ODJyMhg8ODBNGnShKCgIACtWH///XcmTpwIwK1bt+jYsSMA06dPR6VS4ejoSFRUFACjRo169hsrCkXVqlUZOXIk0dHRmJmZUbduXQCsra2Ji4sjMzNTo//AgQNp27YtvXv3ZurUqURFRdG7d28g+2wwZ1Vj9erV2NraYmtrqzWnCjoHrays6N+/Py1atGDPnj0AzJ49W31Wevr06eeOb29vr/5B0KVLF+Li4lCpVEyYMIG+ffty5swZ9Xg5P6C2b9/Ohx9+iKOjIwEBAS/0Povnk9z5FuRORVGe9Z+ich6tdOn1lZKUdEfpP3i2oiiK8tX0NYrKeXSu//2X2dvbK+np6bm2eXl5KVWqVFFCQkI0th84cECZPHmyoiiK8vnnnyu//vqr8uDBA0VRFGXfvn3K559/riiKolhbWyuKoqjb7ty5o7Rr105RFEXp1auXcu3aNSUyMlLp27evoiiKYmlpqVy/fl1jn/T0dKVNmzaKoihKu3btlIcPHyqKoihZWVkaMbm4uCj379/Xeg1169ZV4uPjlRs3biheXl4aY+fEmpWVpdjZ2SmKoihr165V1qxZo4SHhytDhgxRFEVRLly4oP6zeHXWrl2rrFq1SmPb0aNHlfHjx2ts6969uxIXF6f++/Hjx5VBgwYpiqIoM2fOVKZMmaJERkYqvXr1UhQle/5OmTJFSUxMVNq3b69kZWUpycnJSseOHTXGLegcrF27thIdHa3cv39f3T9njMuXLys9e/ZUFOXf71xu43/22WfKoUOHlAcPHiiurq7q/r///ruiKIry8OFD9XFVKpXy8OFDpXfv3kpkZGSuMYnCI7nzjcmdWnVCsfwUGl06O7DL/3fS0tIA0NfXeqS4eI7t27dz48YNfHx8OHbsmEbbjz/+yJEjR2jQoAEdO3ZkxowZhISEkJ6eTv369TX6BgcHs3TpUhRFISEhAYDOnTuzdetWsrKy8Pb2BqBSpUpUq1YNgFOnTjFt2jTS09O5cOECANOmTWPYsGEUL16cGTNmMG/ePE6dOsWkSZMwNzcnJiZGfYaaw9TUlEqVKgFw584dAObNm6cRq56eHo0bN+b06dPs2rWL1atXs3//fkJDQ1GpVACYm5u/vDdW6Cznc35SfHw8JiYm6r9HRETQtGlTAJo3b87Ro0fVZ1mQfRKS0y88PBwHB4dcj5WfOVi5cmV1/4oVK1KjRg0ADAwMgOzviZ+fH/r6+hox5DW+j48P69evJyEhATc3N3Xf5s2bAxAZGcmnn37Kw4cPuXTpEgkJCUyePJmvv/6ajIwMJk+ezI4dO/D396d///70798/n++seJkkdxbx3JlbZaE8tRJx+Ei4VjnSf/BsWYl4Sl7V9OPHjxVFUZTbt28rjo6OGm1PVtOKoihJSUmKvb29oiiKsnfvXqVfv36KovxbTbds2VJ5+PChcvv2baVBgwaKomSfUbm4uCguLi7qY+X0VxRF8fT0VCIjI5W0tDSlZs2a6n0URVH8/PyUOXPmaMQUEBCgDBkyRF1lHzx4UGtMe3v7PGM9ePCgMmrUKKV9+/aKoijK6dOnlZEjR6r3TUtLy/0NFIUmt5UIRck+A//nn38URVGUsLAwpVu3bhrtx44dU5/9zJ49W5kyZYpy+/ZtxcnJSVEURVm6dKkyZcoUJSEhQfH29lbv9/RnXNA5mHM2+ODBA/UZYKNGjZTMzEzln3/+Uc+7nO9cbuMriqLY2dkpPXr0UG7cuKHRX1EUZeTIkcqBAwcURcme25GRkeqYjhw5ogwdOvS576t4OSR39lP3L+K5U6tOyNc9EStW7uDjkQv4/KuVAHy/eic3Y96su7gLU3p6Om3btiU8PBwXFxeOHz9OXFwcM2fOBKBbt26oVCrat2/PtGnTnjlW+fLlKV26NI6Ojvj7+2u1e3p6Ymdnx+TJkylXrhwAJUuWpFy5cpiYmGBkZKS1T6dOnejYsSODBg1S7zN06FDs7OxYunQp7du31+jv5uZGvXr1sLe3x8bGhl9//bVAsdrY2LBt2zY8PT2B7OuVZmZmqFQqHBwcWLt2LZD7db2cbUFBQeoxi9T1v7fMqlWrGDlyJPb29ixYsIBly5ZptLdq1YrU1FScnJz4559/AChXrhw1atSgbdu26rMzU1NTPDw8sLOzw8HBgTlz5miMU9A5aGJiwtSpU7Gzs1NfJ/7www+xs7NTz5/njZ8T/40bN6hatarWPh4eHowcOZKuXbtSvHhxAKZOnYq9vT2jRo2iW7duGv3PnDnDmjVrABg7diyZmZka20TBSe5883OnnvL/y5F5UBxcCna39oHgpS8UkBDi5du6dStz5sxh4cKFWr+hkV+hoaEaN1IWJhsbG8LCwgr9OEKIAtG6lyFf90QIId5sPj4++Pj4vO4whBBvGVmJEEIIIUR+aK1EyD97LYQQQgidSBEhhBBCCJ1IESGEEEIInUgRIYQQQgidSBEhhBBCCJ1IESGEEEIInUgRIYQQQgidSBEhhBBCCJ1IESGEEEIInUgRIYQQQgidSBEhhBBCCJ0889kZqalpmUZGxQtUaGRlKejra/3z2kIUCYqioKcn81MUPTI3RVGmKEqWnp6ewdPbC+UBXHfv3itgeEK8GsbGZWR+iiJJ5qYoyoyNy4A8gEsIIYQQL4sUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEaJQBQUFkpCQ8LrDEK+YfO5C6O5N+v7kq4ioXLkCB4KXqv/7ZHTXwo6ryIuNjcXW1pZKlUzJyMjItc+jR4+oW7cOBw4c0Nh++PBhPvjgfTw9PejYsSO3biW/ipBzdfv2bYYO/Zj09PQC7efi4qz+c0pKCh4e7nh4uFO9ejU8PNwZPnwYycnJ7Nixg0qVKr3ssEUB+Pn50bx5M8LCwoiOjmb27FkAREZG4u3dGQ8Pd3r37k1SUlKBxh02bChXr17V2v6mfO6zZ8/i8OHDrzuM/xTJm29f3ixWkM7DxywiMekOjx6lFVY8b4zy5cuza9cuevfulWefDRvW06DB+7m2devWjS+//Iqff97M1q1bGTLk48IK9ZkuXLjArFmzMTQ01HmMsmXL4u8fAGR/SXL+fOLEH0yZMvVlhCle0OjRY7CxsSE6OvqJbaNZsmQxlpZ1OHbsGL6+41m7dt0LHysi4upb+blnZWWhry+Lty9C8ua/3pa8WaBvxOwZHzNr2hCqVzMtrHjeGCVKlKB8+fJ5tqelpXHixAmsrFo9c5yUlBT1n+fOnYOHhzuenp5ER0dz9epV2rZ1wtPTgwULFgBgZdWKvn37Ymdnx6lTpwD4+efNODk54uzcjnPnzgHg5OTIqFGjsLGxZv/+faSlpdGliw8eHu7069cPgIULFzJz5td4e3cmPDycmJgYBgzoD0BmZiYeHu4ArF+/HldXF1xdXThz5ky+3p+goEC++uor+vbtw/79+wDYs2cPTk6OeHp6EBYWhp+fH71798bHxxsfH2+e80RZ8RJUqVKFIUM+5tq1a1SuXAlLyzoAWFlZkZiYpP7cc84Sc+bAxo0b8fBwx97enpCQkDzHz+1zz5GSkkLXrl1wc3NlwgRfrX2fnrOQ99weM2YM1tZt2LhxI7169aJNm9bq9qCgQNzcXGnXrq1WDLdu3cLT0wNv787q709GRgYDBw7Azc2VgQMHaJ0he3i48+WXX/Lxx0Py9yaLPEnefLY3MW/mayXi4cPHTJ+1jhs3E/H9pAefjOrKxyMXFHZsbzQ/Pz+6devGyZMnc23/+eef2b8/hEePHrJv337Onz9PTEws/v4BXLp0iUWLFtG8eXMGDBhIr1691BMlNjaW/ftDuHv3LmPHjuGnnzbz3XffsXfvPmJiYvD1Hc8vv2zh9u3bfPXVV6Snp+PrO57atS0xMTFhxYrv1GMNHTqUTz/9lMuXLzN37hxWr17DnTt3ePz4MSdOnKBNmzYkJycTGBhAYGAQt2/fZuTIEWza9NMzX3tWVhbLly9n9+49ZGVl4e3tjaOjEwsWzCcwMIiSJUuSlZVFdHQ0VatWYe7ceYwaNYrz58/TsGHDl/tBCA2GhoZUrFiRiIirmJmZa7RVqmSa5yWNzp0707t3b1JSUujXry9OTk5afXL73Nu2baduX7duHZ07d6Z79x6MHDmSkydP0KJFS3X703PWwcExz7k9efJkMjMzsbOz5dy584SHh/PjjxuYM2fuM2PYsGEDffv2pWvXbnTq5AXA7t27ee+99/jhh7XMnz+PnTt34u3trfHa2rf35MMPn/2DTbw4yZtvXt7MVxFx795DDhw8DcDhI+H07NbuOXv8t2VkZBASEsLGjRvz/DLkLMsNGzaUGzdu8M8//xAWdlhdxVauXBkvLy/mzJnNoEEf0a1bN9q1c6Z27dqULl2a0qVLc/fuXZKSkqhevQaGhoZYWFhw9+5dAExMTDA1zV4xSklJoXbt2jRo8D6DBn1EkyZNGTlyJAEB/gQFBVG8eHHi4uIAcHJqy759+zh06CD9+vUnKiqS8+fP4+npke/Xn5yczKVLl+jYsQMAiYmJ6jhLliwJoF4Wrl+/AQBVqphrnF2IwlW5shlxcbEa2xITkyhXrhx6enrqbTmJMyQkhO++W4GiKCQmJuY6Zm6fu6Io6vEiIyNxds6+Jty0aVMiIiI0ioin5+yz5nbO9eJatWpRokQJzMzMuHPnznNjiIqKwtXVFYDGjZuo42rcuLE6rtOnz2i9tiZNmj73PRUvRvLmm5k381VENG9Wj4oVjPnn8nVaW33AtevxhR3XGy0hIYEbN67TuXMnIiIiCA4OpkmTJrku440b9ymzZ89i7NhPcHR0ZP787BWe9PR0MjIymDVrNmlpaTg7t6NdO2ciIiJ48OABd+/epUyZMpiYmHD9+jXS09OJiYnB2NgYQOsHQWpqKiNGjEBfXx8vr4507dqVpUuXERYWxqVLlxg37hMAOnbsyLRpU4mLi+eDDz4gKSmJpk2b8eOPP6rjep6KFSvSoMH7bN++HQMDA9LT0zEwMODGjes8fvyYEiVKkJWVlWuc4tWoUaMGcXFxXL16RX1PRPny5TEyMsLY2Ji4uDhKlixJfHz2d33RooUEBASSmpqqcXPYk3L73J/8fGvVqsWZM6epX78+p0+fpl+/vhr7Pz0X8jO3n97neTFYWFhw/vw53nvvPc6eDcfJyen/4zqDi4srp0+fplat2lqvTe6FKHySN9/MvJmvIuLx41R693CmcqXyXLsez/xFz16W+S9IT0/H29ubc+fO06lTJ6ZM+Yrq1WuwYcMGfH19CQ09CGTfAW5l1TrP64B169YlKSmZypUrU6lSZTw83NHT08Pb2wdjY2NWrVrJw4eP6NYt+zdiqlWrxogRI4iIiGDRooUYGBgwePBgXF1d0NfXZ8GChbke5/r1a4wYMYLMzExq1qyJqakpjRs3wtm5HdbWNup+1atXJzo6GhsbWyC7MndxccHNzRUDAwPs7OyYMGHiM98bfX19Ro4cQYcO7dHT06NevXosXLiIceM+xd3djXfeKcWkSZNy3ffs2bOcOXOGvn37MmnSRGbOnMVff/2l3iZenqVLl+HrO547d+6QmprKzp27AOjfvz/du3fDysoKExMTAFxdXXFzc6V58+aULVs21/Hy+txz9OvXj0GDPmL9+vW8//77tGz54TPjy+/cLlgMfenTpw8//fQTRkZGAHh6ejJ48E7c3FypXNmMsWM/yXN8X9/xzJ+/AD8/Pxo0aEDTpk3V28TzSd7M25uaN/WeU8UoDi5jCjTggeCl3L1774WCEnlzcXEmOHjv6w7jjWVsXOY/Nz937NjB4sWLmTlzJjY2NlrtaWlpdO3aBV/fCVhbW7+GCAX8N+fmqyJ588UZG5cB0Ht6e4F+xVMI8ebx8vLCy8srz/bixYuzY8fOVxeQEOKtIRf63jBSTQshRMFI3iw8UkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ898imdqalqmkVHxAhUaiqJoPOtciKJE5qcoqmRuiqJMUZQsPT09g6e3F8qjwIUQQgjx1tGqcuVyhhBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkXEf9SCBQsYP348+/fvf92hiP+QPXv2EB8f/7rDEEJnkjs15buIaNn8Pdav/pzAnfMZN7prYcZUpBw/fpw2bdpgY2PDJ598kme/qKgoevfuDUDNmjVZvXo1AKGhoXzxxRcMGDAAlUpFuXLlsLe3R6VSsXPnTiwsLHBwcKBdu3YkJycD8PDhQ3V/BwcHTp06BcC6deuoV68eTk5OODk5sW/fPo1j5Jg6dSqhoaEAhISEoFKpsLOzo1OnTupjtGrVilWrVmFnZ6feLzg4GFtbW1QqFePGjSMzM1PrdT5+/JgyZcpw7Ngxnd7Pp2MVr0/OfDp48CBRUVFMnToVgIiICNzc3FCpVHh7e5OYmFigcfv378+VK1e0ticlJbFlyxYqV678MsJXCwoKwt/fP9/958yZw82bN/PdX+asbiR3anpbc2e+igjjMu8w7cuBnP8rkmGjF3HmrHaCeFtZWFjw22+/ERYWRkJCAufOnXvuPqampvj5+WlsW7t2LaGhoTRp0oSQkBBCQ0MpW7Ysffr04cCBA/Tr14+ffvoJgGnTpuHs7ExoaCg///wzo0ePJi0tDQBfX19CQkLYunUr06dPJyYmJs84EhMTmT59Ort37+bQoUPMnTtXPc62bdvo2rUrISEhQHaCnzlzJkFBQYSGhmJqasqqVau0xgwODqZ79+5s3749f2+gKNJ8fX2xt7fX2DZ48GCWLVtGaGgo48aNY9SoUS/lWFeuXGH27NkvZawcWVlZuLq64uHhke99Jk2aRNWqVV9qHEKb5E5Nb2vuzFcR0erDBpQsacTaDQFERcfyW+ifhR1XkWFmZkaJEiUAMDQ0xMBA60moWoyMjLC2tlZXu/lx584d9Z+PHDlCjx49AKhUqRKOjo5a1Wv58uUZOHDgM48REBBAnz59KFOmDADvvvsu5ubmAPzzzz989dVX7NixAwB/f3/69OlDqVKlAPjkk09ynew7d+5kxowZuSaE7du38+GHH+Lo6EhAQACKojBs2DAcHR3x8PDg9u3bGv1Xr16Nra0ttra2/Pnnf2dOFUVVq1Zl5MiRREdHY2ZmRt26dQGwtrYmISGBzMxMVCoVGRkZAKhUKiA7watUKlq0aMHevXvzHH/Pnj1MmDABHx8fgoKCNNpSUlLw9PTEzs6O0aNHa+1rZWVF//79adGiBXv27FEff8KECfTt25d169axevVqoqKisLW1xdvbm+bNm3Pjxg0AvvjiC2xsbHB0dOTOnTvqlZJ169bRsWNHXFxc6NixI2lpacTExODg4ICNjQ3Dhw9/4ff1v0xyp6a3NXfmq4gwqViOzMwspk/5iF82TqO9h3Vhx1XknD17lsTERBo0aJCv/iNGjODbb799br8ff/yRFi1a8O2339KnTx8A9PQ0n7ZarVo1YmNjtfatUqUKcXFxeY4dGxurnvhP+vPPP2nRogXVq1cnPj6erKwsYmNjqVKlirpPiRIl1JV3joyMDO7cuYOZmRmNGzfmr7/+0mjftm0bv/zyC7/99htubm7s2bOHGjVq8NtvvzFy5Ei+++47dd+kpCR27drFoUOH2LlzJ9OnT3/GuyQKm6GhISYmJlrzAKBy5cp5XtLo1q0boaGhhISEsGDBglz7ZGVlsWDBAn777TdCQ0OZP3++RvvKlSvp1q0bhw4d4uHDhxw/flyjPees8ODBgxorGZ06dWLjxo0afe/fv8+WLVsYN24cv/76K6dPnyYiIoKwsDBCQkIoW7asRv9KlSoRHBxMmzZt2LZtGyYmJuzbt4+wsDDu3r3L5cuXn/3GieeS3Pl2585i+el0//5DDAz0Cd77B7VrV2HMCG/2hZzg8eO05+/8Frh16xYjR47kl19+yfc+5ubmlClThkuXLj2zX58+ffj666/p378/165do2HDhiiKotHnxo0b1K9fn4iICI3tN2/exNzcnBIlSpCamqre/vjxY0qWLIm5uXmuS3bbtm0jNDSU48ePEx0dze+//67V9/HjxxgaGmrsFxoayt9//42rqyv37t2jVKlSvP/+++r2yZMn8/XXX5ORkcHkyZO5ePEimzdvJjg4mIyMDFq3bq3uGxERQXh4OA4ODs98f8SrlducSUhIoHz58hoJOmeOBgcHs3TpUhRFISEhIdcxk5KSuHjxIm3btlWPpyiKeryrV6/i7u4OQIsWLbhy5QqtWrVS71+xYkVq1KgBoHE227x5c61jNWjQAH19fapWrcqVK1f4559/aNOmDaD9AwagadOmADRp0oQTJ06QnJzMsGHDuHPnDlFRUc9c8hbPJ7kz29ucO/O1EnHm7BWysrJIT88gIz2DzEyFrCzl+Tu+BTIyMujduzcLFizAzMysQPuOHj2apUuX5qvvZ599xqxZswBo3bq1+hpfQkICISEhWFlZafS/c+cO69evp127dtStW5fTp0+TlZVFVlYWf/75J3Xr1sXd3Z2NGzdy7949IPuadGxsLCdPniQsLIygoCC2b9/O9u3bcXNzY8OGDTx48ACAxYsX4+XlpXHMbdu2sXv3boKCgjhy5AhHjx7VaLewsGD16tUMGTKERYsWUa9ePfr27UtoaChhYWHq1wdQq1YtWrZsSWhoKKGhoQVavhSFx8LCgpiYGPUZ+JEjR6hQoQJGRkaULVuW2NhYkpKS1Gdxs2fPJjAwkJ07d6Kvn3s6MTExoWHDhurr2eHh4Ro/0C0tLdU3wJ08eRJLS0uN/W/dusWNGzd4+PChxg1ruR3v6UKnXr16GsvZT/+QCQ8PV//f0tKSTZs24eXlRWhoKNbW1lr9Rf5J7vzX25w781VEXL+RwIqVOxk00BNbm8YsXLqZtLT0wo6tSNiyZQsnTpxgwoQJqFQq9Yefn5vNWrRoQYUKFfJ1nHr16pGYmEhcXBxTp04lKCgIe3t7unbtytKlSylevDgA8+fPx8nJCW9vb7744gvMzc2pWLEi3t7e6mtkPj4+VKhQAVNTU7788ks8PT15//33GT9+PNeuXcPU1FR93Pfee49jx45RqVIlPvvsM1xdXbG3tyc+Pp4hQ4ao+ymKwqlTpzQSvLGxMdHR0eq/T506FXt7e0aNGkW3bt3o0KEDUVFRODo64ujoSGBgoLqvqakpHh4e2NnZ4eDgwJw5czTe1yfvuH9ZN/aJ/Fm1ahWjRo2iVatWDB8+XL20PGTIENq3b8/UqVPVcyjnXobJkydTrly5XMfT19dn3LhxODk54eDgwNixYzXaBw8ezObNm7G1tcXIyEgr6ZuYmDB16lTs7OyYOHFigV5LkyZNsLCwwNraGkdHR1JSUjTak5OTcXZ2JiwsjM6dO+Po6MjChQvx8vJS/1B4kszP/JPcme1tz516z6m0FQeXMQUa8EBw/qpH8WpNnDiRuXPnvu4wRBGydetW5syZw8KFC7V+QwMgLS0NT09PvvjiC41fZ3vVbGxsCAsLe+njrlu3joyMDAYNGvTSxxZvD8mdGrSuCebrngjxZps6dSpHjhx53WGIIsbHxwcfH58824sXL/7M37gQ4m0nufP5ZCVCCCGEEPmhtRIh/+y1EEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdCJFhBBCCCF0IkWEEEIIIXQiRYQQQgghdPLMZ2ekpqZlGhkVL1ChkZWloK+v9c9rC1EkKIqCnp7MT1H0yNwURZmiKFl6enoGT28vlAdw3b17r4DhCfFqGBuXkfkpiiSZm6IoMzYuA/IALiGEEEK8LFJECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ/kqIvr1duVA8FL1f37rvizsuN4oFy5coF27tri6ujB8+DCe9U+J79+/H1dXFzw83Pn888/IzMwEoHnzZnh4uOPo6MB3360AwM/Pj/Xr1xMdHU2dOpZ4eLjj4eHO2rVr8fbujIeHO9WrV8PDwx1v787q/gMHDuDChQvqY3777Tds2uTHw4cPGT58GB4e7nh6enD69GkArl+/Tvv27fHwcKddu7bcuHFDK+7Nm3+iZcsWBX5vXFycAZg0aaL6tYrC5+fnR/PmzQgLCyM6OprZs2cBEBkZqZ47vXv3JikpqdBjOXz4sPr4QjxJcmfe3pTcma8iYsu2A3Tp9RVden1FXFwy5/+KKOy43ih169Zl3779BAUFA/Dnn3/m2i85OZmFCxfw66/b8PcPoGJFE9atWweAiYkJ/v4BhIT8xs8//6y1r4ODA/7+Afj7BzBgwAD1GA0aNMDfP4Bff92m7uvp2R5//z3qvwcGBuHq6sacOXNwdHTE3z+AtWvXMXHiBNLS0vjuu+/49NNP8fcPYPfuPZiYmGgdPzg4mObNm3Pp0iWd3qM5c+ZiYKD17BYNiqI8M4mIghk9egw2NjZPbRvNvHnz8PcPYOTIkfj6jn9N0b0aWVlZRXIskU1y5/MV9dyZryLi4cNUkpJSqFihLGZmFQned6Kw43qjGBoaqv9cvLgR1apVy7VfcHAQ3bp1p1SpUgCMGDGCPXt2a/RJS0ujePHiLxSPs7MzISEhACQlJVGsmAEVKlTg+PFj+Ph0AcDU1BQ7OztOnDjBO++UJCzsMHfv3qVEiRKUKFFCY7yHDx+SmZlFv3792b17NzExMQwY0B+AzMxMPDzcAejXrx/u7m54eXXk7t27GmN4eLiTkZHB/v378PBwx97enp9+2gTA7NmzGD58GJ06eZGcnPxCr11oq1KlCkOGfMy1a9eoXLkSlpZ1ALCysiIxMUn9GWZkZACoP8+rV6/i5dURd3c35s+fB8CwYUP55JNPcHZux4wZ0xk//lPs7Oz48ccftY47YsRwOnTowIYN69Xb1q9fj6urC66uLpw5c4YtW35h9erVAPz111+MG/cJiqLwySef4OnpSZcuPty+fVtj3KFDP8bd3Q1PTw+ysrLyjHP8+E/p3LmTxr4HDhzAyckRJydHDhw4oNF2+PBhvLw64u3dGU9PT27dukV0dDSenh706dMHPz8/nT8DkTvJnW9+7izQPRHOTi2IT7jN6fDLhRXPGysgIAArq1YkJiZQoUKFXPvExcVjbm6m/nuJEiVIT08Hsiesh4c7H37YEnt7e619Dxw4oF6SCwgIeGYspUuXpnz58ty8eZOAgADc3bMn6tOPGa5SpSpxcXGMHj2GR48e4eCgom/fvjx48ECj3/79+3F2dsbKyoo//zxFlSpVuHPnDo8fP+b333+nTZs2AKxYsYKAgEA6derMtm3byE2bNtb/f9YQwtq1a9XbLS3rsGPHzlwrefFiDA0NqVixIvHxcZiZmWu0VapkmucljRkzZvC//31DQEAgFy9e5ObNmwA4Ojqyd+8+duzYQZ8+fdm3bx8//rhBY99Tp06ir2/Arl27qFfvPSD7bDIwMIDAwCA2bfqJefPm4urqRnBwEAC7du2kY0cvgoKCqFatGnv27GHIkCH88MMP6nHT09O5eTOGgIBAdu/eg76+fp5xWllZsWPHTo245syZzfbtO9i+fQezZs3Ues2KAr/+uo0BAwaoz3QTExNZt24dffr0ye9bLgpAcuebnTvzXUTo6+vjoGrG/t9OypJzLtzd3Tl27DhVqlQhKCgo1z5mZpWJjY1T//3x48cUK1YM+HdJ7syZcC5d+odr165p7PvkklzOxH4WDw9P/P334O+/Bw8PTwCtzy0m5iZmZmaUKVOGmTNncerUnzRp0oTNmzdr9AsMDGDz5s14e3fm3Lnz3LhxAyentuzbt0+d+DMzM/nyyy9wc3Nl1aqVxMXF5hrXmTNn6NChAx06tOfvv/9d3mvSpMlzX5N4MZUrm2l9LomJSZQrV04jSebMkytXLvPxx0Pw8HDnn3/+ITY2BoAGDeoDYGZmRoMGDTAyMtJKspGRUTRq1Aj497ONiork/Pnz/39m35uUlBTKlClD8eJGJCcnc+TI79jY2HDp0iW2bfsVDw93FixYoLESYWhoSM+ePRg8eBAzZswgKysrzzhzm1N6enoYGxtjbGyc6xJxTsyNGjUiIiL7su0HH3zw3OVkoTvJnW927sx3EfFhy/qUL1eGvfvlUsbTUlNT1X8uU8aYkiVL5NqvXTtnNm/+SV2tfvPNN+pJmkNPT4/SpUuTkpLyQjG5u7uxbds27t27R5UqVQBo2fJDtm7dAmSfXR08eJCWLVsSERGh/pKYmJhoXPtNT0/n1q1b7Nmzh23btrN48SL8/ffQsWNHdu7cwcWLf/PBBx9w9uxZHj58SGBgEIMGDc6z0Fy6dAnLly9n585dlC1bVr1dX19+Uaiw1ahRg7i4OK5evQLAsWPHKF++PEZGRhgbGxMXF0dycjLx8fEA1KlTlzVrfsDfP4CDBw/RrFnz/x/p34Lh6eIhR82aFpw/fx6As2fPAmBhUZOmTZupE3rOKoGnpwdLlizB0tISAwMD6tatS/fuPfD3DyA4eC9TpkxRj5uZmYmPTxdWrVpNUlISf/55Ks849fS051RWVhZ3797l7t27ud6sdv78OQDOnTtHrVq1AJmbhUly55ufO/N9dGenFvx96RrXrscXZjxvpP379+Pu7oa7uxsJCQk4OjoRHx/P/PnzNfqZmpoybtw4vL074+7uRmJiAgMGDAD+XZJzc3OlWLFiNGzYEEVR1GdATy7JLV269LkxVahQEUPD4rRt20697bPPPlPH2r9/P+bMmUvx4sUJDQ3FyckRT08PAgL86d69u3qfgwcP0rBhQ/XfraxaExgYSPXq1YmOjqZly5ZA9g1SERERdO7ciVOnTuUZl6dne3r27MGoUSM1vgg5cu5EPnv2LBs2bNDYJl7c0qXL8PX1xdHRgU8/HceiRYsA6N+/P927d2P27FnqZdEvv/ySESOG4+npiY+PNw8fPsz3cVq0aElaWirt27dXFy0mJia4uLjg5uaKp6cHixdnH9vNzZ1Vq1bSoUMHIPvM9Nq1aDw9PfH09GTfvn3qce/du0eHDu1xdm7HzZs3aNDg/QLFOXHiJLy8OuLl1ZGJEydptRsaGtK5cyfWrFmtvnadY//+fepLLzk3pD65TRSc5M43P3fqPefShOLgMqZAAx4IXsrdu/deKCiRbd68uTRt2pR27ZxfdyhvDWPjMv+J+bljxw4WL17MzJkztX5DA7JvQuvatQu+vhOwtrZ+DREWPYcPHyY09ABffvnVazn+f2VuvgqSO18+Y+My8OQy5P8r9upDEfmxYsW3HD16lDFjxr7uUMQbyMvLCy8vrzzbixcvrnXToRBvA8mdr5asRIj/FDnbE0WVzE1RlOW1EiF3DAkhhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQifPfABXampappFR8QIVGoqioKen9YwOIYoEmZ+iqJK5KYoyRVGy9PT0DJ7eXihP8RRCCCHEW0ee4imEEEKIl0OKCCGEEELoRIoIIYQQQuhEigghhBBC6ESKCCGEEELoRIoIIYQQQuhEigghhBBC6ESKCCGEEELoRIoI8Vxnz57lr7/+et1hCCHEG+W/kDuliBDPtXz5curUqfO6wxBvkD179hAfH/+6wxDitfov5M58FRHFixvy9dRBBOycxy8bp+Fg37Sw4yoSFi9ejI2NTZ7toaGhFCtWjISEBABOnDiBnp4eUVFRADx+/JgyZcpw7NgxDh48iEqlokmTJlhYWKBSqVi6dCkqlQp7e3tUKhWdO3cGUG9r3bo1K1euVB/riy++UB970KBBDB06VP33qVOnsn///lzj7N+/P61atUKlUqFSqUhLS1O3RUVFoaenx8mTJwGIj4+nWLFihIaGEhoaioWFBZcvX8bFxYWdO3eqxyhbtiwqlYoBAwYAMGfOHOzs7LC1tWX9+vXqsStXroyTkxP29vYsWbJEfdyIiAjc3NxQqVR4e3uTmJiYr89EFNy6deuoV68eBw8eJCoqiqlTpwIv/hn079+fK1euaG1PSkpiy5YtVK5c+WWEX2imTp1KaGjoc/uNHTuWzMzMwg/oLSK587+TO4vlp1OrlvWxbt2Q6bPW0c6pJUM+6sCBg6cLO7bXKjU1lTNnzjy3X5MmTdi5cyeDBw9m+/bttGjRQt0WHBxM9+7d2b59O3PnzlVPrv379/P1118DsH37dkJCQihWTPOjCAkJQV9fHxsbG4YMGaLRlpmZSWxsLJmZmfl+aI+fn1+eFXHz5s3Vse/cuZOmTf8tEvv06aOOFaBjx44A2NjYqBNwYGAgkZGRHDp0iIyMDDp16kTLli155513aNeuHRs3biQzM5OPP/6YHTt24OXlxeDBg/nuu++oW7cuR44cYcyYMWzatOm5r0PoxtfXF3t7e3WSBrQ+g1GjRrF58+YXPtaVK1eYPXv2C49TVDyZwMXzSe7M9l/JnflaiYiJTSYjI5OY2GTu3n1Aamra83d6w61Zs4Z+/fo9t5+joyMhISEA/PXXX7z//vvqtp07dzJjxgzOnTunUwypqak8fvxYa/uhQ4dQqVTY2Nhw9OhRncZ+0nvvvcfFixcB2L9/P23bti3Q/r/88gvjx48HoFixYowdO5atW7dq9DEwMGDy5Mns3LmT6OhozMzMqFu3LgDW1tbExcXJ2d4rULVqVUaOHJnrZ5CQkEBmZiYqlYqMjAwg+8wOYO3atahUKlq0aMHevXvzHH/Pnj1MmDABHx8fgoKCNNpSUlLw9PTEzs6O0aNHa+1rZWXF4MGDadKkiXrfjRs3YmVlhbW1NeHh4ep+H3/8MY0bN2bt2rV07tyZRo0aqdv37NmDnZ0dbdq00Yrh1q1bODg44Obmxh9//AFARkYGPXr0wM7Ojh49eqhfe44n3w/xfJI78+9tyJ35KiJu3EzgrwuRfLPkE9o6tmDNOv/Cjuu1Sk9PJzQ0FEdHx+f2LV68OCVKlODYsWPUr19fvT0jI4M7d+5gZmZG48aNn3lzjZOTEyqViuHDh2tsq1GjBh9//LFW/+3bt+Pj40OXLl3Yvn17vl5Tr169NJb9nvbee+9x/PhxSpQogZGRkXr7jz/+qF6Gy0m6T4uNjaVKlSrqv1erVo3Y2FitflWqVCEuLk6rP0DlypVJSkrK12sRujM0NMTExCTPzyCvpdFu3boRGhpKSEgICxYsyLVPVlYWCxYs4LfffiM0NJT58+drtK9cuZJu3bpx6NAhHj58yPHjxzXab926xcyZM/H39+f7778nMzOTZcuWcfjwYfz8/Jg8ebK63/Tp0wkMDOSzzz5j06ZNfPfdd6xZs+a5MaxevZpBgwYRGBioTrzbt2+nQYMGHDp0iPfff59ff/01/2+o0CC587+XO/N1OcO1XSsaN6rD51+txMG+GWNG+HDk6Dmysp75GPE31o8//kjPnj3z3d/d3Z2hQ4eycuVKvv32WyD7Otzff/+Nq6sr9+7do1SpUhqV9pPyWpK7fPkyX3/9tcaXQVEUfvvtN/755x+AfN+89qwlOQAvLy+GDBnC559/zoULF9Tbn16Sy425uTkxMTHq6vjGjRuYm5tr9bt58ybm5ubq/k+Kj4/HxMQkX69FvLjcPoOEhATKly+vscSrKNnf8eDgYJYuXYqiKOrr2E9LSkri4sWL6rOxhIQEjSXjq1ev4u7uDkCLFi24cuUKrVq1Uu9vampKpUqVALhz5w6JiYlYWFhgaGhIzZo1SUlJUffLud/C0tKSEiVKUKVKFW7fvv3cGCIiIvD09ASgWbNm6rhy/tyiRQtOnTpV8DdUAJI7/4u5M18rEVmKQlZWFqlp6WRkZlK2bGn09d/eX+y4dOkSK1aswNXVlb/++ovly5c/s7+7uzvNmzenZcuW6m3btm1j9+7dBAUFceTIEZ2WzurXr4+iKOrlMsi+AalTp04EBQURFBSEi4uLzkt+T2rZsiXNmzdXJ/mC8PHxUZ+dZmRksGTJEq2qPTMzkzlz5uDl5YWFhQUxMTFcvnwZgCNHjlCpUiUMDAxe+HWI/MntM6hQoQJGRkaULVuW2NhYkpKSiIuLA2D27NkEBgayc+fOPL/7JiYmNGzYkJCQEEJDQwkPD9coSCwtLdU/oE+ePImlpaXG/k8XL6ampkRHR5Oenk5UVBRly5bV6vf0Ps+LoVatWurLHqdPn85XXCL/JHcWzNuQO/O1ErEv5ARtWr3P11MH8ehhKitW7iAj4+29fj137lz1n21sbBg1ahRxcXGsWbNGvaT6pNKlS7NmzRr13xVF4dSpUxrJyNjYmOjo6FyP5+TkhJ6eHgYGBuprhDkGDRrEihUr1BNr+/btGtfdVCoV27ZtA2DChAlUqFABgC1btlC+fHl1v169elGyZEl1m6mpqcZx9PT0NF5Djh9//JGwsDAAPvroI/r06aPVx8PDg/DwcGxtbVEUhY8++ogPPviAqKgo9u3bh6OjI1lZWXh5edGhQwcAVq1axYgRI3j8+DEVKlTg+++/B2DUqFEsX76coKAgMjMz8fDwUG8TL9eqVasYOXIkt2/f5vHjx+o71IcMGUL79u2xsbFRz5Ocexk+/PBDypUrl+t4+vr6jBs3Tj2fGzRowDfffKNuHzx4MD179mTVqlU0atQIKyurZ8ZnYGDAiBEjsLW1RV9fX2OsvDwvhkGDBuHt7c2GDRvUS89eXl5s3boVOzs7zM3NmThxYq5j5zYnn9wmJHc+6b+SO/VylivzoDi4jCnQgAeCl75QQEKIl2vr1q3MmTOHhQsXYm9vr9WelpaGp6cnX3zxBXZ2dq8hQiHEG0Lr11mkiBBCCCFEfmgVEW/vjQ1CCCGEKFRSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQifPfIpnampappFR8QIVGllZCvr6Wg/6EqJIUBQFPT2Zn6LokbkpijJFUbL09PQMnt5eKI8Cv3v3XgHDE+LVMDYuI/NTFEkyN0VRZmxcBuRR4EIIIYR4WaSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETvJVRBQrZsBnvr3ZvW0OS+aPwqRi2cKO6433zTf/w8XFWWv7/PnzqVfvXWbMmK7Vdvjw4Vy362L27FkcOHCAs2fPsmHDhhcaa9SoUfz666/Mnz//pcQmxJNu3rzJ/v37X3cYooiQ3PlmyVcR4dy2Jfa2jfH97FvKly/D4IHtCzuuN1pqairnzp3Lta1fv36sWrX6lcXSqFEj+vbt+0JjtG5txf3792nZsuVLikoUBX5+fjRv3oywsDCio6OZPXsWAJGRkXh7d8bDw53evXuTlJRUqHEsWLCAevXqPbdfTnLXla/veAD27g3GxsaGDRs2qLeJokFy55snX0XEu3WqE59wm78vXeOfy9dp1vTdwo7rjbZhwwZ69OiZa1ulSpUK9KS+xYsX4+zcDk9PT65fvw7A6tWrcXJyxNPTg8uXL7N//z48PNyxt7fnp582aeyfU6GfOnUSDw93PDzcqVatKjdu3GDhwoW4u7vh6OhAeHg4AMeOHcPZuR0eHu78+uuvxMbGsmnTJjZt8mPXrp0AZGRkMHDgANzcXBk4cAAZGRnEx8fnWm1PmOCLm5srXbt2ISUlRaNt2LChjBgxAjc3V2bO/Drf74l4eUaPHoONjc1T20Yzb948/P0DGDlyJBMnTii04z969Ah3dzeqV69eaMfIMX/+AgD27PHnhx9+oG/fvuptomiQ3PmvNyV35quIuHX7HhUqGFPqnRJUq1qJMqVLFnZcb6z09HTCwsKwt7d/4bHi4+M5dOgge/fuY/LkySxatIjExER27NjB3r372LPHH0tLS9q0scbfP4CQkBDWrl2b61jNm7fA3z+AsWPH4uPThWrVqjFs2DACAgJZtWo1y5cvA2DatKn89NNm/P0D6NSpExUrVmTHjp0EB+/l3r17XL16hd27d/Pee+8RGBhE/fr12blzJ5UrV8bX11fjmKdOneLBg4cEBgbh7e3NDz/8oBWXvb0dgYFBhIeHExMT88LvmdBNlSpVGDLkY65du0blypWwtKwDgJWVFfHxCWRmZmr0HzFiOB06dGDw4EHMnj2L6OhoBg8eBGQn35xVjfXr1+Pq6oKrqwtnzpzRGCMpKYkBA/qzdOlSxo37RCumpxN+jtjYWDw9PXBxcVbvd/z4cRwdHfD09GDDhg1cvXqVtm2d8PT0YMGC7ELBxcWZo0ePEhgYwNChH3P06FH1svk///yj/kGxYsW3pKen0759e9zcXOndu7fW6xcvn+TOf71JubNYfjrtDjiCc9sW7Nw6m0ePHpOUnPL8nf6jNm/eTJcuXV7KWNeuRfP++x8A0LRpU+bMmUN0dBSNGzfGwCD7se76+vqcOXOGOXPmkJGRzt9/X8pzvMjISFasWMFPP21Wx7plyy/o6+urK3xFUahYsaJ67Fu3bjFu3CekpKRw7do1YmPjiIyMpHHjxuq4Tp8+k+fxmjTJ6deMsLDlWn0aNcpub9DgfaKjo6lSpUpB3ybxEhgaGlKxYkUiIq5iZmau0VapkinJyclUqlQJgFOnTqKvb8CuXbtYsGAB6elpuY6ZnJxMYGAAgYFB3L59m5EjR7Bp00/q9sWLFzFu3Dg+/LAVX331FX/8cZwPP2wFoJHwDQwMyMrKUu+Xk5yLFSvG4MGDuHr1Cvv27WXatOnY2tqiKAo//vgjAwYMpFevXiiKot63devWtG3blvHjfbG0tFRvnzZtKosXL+Hdd98lKysLPT09fvnlF0qWLMmMGdM5ePAgjo6OL/5GizxJ7tQ83puSO/O1EpGRkcmUGWuZ9MV3xMQmE3roTCGH9ea6fPkya9aspnPnTly8+Dfff/+dzmPVqGHB+fPZ1wdPnz5NrVq1qFmzFmfPnlUn1aysLJYuXcLy5cvZuXMXZcvmftPrw4cPGTt2LMuX/w8jIyMA1qxZjb9/AMuWLVMnWj09PW7dSlaPvWXLFjw8PPH3D6BVq1YoikKtWrXUZ5U5ceVGs9+fufbLeX0XL16gRo0aOrxL4mWqXNmMuLhYjW0JCYnq5AgQGRlFo0aNAGjSpAmAxjJzzlyKiork/PnzeHp60KdPb60l2UuXLjF16lQ8PNw5ePAgsbFx6rbcEn6OW7du0bdvHzw83Dl27BixsXF89NEgtm/fxuDBg/jzz1N4eXnx11/nGTToI/bv3/fc152cnMy7776rPtaDBw8YOXIE7u5u7Ny5U+s9ES+f5M5/vUm5M18rEaYm5Zg/exiGhsX4/eg5ftwUXNhxvbGmT//3DmEXF2c+/ngo8fHx/38Tly8bNmxg9erV3L59mzt37rBw4SKN/X/5ZQsnTpwEwNfXF1tbO9q1a0vx4sVZseI7TExM6NChA+3ataVkyZIsXrwET8/29OzZg4YNG+b5Rdi1aydXrlxmyJDBAPzww1qaNWuOm5srbdpYq/tNmTKVbt26kZKSwuTJX2BnZ8fQoR/j779H3cfT05PBg3fi5uZK5cpmjB37icZrzNG8eXM2bfLD1dWF0qVLs3r1Gq24wsKOsHr1aqytralatSr79+8jMzMTFxdXfH3HM3/+Ao1tonDVqFGDuLg4rl69gqVlHY4dO4apqYn6hzlAzZoWHD58GICzZ88CYGxsTHx8AgAXLvwFgIVFTZo2bcaPP/4IZC9XP6lu3bp07dqNpk2bAtnXi/89xr8JX19fX2MlIic59+rVi0GDPkJRFMqVK8eiRYuJjY1l5MgRbNzox6xZs0lLS8PZuR3t2mnf7f+kihVNuHLlMnXq1CUrK4uQkBDq1KnDmjU/MH36dJ5YzBCFRHLnm5k79ZRnfzsUB5cxBRrwQPBS7t6990JBidfv669n8MUXXxbqMYYNG6q1rFzYjI3LyPz8f35+fmRkZNCvXz+N7VevXmX8+E9JTU2lfPnyLFmyFFNTU40+w4YN5caNm9SoUZ1q1arx2WefM2LEcK5fv0Ht2rWpXLkSn332ORs3bsTPbyMGBgbY2dkxYcJE9RhJSUmMGTOalJS76Ovrs3z5ciwsLNTtq1atYvPmn9QJf+vWLVhZtaZChQoMHfqx+uxs2LDhnDt3lt27d3P//gPGjh37//uv5OHDR3Tr1pURI0bi4uJMcPBejXmXs+2ff/5h7Ngx6Onp4enpSYcOHenevTvm5mYYGxvj4OBIr1691MnZz8+PBg0a0LRpU/W2FyVz8+3wNudOQOvOVikihJZJkyYSGxvH+vXrC/U4UkS8Xjt27GDx4sXMnDlT6zc08uvw4cOEhR3ms88+f8nR/ffI3Hzzve25EykixH+dJOqXS4qIl0fmpijK8ioi8nVPhBBC5MbW1hZbW9vXHYYQ4jWRZ2cIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ1JECCGEEEInUkQIIYQQQidSRAghhBBCJ898AFdqalqmkVHxAhUaiqKgp6f1jA4higSZn6KokrkpijJFUbL09PQMnt5eKE/xFEIIIcRbR6vKlcsZQgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNCJFBFCCCGE0IkUEUIIIYTQiRQRQgghhNBJvooIk4plWfPdRA4EL2Xipz0BqFG9Miu/8WXHlll81N+jUIN8k6SkpKBSqVCpVJQtWxaVSsWAAQM0+tjY2ORrLJVKRUZGxnP7dezYkXLlyrF///5n9ouMjMTT0xN7e3scHBw4ceJEvuLIj9DQUCIiIvLd/3//+x979ux5accXRdeNGzcIDg5+3WGIIk5yZ/4UtdyZryIiIyOTPQG/ExeXrN42bkxXYuOSmTF7Pb17ONPoA8tCC/JNUrZsWUJDQwkNDaVhw4aEhoaydu3aQj3md999x9ixY5/bb9CgQSxcuJCDBw+ya9cuDAy0nqWiJSsrK18xFOSLkJaWRq1atfD09MxXf6GbdevWUa9ePQ4ePEhUVBRTp04FICIiAjc3N1QqFd7e3iQmJhZqHDNnzqR+/fpa2z/55BNsbGxo3bo1K1euBKBevXqoVCpatWrFsmXLtPZJSkqie/fuqFQqbGxs2LlzZ64/eJ6V9L///nv+97//qf9+5swZrR9WADNmzMDNze1lvAUiHyR3Pl9RzJ35KiLupNxn+67DPHqcBkCxYgY0fL82R4//xak/L5GQeJsWzesVaqBvqjFjxmBvb4+trS3Xrl0DICMjg/79+9OiRQt1RXn8+HFUKhXW1tZaX5ygoCBUKhUtWrRgw4YNWscwNzd/bhzR0dGYm5tTr17251SmTBmaNWvGmTNnsLe3p1WrVsyaNQvI/uHTrVs3PDw8OHv2LF27dsXe3h5nZ2fu3r0LwIoVK7CyssLBwYFLly6xbt06Pv30Uz799FMePXpEjx49cHR0pFu3bqSnp2uMeeHCBWbPng1Av3791Ik+v186kX++vr7Y29trbBs8eDDLli0jNDSUcePGMWZMwR6yVxCPHj2iQ4cO1KhRQ2P7+fPnSUpKIiwsjKNHj9KlSxcATE1NCQ0N5dixY2zcuFFrvFGjRjFixAhCQ0P57bffqFSpUq4/eJ6V9Dt27Mju3bvVf9++fTudO3fWOtbRo0d55513SElJeVlvhygAyZ1vSO5UFOVZ/ykq59Hq/yIiY5TA4GNK5+5fKIqiKFO//kFROY9WoqJjlV17whSV82hF/Mva2lp58OCBoiiKsm/fPuXzzz9XFEVRateurURHRyv3799X2rRpoyiKojg7OyspKSlKVlaW4uTkpKSmpir29vZKenq6eoz09HR1/6dNmTJF2bdvX56xHD16VBk/frzW9ocPHypZWVmKoiiKSqVSHj58qKxdu1YZMmSIuk/O8VetWqWsXLlSiY+PVxwcHJSMjAxFURQlMzNT4/jLli1TNm3apCiKonz77bfKpk2btMa0trZW0tLSFEdHR0VRFHUM4uVZu3atsmrVKkVRFCUtLU1JTExUoqKilJ49e2r0e/KzzDFgwADFyclJ6dWrlzJlyhQlMjJS6dWrl6IoinLgwAFlypQpiqJkzwkbGxvFxsZGOXXqlMYYCQkJSvv27RWVSqUMGzZMo+3KlStKy5YtlcuXL2tst7a2VhRFUR4/fqz+c46MjAzF3t4+z9eb0z8qKkoda15UKpVy+/ZtRVEUpXXr1sqjR4802iMiIpThw4crP/74o+Ln5/fMscTLJ7mzyOZOrTqhmC6Fx927DwAoWcIIgHdKluBOyv2XV9m8RebNm0dISAjp6enqJd2KFSuqz8xyzpDCw8Pp0KEDkL1k++QS86lTp5g2bRrp6elcuHBBpzjMzc2JiYnR2h4ZGcmnn37Kw4cPuXTpEgkJCQA0b94cgMzMTHx9fTl37hx3796lU6dOREZG0qxZM3Xs+vqaC1oXL17k1KlTfP/99zx+/JgePXpQtmxZ9Zg5DA0N6devH71798bCwoIZM2ZojSVeDkNDQ0xMTLhy5QpVqlTRaKtcuTJJSUlUrlwZgD/++AMDAwP279/PrFmzSEtLy3XMpKQkdu3axaFDh7h9+zYDBw5kx44d6vY5c+bw2Wef0bp1ayZOnMjRo0dp3bo1AJaWlowdO5YBAwZw+/ZtVq5cSZs2bUhMTESlUnH9+nV69+6tcbzExERMTU2f+1pjY2Ofe4bZvn179uzZQ+vWralatSolSpTQaN++fTs+Pj40b96cESNG0LNnz+ceV7xckjvfjNyZryLCwECfKlVMKFbMgFKlSmJuXpGz56/SpvUHJCbdwdS0HKdO/1PYsb5xkpOTCQ0N5fDhw+zbtw8/Pz8Abt26xY0bN6hQoQKZmZkANG3alK1bt1KqVCnS09MxNDRUjzNv3jxWr15N1apVeffdd3WKxcLCgri4OC5dukS9evW4f/8+ly9f5ocffmDixInqa8yKogD/Tu4zZ87w4MEDDh06xKpVq7h58ya1a9fm9OnTZGVloa+vT1ZWFoaGhurXUq9ePZycnPD29gYgPT0dPz8/rUmemZlJjx496Nu3L0OGDOHEiRO0atVKp9cn8ie3hBgfH4+JiYn67xERETRt2hTITohHjx5FT09P3Z4zRyIiIggPD8fBwSHXY128eJFJkyahp6fH/fv3+fDDDzXae/bsSc+ePYmMjGTgwIEcOHBAfTlDURS6detGdHQ0FhYWQPaljvzcv5FX0n9Sp06dmDBhAnFxcXTq1EmrPSAggKCgIPT19bl8+TKPHz/WKjRE4ZHc+ebkzvz9doZJOTasnkz1apWwtW7EhtWTWbzsF8wqV+DLz/vht3kf4WevFHasb5zy5ctTunRpHB0d8ff3V283MTFh6tSp2NnZMXHiRACmTZtG+/btcXBwoHv37hrjdOrUiY4dOzJo0CDKlSundZzRo0ezYcMGJkyYoL5BbdSoUVr9Vq9ezaeffopKpcLT05PMzEw8PDwYOXIkXbt2pXjx4lr71KtXjytXruDq6soff/wBZCdzb29v2rRpg4ODA5cvX0alUjFz5kymT5/OkCFD2L59O05OTjg6OvLnn3/m+v7cu3cPJycnrK2tuX79Og0bNmTdunWcOnVK4zU8uU28GAsLC2JiYrh8+TIAR44coVKlShr3DNSqVYvw8HAATp8+DWTf9BYXFwfAuXPn1P1atmypvidh3759GseqV68eixYtIjQ0lJMnT9KxY0d1261bt7h9+zYAFSpU0ChSAPT09ChTpgx37txRbzMwMMDc3JzDhw8D2Qn22LFjub7GnKQPcP/+ffXrePI1xsfHs23bNjw8NH+7LC4ujmrVqrF3716CgoLw9fXVem2icEnufHNyp15O9ZQHxcGlYDddHQhe+kIBCSFejnXr1pGRkcGgQYM0tl+5coURI0bw+PFjKlSowPfff0+lSpU0+vTv35/r169jYWFBjRo1mDp1KgMHDuTatWvUqVMHMzMzpk6dytq1a1m7di0GBgY4Ojry5ZdfqsdITExkyJAhpKSkoK+vz+rVq6lZs6Y6hoEDBwLZd7FPnz4dR0dH6tWrh7m5OVlZWdSvX5/vv/9eI66kpCRGjhxJXFwcGRkZfPHFF7i6ugLZv/4XFhYGZC81jxo1ivv3sy+zLliwgBYtWmiMNWPGDI4cOUJQUJDG9u+//55ixYrx0UcfAfD3338zd+5cunXrpv7hMWrUKJYvX05QUJB6mxD/AXpaG6SIEOLttHXrVubMmcPChQu1fkMjv3JWGXJ+PVQI8Z+mVUTodGOlEKLo8/HxwcfH53WHIYR4i0kRIYTIU84/5CSEELmR36cTQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE6kiBBCCCGETp75FM/U1PQ4IyPDygUZUFGULD09PSlORFGVhRTPomiSuSmKsnjA7OmNz3sUuBBCCCFErqTqFUIIIYROpIgQQgghhE6kiBBCCCGETqSIEEIIIYROpIgQQgghhE7+DzLeaBhCsMOpAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "from platiagro.plotting import plot_data_table\n",
+ "ax = plot_data_table(df_final)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_final.to_csv(dataset, index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Salva resultados da tarefa\n",
+ "\n",
+ "A plataforma guarda o conteúdo de `/tmp/data/` para as tarefas subsequentes.
\n",
+ "Use essa pasta para salvar modelos, metadados e outros resultados."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['/tmp/data/qgenerator.joblib']"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from joblib import dump\n",
+ "\n",
+ "artifacts = {\n",
+ " \"model\":qgenerator_caller,\n",
+ " \"expand_context\":expand_context,\n",
+ " \"infer_num_gen_sentences\":infer_num_gen_sentences,\n",
+ " \"column_context\":column_context,\n",
+ " \"column_question\":column_question\n",
+ "} \n",
+ "\n",
+ "dump(artifacts, \"/tmp/data/qgenerator.joblib\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Liberando Memória da GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "qgenerator_caller.free_memory()\n",
+ "del qgenerator_caller\n",
+ "torch.cuda.empty_cache() "
+ ]
+ }
+ ],
+ "metadata": {
+ "celltoolbar": "Tags",
+ "experiment_id": "dd63cfbd-7a97-41ac-bd9b-fd11711ba459",
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.10"
+ },
+ "operator_id": "e4150bc8-88f2-4d98-b68a-6c246270c403",
+ "task_id": "ccfeb3fe-3d3a-43cf-bdc4-d0b07017e468"
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tasks/nlp-question-generator/aux_functions.py b/tasks/nlp-question-generator/aux_functions.py
new file mode 100644
index 00000000..be5fbf73
--- /dev/null
+++ b/tasks/nlp-question-generator/aux_functions.py
@@ -0,0 +1,7 @@
+import pandas as pd
+
+def build_df_result(gen_questions_dict,column_context="context",column_question="questions"):
+ context_list = [v["context"] for k,v in gen_questions_dict.items()]
+ questions_list = [v["questions"] for k,v in gen_questions_dict.items()]
+ df_result = pd.DataFrame({column_context: context_list,column_question: questions_list})
+ return df_result
\ No newline at end of file
diff --git a/tasks/nlp-question-generator/caller.py b/tasks/nlp-question-generator/caller.py
new file mode 100644
index 00000000..8b4fb796
--- /dev/null
+++ b/tasks/nlp-question-generator/caller.py
@@ -0,0 +1,407 @@
+import os
+import gc
+import sys
+import yaml
+import torch
+import itertools
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from tqdm import tqdm
+from transformers import T5Tokenizer
+from multiprocessing import cpu_count
+from torch.utils.data import DataLoader
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping,GPUStatsMonitor
+
+# Classes and functions from the project
+from dataset import CustomDataset
+from model import T5Finetuner
+from io_utils import IO_Utils
+
+
+#TODO: Precisa Fazer
+class Qgenerator_caller():
+ """Modelo Abstrato Herdado por todos os outros modelos"""
+ def __init__(self, cfg):
+ #self.config = Config.from_json(cfg)
+ self.config = cfg
+ self.io_utils = IO_Utils()
+ self.MODEL = None
+
+ # Checagem da ordem das chamadas
+ self.build_called = False
+ self.train_called = False
+ self.load_called = False
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+ def build(self,**kwargs):
+ """
+ Reponsável por criar os argumentos da classe
+ """
+ # Checagem das Chamadas
+ self.build_called = True
+
+ # Rcuperando Caminhos
+ self.data_dirpath = self.config['dirpaths']['data_dirpath']
+ self.log_dirpath = self.config['dirpaths']['log_dirpath']
+ self.cwd_dirpath = self.config['dirpaths']['cwd_dirpath']
+
+ # Rcuperando Parâmetros
+ self.hparams = self.config['params']['hparams']
+ self.lightning_params = self.config['params']['lightning_params']
+ self.early_stop_callback_params = self.config['params']['early_stop_callback_params']
+ self.prepare_data_params = self.config['params']['prepare_data_params']
+ #-
+ self.test_size_from_dev = self.prepare_data_params['test_size_from_dev']
+ #-
+ self.model_name = self.hparams['model_name']
+ self.num_gen_sentences = self.hparams['num_gen_sentences']
+ self.no_repeat_ngram_size = self.hparams['no_repeat_ngram_size']
+ self.train_batch_size = self.hparams['train_batch_size']
+ self.eval_batch_size = self.hparams['eval_batch_size']
+ self.source_max_length = self.hparams['source_max_length']
+ self.target_max_length = self.hparams['target_max_length']
+ self.temperature = self.hparams['temperature']
+ self.top_p = self.hparams['top_p']
+ self.learning_rate = self.hparams['learning_rate']
+ self.eps = self.hparams['eps']
+ self.seed = self.hparams['seed']
+ #-
+ self.num_gpus = self.lightning_params['num_gpus'] if torch.cuda.is_available() else 0
+ self.profiler = self.lightning_params['profiler']
+ self.max_epochs = self.lightning_params['max_epochs']
+ self.accumulate_grad_batches = self.lightning_params['accumulate_grad_batches']
+ self.check_val_every_n_epoch = self.lightning_params['check_val_every_n_epoch']
+ self.progress_bar_refresh_rate = self.lightning_params['progress_bar_refresh_rate']
+ self.gradient_clip_val = self.lightning_params['gradient_clip_val']
+ self.fast_dev_run = self.lightning_params['fast_dev_run']
+ #-
+ self.monitor = self.early_stop_callback_params['monitor']
+ self.min_delta = self.early_stop_callback_params['min_delta']
+ self.patience = self.early_stop_callback_params['patience']
+ self.verbose = self.early_stop_callback_params['verbose']
+ self.mode = self.early_stop_callback_params['mode']
+
+ # Criando parâmetros adicionais
+ self.tokenizer = T5Tokenizer.from_pretrained(self.config['params']['hparams']['model_name'])
+ self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+ self.MODEL = None
+
+ # Trainer
+ if self.fast_dev_run:
+ self.TRAINER = pl.Trainer(
+ gpus=self.num_gpus,
+ checkpoint_callback=False,
+ fast_dev_run=True # Disable checkpoint saving.
+ )
+ else:
+
+ checkpoint_callback = ModelCheckpoint(
+ dirpath=self.data_dirpath, save_top_k=-1
+ )
+
+ early_stop_callback = EarlyStopping(
+ monitor=self.early_stop_callback_params['monitor'],
+ min_delta=self.early_stop_callback_params['min_delta'],
+ patience=self.early_stop_callback_params['patience'],
+ verbose=self.early_stop_callback_params['verbose'],
+ mode=self.early_stop_callback_params['mode']
+ )
+
+ gpu_stats = GPUStatsMonitor()
+ tb_logger = pl.loggers.TensorBoardLogger(f"{self.log_dirpath}")
+
+ self.TRAINER = pl.Trainer(
+ gpus= self.lightning_params['num_gpus'],
+ profiler=self.lightning_params['profiler'],
+ max_epochs=self.lightning_params['max_epochs'],
+ accumulate_grad_batches = self.lightning_params['accumulate_grad_batches'],
+ check_val_every_n_epoch=self.lightning_params['check_val_every_n_epoch'],
+ progress_bar_refresh_rate=self.lightning_params['progress_bar_refresh_rate'],
+ callbacks = [early_stop_callback,gpu_stats,checkpoint_callback],
+ resume_from_checkpoint=None,
+ logger = tb_logger
+ )
+
+ def free_memory(self):
+ del self.MODEL
+ del self.TRAINER
+ del self.tokenizer
+ del self.device
+ del self.hparams
+ del self.config
+ gc.collect()
+ torch.cuda.empty_cache()
+
+ def load_model(self,**kwargs):
+ def verify_args(checkpoint_path):
+ if not checkpoint_path:
+ raise ValueError("checkpoint_path é um argumento obrigatório")
+
+ # Checagem das Chamadas
+ if not (self.build_called ):
+ raise AssertionError("Para chamar o método load é nececssário chamar o método build")
+ self.load_called = True
+
+ # Recuperando variáveis kwargs
+ checkpoint_path = kwargs.get('checkpoint_path',None)
+ verify_args(checkpoint_path)
+
+ # Atualizando parâmetros de treinamento
+ hparams = self.hparams.copy()
+ hparams['device'] = self.device
+ hparams['track_metrics'] = False
+
+ # Carrgando o modelo
+ self.MODEL = T5Finetuner.load_from_checkpoint(checkpoint_path = checkpoint_path,map_location=self.device,hparams=hparams)
+ self.MODEL.to(self.device)
+
+ def train(self,**kwargs):
+ def verify_args(train_path,valid_path,test_path,glove_weights_path):
+ if not train_path:
+ raise ValueError("train_path é um argumento obrigatório")
+ if not valid_path:
+ raise ValueError("valid_path é um argumento obrigatório")
+ if not test_path:
+ raise ValueError("test_path é um argumento obrigatório")
+ if not glove_weights_path:
+ raise ValueError("glove_weights_path é um argumento obrigatório")
+
+ # Checagem das Chamadas
+ if not (self.build_called ):
+ raise AssertionError("Para chamar o método train é nececssário chamar o método build")
+ self.train_called = True
+
+ # Recuperando variáveis kwargs
+ MODEL_PATH = kwargs.get('MODEL_PATH',None)
+ train_path = kwargs.get('train_path',None)
+ valid_path = kwargs.get('valid_path',None)
+ test_path = kwargs.get('test_path',None)
+ glove_weights_path = kwargs.get('glove_weights_path',None)
+ verify_args(train_path,valid_path,test_path,glove_weights_path)
+
+ # Criando datasets
+ df_result_train= self.io_utils.read_csv_to_df(filepath=os.path.join(self.data_dirpath,train_path))
+ df_result_valid= self.io_utils.read_csv_to_df(filepath=os.path.join(self.data_dirpath,valid_path))
+ df_result_test= self.io_utils.read_csv_to_df(filepath=os.path.join(self.data_dirpath,test_path))
+
+ X_train,y_train = np.array(df_result_train['context']),np.array(df_result_train['question'])
+ X_valid,y_valid = np.array(df_result_valid['context']),np.array(df_result_valid['question'])
+ X_test ,y_test = np.array(df_result_test['context']),np.array(df_result_test['question'])
+
+ train_dataset = CustomDataset(PREFIX=self.hparams['PREFIX'],
+ tokenizer=self.tokenizer,
+ X_context=X_train,
+ y_question=y_train,
+ source_max_length=self.hparams['source_max_length'],
+ target_max_length=self.hparams['target_max_length'],
+ step='Experiment',
+ )
+
+ valid_dataset = CustomDataset(PREFIX=self.hparams['PREFIX'],
+ tokenizer=self.tokenizer,
+ X_context=X_valid,
+ y_question=y_valid,
+ source_max_length=self.hparams['source_max_length'],
+ target_max_length=self.hparams['target_max_length'],
+ step='Experiment',
+ )
+
+ test_dataset = CustomDataset(PREFIX=self.hparams['PREFIX'],
+ tokenizer=self.tokenizer,
+ X_context=X_test,
+ y_question=y_test,
+ source_max_length=self.hparams['source_max_length'],
+ target_max_length=self.hparams['target_max_length'],
+ step='Experiment',
+ )
+
+
+ # Atualizando parâmetros de treinamento
+ hparams = self.hparams.copy()
+ hparams['tokenizer'] = self.tokenizer
+ hparams['device'] = self.device
+ hparams['glove_weights_path'] = glove_weights_path
+ hparams['overfit'] = False
+ hparams['track_metrics'] = True
+ hparams['train_dataset'] = train_dataset
+ hparams['valid_dataset'] = valid_dataset
+ hparams['test_dataset'] = test_dataset
+
+ # Checando se o trainamento será feito do zero ou a partir de um treinamento interrompido
+ if MODEL_PATH:
+ self.MODEL = T5Finetuner.load_from_checkpoint(
+ checkpoint_path = MODEL_PATH,
+ map_location=self.device,
+ hparams=hparams
+ )
+ else:
+ self.MODEL = T5Finetuner(
+ hparams=hparams
+ )
+
+ # Treinando Algorítimos
+ self.TRAINER.fit(self.MODEL)
+
+ def save_checkpoint(self,checkpoint_path):
+ # Checagem das Chamadas
+ if not (self.train_called):
+ raise AssertionError("Para chamar o método save_checkpoint é nececssário chamar o método train")
+
+ self.TRAINER.save_checkpoint(checkpoint_path)
+
+ def evaluate(self,**kwargs):
+ # Checagem das Chamadas
+ if not (self.build_called and (self.train_called or self.load_called)):
+ raise AssertionError("Para chamar o método evaluate é nececssário chamar o método build e em seguida o método train ou o método load")
+
+ #Testando
+ self.TRAINER.test(self.MODEL)
+
+ # Salvando os resultados
+ valid_results_output_path = os.path.join(self.log_dirpath,'valid_results.json')
+ test_results_output_path = os.path.join(self.log_dirpath,'test_results.json')
+ valid_results = self.MODEL.valid_metrics_calculator.list_dict_track
+ test_results = self.MODEL.test_metrics_calculator.list_dict_track
+ self.io_utils.dump_json(filepath=valid_results_output_path,d=valid_results)
+ self.io_utils.dump_json(filepath=test_results_output_path,d=test_results)
+
+ return {'valid_results':valid_results,
+ 'test_results':test_results
+ }
+
+ def forward(self,**kwargs):
+
+ def verify_args(contexts,num_gen_sentences):
+
+ if not all(isinstance(elem, str) for elem in contexts):
+ raise ValueError(f"contexts deve ser uma lista de strings mas é {contexts}")
+ if not num_gen_sentences:
+ raise ValueError("contexts é um argumento obrigatório")
+
+ # Checagem das Chamadas
+ if not (self.build_called and (self.train_called or self.load_called)):
+ raise AssertionError("Para chamar o método forward é nececssário chamar o método build e em seguida o método train ou o método load")
+
+ # Recuperando variáveis kwargs
+ num_gen_sentences = kwargs.get('num_gen_sentences',None)
+ contexts = kwargs.get('contexts',None)
+ verify_args(contexts,num_gen_sentences)
+
+ X_test = np.array(contexts)
+
+ inference_dataset = CustomDataset(PREFIX=self.hparams['PREFIX'],
+ tokenizer=self.tokenizer,
+ X_context=X_test,
+ y_question=[],
+ source_max_length=self.hparams['source_max_length'],
+ target_max_length=self.hparams['target_max_length'],
+ step='Deployment',
+ )
+
+
+ with torch.no_grad():
+
+ self.MODEL.eval()
+ self.MODEL.to(self.device)
+ inference_dataloader = DataLoader(inference_dataset, batch_size=self.hparams['eval_batch_size'], shuffle=False,num_workers=cpu_count())
+
+ result = {}
+ j = 0
+ for i,batch in enumerate(tqdm(inference_dataloader)):
+ source_token_ids, source_masks, original_source = batch
+ batch_size = len(original_source)
+ source_token_ids = source_token_ids.to(self.device)
+ source_masks = source_masks.to(self.device)
+ logits = self.MODEL.forward(source_token_ids, source_masks,info_requested='logits',num_gen_sentences=num_gen_sentences)
+ gen_quesitons = [self.tokenizer.decode(l, skip_special_tokens=True) for l in logits]
+ questions_per_context = [gen_quesitons[s:s+num_gen_sentences] for s in list(range(0,len(gen_quesitons),num_gen_sentences))]
+ result_batch = {f'{j+k}':{'context':original_source[k],'questions':questions_per_context[k]} for k in range(batch_size)}
+ result.update(result_batch)
+ j += batch_size
+
+
+ return result
+
+ def prepare_data(self,**kwargs):
+ def verify_args(squad_train_path,squad_dev_path):
+ if not squad_train_path:
+ raise ValueError("squad_train_path é um argumento obrigatório")
+ if not squad_dev_path:
+ raise ValueError("squad_dev_path é um argumento obrigatório")
+
+ # Recuperando variáveis kwargs
+ squad_train_path = kwargs.get('squad_train_path',None)
+ squad_dev_path = kwargs.get('squad_dev_path',None)
+ verify_args(squad_train_path,squad_dev_path)
+
+ # Convertendo Json em Dataframe
+ train_json = self.io_utils.read_json(os.path.join(self.data_dirpath,squad_train_path))
+ dev_json = self.io_utils.read_json(os.path.join(self.data_dirpath,squad_dev_path))
+ self._read_squad_json_as_dataframe(train_json)
+ df_train = self._read_squad_json_as_dataframe(train_json)
+ df_dev = self._read_squad_json_as_dataframe(dev_json)
+ df_valid, df_test = train_test_split(df_dev, test_size=self.test_size_from_dev)
+
+ # Chunck dataset
+ df_result_train = df_train #self._convert_tokenized_examples_to_dataset(df=df_train)
+ df_result_valid = df_valid #self._convert_tokenized_examples_to_dataset(df=df_valid)
+ df_result_test = df_test #self._convert_tokenized_examples_to_dataset(df=df_test)
+ #df_result = pd.concat([df_result_train, df_result_valid, df_result_test],ignore_index=True)
+
+
+ # Salvando dados
+ train_output = os.path.join(self.data_dirpath,'squad-train-v1.1.csv')
+ valid_output = os.path.join(self.data_dirpath,'squad-valid-v1.1.csv')
+ test_output = os.path.join(self.data_dirpath,'squad-test-v1.1.csv')
+ #complete_output = os.path.join(self.data_dirpath,'squad-v1.1.csv')
+
+ df_result_train.to_csv(os.path.join(train_output),index=False)
+ df_result_valid.to_csv(os.path.join(valid_output),index=False)
+ df_result_test.to_csv(os.path.join(test_output),index=False)
+ #df_result.to_csv(os.path.join(complete_output),index=False)
+
+ return {
+ 'prepared_data_train_path':train_output,
+ 'prepared_data_valid_path':valid_output,
+ 'prepared_data_test_path':test_output,
+ }
+
+ def _apply_preprocessing(self,text):
+ text = " ".join(text.split()).strip()
+ return text
+
+ def _read_squad_json_as_dataframe(self,json_file):
+
+ context, question, answer, answer_start = [], [], [], []
+
+ for d in json_file['data']:
+
+ for c in d['paragraphs']:
+
+ for q in c['qas']:
+
+ for a in q['answers']:
+
+ context.append(self._apply_preprocessing(c['context']))
+ question.append(self._apply_preprocessing(q['question']))
+ answer.append(self._apply_preprocessing(a['text']))
+ answer_start.append(a['answer_start'])
+
+ df = pd.DataFrame({'context': context, 'question': question, 'answer': answer, 'answer_start': answer_start})
+
+ return df
+
+ def build_complete_json(self,gen_questions_dict,reports_contents):
+
+ gen_questions_dict_copy = gen_questions_dict.copy()
+ for k, v in gen_questions_dict.items():
+ rp_infos = reports_contents[k]
+ report_name = rp_infos['report_name']
+ section_name = rp_infos['section_name']
+ gen_questions_dict_copy[k]['report_name'] = report_name
+ gen_questions_dict_copy[k]['section_name'] = section_name
+
+ return gen_questions_dict_copy
diff --git a/tasks/nlp-question-generator/dataset.py b/tasks/nlp-question-generator/dataset.py
new file mode 100644
index 00000000..2d05ef31
--- /dev/null
+++ b/tasks/nlp-question-generator/dataset.py
@@ -0,0 +1,63 @@
+import torch
+import numpy as np
+from typing import List, Union, Optional
+from torch.utils.data import Dataset
+
+class CustomDataset(Dataset):
+ def __init__(self,PREFIX,tokenizer,X_context:np.ndarray,y_question:Optional[np.ndarray]=[],
+ source_max_length: int = 32, target_max_length: int = 32,step="Experiment"):
+ self.tokenizer = tokenizer
+ self.X_context = X_context
+
+ self.y_question = y_question
+ self.source_max_length = min(source_max_length + len(PREFIX.split(' ')),512)
+ self.target_max_length = target_max_length
+ self.step = step
+ self.PREFIX = PREFIX
+
+
+ if step == "Experiment" and len(y_question)==0:
+ raise Exception("Na fase de experimento o componente tem de haver um y de referência")
+
+
+ if step == "Deployment" and len(y_question)>0:
+ raise Exception("Na fase de implantação o componente tem deve possuir y=None")
+
+ def __len__(self):
+ return len(self.X_context)
+
+ def __getitem__(self, idx):
+ #Source
+ original_source = self.X_context[idx]
+ source = f"{self.PREFIX} {original_source}"
+ source_encoder = self.encoder_plus(source,self.source_max_length)
+ source_token_ids = source_encoder['input_ids']
+ source_mask = source_encoder['attention_mask']
+ source_token_ids = torch.tensor(source_token_ids).type(torch.long)
+ source_mask = torch.tensor(source_mask).type(torch.long)
+
+ if self.step=="Experiment":
+ # Target
+ original_target = self.y_question[idx]
+ target = f"{original_target}"
+ target_encoder = self.encoder_plus(target,self.target_max_length)
+ target_token_ids = target_encoder['input_ids']
+ target_mask = target_encoder['attention_mask']
+ target_token_ids = torch.tensor(target_token_ids).type(torch.long)
+ target_mask = torch.tensor(target_mask).type(torch.long)
+
+ retorno = (source_token_ids, source_mask, target_token_ids, target_mask, original_source, original_target)
+
+ if self.step=="Deployment":
+ retorno = (source_token_ids, source_mask, original_source)
+
+ return retorno
+
+ def encoder_plus(self,text,L):
+ #padding - max_length:de acordo com o atributo max_length - True: maior sentença no batch
+ #é preciso avaliar a performance disso. O True me parece melhor e o max_lenth me parece
+ # com maiores chances de funcionar
+ return self.tokenizer.encode_plus(text,
+ max_length = L,
+ truncation=True,
+ padding="max_length")
\ No newline at end of file
diff --git a/tasks/nlp-question-generator/expander.py b/tasks/nlp-question-generator/expander.py
new file mode 100644
index 00000000..205d5bfc
--- /dev/null
+++ b/tasks/nlp-question-generator/expander.py
@@ -0,0 +1,57 @@
+import os
+import pandas as pd
+
+from io_utils import IO_Utils
+from typing import List
+
+class DocExpander:
+ def __init__(self):
+ '''
+ Expand documents with the questions gerated from them.
+ Documents and questions and questins between them are separated by the special token [SEP]
+ '''
+ pass
+
+ def expand_nosql(self,context_questions_map,context_key='context',questions_key = 'questions',apply_filter=False,apply_low_case=False):
+ """
+ Retorna os contextos expandidos no formato noSQL com o id do contexto, tendo como valores
+ os contextos, contextos expandidos e perguntas.
+ """
+
+ context_questions_map_internal = context_questions_map.copy()
+
+ if apply_low_case:
+ context_questions_map = self.lower_case_dict(context_questions_map)
+
+ if apply_filter:
+ context_questions_map_internal = self.filter_post_content(content=context_questions_map_internal,
+ section_names_to_keep=['Capítulo 6', 'Capitulo 6'],
+ min_context_length_in_tokens=20)
+
+ for k,v in context_questions_map_internal.items():
+ context = v[context_key]
+ questions = v[questions_key]
+ expanded_context = context + ' ' + ' '.join(questions)
+ expanded_context = expanded_context.strip()
+ context_questions_map_internal[k]['expanded_context'] = expanded_context
+
+ return context_questions_map_internal
+
+ # TODO: Desenvolver técnica SQL
+ def expand_sql(self,df,context_column_name='context',questions_column_name = 'questions'):
+ """
+ Retorna os contextos expandidos no formato de uma nova coluna no dataframe
+ """
+
+ df_copy = df.copy()
+ expanded_context_list = []
+ for index, row in df_copy.iterrows():
+ questions = row[questions_column_name]
+ context = row[context_column_name]
+ expanded_context = context + ' ' + ' '.join(questions)
+ expanded_context = expanded_context.strip()
+ expanded_context_list.append(expanded_context)
+
+ df_copy.insert(df.shape[1], "expanded_context", expanded_context_list)
+
+ return df_copy
\ No newline at end of file
diff --git a/tasks/nlp-question-generator/io_utils.py b/tasks/nlp-question-generator/io_utils.py
new file mode 100644
index 00000000..7047bb9d
--- /dev/null
+++ b/tasks/nlp-question-generator/io_utils.py
@@ -0,0 +1,75 @@
+import os
+import json
+import pandas as pd
+from typing import List
+from pickle import load as read_pickle
+from pickle import dump as dump_pickle
+
+class IO_Utils(object):
+ """
+ Class with utilities for reading and writing
+ """
+ def __init__(self):
+ pass
+
+ def read_json(self,filepath:str):
+ with open(filepath) as f:
+ json_result = json.load(f)
+ return json_result
+
+ def reads_json(self,filepath:str):
+ with open(filepath) as f:
+ json_result = json.loads(f.read())
+ json_result = json.loads(json_result)
+ return json_result
+
+ def dump_json(self,filepath:str,d,ensure_ascii=False,command='a'):
+ with open(filepath, command) as fp:
+ json.dump(d, fp, ensure_ascii=ensure_ascii)
+
+ def dumps_json(self,filepath:str,d,ensure_ascii=False,command='a'):
+ with open(filepath, command) as fp:
+ d = json.dumps(d, ensure_ascii=ensure_ascii)
+ json.dump(d, fp, ensure_ascii=ensure_ascii)
+
+ def read_pickle(self,filepath:str):
+ with open(filepath, 'rb') as f:
+ content = read_pickle(f)
+ return content
+
+ def save_pickle(self,filepath:str,info):
+ """
+ Save info in a picke file
+ """
+ with open(filepath, 'wb') as f:
+ dump_pickle(info, f)
+
+ def create_folder_structure(self,folder:str):
+ """ Create the comple folder structure if it does not exists """
+ if not os.path.exists(folder):
+ os.makedirs(folder)
+
+ def read_line_spaced_txt_file(self,filepath:str):
+ with open(filepath, 'r') as infile:
+ data = infile.read().splitlines()
+ return data
+
+ def save_line_spaced_txt_file(self,filepath:str,text_list:List[str]):
+ with open(filepath, "w") as output:
+ for row in text_list:
+ output.write(str(row) + '\n')
+
+ def save_df_to_csv(self,filepath:str,df:pd.DataFrame,zipped=False):
+ if filepath.split(".")[-1] != "csv":
+ raise ValueError(f"{filepath} tem de ter a extensão .csv")
+
+ filepath = filepath.split(".csv")[0]+".csv.gz" if zipped else filepath
+ compression = 'gzip' if zipped else 'infer'
+ df.to_csv(filepath, compression=compression,index=False)
+
+
+ def read_csv_to_df(self,filepath:str):
+ if ".csv" not in filepath and ".csv.gz" not in filepath:
+ raise ValueError(f"{filepath} tem de ter a extensão .csv ou csv.gz")
+ df = pd.read_csv(filepath)
+ return df
\ No newline at end of file
diff --git a/tasks/nlp-question-generator/metrics_calculator.py b/tasks/nlp-question-generator/metrics_calculator.py
new file mode 100644
index 00000000..ac913281
--- /dev/null
+++ b/tasks/nlp-question-generator/metrics_calculator.py
@@ -0,0 +1,232 @@
+import numpy as np
+#from nlgeval import NLGEval
+from gensim.models import KeyedVectors
+import torch
+import numpy as np
+import nltk
+nltk.download('stopwords')
+
+class Metrics_Calculator(object):
+
+ def __init__(self,hparams,glove_comparer):
+
+
+ super(Metrics_Calculator, self).__init__()
+ #self.nlg_eval = NLGEval(metrics_to_omit=['EmbeddingAverageCosineSimilairty', 'EmbeddingAverageCosineSimilarity','GreedyMatchingScore','SkipThoughtCS','VectorExtremaCosineSimilarity'])
+ self.list_dict_track = {"data":[]}
+ self.hparams = hparams
+ self.glove_comparer = glove_comparer
+
+
+ def build_json_results(self,
+ context,
+ generated_question_list,
+ target_question_list,
+ row_mean_metrics):
+
+ """
+ Cria json para cada linha que será salvo para monitorar as métricas em self.list_dict_track
+ """
+ new_info = {}
+ new_info["context"] =context
+ new_info["generated_question_list"] =generated_question_list
+ new_info["target_question_list"] =target_question_list
+ new_info["row_mean_metrics"] =row_mean_metrics
+
+
+ return new_info
+
+ def track_metrics_row(self,original_target,gen_target_options_list):
+ """
+ Calcula as métricas para cada par question-context
+ """
+ # bleu_1_list = []
+ # bleu_2_list = []
+ # bleu_3_list = []
+ # bleu_4_list = []
+ # CIDEr_list = []
+ # ROUGE_L_list = []
+ cossine_similarity_list = []
+
+ for gen_target_option in gen_target_options_list:
+
+ # metrics_dict = self.nlg_eval.compute_individual_metrics(ref=[original_target],hyp=gen_target_option)#ref:List[str] , hyp:str
+ # bleu_1_list.append(metrics_dict['Bleu_1'])
+ # bleu_2_list.append(metrics_dict['Bleu_2'])
+ # bleu_3_list.append(metrics_dict['Bleu_3'])
+ # bleu_4_list.append(metrics_dict['Bleu_4'])
+ # CIDEr_list.append(metrics_dict['CIDEr'])
+ # ROUGE_L_list.append(metrics_dict['ROUGE_L'])
+
+ cs = self.glove_comparer.compare_sentences_with_cossine_similarity(original_target,gen_target_option)
+ cossine_similarity_list.append(cs)
+
+
+
+ # row_metrics_dict = {"Bleu_1":np.mean(bleu_1_list),
+ # "Bleu_2":np.mean(bleu_2_list),
+ # "Bleu_3":np.mean(bleu_3_list),
+ # "Bleu_4":np.mean(bleu_4_list),
+ # "CIDEr":np.mean(CIDEr_list),
+ # "ROUGE_L":np.mean(ROUGE_L_list),
+ row_metrics_dict = {
+ "Glove_Cossine_Similarity":np.mean(cossine_similarity_list)}
+
+ return row_metrics_dict
+
+
+
+ def generate_sentences_and_track_metrics_batch(self,logits,original_targets_batch,original_sources_batch,save_track_dict=False):
+ """
+ Calcula métricas para todo o batch
+ """
+ # batch_bleu_1_list = []
+ # batch_bleu_2_list = []
+ # batch_bleu_3_list = []
+ # batch_bleu_4_list = []
+ # batch_CIDEr_list = []
+ # batch_ROUGE_L_list = []
+ batch_Glove_Cossine_Similarity_list = []
+
+
+ #batch
+ for i,(original_target,original_source) in enumerate(zip(original_targets_batch,original_sources_batch)):
+ #linha
+ relevant_logits = logits[i*self.hparams.num_gen_sentences:self.hparams.num_gen_sentences+i*self.hparams.num_gen_sentences]
+ gen_target_options_list = [self.hparams.tokenizer.decode(l, skip_special_tokens=True) for l in relevant_logits]
+ row_metrics_dict = self.track_metrics_row(original_target=original_target,gen_target_options_list=gen_target_options_list)
+
+ if save_track_dict:
+ self.list_dict_track["data"].append(self.build_json_results(context=original_source,
+ generated_question_list=gen_target_options_list,
+ target_question_list=original_target,
+ row_mean_metrics = row_metrics_dict))
+
+ # batch_bleu_1_list.append(row_metrics_dict['Bleu_1'])
+ # batch_bleu_2_list.append(row_metrics_dict['Bleu_2'])
+ # batch_bleu_3_list.append(row_metrics_dict['Bleu_3'])
+ # batch_bleu_4_list.append(row_metrics_dict['Bleu_4'])
+ # batch_CIDEr_list.append(row_metrics_dict['CIDEr'])
+ # batch_ROUGE_L_list.append(row_metrics_dict['ROUGE_L'])
+ batch_Glove_Cossine_Similarity_list.append(row_metrics_dict['Glove_Cossine_Similarity'])
+
+
+ # batch_metrics_dict = {"Batch_Bleu_1":np.mean(batch_bleu_1_list),
+ # "Batch_Bleu_2":np.mean(batch_bleu_2_list),
+ # "Batch_Bleu_3":np.mean(batch_bleu_3_list),
+ # "Batch_Bleu_4":np.mean(batch_bleu_4_list),
+ # "Batch_CIDEr":np.mean(batch_CIDEr_list),
+ # "Batch_ROUGE_L":np.mean(batch_ROUGE_L_list),
+ batch_metrics_dict = {
+ "Batch_Glove_Cossine_Similarity":np.mean(batch_Glove_Cossine_Similarity_list)
+ }
+
+ return batch_metrics_dict
+
+
+class Glove_Embeddings_Comparer(object):
+ """
+ Classes reponsável por criar a matriz de glove embeddings com os textos fornecidos
+ """
+ def __init__(self,glove_weights_path:str,device:str):
+ super(Glove_Embeddings_Comparer , self).__init__()
+
+ self.device = device
+ self.glove_path = glove_weights_path
+ self.glove = None
+ self.glove_infos = None
+ self.stopwords = nltk.corpus.stopwords.words('portuguese')
+ self.extract_glove_properties()
+
+
+ def load_glove_vector(self):
+ """
+ Carrega os vetores glove no formato word2vec
+ """
+
+ #glove = KeyedVectors.load_word2vec_format(self.glove_path)
+ try:
+ glove = KeyedVectors.load_word2vec_format(self.glove_path,no_header=False)
+ print("load_word2vec_format with no_header=False")
+ except ValueError:
+ glove = KeyedVectors.load_word2vec_format(self.glove_path,no_header=True)
+ print("load_word2vec_format with no_header=True")
+
+ return glove
+
+ def extract_glove_properties(self):
+ """
+ Extrai todas as propriedades dos vetores glove considerando o mapeamento ente palavras e vetores
+ """
+ glove = self.load_glove_vector()
+ glove_shape = glove.vectors.shape
+ glove_dim = glove.vector_size
+ glove_words = glove.index_to_key
+ glove_vectors = torch.from_numpy(glove.vectors).to(self.device)
+ glove_vocab = {word:i for i, word in enumerate(glove_words)}
+
+ glove_infos = {'glove_shape':glove_shape,
+ 'glove_dim':glove_dim,
+ 'glove_words':glove_words,
+ 'glove_vectors':glove_vectors,
+ 'glove_vocab':glove_vocab}
+
+
+ self.glove = glove
+ self.glove_infos = glove_infos
+
+ def separate_punctuation_from_words(self,text):
+ """"
+ Pontuações são separadas das palavras porque caso estejam juntas esta palavra não estará no vetor de embeddings
+ """
+ punctuation_list = '!(),.:;?'
+ for punct in list(punctuation_list):
+ text = text.replace(punct,f" {punct} ")
+
+ text = text.strip()
+ return text
+
+ def tokenize_text(self,text: list = None):
+ """
+ Transforma o texto em lista de palavras
+ """
+ text = self.separate_punctuation_from_words(text)
+ tokenize_list = text.split(" ")
+ tokenize_list = [token for token in tokenize_list if ((token not in self.stopwords) and (token in self.glove_infos['glove_vocab']))]
+ return tokenize_list
+
+ def cosine_similarity_calculator(self,a, b):
+ """
+ Caclula a similaridade de cossenos entre dois vetores
+ """
+ nominator = np.dot(a, b)
+
+ a_norm = np.sqrt(np.sum(a**2))
+ b_norm = np.sqrt(np.sum(b**2))
+
+ denominator = a_norm * b_norm
+
+ cosine_similarity = nominator / denominator
+
+ return cosine_similarity
+
+ def compare_sentences_with_cossine_similarity(self,text1,text2):
+ """
+ Compara duas sentenças com similaridade de cossenos
+ """
+ tokenize_list1 = self.tokenize_text(text1)
+ tokenize_list2 = self.tokenize_text(text2)
+
+ embeddigns_sentence1 = [self.glove.get_vector(t1) for t1 in tokenize_list1]
+ embeddigns_sentence1_mean = np.mean(embeddigns_sentence1,axis=0)
+ embeddigns_sentence2 = [self.glove.get_vector(t2) for t2 in tokenize_list2]
+ embeddigns_sentence2_mean = np.mean(embeddigns_sentence2,axis=0)
+ cosine_similarity = self.cosine_similarity_calculator(embeddigns_sentence1_mean,embeddigns_sentence2_mean)
+ cosine_similarity = np.float64(cosine_similarity)
+ return cosine_similarity
+
+
+ def batch_average_cossine_similarity(self,list_ref_texts,list_gen_texts):
+ pass
+
+
diff --git a/tasks/nlp-question-generator/model-question-generator.py b/tasks/nlp-question-generator/model-question-generator.py
deleted file mode 100644
index 7c150ec7..00000000
--- a/tasks/nlp-question-generator/model-question-generator.py
+++ /dev/null
@@ -1,463 +0,0 @@
-import torch
-import nltk
-from tqdm import tqdm
-from multiprocessing import cpu_count
-from typing import List, Union, Optional
-import numpy as np
-import pytorch_lightning as pl
-from torch.utils.data import DataLoader
-from transformers import T5ForConditionalGeneration
-from nlgeval import NLGEval
-from gensim.models import KeyedVectors
-nltk.download('stopwords')
-
-class Metrics_Calculator(object):
-
- def __init__(self,hparams,glove_comparer):
-
-
- super(Metrics_Calculator, self).__init__()
- self.nlg_eval = NLGEval(metrics_to_omit=['EmbeddingAverageCosineSimilairty', 'EmbeddingAverageCosineSimilarity','GreedyMatchingScore','SkipThoughtCS','VectorExtremaCosineSimilarity'])
- self.list_dict_track = {"data":[]}
- self.hparams = hparams
- self.glove_comparer = glove_comparer
-
-
- def build_json_results(self,
- context,
- generated_question_list,
- target_question_list,
- row_mean_metrics):
-
- """
- Cria json para cada linha que será salvo para monitorar as métricas em self.list_dict_track
- """
- new_info = {}
- new_info["context"] =context
- new_info["generated_question_list"] =generated_question_list
- new_info["target_question_list"] =target_question_list
- new_info["row_mean_metrics"] =row_mean_metrics
-
-
- return new_info
-
- def track_metrics_row(self,original_target,gen_target_options_list):
- """
- Calcula as métricas para cada par question-context
- """
- bleu_1_list = []
- bleu_2_list = []
- bleu_3_list = []
- bleu_4_list = []
- CIDEr_list = []
- ROUGE_L_list = []
- cossine_similarity_list = []
-
- for gen_target_option in gen_target_options_list:
-
- metrics_dict = self.nlg_eval.compute_individual_metrics(ref=[original_target],hyp=gen_target_option)#ref:List[str] , hyp:str
- bleu_1_list.append(metrics_dict['Bleu_1'])
- bleu_2_list.append(metrics_dict['Bleu_2'])
- bleu_3_list.append(metrics_dict['Bleu_3'])
- bleu_4_list.append(metrics_dict['Bleu_4'])
- CIDEr_list.append(metrics_dict['CIDEr'])
- ROUGE_L_list.append(metrics_dict['ROUGE_L'])
- cs = self.glove_comparer.compare_sentences_with_cossine_similarity(original_target,gen_target_option)
- cossine_similarity_list.append(cs)
-
-
-
- row_metrics_dict = {"Bleu_1":np.mean(bleu_1_list),
- "Bleu_2":np.mean(bleu_2_list),
- "Bleu_3":np.mean(bleu_3_list),
- "Bleu_4":np.mean(bleu_4_list),
- "CIDEr":np.mean(CIDEr_list),
- "ROUGE_L":np.mean(ROUGE_L_list),
- "Glove_Cossine_Similarity":np.mean(cossine_similarity_list)}
-
- return row_metrics_dict
-
-
-
- def generate_sentences_and_track_metrics_batch(self,logits,original_targets_batch,original_sources_batch,save_track_dict=False):
- """
- Calcula métricas para todo o batch
- """
- batch_bleu_1_list = []
- batch_bleu_2_list = []
- batch_bleu_3_list = []
- batch_bleu_4_list = []
- batch_CIDEr_list = []
- batch_ROUGE_L_list = []
- batch_Glove_Cossine_Similarity_list = []
-
-
- #batch
- for i,(original_target,original_source) in enumerate(zip(original_targets_batch,original_sources_batch)):
- #linha
- relevant_logits = logits[i*self.hparams.num_gen_sentences:self.hparams.num_gen_sentences+i*self.hparams.num_gen_sentences]
- gen_target_options_list = [self.hparams.tokenizer.decode(l, skip_special_tokens=True) for l in relevant_logits]
- row_metrics_dict = self.track_metrics_row(original_target=original_target,gen_target_options_list=gen_target_options_list)
-
- if save_track_dict:
- self.list_dict_track["data"].append(self.build_json_results(context=original_source,
- generated_question_list=gen_target_options_list,
- target_question_list=original_target,
- row_mean_metrics = row_metrics_dict))
-
- batch_bleu_1_list.append(row_metrics_dict['Bleu_1'])
- batch_bleu_2_list.append(row_metrics_dict['Bleu_2'])
- batch_bleu_3_list.append(row_metrics_dict['Bleu_3'])
- batch_bleu_4_list.append(row_metrics_dict['Bleu_4'])
- batch_CIDEr_list.append(row_metrics_dict['CIDEr'])
- batch_ROUGE_L_list.append(row_metrics_dict['ROUGE_L'])
- batch_Glove_Cossine_Similarity_list.append(row_metrics_dict['Glove_Cossine_Similarity'])
-
-
- batch_metrics_dict = {"Batch_Bleu_1":np.mean(batch_bleu_1_list),
- "Batch_Bleu_2":np.mean(batch_bleu_2_list),
- "Batch_Bleu_3":np.mean(batch_bleu_3_list),
- "Batch_Bleu_4":np.mean(batch_bleu_4_list),
- "Batch_CIDEr":np.mean(batch_CIDEr_list),
- "Batch_ROUGE_L":np.mean(batch_ROUGE_L_list),
- "Batch_Glove_Cossine_Similarity":np.mean(batch_Glove_Cossine_Similarity_list)
- }
-
- return batch_metrics_dict
-
-
-class Glove_Embeddings_Comparer(object):
- """
- Classes reponsável por criar a matriz de glove embeddings com os textos fornecidos
- """
- def __init__(self,glove_weights_path:str,device:str):
- super(Glove_Embeddings_Comparer , self).__init__()
-
- self.device = device
- self.glove_path = glove_weights_path
- self.glove = None
- self.glove_infos = None
- self.stopwords = nltk.corpus.stopwords.words('portuguese')
- self.extract_glove_properties()
-
-
- def load_glove_vector(self):
- """
- Carrega os vetores glove no formato word2vec
- """
-
- #glove = KeyedVectors.load_word2vec_format(self.glove_path)
- try:
- glove = KeyedVectors.load_word2vec_format(self.glove_path,no_header=False)
- print("load_word2vec_format with no_header=False")
- except ValueError:
- glove = KeyedVectors.load_word2vec_format(self.glove_path,no_header=True)
- print("load_word2vec_format with no_header=True")
-
- return glove
-
- def extract_glove_properties(self):
- """
- Extrai todas as propriedades dos vetores glove considerando o mapeamento ente palavras e vetores
- """
- glove = self.load_glove_vector()
- glove_shape = glove.vectors.shape
- glove_dim = glove.vector_size
- glove_words = glove.index_to_key
- glove_vectors = torch.from_numpy(glove.vectors).to(self.device)
- glove_vocab = {word:i for i, word in enumerate(glove_words)}
-
- glove_infos = {'glove_shape':glove_shape,
- 'glove_dim':glove_dim,
- 'glove_words':glove_words,
- 'glove_vectors':glove_vectors,
- 'glove_vocab':glove_vocab}
-
-
- self.glove = glove
- self.glove_infos = glove_infos
-
- def separate_punctuation_from_words(self,text):
- """"
- Pontuações são separadas das palavras porque caso estejam juntas esta palavra não estará no vetor de embeddings
- """
- punctuation_list = '!(),.:;?'
- for punct in list(punctuation_list):
- text = text.replace(punct,f" {punct} ")
-
- text = text.strip()
- return text
-
- def tokenize_text(self,text: list = None):
- """
- Transforma o texto em lista de palavras
- """
- text = self.separate_punctuation_from_words(text)
- tokenize_list = text.split(" ")
- tokenize_list = [token for token in tokenize_list if ((token not in self.stopwords) and (token in self.glove_infos['glove_vocab']))]
- return tokenize_list
-
- def cosine_similarity_calculator(self,a, b):
- """
- Caclula a similaridade de cossenos entre dois vetores
- """
- nominator = np.dot(a, b)
-
- a_norm = np.sqrt(np.sum(a**2))
- b_norm = np.sqrt(np.sum(b**2))
-
- denominator = a_norm * b_norm
-
- cosine_similarity = nominator / denominator
-
- return cosine_similarity
-
- def compare_sentences_with_cossine_similarity(self,text1,text2):
- """
- Compara duas sentenças com similaridade de cossenos
- """
- tokenize_list1 = self.tokenize_text(text1)
- tokenize_list2 = self.tokenize_text(text2)
-
- embeddigns_sentence1 = [self.glove.get_vector(t1) for t1 in tokenize_list1]
- embeddigns_sentence1_mean = np.mean(embeddigns_sentence1,axis=0)
- embeddigns_sentence2 = [self.glove.get_vector(t2) for t2 in tokenize_list2]
- embeddigns_sentence2_mean = np.mean(embeddigns_sentence2,axis=0)
- cosine_similarity = self.cosine_similarity_calculator(embeddigns_sentence1_mean,embeddigns_sentence2_mean)
- cosine_similarity = np.float64(cosine_similarity)
- return cosine_similarity
-
-
- def batch_average_cossine_similarity(self,list_ref_texts,list_gen_texts):
- pass
-
-
-class T5Finetuner(pl.LightningModule):
-
- def __init__(self,
- hparams):
-
- super(T5Finetuner, self).__init__()
-
-
- self.hparams = hparams
-
- # ---------- fixing seeds
- # self.seed_everything()
- pl.utilities.seed.seed_everything(seed = self.hparams.seed)
-
-
- # ---------- Model
- self.model = T5ForConditionalGeneration.from_pretrained(self.hparams.model_name)
-
- #----------Other infos
- self.i = 0
- self.step = "Experiment"
- self.softmax = torch.nn.Softmax(dim=1)
- self.loss_funtion = torch.nn.CrossEntropyLoss()
-
-
- #----------Metrics Trackers
- if self.hparams.track_metrics == True:
- glove_comparer = Glove_Embeddings_Comparer(glove_weights_path=self.hparams.glove_weights_path,device=self.hparams.device)
- self.valid_metrics_calculator = Metrics_Calculator(self.hparams,glove_comparer)
- self.test_metrics_calculator = Metrics_Calculator(self.hparams,glove_comparer)
-
- def predict(self,X_context:np.ndarray,num_gen_sentences=10):
- self.step = "Deployment"
- self.model.eval()
- self.hparams["all_data"] = {'X_test':np.array(X_context),'y_test':[]}
- self.hparams.num_gen_sentences = num_gen_sentences
- result = {}
- j = 0
- for i,batch in enumerate(tqdm(self.test_dataloader())):
- source_token_ids, source_masks, original_source = batch
- logits = self.forward(source_token_ids, source_masks,info_requested='logits')
- gen_quesitons = [self.hparams.tokenizer.decode(l, skip_special_tokens=True) for l in logits]
- questions_per_context = [gen_quesitons[s:s+self.hparams.num_gen_sentences] for s in list(range(0,len(gen_quesitons),self.hparams.num_gen_sentences))]
- result_batch = {j+k:{'context':original_source[k],'questions':questions_per_context[k]} for k in range(len(original_source))}
- result.update(result_batch)
- j +=len(batch)
-
- return result
-
-
- def forward(self, source_token_ids, source_mask, target_token_ids=None,
- target_mask=None,info_requested='loss'):
-
-
- if info_requested=='loss':
-
- # TODO calcular a loss dado os target_token_ids
- outputs = self.model(input_ids = source_token_ids, attention_mask = source_mask,labels = target_token_ids)
-
- # loss, predicted_token_ids = outputs[:2]
- loss = outputs[0]
- result = loss
- if info_requested=='logits':
- #num_return_sequences must be 1
- if info_requested=='logits':
- decoder_output = self.model.generate(
- input_ids =source_token_ids,
- attention_mask=source_mask,
- max_length= self.hparams.target_max_length,
- do_sample=True,
- num_return_sequences=self.hparams.num_gen_sentences,
- temperature = self.hparams.temperature,
- top_p=self.hparams.top_p,
- top_k=0)
-
- result = decoder_output
-
- return result
-
- def training_step(self, batch, batch_nb):
- # batch
- source_token_ids, source_masks, target_token_ids, target_masks, original_sources, original_targets = batch
-
- # fwd
- loss = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='loss')
- batch_metrics_dict = {'loss':loss}
- return batch_metrics_dict
-
-
- def validation_step(self, batch, batch_nb):
- # batch
- source_token_ids, source_masks, target_token_ids, target_masks, original_sources, original_targets = batch
-
- # fwd
- loss = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='loss')
- logits = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='logits')
-
- #Calc Metrics and Saving Results
- batch_metrics_dict = self.valid_metrics_calculator.generate_sentences_and_track_metrics_batch(logits,original_targets,original_sources,save_track_dict=True)
-
- tensorboard_logs = {'valid_'+key: value for (key, value) in batch_metrics_dict.items()}
- tensorboard_logs['valid_loss'] = loss.item()
-
- #include special values to batch metrics dict
- batch_metrics_dict['loss'] = loss
- batch_metrics_dict['log'] = tensorboard_logs
-
- for key, value in batch_metrics_dict.items():
- self.log(key, value, on_step=True, prog_bar=True, logger=True)
-
- return batch_metrics_dict
-
- def test_step(self, batch, batch_nb):
-
- # batch
- source_token_ids, source_masks, target_token_ids, target_masks, original_sources, original_targets = batch
-
- # fwd
- logits = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='logits')
-
- #Calc Metrics and Saving Results
- batch_metrics_dict = self.test_metrics_calculator.generate_sentences_and_track_metrics_batch(logits,original_targets,original_sources,save_track_dict=True)
-
- tensorboard_logs = {'test_'+key: value for (key, value) in batch_metrics_dict.items()}
-
- #include special values to batch metrics dict
- batch_metrics_dict['log'] = tensorboard_logs
- for key, value in batch_metrics_dict.items():
- self.log(key, value, on_step=True, prog_bar=True, logger=True)
-
- return batch_metrics_dict
-
- def get_epoch_results(self,outputs,step='train'):
-
- tensorboard_logs = {}
-
- if step != "test":
- temp_avg_loss_batch = [x["loss"] for x in outputs]
- avg_loss = torch.stack(temp_avg_loss_batch).mean()
-
- if step != "train":
- temp_avg_bleu1_batch = [x["Batch_Bleu_1"] for x in outputs]
- temp_avg_bleu2_batch = [x["Batch_Bleu_2"] for x in outputs]
- temp_avg_bleu3_batch = [x["Batch_Bleu_3"] for x in outputs]
- temp_avg_bleu4_batch = [x["Batch_Bleu_4"] for x in outputs]
- temp_avg_cider_batch = [x["Batch_CIDEr"] for x in outputs]
- temp_avg_rougeL_batch = [x["Batch_ROUGE_L"] for x in outputs]
- temp_avg_glove_cossine_similarity = [x["Batch_Glove_Cossine_Similarity"] for x in outputs]
-
- avg_bleu1 = np.stack(temp_avg_bleu1_batch).mean()
- avg_bleu2 = np.stack(temp_avg_bleu2_batch).mean()
- avg_bleu3 = np.stack(temp_avg_bleu3_batch).mean()
- avg_bleu4 = np.stack(temp_avg_bleu4_batch).mean()
- avg_cider = np.stack(temp_avg_cider_batch).mean()
- avg_rougeL = np.stack(temp_avg_rougeL_batch).mean()
- avg_glove_cossine_similarity = np.stack(temp_avg_glove_cossine_similarity).mean()
-
- tensorboard_logs[f"avg_{step}_bleu1"] = avg_bleu1
- tensorboard_logs[f"avg_{step}_bleu2"] = avg_bleu2
- tensorboard_logs[f"avg_{step}_bleu3"] = avg_bleu3
- tensorboard_logs[f"avg_{step}_bleu4"] = avg_bleu4
- tensorboard_logs[f"avg_{step}_cider"] = avg_cider
- tensorboard_logs[f"avg_{step}_rougeL"] = avg_rougeL
- tensorboard_logs[f"avg_{step}_rougeL"] = avg_glove_cossine_similarity
-
- if step != "test":
- tensorboard_logs[f"avg_{step}_loss"] = avg_loss.item()
-
- epoch_dict = tensorboard_logs.copy()
- epoch_dict['log'] = tensorboard_logs
-
- for key, value in epoch_dict.items():
- self.log(key, value, on_epoch=True, prog_bar=True, logger=True)
-
- return epoch_dict
-
- def training_epoch_end(self, outputs):
- if not outputs:
- return {}
- epoch_dict = self.get_epoch_results(outputs,'train')
-
-
- def validation_epoch_end(self, outputs):
- epoch_dict = self.get_epoch_results(outputs,'valid')
- return epoch_dict #must do to save checkpoints
-
- def test_epoch_end(self, outputs):
- epoch_dict = self.get_epoch_results(outputs,'test')
-
-
- def configure_optimizers(self):
- return torch.optim.AdamW(
- [p for p in self.parameters() if p.requires_grad],
- lr=self.hparams.learning_rate, eps=self.hparams.eps)
-
- def train_dataloader(self):
- self.train_dataset = self.hparams.CustomDataset(PREFIX=self.hparams.PREFIX,
- tokenizer=self.hparams.tokenizer,
- X_context=self.hparams.all_data['X_train'],
- y_question=self.hparams.all_data['y_train'],
- source_max_length=self.hparams.source_max_length,
- target_max_length=self.hparams.target_max_length,
- step=self.step,
- )
- shuffle = False if self.hparams.overfit else True
- return DataLoader(self.train_dataset, batch_size=self.hparams.train_batch_size, shuffle=shuffle,num_workers=cpu_count())
-
- def val_dataloader(self):
- self.valid_dataset = self.hparams.CustomDataset(PREFIX=self.hparams.PREFIX,
- tokenizer=self.hparams.tokenizer,
- X_context=self.hparams.all_data['X_valid'],
- y_question=self.hparams.all_data['y_valid'],
- source_max_length=self.hparams.source_max_length,
- target_max_length=self.hparams.target_max_length,
- step=self.step,
- )
- return DataLoader(self.valid_dataset, batch_size=self.hparams.eval_batch_size, shuffle=False,num_workers=cpu_count())
-
- def test_dataloader(self):
- self.test_dataset = self.hparams.CustomDataset(PREFIX=self.hparams.PREFIX,
- tokenizer=self.hparams.tokenizer,
- X_context=self.hparams.all_data['X_test'],
- y_question=self.hparams.all_data['y_test'],
- source_max_length=self.hparams.source_max_length,
- target_max_length=self.hparams.target_max_length,
- step=self.step,
- )
- return DataLoader(self.test_dataset, batch_size=self.hparams.eval_batch_size,shuffle=False, num_workers=cpu_count())
-
diff --git a/tasks/nlp-question-generator/model.py b/tasks/nlp-question-generator/model.py
new file mode 100644
index 00000000..9f2c8ffd
--- /dev/null
+++ b/tasks/nlp-question-generator/model.py
@@ -0,0 +1,199 @@
+import torch
+from tqdm import tqdm
+from multiprocessing import cpu_count
+from typing import List, Union, Optional
+import numpy as np
+import pandas as pd
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+from transformers import T5ForConditionalGeneration
+from metrics_calculator import Glove_Embeddings_Comparer, Metrics_Calculator
+
+class T5Finetuner(pl.LightningModule):
+
+ def __init__(self,
+ hparams):
+
+ super(T5Finetuner, self).__init__()
+
+
+ self.hparams = hparams
+
+ # ---------- fixing seeds
+ # self.seed_everything()
+ pl.utilities.seed.seed_everything(seed = self.hparams.seed)
+
+
+ # ---------- Model
+ self.model = T5ForConditionalGeneration.from_pretrained(self.hparams.model_name)
+ self.model.to(self.hparams.device)
+
+ # #----------Metrics Trackers
+ if self.hparams.track_metrics == True:
+ glove_comparer = Glove_Embeddings_Comparer(glove_weights_path=self.hparams.glove_weights_path,device=self.hparams.device)
+ self.valid_metrics_calculator = Metrics_Calculator(self.hparams,glove_comparer)
+ self.test_metrics_calculator = Metrics_Calculator(self.hparams,glove_comparer)
+
+
+ def forward(self, source_token_ids, source_mask, target_token_ids=None,
+ target_mask=None,info_requested='loss',num_gen_sentences = None):
+
+
+ if info_requested=='loss':
+
+ # TODO calcular a loss dado os target_token_ids
+ outputs = self.model(input_ids = source_token_ids, attention_mask = source_mask,labels = target_token_ids)
+
+
+ # loss, predicted_token_ids = outputs[:2]
+ loss = outputs[0]
+ result = loss
+ if info_requested=='logits':
+ #num_return_sequences must be 1
+ if info_requested=='logits':
+
+ num_gen_sentences = num_gen_sentences if num_gen_sentences else self.hparams.num_gen_sentences
+
+ decoder_output = self.model.generate(
+ input_ids =source_token_ids,
+ attention_mask=source_mask,
+ max_length= self.hparams.target_max_length,
+ do_sample=True,
+ num_return_sequences=num_gen_sentences,
+ temperature = self.hparams.temperature,
+ top_p=self.hparams.top_p,
+ top_k=0)
+
+ result = decoder_output
+
+ return result
+
+ def training_step(self, batch, batch_nb):
+ # batch
+ source_token_ids, source_masks, target_token_ids, target_masks, original_sources, original_targets = batch
+
+ # fwd
+ loss = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='loss')
+ batch_metrics_dict = {}
+ batch_metrics_dict['train_loss'] = loss.item()
+ batch_metrics_dict = {'loss':loss}
+ return batch_metrics_dict
+
+
+ def validation_step(self, batch, batch_nb):
+ # batch
+ source_token_ids, source_masks, target_token_ids, target_masks, original_sources, original_targets = batch
+
+ # fwd
+ loss = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='loss')
+ logits = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='logits')
+
+ #Calc Metrics and Saving Results
+ batch_metrics_dict = self.valid_metrics_calculator.generate_sentences_and_track_metrics_batch(logits,original_targets,original_sources,save_track_dict=True)
+
+ batch_metrics_dict = {'valid_'+key: value for (key, value) in batch_metrics_dict.items()}
+ batch_metrics_dict['valid_loss'] = loss.item()
+
+ #include special values to batch metrics dict
+ batch_metrics_dict['loss'] = loss
+
+ for key, value in batch_metrics_dict.items():
+ self.log(key, value, on_step=True, prog_bar=True, logger=True)
+
+ return batch_metrics_dict
+
+ def test_step(self, batch, batch_nb):
+
+ # batch
+ source_token_ids, source_masks, target_token_ids, target_masks, original_sources, original_targets = batch
+
+ # fwd
+ logits = self.forward(source_token_ids, source_masks, target_token_ids,info_requested='logits')
+
+ #Calc Metrics and Saving Results
+ batch_metrics_dict = self.test_metrics_calculator.generate_sentences_and_track_metrics_batch(logits,original_targets,original_sources,save_track_dict=True)
+
+ batch_metrics_dict = {'test_'+key: value for (key, value) in batch_metrics_dict.items()}
+
+
+ #include special values to batch metrics dict
+ for key, value in batch_metrics_dict.items():
+ self.log(key, value, on_step=True, prog_bar=True, logger=True)
+
+ return batch_metrics_dict
+
+ def get_epoch_results(self,outputs,step='train'):
+
+ tensorboard_logs = {}
+
+ if step != "test":
+ temp_avg_loss_batch = [x["loss"] for x in outputs]
+ avg_loss = torch.stack(temp_avg_loss_batch).mean()
+
+ if step != "train":
+ # temp_avg_bleu1_batch = [x[f"{step}_Batch_Bleu_1"] for x in outputs]
+ # temp_avg_bleu2_batch = [x[f"{step}_Batch_Bleu_2"] for x in outputs]
+ # temp_avg_bleu3_batch = [x[f"{step}_Batch_Bleu_3"] for x in outputs]
+ # temp_avg_bleu4_batch = [x[f"{step}_Batch_Bleu_4"] for x in outputs]
+ # temp_avg_cider_batch = [x[f"{step}_Batch_CIDEr"] for x in outputs]
+ # temp_avg_rougeL_batch = [x[f"{step}_Batch_ROUGE_L"] for x in outputs]
+ temp_avg_glove_cossine_similarity = [x[f"{step}_Batch_Glove_Cossine_Similarity"] for x in outputs]
+
+ # avg_bleu1 = np.stack(temp_avg_bleu1_batch).mean()
+ # avg_bleu2 = np.stack(temp_avg_bleu2_batch).mean()
+ # avg_bleu3 = np.stack(temp_avg_bleu3_batch).mean()
+ # avg_bleu4 = np.stack(temp_avg_bleu4_batch).mean()
+ # avg_cider = np.stack(temp_avg_cider_batch).mean()
+ # avg_rougeL = np.stack(temp_avg_rougeL_batch).mean()
+ avg_glove_cossine_similarity = np.stack(temp_avg_glove_cossine_similarity).mean()
+
+ # tensorboard_logs[f"avg_{step}_bleu1"] = avg_bleu1
+ # tensorboard_logs[f"avg_{step}_bleu2"] = avg_bleu2
+ # tensorboard_logs[f"avg_{step}_bleu3"] = avg_bleu3
+ # tensorboard_logs[f"avg_{step}_bleu4"] = avg_bleu4
+ # tensorboard_logs[f"avg_{step}_cider"] = avg_cider
+ # tensorboard_logs[f"avg_{step}_rougeL"] = avg_rougeL
+ tensorboard_logs[f"avg_{step}_glove_cossine_similarity"] = avg_glove_cossine_similarity
+
+ if step != "test":
+ tensorboard_logs[f"avg_{step}_loss"] = avg_loss.item()
+
+ epoch_dict = tensorboard_logs.copy()
+ epoch_dict['log'] = tensorboard_logs
+
+ for key, value in epoch_dict.items():
+ self.log(key, value, on_epoch=True, prog_bar=True, logger=True)
+
+ return epoch_dict
+
+ def training_epoch_end(self, outputs):
+ if not outputs:
+ return {}
+ epoch_dict = self.get_epoch_results(outputs,'train')
+
+
+ def validation_epoch_end(self, outputs):
+ epoch_dict = self.get_epoch_results(outputs,'valid')
+ return epoch_dict #must do to save checkpoints
+
+ def test_epoch_end(self, outputs):
+ epoch_dict = self.get_epoch_results(outputs,'test')
+
+
+ def configure_optimizers(self):
+ return torch.optim.AdamW(
+ [p for p in self.parameters() if p.requires_grad],
+ lr=self.hparams.learning_rate, eps=self.hparams.eps)
+
+ def train_dataloader(self):
+ shuffle = False if self.hparams.overfit else True
+ return DataLoader(self.hparams.train_dataset, batch_size=self.hparams.train_batch_size, shuffle=shuffle,num_workers=cpu_count())
+
+ def val_dataloader(self):
+
+ return DataLoader(self.hparams.valid_dataset, batch_size=self.hparams.eval_batch_size, shuffle=False,num_workers=cpu_count())
+
+ def test_dataloader(self):
+
+ return DataLoader(self.hparams.test_dataset, batch_size=self.hparams.eval_batch_size,shuffle=False, num_workers=cpu_count())
+
diff --git a/tasks/nlp-question-generator/params.yaml b/tasks/nlp-question-generator/params.yaml
new file mode 100644
index 00000000..c622c00a
--- /dev/null
+++ b/tasks/nlp-question-generator/params.yaml
@@ -0,0 +1,38 @@
+# https://geekflare.com/python-yaml-intro/
+prepare_data:
+ test_size_from_dev: 0.5
+
+hparams:
+ model_name: "unicamp-dl/ptt5-base-portuguese-vocab"
+ PREFIX: "gerador_perguntas:"
+ save_every: 5000
+ num_gen_sentences: 2
+ #num_gen_sentences_infer: 10
+ no_repeat_ngram_size: 2
+ temperature: 0.7
+ top_p: 0.92
+ train_batch_size: 4
+ eval_batch_size: 32
+ inference_batch_size: 16
+ source_max_length: 512
+ target_max_length: 100
+ learning_rate: 3.0e-5
+ eps: 1.0e-08
+ seed: 13
+
+lightning_params:
+ num_gpus: 1
+ profiler: True
+ max_epochs: 1
+ accumulate_grad_batches: 16
+ check_val_every_n_epoch: 1
+ progress_bar_refresh_rate: 1
+ gradient_clip_val: 1.0
+ fast_dev_run: False
+
+early_stop_callback:
+ monitor: 'avg_train_loss'
+ min_delta: 0.01
+ patience: 1
+ verbose: False
+ mode: 'min'
\ No newline at end of file
diff --git a/tasks/nlp-question-generator/select_queries.py b/tasks/nlp-question-generator/select_queries.py
new file mode 100644
index 00000000..fa9dc437
--- /dev/null
+++ b/tasks/nlp-question-generator/select_queries.py
@@ -0,0 +1,83 @@
+import os
+import pandas as pd
+from vident.io_utils import IO_Utils
+import faiss
+
+from vident.document_retriever.sparse_similarity.similarity import TfidfVectorizer
+
+from sklearn.cluster import KMeans
+from sklearn import metrics
+import matplotlib.pyplot as plt
+
+#import faiss
+import numpy as np
+
+
+class FaissKMeans:
+ def __init__(self, n_clusters=8, n_init=10, max_iter=300):
+ self.n_clusters = n_clusters
+ self.n_init = n_init
+ self.max_iter = max_iter
+ self.kmeans = None
+ self.cluster_centers_ = None
+ self.inertia_ = None
+
+ def fit(self, X, y):
+ self.kmeans = faiss.Kmeans(d=X.shape[1],
+ k=self.n_clusters,
+ niter=self.max_iter,
+ nredo=self.n_init)
+ self.kmeans.train(X.astype(np.float32))
+ self.cluster_centers_ = self.kmeans.centroids
+ self.inertia_ = self.kmeans.obj[-1]
+
+ def predict(self, X):
+ return self.kmeans.index.search(X.astype(np.float32), 1)[1]
+
+
+if __name__ == '__main__':
+ root_dir = os.path.join(os.path.abspath(os.getcwd()).replace('= ',''))
+ data_dir = os.path.join(root_dir,"data")
+ qgenerator_dir = os.path.join(data_dir,'qgenerator')
+ context_questions_map_path = os.path.join(qgenerator_dir,'context_questions_map.json')
+
+ io_utils = IO_Utils()
+ context_questions_map = io_utils.read_json(filepath=context_questions_map_path)
+ # import pdb;pdb.set_trace()
+ example_context = context_questions_map['9']['context']
+ # '4.6 Análise Estatística: Os dados obtidos foram submetidos ao estudo da homogeneidade da variância (para estabilizar ou reduzir a variabilidade existente) através do método Box-Cox contido no PROC TRANSREG do Sistema SAS. Como para valores nulos a família de transformações de Box-Cox fica restrita, utilizou-se a variável somada a uma constante (+1.0). fitotoxicidade aos 6 e 18 DAAreinfestação de trapoeraba aos 41 DAAfoi sugerida a transformação dos dados com valor de lambda (+0.0), (-3.0) e (+0.0), respectivamente. Após a transformação dos dados as variáveis fitotoxicidade aos 18 DAAreinfestação de trapoeraba aos 41 DAAnão apresentaram distribuição normal, portanto para estabilizar a variabilidade dos tratamentos foi utilizada a estatística não paramétrica, através do Teste de Friedman. Os dados, então, foram submetidos a análise de variância, sendo a comparação das médias quando significativas realizadas pelo teste LSD ao nível de 5 % de probabilidade. Para a análise dos dados foi utilizado o software SAS.'
+ example_questions = context_questions_map['9']['questions']
+ # ['Quando foi utilizado o método Box-Cox?',
+ # 'Qual foi o critério utilizado para analisar os dados para a análise de variância?',
+ # 'Qual foi a frequência do teste paramétrica?',
+ # 'Em que nível a análise estatística é realizada?',
+ # 'Qual foi o valor de lambda usado para estabilizar a variabilidade dos tratamentos?',
+ # 'Quantos dados foram submetidos a análise de variância?',
+ # 'Qual foi a função de equação usada para estabilizar a variabilidade dos tratamentos?',
+ # 'O que foi adicionado para estabilizar a variabilidade dos tratamentos?',
+ # 'Quantos tratamentos foram submetidos ao estudo da homogeneidade da variância?',
+ # 'Qual foi a análise de variância?']
+
+ print('example_questions:\n',example_questions)
+
+ vectorizer = TfidfVectorizer()
+ vectorizer.fit(example_questions)
+ vec = vectorizer.transform(example_questions)
+ X = vec.toarray()
+
+
+ kmeans = KMeans(n_clusters=2)
+ kmeans.fit(X)
+ y_kmeans = kmeans.predict(X)
+
+ print(kmeans.cluster_centers_)
+
+ print(kmeans.labels_)
+
+ #plt.scatter(X[:,0],X[:,1], c=kmeans.labels_, cmap='rainbow')
+
+ plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='viridis')
+
+ centers = kmeans.cluster_centers_
+ plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
+ plt.show()
\ No newline at end of file
diff --git a/tests/datasets.py b/tests/datasets.py
index 254c53da..509d380c 100644
--- a/tests/datasets.py
+++ b/tests/datasets.py
@@ -292,7 +292,7 @@ def report_contexts_test_data():
return data
-def document_reader_test_data():
+def report_contexts_test_data():
data = {
"data": {
"ndarray": [["Na região da Amazônia, o fertilizante foliar Ômega utilizado na dessecação pré-colheita do feijão, var. Imperador Vermelho, realizado com 84,2 porcento de vagens maduras, resultou em dessecação eficaz (97%) quando utilizado na dose de 4.000 mL.ha-1 , com desempenho similar a Trunfo (1.500 mL.ha-1 ) e Reglone (2.000 mL.ha-1 ). O aumento da dose de Ômega para 5.000 ou 10.000 mL.ha-1 não resultou em diferença significativa na dessecação das plantas (Figura 6);","Qual o resultado da utilização do fertilizante foliar Ômega, quando utilizado cerrado?"],
diff --git a/tests/test_nlp_dense_document_retriever.py b/tests/test_nlp_dense_document_retriever.py
index a6580ae8..7de85537 100644
--- a/tests/test_nlp_dense_document_retriever.py
+++ b/tests/test_nlp_dense_document_retriever.py
@@ -50,11 +50,7 @@ def test_experiment_report_contexts(self):
"/dev/null",
)
data = datasets.report_contexts_test_data()
- print("######################################################")
- print(data)
with server.Server() as s:
- print("######################################################")
- print("s")
response = s.test(data=data,timeout=10)
ndarray = response["ndarray"]
self.assertEqual(len(ndarray[0]), 4) # 1 feature
\ No newline at end of file
diff --git a/tests/test_nlp_document_reader.py b/tests/test_nlp_document_reader.py
index ceff5c01..d4ce743f 100644
--- a/tests/test_nlp_document_reader.py
+++ b/tests/test_nlp_document_reader.py
@@ -73,7 +73,7 @@ def test_experiment(self):
"/dev/null",
)
- data = datasets.document_reader_test_data()
+ data = datasets.report_contexts_test_data()
with server.Server() as s:
response = s.test(data=data)
diff --git a/tests/test_nlp_question_generator.py b/tests/test_nlp_question_generator.py
new file mode 100644
index 00000000..f175f2c2
--- /dev/null
+++ b/tests/test_nlp_question_generator.py
@@ -0,0 +1,89 @@
+import os
+import unittest
+import uuid
+
+import papermill
+
+from tests import datasets, server
+
+EXPERIMENT_ID = str(uuid.uuid4())
+OPERATOR_ID = str(uuid.uuid4())
+RUN_ID = str(uuid.uuid4())
+
+class TestQuestionGenerator(unittest.TestCase):
+
+ def setUp(self):
+ # Set environment variables needed to run notebooks
+ os.environ["EXPERIMENT_ID"] = EXPERIMENT_ID
+ os.environ["OPERATOR_ID"] = OPERATOR_ID
+ os.environ["RUN_ID"] = RUN_ID
+
+ datasets.reports_contexts_small()
+
+ os.chdir("tasks/nlp-question-generator")
+
+ def tearDown(self):
+ datasets.clean()
+ os.chdir("../../")
+
+ def test_experiment(self):
+
+ papermill.execute_notebook(
+ "Experiment.ipynb",
+ "/dev/null",
+ parameters=dict(
+ dataset="/tmp/data/reports_contexts_small.csv",
+ column_context = "context",
+ column_question = "question",
+ column_answer_start = "answer_start",
+ column_answer_end= "answer_end",
+ train_from_zero = False,
+ train_from_squad = False,
+ expand_context = True,
+ dev_size_from_data= 0.2,
+ test_size_from_dev= 0.5,
+ model_name= "unicamp-dl/ptt5-base-portuguese-vocab",
+ PREFIX = "gerador_perguntas:",
+ num_gen_sentences = 2,
+ infer_num_gen_sentences = 10,
+ train_batch_size= 2,
+ eval_batch_size= 8,
+ infer_batch_size = 8,
+ no_repeat_ngram_size= 2,
+ temperature= 0.7,
+ top_p= 0.92,
+ source_max_length= 512,
+ target_max_length= 100,
+ learning_rate= 3.0e-5,
+ eps= 1.0e-08,
+ seed = 13,
+ num_gpus= 1,
+ profiler= True,
+ max_epochs= 1,
+ accumulate_grad_batches= 16,
+ check_val_every_n_epoch= 1,
+ progress_bar_refresh_rate= 1,
+ gradient_clip_val= 1.0,
+ fast_dev_run= False,
+ monitor= 'avg_train_loss',
+ min_delta= 0.01,
+ patience= 1,
+ verbose= False,
+ mode= 'min'
+ ),
+ )
+
+ papermill.execute_notebook(
+ "Deployment.ipynb",
+ "/dev/null",
+ )
+
+ data = datasets.report_contexts_test_data()
+
+ with server.Server() as s:
+ response = s.test(data=data)
+
+ names = response["names"]
+ ndarray = response["ndarray"]
+ self.assertEqual(len(ndarray[0]), 4)
+ self.assertEqual(len(names), 4)
\ No newline at end of file