From afbc805bd81bde2053406bfb095a0d2446d0db88 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 20 Sep 2018 16:38:57 +0200 Subject: [PATCH 1/7] GH-16: Move weight extraction to training utils. --- flair/trainers/text_classification_trainer.py | 49 ++--------------- flair/training_utils.py | 54 +++++++++++++++++-- 2 files changed, 54 insertions(+), 49 deletions(-) diff --git a/flair/trainers/text_classification_trainer.py b/flair/trainers/text_classification_trainer.py index 7eef04fc27..c3eabeedd5 100644 --- a/flair/trainers/text_classification_trainer.py +++ b/flair/trainers/text_classification_trainer.py @@ -1,6 +1,4 @@ import random -from collections import defaultdict -from functools import reduce from typing import List import torch @@ -8,8 +6,8 @@ from flair.data import Sentence, TaggedCorpus, Dictionary from flair.models.text_classification_model import TextClassifier -from flair.training_utils import convert_labels_to_one_hot, calculate_micro_avg_metric, init_output_file, clear_embeddings, \ - calculate_class_metrics +from flair.training_utils import convert_labels_to_one_hot, calculate_micro_avg_metric, init_output_file, \ + clear_embeddings, calculate_class_metrics, WeightExtractor MICRO_AVG_METRIC = 'MICRO_AVG' @@ -54,9 +52,8 @@ def train(self, loss_txt = init_output_file(base_path, 'loss.txt') with open(loss_txt, 'a') as f: f.write('EPOCH\tITERATION\tDEV_LOSS\tTRAIN_LOSS\tDEV_F_SCORE\tTRAIN_F_SCORE\tDEV_ACC\tTRAIN_ACC\n') - weights_txt = init_output_file(base_path, 'weights.txt') - weights_index = defaultdict(lambda: defaultdict(lambda: list())) + weight_extractor = WeightExtractor(base_path) optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate) @@ -111,7 +108,7 @@ def train(self, print("epoch {0} - iter {1}/{2} - loss {3:.8f} - lr {4:.4f} - bad epochs {5}".format( epoch + 1, batch_no, len(batches), current_loss / seen_sentences, learning_rate, scheduler.num_bad_epochs)) iteration = epoch * len(batches) + batch_no - self._extract_weights(iteration, weights_index, weights_txt) + weight_extractor.extract_weights(self.model.state_dict(), iteration) current_loss /= len(train_data) @@ -226,41 +223,3 @@ def evaluate(self, sentences: List[Sentence], eval_class_metrics: bool = False, metrics_dict = {metric.name: metric for metric in metrics} return metrics_dict, eval_loss - - def _extract_weights(self, iteration, weights_index, weights_txt): - for key in self.model.state_dict().keys(): - - vec = self.model.state_dict()[key] - weights_to_watch = min(10, reduce(lambda x, y: x*y, list(vec.size()))) - - if key not in weights_index: - self._init_weights_index(key, weights_index, weights_to_watch) - - for i in range(weights_to_watch): - vec = self.model.state_dict()[key] - for index in weights_index[key][i]: - vec = vec[index] - - value = vec.item() - - with open(weights_txt, 'a') as f: - f.write('{}\t{}\t{}\t{}\n'.format(iteration, key, i, float(value))) - - def _init_weights_index(self, key, weights_index, weights_to_watch): - indices = {} - - i = 0 - while len(indices) < weights_to_watch: - vec = self.model.state_dict()[key] - cur_indices = [] - - for x in range(len(vec.size())): - index = random.randint(0, len(vec) - 1) - vec = vec[index] - cur_indices.append(index) - - if cur_indices not in list(indices.values()): - indices[i] = cur_indices - i += 1 - - weights_index[key] = indices \ No newline at end of file diff --git a/flair/training_utils.py b/flair/training_utils.py index c3aaf58a09..555e167ed4 100644 --- a/flair/training_utils.py +++ b/flair/training_utils.py @@ -1,9 +1,9 @@ -from typing import List, Dict - +import random import os -import numpy as np - +from collections import defaultdict +from typing import List from flair.data import Dictionary, Sentence +from functools import reduce class Metric(object): @@ -56,6 +56,52 @@ def print(self): print(self) +class WeightExtractor(object): + + def __init__(self, directory: str, number_of_weights: int = 10): + self.weights_file = init_output_file(directory, 'weights.txt') + self.weights_dict = defaultdict(lambda: defaultdict(lambda: list())) + self.number_of_weights = number_of_weights + + def extract_weights(self, state_dict, iteration): + for key in state_dict.keys(): + + vec = state_dict[key] + weights_to_watch = min(self.number_of_weights, reduce(lambda x, y: x*y, list(vec.size()))) + + if key not in self.weights_dict: + self._init_weights_index(key, state_dict, weights_to_watch) + + for i in range(weights_to_watch): + vec = state_dict[key] + for index in self.weights_dict[key][i]: + vec = vec[index] + + value = vec.item() + + with open(self.weights_file, 'a') as f: + f.write('{}\t{}\t{}\t{}\n'.format(iteration, key, i, float(value))) + + def _init_weights_index(self, key, state_dict, weights_to_watch): + indices = {} + + i = 0 + while len(indices) < weights_to_watch: + vec = state_dict[key] + cur_indices = [] + + for x in range(len(vec.size())): + index = random.randint(0, len(vec) - 1) + vec = vec[index] + cur_indices.append(index) + + if cur_indices not in list(indices.values()): + indices[i] = cur_indices + i += 1 + + self.weights_dict[key] = indices + + def clear_embeddings(sentences: List[Sentence]): """ Clears the embeddings from all given sentences. From 352637e6af64f7ea3ff587beeb904675a4462c76 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 20 Sep 2018 17:35:29 +0200 Subject: [PATCH 2/7] GH-16: Standardized log output in text classifier and sequence labeler. --- flair/trainers/sequence_tagger_trainer.py | 75 +++++++++---------- flair/trainers/text_classification_trainer.py | 44 ++++++----- flair/training_utils.py | 4 + 3 files changed, 64 insertions(+), 59 deletions(-) diff --git a/flair/trainers/sequence_tagger_trainer.py b/flair/trainers/sequence_tagger_trainer.py index 7fa766c3a7..e36f2493eb 100644 --- a/flair/trainers/sequence_tagger_trainer.py +++ b/flair/trainers/sequence_tagger_trainer.py @@ -12,7 +12,7 @@ from flair.file_utils import cached_path from flair.models.sequence_tagger_model import SequenceTagger from flair.data import Sentence, Token, TaggedCorpus -from flair.training_utils import Metric +from flair.training_utils import Metric, init_output_file, WeightExtractor class SequenceTaggerTrainer: @@ -36,10 +36,11 @@ def train(self, if self.model.tag_type in ['pos', 'upos']: evaluation_method = 'accuracy' print(evaluation_method) - os.makedirs(base_path, exist_ok=True) + loss_txt = init_output_file(base_path, 'loss.txt') + with open(loss_txt, 'a') as f: + f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\tTRAIN_METRICS\tDEV_LOSS\tDEV_METRICS\tTEST_LOSS\tTEST_METRICS\n') - loss_txt = os.path.join(base_path, "loss.txt") - open(loss_txt, "w", encoding='utf-8').close() + weight_extractor = WeightExtractor(base_path) optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate) @@ -56,12 +57,8 @@ def train(self, # At any point you can hit Ctrl + C to break out of training early. try: - for epoch in range(0, max_epochs): - - current_loss: int = 0 - - for group in optimizer.param_groups: - learning_rate = group['lr'] + for epoch in range(max_epochs): + print('-' * 100) if not self.test_mode: random.shuffle(train_data) @@ -69,14 +66,15 @@ def train(self, self.model.train() - batch_no: int = 0 + current_loss: float = 0 + seen_sentences = 0 + modulo = max(1, int(len(batches) / 10)) - for batch in batches: - batch: List[Sentence] = batch - batch_no += 1 + for group in optimizer.param_groups: + learning_rate = group['lr'] - if batch_no % 100 == 0: - print("%d of %d (%f)" % (batch_no, len(batches), float(batch_no / len(batches)))) + for batch_no, batch in enumerate(batches): + batch: List[Sentence] = batch optimizer.zero_grad() @@ -84,35 +82,34 @@ def train(self, loss = self.model.neg_log_likelihood(batch, self.model.tag_type) current_loss += loss.item() + seen_sentences += len(batch) loss.backward() - torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0) - optimizer.step() - sys.stdout.write('.') - sys.stdout.flush() - if not embeddings_in_memory: self.clear_embeddings_in_batch(batch) + if batch_no % modulo == 0: + print("epoch {0} - iter {1}/{2} - loss {3:.8f}".format( + epoch + 1, batch_no, len(batches), current_loss / seen_sentences)) + iteration = epoch * len(batches) + batch_no + weight_extractor.extract_weights(self.model.state_dict(), iteration) + current_loss /= len(train_data) # switch to eval mode self.model.eval() + print('-' * 100) + if not train_with_dev: - print('.. evaluating... dev... ') - dev_score, dev_fp, dev_result = self.evaluate(self.corpus.dev, base_path, + dev_score, dev_metric = self.evaluate(self.corpus.dev, base_path, evaluation_method=evaluation_method, embeddings_in_memory=embeddings_in_memory) - else: - dev_fp = 0 - dev_result = '_' - print('test... ') - test_score, test_fp, test_result = self.evaluate(self.corpus.test, base_path, + test_score, test_metric = self.evaluate(self.corpus.test, base_path, evaluation_method=evaluation_method, embeddings_in_memory=embeddings_in_memory) @@ -122,16 +119,16 @@ def train(self, # anneal against train loss if training with dev, otherwise anneal against dev score scheduler.step(current_loss) if train_with_dev else scheduler.step(dev_score) - summary = '%d' % epoch + '\t({:%H:%M:%S})'.format(datetime.datetime.now()) \ - + '\t%f\t%d\t%f\tDEV %d\t' % ( - current_loss, scheduler.num_bad_epochs, learning_rate, dev_fp) + dev_result - summary = summary.replace('\n', '') - summary += '\tTEST \t%d\t' % test_fp + test_result + if not train_with_dev: + print("{0:<7} epoch {1} - lr {2:.4f} - bad epochs {3} - f-score {4:.4f} - acc {5:.4f}".format( + 'DEV', epoch + 1, learning_rate, scheduler.num_bad_epochs, dev_metric.f_score(), dev_metric.accuracy())) + print("{0:<7} epoch {1} - lr {2:.4f} - bad epochs {3} - f-score {4:.4f} - acc {5:.4f}".format( + 'TEST', epoch + 1, learning_rate, scheduler.num_bad_epochs, test_metric.f_score(), test_metric.accuracy())) - print(summary) - with open(loss_txt, "a") as loss_file: - loss_file.write('%s\n' % summary) - loss_file.close() + with open(loss_txt, 'a') as f: + dev_metric_str = dev_metric.to_csv() if dev_metric is not None else '_' + f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( + epoch, datetime.datetime.now(), '_', '_', '_', dev_metric_str, '_', test_metric.to_csv())) # save if model is current best and we use dev data for model selection if save_model and not train_with_dev and dev_score == scheduler.best: @@ -210,11 +207,11 @@ def evaluate(self, evaluation: List[Sentence], out_path=None, evaluation_method: if evaluation_method == 'accuracy': score = metric.accuracy() - return score, metric._fp, str(score) + return score, metric if evaluation_method == 'F1': score = metric.f_score() - return score, metric._fp, str(metric) + return score, metric def clear_embeddings_in_batch(self, batch: List[Sentence]): for sentence in batch: diff --git a/flair/trainers/text_classification_trainer.py b/flair/trainers/text_classification_trainer.py index c3eabeedd5..c741e60f92 100644 --- a/flair/trainers/text_classification_trainer.py +++ b/flair/trainers/text_classification_trainer.py @@ -1,3 +1,4 @@ +import datetime import random from typing import List @@ -51,7 +52,7 @@ def train(self, loss_txt = init_output_file(base_path, 'loss.txt') with open(loss_txt, 'a') as f: - f.write('EPOCH\tITERATION\tDEV_LOSS\tTRAIN_LOSS\tDEV_F_SCORE\tTRAIN_F_SCORE\tDEV_ACC\tTRAIN_ACC\n') + f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\tTRAIN_METRICS\tDEV_LOSS\tDEV_METRICS\tTEST_LOSS\tTEST_METRICS\n') weight_extractor = WeightExtractor(base_path) @@ -86,6 +87,9 @@ def train(self, seen_sentences = 0 modulo = max(1, int(len(batches) / 10)) + for group in optimizer.param_groups: + learning_rate = group['lr'] + for batch_no, batch in enumerate(batches): scores = self.model.forward(batch) loss = self.model.calculate_loss(scores, batch) @@ -102,11 +106,8 @@ def train(self, clear_embeddings(batch) if batch_no % modulo == 0: - for group in optimizer.param_groups: - learning_rate = group['lr'] - - print("epoch {0} - iter {1}/{2} - loss {3:.8f} - lr {4:.4f} - bad epochs {5}".format( - epoch + 1, batch_no, len(batches), current_loss / seen_sentences, learning_rate, scheduler.num_bad_epochs)) + print("epoch {0} - iter {1}/{2} - loss {3:.8f}".format( + epoch + 1, batch_no, len(batches), current_loss / seen_sentences)) iteration = epoch * len(batches) + batch_no weight_extractor.extract_weights(self.model.state_dict(), iteration) @@ -116,27 +117,30 @@ def train(self, print('-' * 100) - train_f_score = train_acc = train_loss = 0 + dev_metric = train_metric = None + dev_loss = train_loss = '_' + if eval_on_train: - train_acc, train_f_score, train_loss = self._calculate_evaluation_results_for( - 'TRAIN', self.corpus.train, embeddings_in_memory, epoch, eval_mini_batch_size) + train_metric, train_loss = self._calculate_evaluation_results_for( + 'TRAIN', self.corpus.train, embeddings_in_memory, epoch, eval_mini_batch_size, learning_rate, scheduler.num_bad_epochs) - dev_f_score = dev_acc = dev_loss = 0 if not train_with_dev: - dev_acc, dev_f_score, dev_loss = self._calculate_evaluation_results_for( - 'DEV', self.corpus.dev, embeddings_in_memory, epoch, eval_mini_batch_size) + dev_metric, dev_loss = self._calculate_evaluation_results_for( + 'DEV', self.corpus.dev, embeddings_in_memory, epoch, eval_mini_batch_size, learning_rate, scheduler.num_bad_epochs) with open(loss_txt, 'a') as f: - f.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( - epoch, epoch * len(batches), dev_loss, train_loss, dev_f_score, train_f_score, dev_acc, train_acc)) + train_metric_str = train_metric.to_csv() if train_metric is not None else '_' + dev_metric_str = dev_metric.to_csv() if dev_metric is not None else '_' + f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( + epoch, datetime.datetime.now(), train_loss, train_metric_str, dev_loss, dev_metric_str, '_', '_')) self.model.train() # anneal against train loss if training with dev, otherwise anneal against dev score - scheduler.step(current_loss) if train_with_dev else scheduler.step(dev_f_score) + scheduler.step(current_loss) if train_with_dev else scheduler.step(dev_metric.f_score()) is_best_model_so_far: bool = False - current_score = dev_f_score if not train_with_dev else train_f_score + current_score = dev_metric.f_score() if not train_with_dev else train_metric.f_score() if current_score > best_score: best_score = current_score @@ -174,17 +178,17 @@ def train(self, model_save_file.close() print('done') - def _calculate_evaluation_results_for(self, dataset_name, dataset, embeddings_in_memory, epoch, mini_batch_size): + def _calculate_evaluation_results_for(self, dataset_name, dataset, embeddings_in_memory, epoch, mini_batch_size, learning_rate, num_bad_epochs): metrics, loss = self.evaluate(dataset, mini_batch_size=mini_batch_size, embeddings_in_memory=embeddings_in_memory) f_score = metrics[MICRO_AVG_METRIC].f_score() acc = metrics[MICRO_AVG_METRIC].accuracy() - print("{0:<7} epoch {1} - loss {2:.8f} - f-score {3:.4f} - acc {4:.4f}".format( - dataset_name, epoch + 1, loss, f_score, acc)) + print("{0:<7} epoch {1} - lr {2:.4f} - bad epochs {3} - loss {4:.8f} - f-score {5:.4f} - acc {6:.4f}".format( + dataset_name, epoch + 1, learning_rate, num_bad_epochs, loss, f_score, acc)) - return acc, f_score, loss + return metrics[MICRO_AVG_METRIC], loss def evaluate(self, sentences: List[Sentence], eval_class_metrics: bool = False, mini_batch_size: int = 16, embeddings_in_memory: bool = True) -> (dict, float): diff --git a/flair/training_utils.py b/flair/training_utils.py index 555e167ed4..837175e04d 100644 --- a/flair/training_utils.py +++ b/flair/training_utils.py @@ -48,6 +48,10 @@ def accuracy(self): return (self._tp + self._tn) / (self._tp + self._tn + self._fp + self._fn) return 0.0 + def to_csv(self): + return '{},{},{},{},{},{},{},{}'.format( + self._tp, self._tn, self._fp, self._fn, self.precision(), self.recall(), self.f_score(), self.accuracy()) + def __str__(self): return '{0:<20}\tprecision: {1:.4f} - recall: {2:.4f} - accuracy: {3:.4f} - f1-score: {4:.4f}'.format( self.name, self.precision(), self.recall(), self.accuracy(), self.f_score()) From 3c25909296b8c4c1b0a3de3e52569eb2e5c0d133 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 20 Sep 2018 17:53:02 +0200 Subject: [PATCH 3/7] GH-16: Adapt epoch summary. --- flair/trainers/sequence_tagger_trainer.py | 9 +++++---- flair/trainers/text_classification_trainer.py | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/flair/trainers/sequence_tagger_trainer.py b/flair/trainers/sequence_tagger_trainer.py index e36f2493eb..b31532f4d2 100644 --- a/flair/trainers/sequence_tagger_trainer.py +++ b/flair/trainers/sequence_tagger_trainer.py @@ -119,11 +119,12 @@ def train(self, # anneal against train loss if training with dev, otherwise anneal against dev score scheduler.step(current_loss) if train_with_dev else scheduler.step(dev_score) + print("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs)) if not train_with_dev: - print("{0:<7} epoch {1} - lr {2:.4f} - bad epochs {3} - f-score {4:.4f} - acc {5:.4f}".format( - 'DEV', epoch + 1, learning_rate, scheduler.num_bad_epochs, dev_metric.f_score(), dev_metric.accuracy())) - print("{0:<7} epoch {1} - lr {2:.4f} - bad epochs {3} - f-score {4:.4f} - acc {5:.4f}".format( - 'TEST', epoch + 1, learning_rate, scheduler.num_bad_epochs, test_metric.f_score(), test_metric.accuracy())) + print("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format( + 'DEV', dev_metric.f_score(), dev_metric.accuracy(), dev_metric._tp, dev_metric._fp, dev_metric._fn, dev_metric._tn)) + print("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format( + 'TEST', test_metric.f_score(), test_metric.accuracy(), test_metric._tp, test_metric._fp, test_metric._fn, test_metric._tn)) with open(loss_txt, 'a') as f: dev_metric_str = dev_metric.to_csv() if dev_metric is not None else '_' diff --git a/flair/trainers/text_classification_trainer.py b/flair/trainers/text_classification_trainer.py index c741e60f92..1148d41f6b 100644 --- a/flair/trainers/text_classification_trainer.py +++ b/flair/trainers/text_classification_trainer.py @@ -116,17 +116,18 @@ def train(self, self.model.eval() print('-' * 100) + print("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs)) dev_metric = train_metric = None dev_loss = train_loss = '_' if eval_on_train: train_metric, train_loss = self._calculate_evaluation_results_for( - 'TRAIN', self.corpus.train, embeddings_in_memory, epoch, eval_mini_batch_size, learning_rate, scheduler.num_bad_epochs) + 'TRAIN', self.corpus.train, embeddings_in_memory, eval_mini_batch_size) if not train_with_dev: dev_metric, dev_loss = self._calculate_evaluation_results_for( - 'DEV', self.corpus.dev, embeddings_in_memory, epoch, eval_mini_batch_size, learning_rate, scheduler.num_bad_epochs) + 'DEV', self.corpus.dev, embeddings_in_memory, eval_mini_batch_size) with open(loss_txt, 'a') as f: train_metric_str = train_metric.to_csv() if train_metric is not None else '_' @@ -178,15 +179,15 @@ def train(self, model_save_file.close() print('done') - def _calculate_evaluation_results_for(self, dataset_name, dataset, embeddings_in_memory, epoch, mini_batch_size, learning_rate, num_bad_epochs): + def _calculate_evaluation_results_for(self, dataset_name, dataset, embeddings_in_memory, mini_batch_size): metrics, loss = self.evaluate(dataset, mini_batch_size=mini_batch_size, embeddings_in_memory=embeddings_in_memory) f_score = metrics[MICRO_AVG_METRIC].f_score() acc = metrics[MICRO_AVG_METRIC].accuracy() - print("{0:<7} epoch {1} - lr {2:.4f} - bad epochs {3} - loss {4:.8f} - f-score {5:.4f} - acc {6:.4f}".format( - dataset_name, epoch + 1, learning_rate, num_bad_epochs, loss, f_score, acc)) + print("{0:<5}: loss {1:.8f} - f-score {2:.4f} - acc {3:.4f}".format( + dataset_name, loss, f_score, acc)) return metrics[MICRO_AVG_METRIC], loss From 145acda6874d71c1ec7af213f4a90bc2c9be7d5e Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 20 Sep 2018 18:03:19 +0200 Subject: [PATCH 4/7] GH-16: Add logger. --- flair/__init__.py | 9 ++++++ flair/trainers/sequence_tagger_trainer.py | 29 +++++++++---------- flair/trainers/text_classification_trainer.py | 26 +++++++++-------- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/flair/__init__.py b/flair/__init__.py index fe00a5bb01..37250c116b 100644 --- a/flair/__init__.py +++ b/flair/__init__.py @@ -1,2 +1,11 @@ from . import data from . import models + +import sys +import logging + +logger = logging.getLogger(__name__) + +FORMAT = '%(asctime)-15s %(message)s' + +logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) \ No newline at end of file diff --git a/flair/trainers/sequence_tagger_trainer.py b/flair/trainers/sequence_tagger_trainer.py index b31532f4d2..5a86ca16d2 100644 --- a/flair/trainers/sequence_tagger_trainer.py +++ b/flair/trainers/sequence_tagger_trainer.py @@ -1,19 +1,18 @@ -from subprocess import run, PIPE from typing import List import datetime import os import random -import re -import sys +import logging import torch from torch.optim.lr_scheduler import ReduceLROnPlateau -from flair.file_utils import cached_path from flair.models.sequence_tagger_model import SequenceTagger from flair.data import Sentence, Token, TaggedCorpus from flair.training_utils import Metric, init_output_file, WeightExtractor +log = logging.getLogger(__name__) + class SequenceTaggerTrainer: def __init__(self, model: SequenceTagger, corpus: TaggedCorpus, test_mode: bool = False) -> None: @@ -34,7 +33,7 @@ def train(self, evaluation_method = 'F1' if self.model.tag_type in ['pos', 'upos']: evaluation_method = 'accuracy' - print(evaluation_method) + log.info(evaluation_method) loss_txt = init_output_file(base_path, 'loss.txt') with open(loss_txt, 'a') as f: @@ -58,7 +57,7 @@ def train(self, try: for epoch in range(max_epochs): - print('-' * 100) + log.info('-' * 100) if not self.test_mode: random.shuffle(train_data) @@ -92,7 +91,7 @@ def train(self, self.clear_embeddings_in_batch(batch) if batch_no % modulo == 0: - print("epoch {0} - iter {1}/{2} - loss {3:.8f}".format( + log.info("epoch {0} - iter {1}/{2} - loss {3:.8f}".format( epoch + 1, batch_no, len(batches), current_loss / seen_sentences)) iteration = epoch * len(batches) + batch_no weight_extractor.extract_weights(self.model.state_dict(), iteration) @@ -102,7 +101,7 @@ def train(self, # switch to eval mode self.model.eval() - print('-' * 100) + log.info('-' * 100) if not train_with_dev: dev_score, dev_metric = self.evaluate(self.corpus.dev, base_path, @@ -119,11 +118,11 @@ def train(self, # anneal against train loss if training with dev, otherwise anneal against dev score scheduler.step(current_loss) if train_with_dev else scheduler.step(dev_score) - print("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs)) + log.info("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs)) if not train_with_dev: - print("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format( + log.info("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format( 'DEV', dev_metric.f_score(), dev_metric.accuracy(), dev_metric._tp, dev_metric._fp, dev_metric._fn, dev_metric._tn)) - print("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format( + log.info("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format( 'TEST', test_metric.f_score(), test_metric.accuracy(), test_metric._tp, test_metric._fp, test_metric._fn, test_metric._tn)) with open(loss_txt, 'a') as f: @@ -139,11 +138,11 @@ def train(self, if save_model and train_with_dev: self.model.save(base_path + "/final-model.pt") except KeyboardInterrupt: - print('-' * 89) - print('Exiting from training early') - print('saving model') + log.info('-' * 89) + log.info('Exiting from training early') + log.info('saving model') self.model.save(base_path + "/final-model.pt") - print('done') + log.info('done') def evaluate(self, evaluation: List[Sentence], out_path=None, evaluation_method: str = 'F1', embeddings_in_memory: bool = True): diff --git a/flair/trainers/text_classification_trainer.py b/flair/trainers/text_classification_trainer.py index 1148d41f6b..9533985766 100644 --- a/flair/trainers/text_classification_trainer.py +++ b/flair/trainers/text_classification_trainer.py @@ -1,5 +1,6 @@ import datetime import random +import logging from typing import List import torch @@ -12,6 +13,7 @@ MICRO_AVG_METRIC = 'MICRO_AVG' +log = logging.getLogger(__name__) class TextClassifierTrainer: """ @@ -73,7 +75,7 @@ def train(self, best_score = 0 for epoch in range(max_epochs): - print('-' * 100) + log.info('-' * 100) if not self.test_mode: random.shuffle(train_data) @@ -106,7 +108,7 @@ def train(self, clear_embeddings(batch) if batch_no % modulo == 0: - print("epoch {0} - iter {1}/{2} - loss {3:.8f}".format( + log.info("epoch {0} - iter {1}/{2} - loss {3:.8f}".format( epoch + 1, batch_no, len(batches), current_loss / seen_sentences)) iteration = epoch * len(batches) + batch_no weight_extractor.extract_weights(self.model.state_dict(), iteration) @@ -115,8 +117,8 @@ def train(self, self.model.eval() - print('-' * 100) - print("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs)) + log.info('-' * 100) + log.info("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs)) dev_metric = train_metric = None dev_loss = train_loss = '_' @@ -156,8 +158,8 @@ def train(self, if save_model: self.model = TextClassifier.load_from_file(base_path + "/model.pt") - print('-' * 100) - print('Testing using best model ...') + log.info('-' * 100) + log.info('Testing using best model ...') self.model.eval() test_metrics, test_loss = self.evaluate( @@ -168,16 +170,16 @@ def train(self, metric.print() self.model.train() - print('-' * 100) + log.info('-' * 100) except KeyboardInterrupt: - print('-' * 89) - print('Exiting from training early') - print('saving model') + log.info('-' * 89) + log.info('Exiting from training early') + log.info('saving model') with open(base_path + "/final-model.pt", 'wb') as model_save_file: torch.save(self.model, model_save_file, pickle_protocol=4) model_save_file.close() - print('done') + log.info('done') def _calculate_evaluation_results_for(self, dataset_name, dataset, embeddings_in_memory, mini_batch_size): metrics, loss = self.evaluate(dataset, mini_batch_size=mini_batch_size, @@ -186,7 +188,7 @@ def _calculate_evaluation_results_for(self, dataset_name, dataset, embeddings_in f_score = metrics[MICRO_AVG_METRIC].f_score() acc = metrics[MICRO_AVG_METRIC].accuracy() - print("{0:<5}: loss {1:.8f} - f-score {2:.4f} - acc {3:.4f}".format( + log.info("{0:<5}: loss {1:.8f} - f-score {2:.4f} - acc {3:.4f}".format( dataset_name, loss, f_score, acc)) return metrics[MICRO_AVG_METRIC], loss From ffc0b8612c3ef0e9bd3e8799201c54786f159244 Mon Sep 17 00:00:00 2001 From: tabergma Date: Tue, 25 Sep 2018 10:04:55 +0200 Subject: [PATCH 5/7] GH-16: Metric to tsv --- flair/trainers/sequence_tagger_trainer.py | 10 +++++--- flair/trainers/text_classification_trainer.py | 13 +++++----- flair/training_utils.py | 25 +++++++++++++++---- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/flair/trainers/sequence_tagger_trainer.py b/flair/trainers/sequence_tagger_trainer.py index 5a86ca16d2..e20bec6e79 100644 --- a/flair/trainers/sequence_tagger_trainer.py +++ b/flair/trainers/sequence_tagger_trainer.py @@ -35,9 +35,10 @@ def train(self, if self.model.tag_type in ['pos', 'upos']: evaluation_method = 'accuracy' log.info(evaluation_method) - loss_txt = init_output_file(base_path, 'loss.txt') + loss_txt = init_output_file(base_path, 'loss.tsv') with open(loss_txt, 'a') as f: - f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\tTRAIN_METRICS\tDEV_LOSS\tDEV_METRICS\tTEST_LOSS\tTEST_METRICS\n') + f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\t{}\tDEV_LOSS\t{}\tTEST_LOSS\t{}\n'.format( + Metric.tsv_header('TRAIN'), Metric.tsv_header('DEV'), Metric.tsv_header('TEST'))) weight_extractor = WeightExtractor(base_path) @@ -103,6 +104,7 @@ def train(self, log.info('-' * 100) + dev_score = dev_metric = None if not train_with_dev: dev_score, dev_metric = self.evaluate(self.corpus.dev, base_path, evaluation_method=evaluation_method, @@ -126,9 +128,9 @@ def train(self, 'TEST', test_metric.f_score(), test_metric.accuracy(), test_metric._tp, test_metric._fp, test_metric._fn, test_metric._tn)) with open(loss_txt, 'a') as f: - dev_metric_str = dev_metric.to_csv() if dev_metric is not None else '_' + dev_metric_str = dev_metric.to_tsv() if dev_metric is not None else Metric.to_empty_tsv() f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( - epoch, datetime.datetime.now(), '_', '_', '_', dev_metric_str, '_', test_metric.to_csv())) + epoch, datetime.datetime.now(), '_', Metric.to_empty_tsv(), '_', dev_metric_str, '_', test_metric.to_tsv())) # save if model is current best and we use dev data for model selection if save_model and not train_with_dev and dev_score == scheduler.best: diff --git a/flair/trainers/text_classification_trainer.py b/flair/trainers/text_classification_trainer.py index 9533985766..f220196bcf 100644 --- a/flair/trainers/text_classification_trainer.py +++ b/flair/trainers/text_classification_trainer.py @@ -9,7 +9,7 @@ from flair.data import Sentence, TaggedCorpus, Dictionary from flair.models.text_classification_model import TextClassifier from flair.training_utils import convert_labels_to_one_hot, calculate_micro_avg_metric, init_output_file, \ - clear_embeddings, calculate_class_metrics, WeightExtractor + clear_embeddings, calculate_class_metrics, WeightExtractor, Metric MICRO_AVG_METRIC = 'MICRO_AVG' @@ -52,9 +52,10 @@ def train(self, or not """ - loss_txt = init_output_file(base_path, 'loss.txt') + loss_txt = init_output_file(base_path, 'loss.tsv') with open(loss_txt, 'a') as f: - f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\tTRAIN_METRICS\tDEV_LOSS\tDEV_METRICS\tTEST_LOSS\tTEST_METRICS\n') + f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\t{}\tDEV_LOSS\t{}\tTEST_LOSS\t{}\n'.format( + Metric.tsv_header('TRAIN'), Metric.tsv_header('DEV'), Metric.tsv_header('TEST'))) weight_extractor = WeightExtractor(base_path) @@ -132,10 +133,10 @@ def train(self, 'DEV', self.corpus.dev, embeddings_in_memory, eval_mini_batch_size) with open(loss_txt, 'a') as f: - train_metric_str = train_metric.to_csv() if train_metric is not None else '_' - dev_metric_str = dev_metric.to_csv() if dev_metric is not None else '_' + train_metric_str = train_metric.to_tsv() if train_metric is not None else Metric.to_empty_tsv() + dev_metric_str = dev_metric.to_tsv() if dev_metric is not None else Metric.to_empty_tsv() f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( - epoch, datetime.datetime.now(), train_loss, train_metric_str, dev_loss, dev_metric_str, '_', '_')) + epoch, datetime.datetime.now(), train_loss, train_metric_str, dev_loss, dev_metric_str, '_', Metric.to_empty_tsv())) self.model.train() diff --git a/flair/training_utils.py b/flair/training_utils.py index 837175e04d..fbe70303fc 100644 --- a/flair/training_utils.py +++ b/flair/training_utils.py @@ -1,4 +1,5 @@ import random +import logging import os from collections import defaultdict from typing import List @@ -6,6 +7,9 @@ from functools import reduce +log = logging.getLogger(__name__) + + class Metric(object): def __init__(self, name): @@ -48,17 +52,28 @@ def accuracy(self): return (self._tp + self._tn) / (self._tp + self._tn + self._fp + self._fn) return 0.0 - def to_csv(self): - return '{},{},{},{},{},{},{},{}'.format( + def to_tsv(self): + return '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}'.format( self._tp, self._tn, self._fp, self._fn, self.precision(), self.recall(), self.f_score(), self.accuracy()) + def print(self): + log.info(self) + + @staticmethod + def tsv_header(prefix=None): + if prefix: + return '{0}_TP\t{0}_TN\t{0}_FP\t{0}_FN\t{0}_PRECISION\t{0}_RECALL\t{0}_F-SCORE\t{0}_ACCURACY'.format(prefix) + + return 'TP\tTN\tFP\tFN\tPRECISION\tRECALL\tF-SCORE\tACCURACY' + + @staticmethod + def to_empty_tsv(): + return '_\t_\t_\t_\t_\t_\t_\t_' + def __str__(self): return '{0:<20}\tprecision: {1:.4f} - recall: {2:.4f} - accuracy: {3:.4f} - f1-score: {4:.4f}'.format( self.name, self.precision(), self.recall(), self.accuracy(), self.f_score()) - def print(self): - print(self) - class WeightExtractor(object): From 167c496ce2c614d6aeeedf5bc58e619b35c574e8 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 27 Sep 2018 09:09:25 +0200 Subject: [PATCH 6/7] GH-16: Replace print with log.info --- flair/__init__.py | 3 ++- flair/data.py | 27 +++++++++++++++++------ flair/data_fetcher.py | 7 ++++-- flair/trainers/language_model_trainer.py | 27 +++++++++++++---------- flair/trainers/sequence_tagger_trainer.py | 2 +- 5 files changed, 43 insertions(+), 23 deletions(-) diff --git a/flair/__init__.py b/flair/__init__.py index 37250c116b..c4a7b48757 100644 --- a/flair/__init__.py +++ b/flair/__init__.py @@ -8,4 +8,5 @@ FORMAT = '%(asctime)-15s %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) \ No newline at end of file +logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) +logging.getLogger('flair').setLevel(logging.INFO) \ No newline at end of file diff --git a/flair/data.py b/flair/data.py index 4ca3953402..1bc71889d8 100644 --- a/flair/data.py +++ b/flair/data.py @@ -1,6 +1,7 @@ from typing import List, Dict, Union import torch +import logging from collections import Counter from collections import defaultdict @@ -10,6 +11,9 @@ from segtok.tokenizer import word_tokenizer +log = logging.getLogger(__name__) + + class Dictionary: """ This class holds a dictionary that maps strings to IDs, used to generate one-hot encodings of strings. @@ -601,14 +605,23 @@ def _print_statistics_for(sentences, name): classes_to_count = TaggedCorpus._get_classes_to_count(sentences) tokens_per_sentence = TaggedCorpus._get_tokens_per_sentence(sentences) - print(name) - print("total size: " + str(len(sentences))) + size_dict = {} for l, c in classes_to_count.items(): - print("size of class {}: {}".format(l, c)) - print("total # of tokens: " + str(sum(tokens_per_sentence))) - print("min # of tokens: " + str(min(tokens_per_sentence))) - print("max # of tokens: " + str(max(tokens_per_sentence))) - print("avg # of tokens: " + str(sum(tokens_per_sentence) / len(sentences))) + size_dict = { l: c } + size_dict['total'] = len(sentences) + + stats = { + 'dataset': name, + 'number_of_documents': size_dict, + 'number_of_tokens': { + 'total': sum(tokens_per_sentence), + 'min': min(tokens_per_sentence), + 'max': max(tokens_per_sentence), + 'avg': sum(tokens_per_sentence) / len(sentences) + } + } + + log.info(stats) @staticmethod def _get_tokens_per_sentence(sentences): diff --git a/flair/data_fetcher.py b/flair/data_fetcher.py index 35ddaf12eb..73e17f1225 100644 --- a/flair/data_fetcher.py +++ b/flair/data_fetcher.py @@ -1,9 +1,12 @@ from typing import List, Dict import re import os +import logging from enum import Enum -from flair.data import Sentence, TaggedCorpus, Token, Label +from flair.data import Sentence, TaggedCorpus, Token + +log = logging.getLogger(__name__) class NLPTask(Enum): @@ -43,7 +46,7 @@ def fetch_data(task: NLPTask) -> TaggedCorpus: """ data_folder = os.path.join('resources', 'tasks', str(task.value).lower()) - print("reading data from {}".format(data_folder)) + log.info("Reading data from {}".format(data_folder)) # the CoNLL 2000 task on chunking has three columns: text, pos and np (chunk) if task == NLPTask.CONLL_2000: diff --git a/flair/trainers/language_model_trainer.py b/flair/trainers/language_model_trainer.py index 8fa3129306..d3e2023597 100644 --- a/flair/trainers/language_model_trainer.py +++ b/flair/trainers/language_model_trainer.py @@ -1,7 +1,7 @@ import time, datetime import os import random - +import logging import math import torch from torch.autograd import Variable @@ -10,6 +10,10 @@ from flair.data import Dictionary from flair.models import LanguageModel + +log = logging.getLogger(__name__) + + class TextCorpus(object): def __init__(self, path, dictionary: Dictionary, forward: bool = True, character_level: bool = True): @@ -70,7 +74,6 @@ def charsplit(self, path: str, expand_vocab=False, forward=True, split_on_char=T else: chars = line.split() - # print(chars) tokens += len(chars) # Add chars to the dictionary @@ -185,7 +188,7 @@ def train(self, for split in range(1, max_epochs + 1): - print('Split %d' % split + '\t - ({:%H:%M:%S})'.format(datetime.datetime.now())) + log.info('Split %d' % split + '\t - ({:%H:%M:%S})'.format(datetime.datetime.now())) for group in optimizer.param_groups: learning_rate = group['lr'] @@ -193,7 +196,7 @@ def train(self, train_slice = self.corpus.get_next_train_slice() train_data = self._batchify(train_slice, mini_batch_size) - print('\t({:%H:%M:%S})'.format(datetime.datetime.now())) + log.info('\t({:%H:%M:%S})'.format(datetime.datetime.now())) # go into train mode self.model.train() @@ -237,7 +240,7 @@ def train(self, if batch % self.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / self.log_interval elapsed = time.time() - start_time - print('| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | ' + log.info('| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( split, number_of_splits, batch, len(train_data) // sequence_length, elapsed * 1000 / self.log_interval, cur_loss, @@ -245,7 +248,7 @@ def train(self, total_loss = 0 start_time = time.time() - print('training done! \t({:%H:%M:%S})'.format(datetime.datetime.now())) + log.info('training done! \t({:%H:%M:%S})'.format(datetime.datetime.now())) ############################################################################### # TEST @@ -254,7 +257,7 @@ def train(self, val_loss = self.evaluate(val_data, mini_batch_size, sequence_length) scheduler.step(val_loss) - print('best loss so far {:5.2f}'.format(best_val_loss)) + log.info('best loss so far {:5.2f}'.format(best_val_loss)) # Save the model if the validation loss is the best we've seen so far. if val_loss < best_val_loss: @@ -264,7 +267,7 @@ def train(self, ############################################################################### # print info ############################################################################### - print('-' * 89) + log.info('-' * 89) local_split_number = split % number_of_splits if local_split_number == 0: local_split_number = number_of_splits @@ -281,12 +284,12 @@ def train(self, with open(loss_txt, "a") as myfile: myfile.write('%s\n' % summary) - print(summary) - print('-' * 89) + log.info(summary) + log.info('-' * 89) except KeyboardInterrupt: - print('-' * 89) - print('Exiting from training early') + log.info('-' * 89) + log.info('Exiting from training early') def evaluate(self, data_source, eval_batch_size, sequence_length): # Turn on evaluation mode which disables dropout. diff --git a/flair/trainers/sequence_tagger_trainer.py b/flair/trainers/sequence_tagger_trainer.py index e20bec6e79..39dca80a0e 100644 --- a/flair/trainers/sequence_tagger_trainer.py +++ b/flair/trainers/sequence_tagger_trainer.py @@ -33,7 +33,7 @@ def train(self, evaluation_method = 'F1' if self.model.tag_type in ['pos', 'upos']: evaluation_method = 'accuracy' - log.info(evaluation_method) + log.info('Evaluation method: {}'.format(evaluation_method)) loss_txt = init_output_file(base_path, 'loss.tsv') with open(loss_txt, 'a') as f: From c57557237d837cd4619e12224a6cd41f51f9c82d Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 27 Sep 2018 09:10:58 +0200 Subject: [PATCH 7/7] GH-16: Update log level. --- flair/__init__.py | 2 +- flair/trainers/sequence_tagger_trainer.py | 8 ++++---- flair/trainers/text_classification_trainer.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/flair/__init__.py b/flair/__init__.py index c4a7b48757..2b74ad0143 100644 --- a/flair/__init__.py +++ b/flair/__init__.py @@ -8,5 +8,5 @@ FORMAT = '%(asctime)-15s %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) +logging.basicConfig(level=logging.WARNING, format=FORMAT, stream=sys.stdout) logging.getLogger('flair').setLevel(logging.INFO) \ No newline at end of file diff --git a/flair/trainers/sequence_tagger_trainer.py b/flair/trainers/sequence_tagger_trainer.py index 39dca80a0e..bdff9c4c94 100644 --- a/flair/trainers/sequence_tagger_trainer.py +++ b/flair/trainers/sequence_tagger_trainer.py @@ -140,11 +140,11 @@ def train(self, if save_model and train_with_dev: self.model.save(base_path + "/final-model.pt") except KeyboardInterrupt: - log.info('-' * 89) - log.info('Exiting from training early') - log.info('saving model') + log.info('-' * 100) + log.info('Exiting from training early.') + log.info('Saving model ...') self.model.save(base_path + "/final-model.pt") - log.info('done') + log.info('Done.') def evaluate(self, evaluation: List[Sentence], out_path=None, evaluation_method: str = 'F1', embeddings_in_memory: bool = True): diff --git a/flair/trainers/text_classification_trainer.py b/flair/trainers/text_classification_trainer.py index f220196bcf..66e222d018 100644 --- a/flair/trainers/text_classification_trainer.py +++ b/flair/trainers/text_classification_trainer.py @@ -174,13 +174,13 @@ def train(self, log.info('-' * 100) except KeyboardInterrupt: - log.info('-' * 89) - log.info('Exiting from training early') - log.info('saving model') + log.info('-' * 100) + log.info('Exiting from training early.') + log.info('Saving model ...') with open(base_path + "/final-model.pt", 'wb') as model_save_file: torch.save(self.model, model_save_file, pickle_protocol=4) model_save_file.close() - log.info('done') + log.info('Done.') def _calculate_evaluation_results_for(self, dataset_name, dataset, embeddings_in_memory, mini_batch_size): metrics, loss = self.evaluate(dataset, mini_batch_size=mini_batch_size,