flairNLP · alanakbik · Sep 27, 2018 · Sep 20, 2018 · Sep 20, 2018 · Sep 20, 2018
diff --git a/flair/__init__.py b/flair/__init__.py
@@ -1,2 +1,12 @@
 from . import data
 from . import models
+
+import sys
+import logging
+
+logger = logging.getLogger(__name__)
+
+FORMAT = '%(asctime)-15s %(message)s'
+
+logging.basicConfig(level=logging.WARNING, format=FORMAT, stream=sys.stdout)
+logging.getLogger('flair').setLevel(logging.INFO)
diff --git a/flair/data.py b/flair/data.py
@@ -1,6 +1,7 @@
 from typing import List, Dict, Union
 
 import torch
+import logging
 
 from collections import Counter
 from collections import defaultdict
@@ -10,6 +11,9 @@
 from segtok.tokenizer import word_tokenizer
 
 
+log = logging.getLogger(__name__)
+
+
 class Dictionary:
     """
     This class holds a dictionary that maps strings to IDs, used to generate one-hot encodings of strings.
@@ -601,14 +605,23 @@ def _print_statistics_for(sentences, name):
         classes_to_count = TaggedCorpus._get_classes_to_count(sentences)
         tokens_per_sentence = TaggedCorpus._get_tokens_per_sentence(sentences)
 
-        print(name)
-        print("total size: " + str(len(sentences)))
+        size_dict = {}
         for l, c in classes_to_count.items():
-            print("size of class {}: {}".format(l, c))
-        print("total # of tokens: " + str(sum(tokens_per_sentence)))
-        print("min # of tokens: " + str(min(tokens_per_sentence)))
-        print("max # of tokens: " + str(max(tokens_per_sentence)))
-        print("avg # of tokens: " + str(sum(tokens_per_sentence) / len(sentences)))
+            size_dict = { l: c }
+        size_dict['total'] = len(sentences)
+
+        stats = {
+            'dataset': name,
+            'number_of_documents': size_dict,
+            'number_of_tokens': {
+                'total': sum(tokens_per_sentence),
+                'min': min(tokens_per_sentence),
+                'max': max(tokens_per_sentence),
+                'avg': sum(tokens_per_sentence) / len(sentences)
+            }
+        }
+
+        log.info(stats)
 
     @staticmethod
     def _get_tokens_per_sentence(sentences):

diff --git a/flair/data_fetcher.py b/flair/data_fetcher.py
@@ -1,9 +1,12 @@
 from typing import List, Dict
 import re
 import os
+import logging
 from enum import Enum
 
-from flair.data import Sentence, TaggedCorpus, Token, Label
+from flair.data import Sentence, TaggedCorpus, Token
+
+log = logging.getLogger(__name__)
 
 
 class NLPTask(Enum):
@@ -43,7 +46,7 @@ def fetch_data(task: NLPTask) -> TaggedCorpus:
         """
 
         data_folder = os.path.join('resources', 'tasks', str(task.value).lower())
-        print("reading data from {}".format(data_folder))
+        log.info("Reading data from {}".format(data_folder))
 
         # the CoNLL 2000 task on chunking has three columns: text, pos and np (chunk)
         if task == NLPTask.CONLL_2000:

diff --git a/flair/trainers/language_model_trainer.py b/flair/trainers/language_model_trainer.py
@@ -1,7 +1,7 @@
 import time, datetime
 import os
 import random
-
+import logging
 import math
 import torch
 from torch.autograd import Variable
@@ -10,6 +10,10 @@
 from flair.data import Dictionary
 from flair.models import LanguageModel
 
+
+log = logging.getLogger(__name__)
+
+
 class TextCorpus(object):
     def __init__(self, path, dictionary: Dictionary, forward: bool = True, character_level: bool = True):
 
@@ -70,7 +74,6 @@ def charsplit(self, path: str, expand_vocab=False, forward=True, split_on_char=T
                 else:
                     chars = line.split()
 
-                # print(chars)
                 tokens += len(chars)
 
                 # Add chars to the dictionary
@@ -185,15 +188,15 @@ def train(self,
 
             for split in range(1, max_epochs + 1):
 
-                print('Split %d' % split + '\t - ({:%H:%M:%S})'.format(datetime.datetime.now()))
+                log.info('Split %d' % split + '\t - ({:%H:%M:%S})'.format(datetime.datetime.now()))
 
                 for group in optimizer.param_groups:
                     learning_rate = group['lr']
 
                 train_slice = self.corpus.get_next_train_slice()
 
                 train_data = self._batchify(train_slice, mini_batch_size)
-                print('\t({:%H:%M:%S})'.format(datetime.datetime.now()))
+                log.info('\t({:%H:%M:%S})'.format(datetime.datetime.now()))
 
                 # go into train mode
                 self.model.train()
@@ -237,15 +240,15 @@ def train(self,
                     if batch % self.log_interval == 0 and batch > 0:
                         cur_loss = total_loss.item() / self.log_interval
                         elapsed = time.time() - start_time
-                        print('| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
+                        log.info('| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                               'loss {:5.2f} | ppl {:8.2f}'.format(
                             split, number_of_splits, batch, len(train_data) // sequence_length,
                                                             elapsed * 1000 / self.log_interval, cur_loss,
                             math.exp(cur_loss)))
                         total_loss = 0
                         start_time = time.time()
 
-                print('training done! \t({:%H:%M:%S})'.format(datetime.datetime.now()))
+                log.info('training done! \t({:%H:%M:%S})'.format(datetime.datetime.now()))
 
                 ###############################################################################
                 # TEST
@@ -254,7 +257,7 @@ def train(self,
                 val_loss = self.evaluate(val_data, mini_batch_size, sequence_length)
                 scheduler.step(val_loss)
 
-                print('best loss so far {:5.2f}'.format(best_val_loss))
+                log.info('best loss so far {:5.2f}'.format(best_val_loss))
 
                 # Save the model if the validation loss is the best we've seen so far.
                 if val_loss < best_val_loss:
@@ -264,7 +267,7 @@ def train(self,
                 ###############################################################################
                 # print info
                 ###############################################################################
-                print('-' * 89)
+                log.info('-' * 89)
 
                 local_split_number = split % number_of_splits
                 if local_split_number == 0: local_split_number = number_of_splits
@@ -281,12 +284,12 @@ def train(self,
                 with open(loss_txt, "a") as myfile:
                     myfile.write('%s\n' % summary)
 
-                print(summary)
-                print('-' * 89)
+                log.info(summary)
+                log.info('-' * 89)
 
         except KeyboardInterrupt:
-            print('-' * 89)
-            print('Exiting from training early')
+            log.info('-' * 89)
+            log.info('Exiting from training early')
 
     def evaluate(self, data_source, eval_batch_size, sequence_length):
         # Turn on evaluation mode which disables dropout.

diff --git a/flair/trainers/sequence_tagger_trainer.py b/flair/trainers/sequence_tagger_trainer.py
@@ -1,18 +1,17 @@
-from subprocess import run, PIPE
 from typing import List
 
 import datetime
 import os
 import random
-import re
-import sys
+import logging
 import torch
 from torch.optim.lr_scheduler import ReduceLROnPlateau
 
-from flair.file_utils import cached_path
 from flair.models.sequence_tagger_model import SequenceTagger
 from flair.data import Sentence, Token, TaggedCorpus
-from flair.training_utils import Metric
+from flair.training_utils import Metric, init_output_file, WeightExtractor
+
+log = logging.getLogger(__name__)
 
 
 class SequenceTaggerTrainer:
@@ -34,12 +33,14 @@ def train(self,
 
         evaluation_method = 'F1'
         if self.model.tag_type in ['pos', 'upos']: evaluation_method = 'accuracy'
-        print(evaluation_method)
+        log.info('Evaluation method: {}'.format(evaluation_method))
 
-        os.makedirs(base_path, exist_ok=True)
+        loss_txt = init_output_file(base_path, 'loss.tsv')
+        with open(loss_txt, 'a') as f:
+            f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\t{}\tDEV_LOSS\t{}\tTEST_LOSS\t{}\n'.format(
+                Metric.tsv_header('TRAIN'), Metric.tsv_header('DEV'), Metric.tsv_header('TEST')))
 
-        loss_txt = os.path.join(base_path, "loss.txt")
-        open(loss_txt, "w", encoding='utf-8').close()
+        weight_extractor = WeightExtractor(base_path)
 
         optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate)
 
@@ -56,63 +57,60 @@ def train(self,
         # At any point you can hit Ctrl + C to break out of training early.
         try:
 
-            for epoch in range(0, max_epochs):
-
-                current_loss: int = 0
-
-                for group in optimizer.param_groups:
-                    learning_rate = group['lr']
+            for epoch in range(max_epochs):
+                log.info('-' * 100)
 
                 if not self.test_mode: random.shuffle(train_data)
 
                 batches = [train_data[x:x + mini_batch_size] for x in range(0, len(train_data), mini_batch_size)]
 
                 self.model.train()
 
-                batch_no: int = 0
+                current_loss: float = 0
+                seen_sentences = 0
+                modulo = max(1, int(len(batches) / 10))
 
-                for batch in batches:
-                    batch: List[Sentence] = batch
-                    batch_no += 1
+                for group in optimizer.param_groups:
+                    learning_rate = group['lr']
 
-                    if batch_no % 100 == 0:
-                        print("%d of %d (%f)" % (batch_no, len(batches), float(batch_no / len(batches))))
+                for batch_no, batch in enumerate(batches):
+                    batch: List[Sentence] = batch
 
                     optimizer.zero_grad()
 
                     # Step 4. Compute the loss, gradients, and update the parameters by calling optimizer.step()
                     loss = self.model.neg_log_likelihood(batch, self.model.tag_type)
 
                     current_loss += loss.item()
+                    seen_sentences += len(batch)
 
                     loss.backward()
-
                     torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0)
-
                     optimizer.step()
 
-                    sys.stdout.write('.')
-                    sys.stdout.flush()
-
                     if not embeddings_in_memory:
                         self.clear_embeddings_in_batch(batch)
 
+                    if batch_no % modulo == 0:
+                        log.info("epoch {0} - iter {1}/{2} - loss {3:.8f}".format(
+                            epoch + 1, batch_no, len(batches), current_loss / seen_sentences))
+                        iteration = epoch * len(batches) + batch_no
+                        weight_extractor.extract_weights(self.model.state_dict(), iteration)
+
                 current_loss /= len(train_data)
 
                 # switch to eval mode
                 self.model.eval()
 
+                log.info('-' * 100)
+
+                dev_score = dev_metric = None
                 if not train_with_dev:
-                    print('.. evaluating... dev... ')
-                    dev_score, dev_fp, dev_result = self.evaluate(self.corpus.dev, base_path,
+                    dev_score, dev_metric = self.evaluate(self.corpus.dev, base_path,
                                                                   evaluation_method=evaluation_method,
                                                                   embeddings_in_memory=embeddings_in_memory)
-                else:
-                    dev_fp = 0
-                    dev_result = '_'
 
-                print('test... ')
-                test_score, test_fp, test_result = self.evaluate(self.corpus.test, base_path,
+                test_score, test_metric = self.evaluate(self.corpus.test, base_path,
                                                                  evaluation_method=evaluation_method,
                                                                  embeddings_in_memory=embeddings_in_memory)
 
@@ -122,16 +120,17 @@ def train(self,
                 # anneal against train loss if training with dev, otherwise anneal against dev score
                 scheduler.step(current_loss) if train_with_dev else scheduler.step(dev_score)
 
-                summary = '%d' % epoch + '\t({:%H:%M:%S})'.format(datetime.datetime.now()) \
-                          + '\t%f\t%d\t%f\tDEV   %d\t' % (
-                    current_loss, scheduler.num_bad_epochs, learning_rate, dev_fp) + dev_result
-                summary = summary.replace('\n', '')
-                summary += '\tTEST   \t%d\t' % test_fp + test_result
+                log.info("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs))
+                if not train_with_dev:
+                    log.info("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format(
+                        'DEV', dev_metric.f_score(), dev_metric.accuracy(), dev_metric._tp, dev_metric._fp, dev_metric._fn, dev_metric._tn))
+                log.info("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format(
+                    'TEST', test_metric.f_score(), test_metric.accuracy(), test_metric._tp, test_metric._fp, test_metric._fn, test_metric._tn))
 
-                print(summary)
-                with open(loss_txt, "a") as loss_file:
-                    loss_file.write('%s\n' % summary)
-                    loss_file.close()
+                with open(loss_txt, 'a') as f:
+                    dev_metric_str = dev_metric.to_tsv() if dev_metric is not None else Metric.to_empty_tsv()
+                    f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
+                        epoch, datetime.datetime.now(), '_', Metric.to_empty_tsv(), '_', dev_metric_str, '_', test_metric.to_tsv()))
 
                 # save if model is current best and we use dev data for model selection
                 if save_model and not train_with_dev and dev_score == scheduler.best:
@@ -141,11 +140,11 @@ def train(self,
             if save_model and train_with_dev: self.model.save(base_path + "/final-model.pt")
 
         except KeyboardInterrupt:
-            print('-' * 89)
-            print('Exiting from training early')
-            print('saving model')
+            log.info('-' * 100)
+            log.info('Exiting from training early.')
+            log.info('Saving model ...')
             self.model.save(base_path + "/final-model.pt")
-            print('done')
+            log.info('Done.')
 
     def evaluate(self, evaluation: List[Sentence], out_path=None, evaluation_method: str = 'F1',
                  embeddings_in_memory: bool = True):
@@ -210,11 +209,11 @@ def evaluate(self, evaluation: List[Sentence], out_path=None, evaluation_method:
 
         if evaluation_method == 'accuracy':
             score = metric.accuracy()
-            return score, metric._fp, str(score)
+            return score, metric
 
         if evaluation_method == 'F1':
             score = metric.f_score()
-            return score, metric._fp, str(metric)
+            return score, metric
 
     def clear_embeddings_in_batch(self, batch: List[Sentence]):
         for sentence in batch: