Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-16: Standardize logging output for sequence tagger and text classifier #123

Merged
merged 7 commits into from
Sep 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions flair/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@
from . import data
from . import models

import sys
import logging

logger = logging.getLogger(__name__)

FORMAT = '%(asctime)-15s %(message)s'

logging.basicConfig(level=logging.WARNING, format=FORMAT, stream=sys.stdout)
logging.getLogger('flair').setLevel(logging.INFO)
27 changes: 20 additions & 7 deletions flair/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List, Dict, Union

import torch
import logging

from collections import Counter
from collections import defaultdict
Expand All @@ -10,6 +11,9 @@
from segtok.tokenizer import word_tokenizer


log = logging.getLogger(__name__)


class Dictionary:
"""
This class holds a dictionary that maps strings to IDs, used to generate one-hot encodings of strings.
Expand Down Expand Up @@ -601,14 +605,23 @@ def _print_statistics_for(sentences, name):
classes_to_count = TaggedCorpus._get_classes_to_count(sentences)
tokens_per_sentence = TaggedCorpus._get_tokens_per_sentence(sentences)

print(name)
print("total size: " + str(len(sentences)))
size_dict = {}
for l, c in classes_to_count.items():
print("size of class {}: {}".format(l, c))
print("total # of tokens: " + str(sum(tokens_per_sentence)))
print("min # of tokens: " + str(min(tokens_per_sentence)))
print("max # of tokens: " + str(max(tokens_per_sentence)))
print("avg # of tokens: " + str(sum(tokens_per_sentence) / len(sentences)))
size_dict = { l: c }
size_dict['total'] = len(sentences)

stats = {
'dataset': name,
'number_of_documents': size_dict,
'number_of_tokens': {
'total': sum(tokens_per_sentence),
'min': min(tokens_per_sentence),
'max': max(tokens_per_sentence),
'avg': sum(tokens_per_sentence) / len(sentences)
}
}

log.info(stats)

@staticmethod
def _get_tokens_per_sentence(sentences):
Expand Down
7 changes: 5 additions & 2 deletions flair/data_fetcher.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from typing import List, Dict
import re
import os
import logging
from enum import Enum

from flair.data import Sentence, TaggedCorpus, Token, Label
from flair.data import Sentence, TaggedCorpus, Token

log = logging.getLogger(__name__)


class NLPTask(Enum):
Expand Down Expand Up @@ -43,7 +46,7 @@ def fetch_data(task: NLPTask) -> TaggedCorpus:
"""

data_folder = os.path.join('resources', 'tasks', str(task.value).lower())
print("reading data from {}".format(data_folder))
log.info("Reading data from {}".format(data_folder))

# the CoNLL 2000 task on chunking has three columns: text, pos and np (chunk)
if task == NLPTask.CONLL_2000:
Expand Down
27 changes: 15 additions & 12 deletions flair/trainers/language_model_trainer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time, datetime
import os
import random

import logging
import math
import torch
from torch.autograd import Variable
Expand All @@ -10,6 +10,10 @@
from flair.data import Dictionary
from flair.models import LanguageModel


log = logging.getLogger(__name__)


class TextCorpus(object):
def __init__(self, path, dictionary: Dictionary, forward: bool = True, character_level: bool = True):

Expand Down Expand Up @@ -70,7 +74,6 @@ def charsplit(self, path: str, expand_vocab=False, forward=True, split_on_char=T
else:
chars = line.split()

# print(chars)
tokens += len(chars)

# Add chars to the dictionary
Expand Down Expand Up @@ -185,15 +188,15 @@ def train(self,

for split in range(1, max_epochs + 1):

print('Split %d' % split + '\t - ({:%H:%M:%S})'.format(datetime.datetime.now()))
log.info('Split %d' % split + '\t - ({:%H:%M:%S})'.format(datetime.datetime.now()))

for group in optimizer.param_groups:
learning_rate = group['lr']

train_slice = self.corpus.get_next_train_slice()

train_data = self._batchify(train_slice, mini_batch_size)
print('\t({:%H:%M:%S})'.format(datetime.datetime.now()))
log.info('\t({:%H:%M:%S})'.format(datetime.datetime.now()))

# go into train mode
self.model.train()
Expand Down Expand Up @@ -237,15 +240,15 @@ def train(self,
if batch % self.log_interval == 0 and batch > 0:
cur_loss = total_loss.item() / self.log_interval
elapsed = time.time() - start_time
print('| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
log.info('| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
split, number_of_splits, batch, len(train_data) // sequence_length,
elapsed * 1000 / self.log_interval, cur_loss,
math.exp(cur_loss)))
total_loss = 0
start_time = time.time()

print('training done! \t({:%H:%M:%S})'.format(datetime.datetime.now()))
log.info('training done! \t({:%H:%M:%S})'.format(datetime.datetime.now()))

###############################################################################
# TEST
Expand All @@ -254,7 +257,7 @@ def train(self,
val_loss = self.evaluate(val_data, mini_batch_size, sequence_length)
scheduler.step(val_loss)

print('best loss so far {:5.2f}'.format(best_val_loss))
log.info('best loss so far {:5.2f}'.format(best_val_loss))

# Save the model if the validation loss is the best we've seen so far.
if val_loss < best_val_loss:
Expand All @@ -264,7 +267,7 @@ def train(self,
###############################################################################
# print info
###############################################################################
print('-' * 89)
log.info('-' * 89)

local_split_number = split % number_of_splits
if local_split_number == 0: local_split_number = number_of_splits
Expand All @@ -281,12 +284,12 @@ def train(self,
with open(loss_txt, "a") as myfile:
myfile.write('%s\n' % summary)

print(summary)
print('-' * 89)
log.info(summary)
log.info('-' * 89)

except KeyboardInterrupt:
print('-' * 89)
print('Exiting from training early')
log.info('-' * 89)
log.info('Exiting from training early')

def evaluate(self, data_source, eval_batch_size, sequence_length):
# Turn on evaluation mode which disables dropout.
Expand Down
95 changes: 47 additions & 48 deletions flair/trainers/sequence_tagger_trainer.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
from subprocess import run, PIPE
from typing import List

import datetime
import os
import random
import re
import sys
import logging
import torch
from torch.optim.lr_scheduler import ReduceLROnPlateau

from flair.file_utils import cached_path
from flair.models.sequence_tagger_model import SequenceTagger
from flair.data import Sentence, Token, TaggedCorpus
from flair.training_utils import Metric
from flair.training_utils import Metric, init_output_file, WeightExtractor

log = logging.getLogger(__name__)


class SequenceTaggerTrainer:
Expand All @@ -34,12 +33,14 @@ def train(self,

evaluation_method = 'F1'
if self.model.tag_type in ['pos', 'upos']: evaluation_method = 'accuracy'
print(evaluation_method)
log.info('Evaluation method: {}'.format(evaluation_method))

os.makedirs(base_path, exist_ok=True)
loss_txt = init_output_file(base_path, 'loss.tsv')
with open(loss_txt, 'a') as f:
f.write('EPOCH\tTIMESTAMP\tTRAIN_LOSS\t{}\tDEV_LOSS\t{}\tTEST_LOSS\t{}\n'.format(
Metric.tsv_header('TRAIN'), Metric.tsv_header('DEV'), Metric.tsv_header('TEST')))

loss_txt = os.path.join(base_path, "loss.txt")
open(loss_txt, "w", encoding='utf-8').close()
weight_extractor = WeightExtractor(base_path)

optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate)

Expand All @@ -56,63 +57,60 @@ def train(self,
# At any point you can hit Ctrl + C to break out of training early.
try:

for epoch in range(0, max_epochs):

current_loss: int = 0

for group in optimizer.param_groups:
learning_rate = group['lr']
for epoch in range(max_epochs):
log.info('-' * 100)

if not self.test_mode: random.shuffle(train_data)

batches = [train_data[x:x + mini_batch_size] for x in range(0, len(train_data), mini_batch_size)]

self.model.train()

batch_no: int = 0
current_loss: float = 0
seen_sentences = 0
modulo = max(1, int(len(batches) / 10))

for batch in batches:
batch: List[Sentence] = batch
batch_no += 1
for group in optimizer.param_groups:
learning_rate = group['lr']

if batch_no % 100 == 0:
print("%d of %d (%f)" % (batch_no, len(batches), float(batch_no / len(batches))))
for batch_no, batch in enumerate(batches):
batch: List[Sentence] = batch

optimizer.zero_grad()

# Step 4. Compute the loss, gradients, and update the parameters by calling optimizer.step()
loss = self.model.neg_log_likelihood(batch, self.model.tag_type)

current_loss += loss.item()
seen_sentences += len(batch)

loss.backward()

torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0)

optimizer.step()

sys.stdout.write('.')
sys.stdout.flush()

if not embeddings_in_memory:
self.clear_embeddings_in_batch(batch)

if batch_no % modulo == 0:
log.info("epoch {0} - iter {1}/{2} - loss {3:.8f}".format(
epoch + 1, batch_no, len(batches), current_loss / seen_sentences))
iteration = epoch * len(batches) + batch_no
weight_extractor.extract_weights(self.model.state_dict(), iteration)

current_loss /= len(train_data)

# switch to eval mode
self.model.eval()

log.info('-' * 100)

dev_score = dev_metric = None
if not train_with_dev:
print('.. evaluating... dev... ')
dev_score, dev_fp, dev_result = self.evaluate(self.corpus.dev, base_path,
dev_score, dev_metric = self.evaluate(self.corpus.dev, base_path,
evaluation_method=evaluation_method,
embeddings_in_memory=embeddings_in_memory)
else:
dev_fp = 0
dev_result = '_'

print('test... ')
test_score, test_fp, test_result = self.evaluate(self.corpus.test, base_path,
test_score, test_metric = self.evaluate(self.corpus.test, base_path,
evaluation_method=evaluation_method,
embeddings_in_memory=embeddings_in_memory)

Expand All @@ -122,16 +120,17 @@ def train(self,
# anneal against train loss if training with dev, otherwise anneal against dev score
scheduler.step(current_loss) if train_with_dev else scheduler.step(dev_score)

summary = '%d' % epoch + '\t({:%H:%M:%S})'.format(datetime.datetime.now()) \
+ '\t%f\t%d\t%f\tDEV %d\t' % (
current_loss, scheduler.num_bad_epochs, learning_rate, dev_fp) + dev_result
summary = summary.replace('\n', '')
summary += '\tTEST \t%d\t' % test_fp + test_result
log.info("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(epoch + 1, learning_rate, scheduler.num_bad_epochs))
if not train_with_dev:
log.info("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format(
'DEV', dev_metric.f_score(), dev_metric.accuracy(), dev_metric._tp, dev_metric._fp, dev_metric._fn, dev_metric._tn))
log.info("{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}".format(
'TEST', test_metric.f_score(), test_metric.accuracy(), test_metric._tp, test_metric._fp, test_metric._fn, test_metric._tn))

print(summary)
with open(loss_txt, "a") as loss_file:
loss_file.write('%s\n' % summary)
loss_file.close()
with open(loss_txt, 'a') as f:
dev_metric_str = dev_metric.to_tsv() if dev_metric is not None else Metric.to_empty_tsv()
f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
epoch, datetime.datetime.now(), '_', Metric.to_empty_tsv(), '_', dev_metric_str, '_', test_metric.to_tsv()))

# save if model is current best and we use dev data for model selection
if save_model and not train_with_dev and dev_score == scheduler.best:
Expand All @@ -141,11 +140,11 @@ def train(self,
if save_model and train_with_dev: self.model.save(base_path + "/final-model.pt")

except KeyboardInterrupt:
print('-' * 89)
print('Exiting from training early')
print('saving model')
log.info('-' * 100)
log.info('Exiting from training early.')
log.info('Saving model ...')
self.model.save(base_path + "/final-model.pt")
print('done')
log.info('Done.')

def evaluate(self, evaluation: List[Sentence], out_path=None, evaluation_method: str = 'F1',
embeddings_in_memory: bool = True):
Expand Down Expand Up @@ -210,11 +209,11 @@ def evaluate(self, evaluation: List[Sentence], out_path=None, evaluation_method:

if evaluation_method == 'accuracy':
score = metric.accuracy()
return score, metric._fp, str(score)
return score, metric

if evaluation_method == 'F1':
score = metric.f_score()
return score, metric._fp, str(metric)
return score, metric

def clear_embeddings_in_batch(self, batch: List[Sentence]):
for sentence in batch:
Expand Down
Loading