From 98ab11844ba806c03ea569b86bf188f7c404d790 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 10:51:41 +0100 Subject: [PATCH 01/11] GH-177: Fix tests. --- tests/conftest.py | 2 ++ tests/test_data.py | 3 --- tests/test_embeddings.py | 10 +++++----- tests/test_language_model_trainer.py | 2 -- tests/test_model_integration.py | 8 ++++---- tests/test_sequence_tagger.py | 5 +++-- tests/test_visual.py | 1 - 7 files changed, 14 insertions(+), 17 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1937241396..f2f491b0ab 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,12 @@ import pytest from pathlib import Path + @pytest.fixture(scope="module") def resources_path(): return Path(__file__).parent / 'resources' + @pytest.fixture(scope="module") def tasks_base_path(resources_path): return resources_path / 'tasks' diff --git a/tests/test_data.py b/tests/test_data.py index ea23c79440..b2aaf1af03 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -237,9 +237,6 @@ def test_label_set_confidence(): assert (0.2 == label.score) - # with pytest.raises(ValueError): - # label.name = '' - def test_tagged_corpus_make_label_dictionary(): sentence_1 = Sentence('sentence 1', labels=[Label('class_1')]) diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py index 2265a09ee1..480c230505 100644 --- a/tests/test_embeddings.py +++ b/tests/test_embeddings.py @@ -114,7 +114,7 @@ def test_document_lstm_embeddings(): sentence, glove, charlm = init_document_embeddings() embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128, - bidirectional=False, use_first_representation=False) + bidirectional=False) embeddings.embed(sentence) @@ -131,7 +131,7 @@ def test_document_bidirectional_lstm_embeddings(): sentence, glove, charlm = init_document_embeddings() embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128, - bidirectional=True, use_first_representation=False) + bidirectional=True) embeddings.embed(sentence) @@ -148,7 +148,7 @@ def test_document_bidirectional_lstm_embeddings_using_first_representation(): sentence, glove, charlm = init_document_embeddings() embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128, - bidirectional=True, use_first_representation=True) + bidirectional=True) embeddings.embed(sentence) @@ -165,7 +165,7 @@ def test_document_lstm_embeddings_using_first_representation(): sentence, glove, charlm = init_document_embeddings() embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128, - bidirectional=False, use_first_representation=True) + bidirectional=False) embeddings.embed(sentence) @@ -221,4 +221,4 @@ def load_and_apply_char_lm_embeddings(emb_type: str): token.clear_embeddings() - assert(len(token.get_embedding()) == 0) \ No newline at end of file + assert(len(token.get_embedding()) == 0) diff --git a/tests/test_language_model_trainer.py b/tests/test_language_model_trainer.py index 80a4bf60db..d821acc0c6 100644 --- a/tests/test_language_model_trainer.py +++ b/tests/test_language_model_trainer.py @@ -32,5 +32,3 @@ def test_training(): # clean up results directory shutil.rmtree('./results', ignore_errors=True) - - diff --git a/tests/test_model_integration.py b/tests/test_model_integration.py index 3ddd9419e7..ddd49ab6f3 100644 --- a/tests/test_model_integration.py +++ b/tests/test_model_integration.py @@ -170,7 +170,7 @@ def test_train_load_use_classifier(): assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) - loaded_model = TextClassifier.load_from_file('./results/final-model.pt') + loaded_model = TextClassifier.load_from_file('./results/final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -204,7 +204,7 @@ def test_train_charlm_load_use_classifier(): assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) - loaded_model = TextClassifier.load_from_file('./results/final-model.pt') + loaded_model = TextClassifier.load_from_file('./results/final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -239,7 +239,7 @@ def test_train_charlm__nocache_load_use_classifier(): assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) - loaded_model = TextClassifier.load_from_file('./results/final-model.pt') + loaded_model = TextClassifier.load_from_file('./results/final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -249,4 +249,4 @@ def test_train_charlm__nocache_load_use_classifier(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') \ No newline at end of file + shutil.rmtree('./results') diff --git a/tests/test_sequence_tagger.py b/tests/test_sequence_tagger.py index 47ae2fc766..c326c53c64 100644 --- a/tests/test_sequence_tagger.py +++ b/tests/test_sequence_tagger.py @@ -5,17 +5,18 @@ from flair.data import Sentence from flair.models import SequenceTagger + @pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") def test_tag_sentence(): # test tagging sentence = Sentence('I love Berlin') - tagger = SequenceTagger.load('ner') + tagger = SequenceTagger.load('pos') tagger.predict(sentence) # test re-tagging - tagger = SequenceTagger.load('pos') + tagger = SequenceTagger.load('ner') tagger.predict(sentence) diff --git a/tests/test_visual.py b/tests/test_visual.py index fed83a298a..145d8ea5fe 100644 --- a/tests/test_visual.py +++ b/tests/test_visual.py @@ -1,5 +1,4 @@ import os -import shutil import pytest From c4973e996a806347a81258b07a181957a20e3cc2 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 11:07:40 +0100 Subject: [PATCH 02/11] GH-177: Remove duplicate tests. --- tests/test_sequence_labeler_trainer.py | 28 ------------- tests/test_text_classifier_trainer.py | 56 -------------------------- 2 files changed, 84 deletions(-) delete mode 100644 tests/test_sequence_labeler_trainer.py delete mode 100644 tests/test_text_classifier_trainer.py diff --git a/tests/test_sequence_labeler_trainer.py b/tests/test_sequence_labeler_trainer.py deleted file mode 100644 index 6d1806135a..0000000000 --- a/tests/test_sequence_labeler_trainer.py +++ /dev/null @@ -1,28 +0,0 @@ -import shutil - -from flair.data_fetcher import NLPTaskDataFetcher, NLPTask -from flair.embeddings import WordEmbeddings -from flair.models import SequenceTagger -from flair.trainers import SequenceTaggerTrainer - - -def test_training(tasks_base_path): - - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, tasks_base_path) - tag_dictionary = corpus.make_tag_dictionary('ner') - - embeddings = WordEmbeddings('glove') - - tagger: SequenceTagger = SequenceTagger(hidden_size=256, - embeddings=embeddings, - tag_dictionary=tag_dictionary, - tag_type='ner', - use_crf=False) - - # initialize trainer - trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - - trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=10) - - # clean up results directory - shutil.rmtree('./results') diff --git a/tests/test_text_classifier_trainer.py b/tests/test_text_classifier_trainer.py deleted file mode 100644 index 5a4703db0e..0000000000 --- a/tests/test_text_classifier_trainer.py +++ /dev/null @@ -1,56 +0,0 @@ -import shutil - -from flair.data import Sentence - -from flair.data_fetcher import NLPTaskDataFetcher, NLPTask -from flair.embeddings import WordEmbeddings, DocumentMeanEmbeddings, DocumentLSTMEmbeddings -from flair.models.text_classification_model import TextClassifier -from flair.trainers.text_classification_trainer import TextClassifierTrainer - - -def test_text_classifier_single_label(tasks_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, tasks_base_path) - label_dict = corpus.make_label_dictionary() - - glove_embedding: WordEmbeddings = WordEmbeddings('en-glove') - document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False, False) - - model = TextClassifier(document_embeddings, label_dict, False) - - trainer = TextClassifierTrainer(model, corpus, label_dict, False) - trainer.train('./results', max_epochs=2) - - sentence = Sentence("Berlin is a really nice city.") - - for s in model.predict(sentence): - for l in s.labels: - assert(l.value is not None) - assert(0.0 <= l.score <= 1.0) - assert(type(l.score) is float) - - # clean up results directory - shutil.rmtree('./results') - - -def test_text_classifier_mulit_label(tasks_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, tasks_base_path) - label_dict = corpus.make_label_dictionary() - - glove_embedding: WordEmbeddings = WordEmbeddings('en-glove') - document_embeddings: DocumentMeanEmbeddings = DocumentMeanEmbeddings([glove_embedding]) - - model = TextClassifier(document_embeddings, label_dict, True) - - trainer = TextClassifierTrainer(model, corpus, label_dict, False) - trainer.train('./results', max_epochs=2) - - sentence = Sentence("Berlin is a really nice city.") - - for s in model.predict(sentence): - for l in s.labels: - assert(l.value is not None) - assert(0.0 <= l.score <= 1.0) - assert(type(l.score) is float) - - # clean up results directory - shutil.rmtree('./results') From 2808bc19159206c2441deb17e8e51a2d4b204352 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 11:08:57 +0100 Subject: [PATCH 03/11] GH-177: Mark slow tests. --- tests/conftest.py | 16 +++++++++++ tests/test_embeddings.py | 41 ++++++++++++++-------------- tests/test_language_model_trainer.py | 3 ++ tests/test_model_integration.py | 9 ++++++ tests/test_sequence_tagger.py | 7 ++--- tests/test_visual.py | 6 ++-- 6 files changed, 53 insertions(+), 29 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f2f491b0ab..9d3db9ac56 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,3 +10,19 @@ def resources_path(): @pytest.fixture(scope="module") def tasks_base_path(resources_path): return resources_path / 'tasks' + + +def pytest_addoption(parser): + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py index 480c230505..e598cea217 100644 --- a/tests/test_embeddings.py +++ b/tests/test_embeddings.py @@ -1,4 +1,3 @@ -import os import pytest from flair.embeddings import WordEmbeddings, TokenEmbeddings, CharLMEmbeddings, StackedEmbeddings, \ @@ -7,77 +6,77 @@ from flair.data import Sentence -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_en_glove(): load_and_apply_word_embeddings('en-glove') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_en_numberbatch(): load_and_apply_word_embeddings('en-numberbatch') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_en_extvec(): load_and_apply_word_embeddings('en-extvec') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_en_crawl(): load_and_apply_word_embeddings('en-crawl') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_en_news(): load_and_apply_word_embeddings('en-news') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_de_fasttext(): load_and_apply_word_embeddings('de-fasttext') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_de_numberbatch(): load_and_apply_word_embeddings('de-numberbatch') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_sv_fasttext(): load_and_apply_word_embeddings('sv-fasttext') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_news_forward(): load_and_apply_char_lm_embeddings('news-forward') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_news_backward(): load_and_apply_char_lm_embeddings('news-backward') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_mix_forward(): load_and_apply_char_lm_embeddings('mix-forward') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_mix_backward(): load_and_apply_char_lm_embeddings('mix-backward') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_german_forward(): load_and_apply_char_lm_embeddings('german-forward') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_german_backward(): load_and_apply_char_lm_embeddings('german-backward') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_stacked_embeddings(): text = 'I love Berlin.' sentence: Sentence = Sentence(text) @@ -109,7 +108,7 @@ def init_document_embeddings(): return sentence, glove, charlm -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_document_lstm_embeddings(): sentence, glove, charlm = init_document_embeddings() @@ -126,7 +125,7 @@ def test_document_lstm_embeddings(): assert (len(sentence.get_embedding()) == 0) -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_document_bidirectional_lstm_embeddings(): sentence, glove, charlm = init_document_embeddings() @@ -143,7 +142,7 @@ def test_document_bidirectional_lstm_embeddings(): assert (len(sentence.get_embedding()) == 0) -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_document_bidirectional_lstm_embeddings_using_first_representation(): sentence, glove, charlm = init_document_embeddings() @@ -160,7 +159,7 @@ def test_document_bidirectional_lstm_embeddings_using_first_representation(): assert (len(sentence.get_embedding()) == 0) -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_document_lstm_embeddings_using_first_representation(): sentence, glove, charlm = init_document_embeddings() @@ -177,7 +176,7 @@ def test_document_lstm_embeddings_using_first_representation(): assert (len(sentence.get_embedding()) == 0) -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_document_mean_embeddings(): text = 'I love Berlin. Berlin is a great place to live.' sentence: Sentence = Sentence(text) diff --git a/tests/test_language_model_trainer.py b/tests/test_language_model_trainer.py index d821acc0c6..261377a88a 100644 --- a/tests/test_language_model_trainer.py +++ b/tests/test_language_model_trainer.py @@ -1,4 +1,6 @@ import shutil +import pytest + from pathlib import Path from flair.data import Dictionary, Sentence @@ -7,6 +9,7 @@ from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus +@pytest.mark.slow def test_training(): # get default dictionary dictionary: Dictionary = Dictionary.load('chars') diff --git a/tests/test_model_integration.py b/tests/test_model_integration.py index ddd49ab6f3..ce069b0e89 100644 --- a/tests/test_model_integration.py +++ b/tests/test_model_integration.py @@ -1,4 +1,5 @@ import os +import pytest import shutil from flair.data import Sentence @@ -8,6 +9,7 @@ from flair.trainers import SequenceTaggerTrainer, TextClassifierTrainer +@pytest.mark.slow def test_train_load_use_tagger(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) @@ -39,6 +41,7 @@ def test_train_load_use_tagger(): shutil.rmtree('./results') +@pytest.mark.slow def test_train_charlm_load_use_tagger(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) @@ -70,6 +73,7 @@ def test_train_charlm_load_use_tagger(): shutil.rmtree('./results') +@pytest.mark.slow def test_train_charlm_changed_chache_load_use_tagger(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) @@ -106,6 +110,7 @@ def test_train_charlm_changed_chache_load_use_tagger(): shutil.rmtree('./results') +@pytest.mark.slow def test_train_charlm_nochache_load_use_tagger(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) @@ -137,6 +142,7 @@ def test_train_charlm_nochache_load_use_tagger(): shutil.rmtree('./results') +@pytest.mark.slow def test_load_use_serialized_tagger(): loaded_model: SequenceTagger = SequenceTagger.load('ner') @@ -149,6 +155,7 @@ def test_load_use_serialized_tagger(): loaded_model.predict([sentence_empty]) +@pytest.mark.slow def test_train_load_use_classifier(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() @@ -183,6 +190,7 @@ def test_train_load_use_classifier(): shutil.rmtree('./results') +@pytest.mark.slow def test_train_charlm_load_use_classifier(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() @@ -217,6 +225,7 @@ def test_train_charlm_load_use_classifier(): shutil.rmtree('./results') +@pytest.mark.slow def test_train_charlm__nocache_load_use_classifier(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() diff --git a/tests/test_sequence_tagger.py b/tests/test_sequence_tagger.py index c326c53c64..1792149791 100644 --- a/tests/test_sequence_tagger.py +++ b/tests/test_sequence_tagger.py @@ -1,12 +1,7 @@ -import os - -import pytest - from flair.data import Sentence from flair.models import SequenceTagger -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") def test_tag_sentence(): # test tagging @@ -16,6 +11,8 @@ def test_tag_sentence(): tagger.predict(sentence) + sentence.clear_embeddings() + # test re-tagging tagger = SequenceTagger.load('ner') diff --git a/tests/test_visual.py b/tests/test_visual.py index 145d8ea5fe..5cd91056fb 100644 --- a/tests/test_visual.py +++ b/tests/test_visual.py @@ -11,7 +11,7 @@ from flair.visual.training_curves import Plotter -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_visualize_word_emeddings(resources_path): with open('./resources/visual/snippet.txt') as f: @@ -31,7 +31,7 @@ def test_visualize_word_emeddings(resources_path): os.remove('./resources/visual/sentence_embeddings.html') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_visualize_word_emeddings(): with open('./resources/visual/snippet.txt') as f: @@ -48,7 +48,7 @@ def test_visualize_word_emeddings(): os.remove('./resources/visual/sentence_embeddings.html') -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.") +@pytest.mark.slow def test_visualize(): with open('./resources/visual/snippet.txt') as f: From 0ecd02f5dfee5155fac13f47cd831a156e5f1f1c Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 11:18:41 +0100 Subject: [PATCH 04/11] GH-177: Move test train language model. --- tests/test_language_model_trainer.py | 37 -------------------------- tests/test_model_integration.py | 39 +++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 41 deletions(-) delete mode 100644 tests/test_language_model_trainer.py diff --git a/tests/test_language_model_trainer.py b/tests/test_language_model_trainer.py deleted file mode 100644 index 261377a88a..0000000000 --- a/tests/test_language_model_trainer.py +++ /dev/null @@ -1,37 +0,0 @@ -import shutil -import pytest - -from pathlib import Path - -from flair.data import Dictionary, Sentence -from flair.embeddings import CharLMEmbeddings -from flair.models import LanguageModel -from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus - - -@pytest.mark.slow -def test_training(): - # get default dictionary - dictionary: Dictionary = Dictionary.load('chars') - - # init forward LM with 128 hidden states and 1 layer - language_model: LanguageModel = LanguageModel(dictionary, is_forward_lm=True, hidden_size=128, nlayers=1) - - # get the example corpus and process at character level in forward direction - corpus: TextCorpus = TextCorpus(str(Path(__file__).parent / 'resources/corpora/lorem_ipsum'), - dictionary, - language_model.is_forward_lm, - character_level=True) - - # train the language model - trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus) - trainer.train('./results', sequence_length=10, mini_batch_size=10, max_epochs=5) - - # use the character LM as embeddings to embed the example sentence 'I love Berlin' - char_lm_embeddings = CharLMEmbeddings('./results/best-lm.pt') - sentence = Sentence('I love Berlin') - char_lm_embeddings.embed(sentence) - print(sentence[1].embedding.size()) - - # clean up results directory - shutil.rmtree('./results', ignore_errors=True) diff --git a/tests/test_model_integration.py b/tests/test_model_integration.py index ce069b0e89..959a519164 100644 --- a/tests/test_model_integration.py +++ b/tests/test_model_integration.py @@ -1,12 +1,15 @@ import os -import pytest import shutil +import pytest -from flair.data import Sentence +from pathlib import Path + +from flair.data import Dictionary, Sentence from flair.data_fetcher import NLPTaskDataFetcher, NLPTask from flair.embeddings import WordEmbeddings, CharLMEmbeddings, DocumentLSTMEmbeddings, TokenEmbeddings -from flair.models import SequenceTagger, TextClassifier +from flair.models import SequenceTagger, TextClassifier, LanguageModel from flair.trainers import SequenceTaggerTrainer, TextClassifierTrainer +from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus @pytest.mark.slow @@ -226,7 +229,7 @@ def test_train_charlm_load_use_classifier(): @pytest.mark.slow -def test_train_charlm__nocache_load_use_classifier(): +def test_train_charlm_nocache_load_use_classifier(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() @@ -259,3 +262,31 @@ def test_train_charlm__nocache_load_use_classifier(): # clean up results directory shutil.rmtree('./results') + + +@pytest.mark.slow +def test_train_language_model(): + # get default dictionary + dictionary: Dictionary = Dictionary.load('chars') + + # init forward LM with 128 hidden states and 1 layer + language_model: LanguageModel = LanguageModel(dictionary, is_forward_lm=True, hidden_size=128, nlayers=1) + + # get the example corpus and process at character level in forward direction + corpus: TextCorpus = TextCorpus(str(Path(__file__).parent / 'resources/corpora/lorem_ipsum'), + dictionary, + language_model.is_forward_lm, + character_level=True) + + # train the language model + trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus) + trainer.train('./results', sequence_length=10, mini_batch_size=10, max_epochs=5) + + # use the character LM as embeddings to embed the example sentence 'I love Berlin' + char_lm_embeddings = CharLMEmbeddings('./results/best-lm.pt') + sentence = Sentence('I love Berlin') + char_lm_embeddings.embed(sentence) + print(sentence[1].embedding.size()) + + # clean up results directory + shutil.rmtree('./results', ignore_errors=True) From fe5aa16b209bad386f03084e070b8b030968ffb5 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 11:59:36 +0100 Subject: [PATCH 05/11] GH-177: Fix warning and use pathlib --- flair/embeddings.py | 8 +-- tests/conftest.py | 5 ++ tests/test_embeddings.py | 21 ++++---- tests/test_model_integration.py | 86 ++++++++++++++++++--------------- tests/test_sequence_tagger.py | 19 -------- tests/test_text_classifier.py | 1 - tests/test_utils.py | 1 - 7 files changed, 65 insertions(+), 76 deletions(-) delete mode 100644 tests/test_sequence_tagger.py diff --git a/flair/embeddings.py b/flair/embeddings.py index 5316c29752..80b50d625e 100644 --- a/flair/embeddings.py +++ b/flair/embeddings.py @@ -207,10 +207,10 @@ def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]: word_embedding = self.precomputed_word_embeddings[token.text] elif token.text.lower() in self.precomputed_word_embeddings: word_embedding = self.precomputed_word_embeddings[token.text.lower()] - elif re.sub('\d', '#', token.text.lower()) in self.precomputed_word_embeddings: - word_embedding = self.precomputed_word_embeddings[re.sub('\d', '#', token.text.lower())] - elif re.sub('\d', '0', token.text.lower()) in self.precomputed_word_embeddings: - word_embedding = self.precomputed_word_embeddings[re.sub('\d', '0', token.text.lower())] + elif re.sub(r'\d', '#', token.text.lower()) in self.precomputed_word_embeddings: + word_embedding = self.precomputed_word_embeddings[re.sub(r'\d', '#', token.text.lower())] + elif re.sub(r'\d', '0', token.text.lower()) in self.precomputed_word_embeddings: + word_embedding = self.precomputed_word_embeddings[re.sub(r'\d', '0', token.text.lower())] else: word_embedding = np.zeros(self.embedding_length, dtype='float') diff --git a/tests/conftest.py b/tests/conftest.py index 9d3db9ac56..48821d4563 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,11 @@ def tasks_base_path(resources_path): return resources_path / 'tasks' +@pytest.fixture(scope="module") +def results_base_path(resources_path): + return resources_path / 'results' + + def pytest_addoption(parser): parser.addoption( "--runslow", action="store_true", default=False, help="run slow tests" diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py index e598cea217..c56b7639e5 100644 --- a/tests/test_embeddings.py +++ b/tests/test_embeddings.py @@ -97,17 +97,6 @@ def test_stacked_embeddings(): assert(len(token.get_embedding()) == 0) -@pytest.fixture -def init_document_embeddings(): - text = 'I love Berlin. Berlin is a great place to live.' - sentence: Sentence = Sentence(text) - - glove: TokenEmbeddings = WordEmbeddings('en-glove') - charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward') - - return sentence, glove, charlm - - @pytest.mark.slow def test_document_lstm_embeddings(): sentence, glove, charlm = init_document_embeddings() @@ -195,6 +184,16 @@ def test_document_mean_embeddings(): assert (len(sentence.get_embedding()) == 0) +def init_document_embeddings(): + text = 'I love Berlin. Berlin is a great place to live.' + sentence: Sentence = Sentence(text) + + glove: TokenEmbeddings = WordEmbeddings('en-glove') + charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward') + + return sentence, glove, charlm + + def load_and_apply_word_embeddings(emb_type: str): text = 'I love Berlin.' sentence: Sentence = Sentence(text) diff --git a/tests/test_model_integration.py b/tests/test_model_integration.py index 959a519164..1a714a0807 100644 --- a/tests/test_model_integration.py +++ b/tests/test_model_integration.py @@ -2,8 +2,6 @@ import shutil import pytest -from pathlib import Path - from flair.data import Dictionary, Sentence from flair.data_fetcher import NLPTaskDataFetcher, NLPTask from flair.embeddings import WordEmbeddings, CharLMEmbeddings, DocumentLSTMEmbeddings, TokenEmbeddings @@ -13,7 +11,7 @@ @pytest.mark.slow -def test_train_load_use_tagger(): +def test_train_load_use_tagger(results_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) tag_dictionary = corpus.make_tag_dictionary('ner') @@ -29,9 +27,9 @@ def test_train_load_use_tagger(): # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) - loaded_model: SequenceTagger = SequenceTagger.load_from_file('./results/final-model.pt') + loaded_model: SequenceTagger = SequenceTagger.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -41,11 +39,11 @@ def test_train_load_use_tagger(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') + shutil.rmtree(results_base_path) @pytest.mark.slow -def test_train_charlm_load_use_tagger(): +def test_train_charlm_load_use_tagger(results_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) tag_dictionary = corpus.make_tag_dictionary('ner') @@ -61,9 +59,9 @@ def test_train_charlm_load_use_tagger(): # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) - loaded_model: SequenceTagger = SequenceTagger.load_from_file('./results/final-model.pt') + loaded_model: SequenceTagger = SequenceTagger.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -73,18 +71,19 @@ def test_train_charlm_load_use_tagger(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') + shutil.rmtree(results_base_path) @pytest.mark.slow -def test_train_charlm_changed_chache_load_use_tagger(): +def test_train_charlm_changed_chache_load_use_tagger(results_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) tag_dictionary = corpus.make_tag_dictionary('ner') # make a temporary cache directory that we remove afterwards - os.makedirs('./results/cache/', exist_ok=True) - embeddings = CharLMEmbeddings('news-forward-fast', cache_directory='./results/cache/') + cache_dir = results_base_path / 'cache' + os.makedirs(cache_dir, exist_ok=True) + embeddings = CharLMEmbeddings('news-forward-fast', cache_directory=cache_dir) tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, @@ -95,12 +94,12 @@ def test_train_charlm_changed_chache_load_use_tagger(): # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) # remove the cache directory - shutil.rmtree('./results/cache') + shutil.rmtree(cache_dir) - loaded_model: SequenceTagger = SequenceTagger.load_from_file('./results/final-model.pt') + loaded_model: SequenceTagger = SequenceTagger.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -110,11 +109,11 @@ def test_train_charlm_changed_chache_load_use_tagger(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') + shutil.rmtree(results_base_path) @pytest.mark.slow -def test_train_charlm_nochache_load_use_tagger(): +def test_train_charlm_nochache_load_use_tagger(results_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) tag_dictionary = corpus.make_tag_dictionary('ner') @@ -130,9 +129,9 @@ def test_train_charlm_nochache_load_use_tagger(): # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) - loaded_model: SequenceTagger = SequenceTagger.load_from_file('./results/final-model.pt') + loaded_model: SequenceTagger = SequenceTagger.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -142,10 +141,9 @@ def test_train_charlm_nochache_load_use_tagger(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') + shutil.rmtree(results_base_path) -@pytest.mark.slow def test_load_use_serialized_tagger(): loaded_model: SequenceTagger = SequenceTagger.load('ner') @@ -157,9 +155,18 @@ def test_load_use_serialized_tagger(): loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) + sentence.clear_embeddings() + sentence_empty.clear_embeddings() + + loaded_model: SequenceTagger = SequenceTagger.load('pos') + + loaded_model.predict(sentence) + loaded_model.predict([sentence, sentence_empty]) + loaded_model.predict([sentence_empty]) + @pytest.mark.slow -def test_train_load_use_classifier(): +def test_train_load_use_classifier(results_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() @@ -170,7 +177,7 @@ def test_train_load_use_classifier(): model = TextClassifier(document_embeddings, label_dict, False) trainer = TextClassifierTrainer(model, corpus, label_dict, False) - trainer.train('./results', max_epochs=2) + trainer.train(str(results_base_path), max_epochs=2) sentence = Sentence("Berlin is a really nice city.") @@ -180,7 +187,7 @@ def test_train_load_use_classifier(): assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) - loaded_model = TextClassifier.load_from_file('./results/final-model.pt') + loaded_model = TextClassifier.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -190,11 +197,11 @@ def test_train_load_use_classifier(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') + shutil.rmtree(results_base_path) @pytest.mark.slow -def test_train_charlm_load_use_classifier(): +def test_train_charlm_load_use_classifier(results_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() @@ -205,7 +212,7 @@ def test_train_charlm_load_use_classifier(): model = TextClassifier(document_embeddings, label_dict, False) trainer = TextClassifierTrainer(model, corpus, label_dict, False) - trainer.train('./results', max_epochs=2) + trainer.train(str(results_base_path), max_epochs=2) sentence = Sentence("Berlin is a really nice city.") @@ -215,7 +222,7 @@ def test_train_charlm_load_use_classifier(): assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) - loaded_model = TextClassifier.load_from_file('./results/final-model.pt') + loaded_model = TextClassifier.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -225,11 +232,11 @@ def test_train_charlm_load_use_classifier(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') + shutil.rmtree(results_base_path) @pytest.mark.slow -def test_train_charlm_nocache_load_use_classifier(): +def test_train_charlm_nocache_load_use_classifier(results_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() @@ -241,7 +248,7 @@ def test_train_charlm_nocache_load_use_classifier(): model = TextClassifier(document_embeddings, label_dict, False) trainer = TextClassifierTrainer(model, corpus, label_dict, False) - trainer.train('./results', max_epochs=2) + trainer.train(str(results_base_path), max_epochs=2) sentence = Sentence("Berlin is a really nice city.") @@ -251,7 +258,7 @@ def test_train_charlm_nocache_load_use_classifier(): assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) - loaded_model = TextClassifier.load_from_file('./results/final-model.pt') + loaded_model = TextClassifier.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') @@ -261,11 +268,11 @@ def test_train_charlm_nocache_load_use_classifier(): loaded_model.predict([sentence_empty]) # clean up results directory - shutil.rmtree('./results') + shutil.rmtree(results_base_path) @pytest.mark.slow -def test_train_language_model(): +def test_train_language_model(results_base_path, resources_path): # get default dictionary dictionary: Dictionary = Dictionary.load('chars') @@ -273,20 +280,19 @@ def test_train_language_model(): language_model: LanguageModel = LanguageModel(dictionary, is_forward_lm=True, hidden_size=128, nlayers=1) # get the example corpus and process at character level in forward direction - corpus: TextCorpus = TextCorpus(str(Path(__file__).parent / 'resources/corpora/lorem_ipsum'), + corpus: TextCorpus = TextCorpus(str(resources_path / 'corpora/lorem_ipsum'), dictionary, language_model.is_forward_lm, character_level=True) # train the language model trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus) - trainer.train('./results', sequence_length=10, mini_batch_size=10, max_epochs=5) + trainer.train(str(results_base_path), sequence_length=10, mini_batch_size=10, max_epochs=5) # use the character LM as embeddings to embed the example sentence 'I love Berlin' - char_lm_embeddings = CharLMEmbeddings('./results/best-lm.pt') + char_lm_embeddings = CharLMEmbeddings(str(results_base_path / 'best-lm.pt')) sentence = Sentence('I love Berlin') char_lm_embeddings.embed(sentence) - print(sentence[1].embedding.size()) # clean up results directory - shutil.rmtree('./results', ignore_errors=True) + shutil.rmtree(results_base_path, ignore_errors=True) diff --git a/tests/test_sequence_tagger.py b/tests/test_sequence_tagger.py deleted file mode 100644 index 1792149791..0000000000 --- a/tests/test_sequence_tagger.py +++ /dev/null @@ -1,19 +0,0 @@ -from flair.data import Sentence -from flair.models import SequenceTagger - - -def test_tag_sentence(): - - # test tagging - sentence = Sentence('I love Berlin') - - tagger = SequenceTagger.load('pos') - - tagger.predict(sentence) - - sentence.clear_embeddings() - - # test re-tagging - tagger = SequenceTagger.load('ner') - - tagger.predict(sentence) diff --git a/tests/test_text_classifier.py b/tests/test_text_classifier.py index d8dfa36bc5..1336cb96a1 100644 --- a/tests/test_text_classifier.py +++ b/tests/test_text_classifier.py @@ -7,7 +7,6 @@ from flair.models.text_classification_model import TextClassifier -@pytest.fixture def init(tasks_base_path) -> Tuple[TaggedCorpus, Dictionary, TextClassifier]: corpus = NLPTaskDataFetcher.fetch_data(NLPTask.AG_NEWS, tasks_base_path) label_dict = corpus.make_label_dictionary() diff --git a/tests/test_utils.py b/tests/test_utils.py index 7dad1ed69f..76d54671a8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,6 @@ from flair.training_utils import calculate_micro_avg_metric, calculate_class_metrics, convert_labels_to_one_hot -@pytest.fixture def init(): y_true = [[0, 1, 1], [0, 0, 1], [1, 1, 0]] y_pred = [[0, 1, 1], [0, 0, 0], [1, 0, 0]] From 236066efe2f86d387d13a07ef80a3d0e558c880d Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 12:00:06 +0100 Subject: [PATCH 06/11] GH-177: Update documentation --- .travis.yml | 2 -- README.md | 8 ++++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4063862c02..5923347cea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,3 @@ -env: - - TRAVIS=true language: python sudo: false python: diff --git a/README.md b/README.md index 691163ef5e..2b75d55018 100644 --- a/README.md +++ b/README.md @@ -148,11 +148,15 @@ the code should hopefully be easy. You need [Pipenv](https://pipenv.readthedocs.io/) for this: -``` +```bash pipenv install --dev && pipenv shell -TRAVIS=true pytest +pytest ``` +If you also want to run the slow tests, execute: +```bash +pytest --runslow +``` ## [License](/LICENSE) From 1e2242f8c85ecc5fad0157eacf1cb44289ddeed9 Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 12:58:54 +0100 Subject: [PATCH 07/11] GH-177: Fix tests - Use pathlib --- tests/test_model_integration.py | 31 +++++++++++++++++-------------- tests/test_visual.py | 22 +++++++++++----------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/tests/test_model_integration.py b/tests/test_model_integration.py index 1a714a0807..88e3ce14cb 100644 --- a/tests/test_model_integration.py +++ b/tests/test_model_integration.py @@ -11,9 +11,9 @@ @pytest.mark.slow -def test_train_load_use_tagger(results_base_path): +def test_train_load_use_tagger(results_base_path, tasks_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) + corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary('ner') embeddings = WordEmbeddings('glove') @@ -43,9 +43,9 @@ def test_train_load_use_tagger(results_base_path): @pytest.mark.slow -def test_train_charlm_load_use_tagger(results_base_path): +def test_train_charlm_load_use_tagger(results_base_path, tasks_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) + corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary('ner') embeddings = CharLMEmbeddings('news-forward-fast') @@ -75,9 +75,9 @@ def test_train_charlm_load_use_tagger(results_base_path): @pytest.mark.slow -def test_train_charlm_changed_chache_load_use_tagger(results_base_path): +def test_train_charlm_changed_chache_load_use_tagger(results_base_path, tasks_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) + corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary('ner') # make a temporary cache directory that we remove afterwards @@ -113,9 +113,9 @@ def test_train_charlm_changed_chache_load_use_tagger(results_base_path): @pytest.mark.slow -def test_train_charlm_nochache_load_use_tagger(results_base_path): +def test_train_charlm_nochache_load_use_tagger(results_base_path, tasks_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) + corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary('ner') embeddings = CharLMEmbeddings('news-forward-fast', use_cache=False) @@ -166,8 +166,9 @@ def test_load_use_serialized_tagger(): @pytest.mark.slow -def test_train_load_use_classifier(results_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) +def test_train_load_use_classifier(results_base_path, tasks_base_path): + + corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() glove_embedding: WordEmbeddings = WordEmbeddings('en-glove') @@ -201,8 +202,9 @@ def test_train_load_use_classifier(results_base_path): @pytest.mark.slow -def test_train_charlm_load_use_classifier(results_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) +def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path): + + corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() glove_embedding: TokenEmbeddings = CharLMEmbeddings('news-forward-fast') @@ -236,8 +238,9 @@ def test_train_charlm_load_use_classifier(results_base_path): @pytest.mark.slow -def test_train_charlm_nocache_load_use_classifier(results_base_path): - corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) +def test_train_charlm_nocache_load_use_classifier(results_base_path, tasks_base_path): + + corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() glove_embedding: TokenEmbeddings = CharLMEmbeddings('news-forward-fast', use_cache=False) diff --git a/tests/test_visual.py b/tests/test_visual.py index 5cd91056fb..c88d6cd5f8 100644 --- a/tests/test_visual.py +++ b/tests/test_visual.py @@ -14,7 +14,7 @@ @pytest.mark.slow def test_visualize_word_emeddings(resources_path): - with open('./resources/visual/snippet.txt') as f: + with open(resources_path / 'visual/snippet.txt') as f: sentences = [x for x in f.read().split('\n') if x] sentences = [Sentence(x) for x in sentences] @@ -25,16 +25,16 @@ def test_visualize_word_emeddings(resources_path): embeddings = StackedEmbeddings([charlm_embedding_backward, charlm_embedding_forward]) visualizer = Visualizer() - visualizer.visualize_word_emeddings(embeddings, sentences, './resources/visual/sentence_embeddings.html') + visualizer.visualize_word_emeddings(embeddings, sentences, str(resources_path / 'visual/sentence_embeddings.html')) # clean up directory - os.remove('./resources/visual/sentence_embeddings.html') + os.remove(resources_path / 'visual/sentence_embeddings.html') @pytest.mark.slow -def test_visualize_word_emeddings(): +def test_visualize_word_emeddings(resources_path): - with open('./resources/visual/snippet.txt') as f: + with open(resources_path / 'visual/snippet.txt') as f: sentences = [x for x in f.read().split('\n') if x] sentences = [Sentence(x) for x in sentences] @@ -42,16 +42,16 @@ def test_visualize_word_emeddings(): charlm_embedding_forward = CharLMEmbeddings('news-forward') visualizer = Visualizer() - visualizer.visualize_char_emeddings(charlm_embedding_forward, sentences, './resources/visual/sentence_embeddings.html') + visualizer.visualize_char_emeddings(charlm_embedding_forward, sentences, str(resources_path / 'visual/sentence_embeddings.html')) # clean up directory - os.remove('./resources/visual/sentence_embeddings.html') + os.remove(resources_path / 'visual/sentence_embeddings.html') @pytest.mark.slow -def test_visualize(): +def test_visualize(resources_path): - with open('./resources/visual/snippet.txt') as f: + with open(resources_path / 'visual/snippet.txt') as f: sentences = [x for x in f.read().split('\n') if x] sentences = [Sentence(x) for x in sentences] @@ -73,10 +73,10 @@ def test_visualize(): trans_ = tSNE() reduced = trans_.fit(X) - visualizer.visualize(reduced, contexts, './resources/visual/char_embeddings.html') + visualizer.visualize(reduced, contexts, str(resources_path / 'visual/char_embeddings.html')) # clean up directory - os.remove('./resources/visual/char_embeddings.html') + os.remove(resources_path / 'visual/char_embeddings.html') def test_highlighter(resources_path): From cd9d28dfc27fdd2d8e2d23f66bc258fc318df96b Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 13:06:08 +0100 Subject: [PATCH 08/11] GH-177: Run slowtest on travis. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5923347cea..132f86af05 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,4 @@ install: - pip install -r requirements.txt -q before_script: cd tests script: - - pytest \ No newline at end of file + - pytest --runslow From e03fb19e006d7f40f292a6c3f2a265a658a8696f Mon Sep 17 00:00:00 2001 From: tabergma Date: Thu, 1 Nov 2018 13:17:24 +0100 Subject: [PATCH 09/11] GH-177: Revert running slowtest. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 132f86af05..7d10d62821 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,4 @@ install: - pip install -r requirements.txt -q before_script: cd tests script: - - pytest --runslow + - pytest From a576b492f957d9242985b688ea7be565688f5c52 Mon Sep 17 00:00:00 2001 From: tabergma Date: Tue, 6 Nov 2018 13:33:44 +0100 Subject: [PATCH 10/11] GH-177: Run integration tests on travis. --- .travis.yml | 2 +- README.md | 11 ++++++++++- tests/conftest.py | 22 +++++++++++++++------ tests/test_model_integration.py | 35 +++++++++++++++++---------------- 4 files changed, 45 insertions(+), 25 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7d10d62821..099372cca2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,4 @@ install: - pip install -r requirements.txt -q before_script: cd tests script: - - pytest + - pytest --runintegration diff --git a/README.md b/README.md index 2b75d55018..3f07be3fc6 100644 --- a/README.md +++ b/README.md @@ -153,11 +153,20 @@ pipenv install --dev && pipenv shell pytest ``` -If you also want to run the slow tests, execute: +To run integration tests execute: +```bash +pytest --runintegration +``` +The integration tests will train small models. +Afterwards, the trained model will be loaded for prediction. + +To also run slow tests, such as loading and using the embeddings provided by flair, you should execute: ```bash pytest --runslow ``` + + ## [License](/LICENSE) The MIT License (MIT) diff --git a/tests/conftest.py b/tests/conftest.py index 48821d4563..211931ce52 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,13 +21,23 @@ def pytest_addoption(parser): parser.addoption( "--runslow", action="store_true", default=False, help="run slow tests" ) + parser.addoption( + "--runintegration", action="store_true", default=False, help="run integration tests" + ) def pytest_collection_modifyitems(config, items): - if config.getoption("--runslow"): - # --runslow given in cli: do not skip slow tests + if config.getoption("--runslow") and config.getoption("--runintegration"): return - skip_slow = pytest.mark.skip(reason="need --runslow option to run") - for item in items: - if "slow" in item.keywords: - item.add_marker(skip_slow) + + if not config.getoption("--runslow"): + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) + + if not config.getoption("--runintegration"): + skip_integration = pytest.mark.skip(reason="need --runintegration option to run") + for item in items: + if "integration" in item.keywords: + item.add_marker(skip_integration) diff --git a/tests/test_model_integration.py b/tests/test_model_integration.py index 88e3ce14cb..b3587e6f0b 100644 --- a/tests/test_model_integration.py +++ b/tests/test_model_integration.py @@ -10,7 +10,7 @@ from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus -@pytest.mark.slow +@pytest.mark.integration def test_train_load_use_tagger(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) @@ -27,7 +27,7 @@ def test_train_load_use_tagger(results_base_path, tasks_base_path): # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=2) loaded_model: SequenceTagger = SequenceTagger.load_from_file(results_base_path / 'final-model.pt') @@ -42,7 +42,7 @@ def test_train_load_use_tagger(results_base_path, tasks_base_path): shutil.rmtree(results_base_path) -@pytest.mark.slow +@pytest.mark.integration def test_train_charlm_load_use_tagger(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) @@ -59,7 +59,7 @@ def test_train_charlm_load_use_tagger(results_base_path, tasks_base_path): # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=2) loaded_model: SequenceTagger = SequenceTagger.load_from_file(results_base_path / 'final-model.pt') @@ -74,7 +74,7 @@ def test_train_charlm_load_use_tagger(results_base_path, tasks_base_path): shutil.rmtree(results_base_path) -@pytest.mark.slow +@pytest.mark.integration def test_train_charlm_changed_chache_load_use_tagger(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) @@ -94,7 +94,7 @@ def test_train_charlm_changed_chache_load_use_tagger(results_base_path, tasks_ba # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=2) # remove the cache directory shutil.rmtree(cache_dir) @@ -112,7 +112,7 @@ def test_train_charlm_changed_chache_load_use_tagger(results_base_path, tasks_ba shutil.rmtree(results_base_path) -@pytest.mark.slow +@pytest.mark.integration def test_train_charlm_nochache_load_use_tagger(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, base_path=tasks_base_path) @@ -129,7 +129,7 @@ def test_train_charlm_nochache_load_use_tagger(results_base_path, tasks_base_pat # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) - trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=3) + trainer.train(str(results_base_path), learning_rate=0.1, mini_batch_size=2, max_epochs=2) loaded_model: SequenceTagger = SequenceTagger.load_from_file(results_base_path / 'final-model.pt') @@ -144,6 +144,7 @@ def test_train_charlm_nochache_load_use_tagger(results_base_path, tasks_base_pat shutil.rmtree(results_base_path) +@pytest.mark.integration def test_load_use_serialized_tagger(): loaded_model: SequenceTagger = SequenceTagger.load('ner') @@ -165,7 +166,7 @@ def test_load_use_serialized_tagger(): loaded_model.predict([sentence_empty]) -@pytest.mark.slow +@pytest.mark.integration def test_train_load_use_classifier(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, base_path=tasks_base_path) @@ -177,7 +178,7 @@ def test_train_load_use_classifier(results_base_path, tasks_base_path): model = TextClassifier(document_embeddings, label_dict, False) - trainer = TextClassifierTrainer(model, corpus, label_dict, False) + trainer = TextClassifierTrainer(model, corpus, label_dict, test_mode=True) trainer.train(str(results_base_path), max_epochs=2) sentence = Sentence("Berlin is a really nice city.") @@ -201,7 +202,7 @@ def test_train_load_use_classifier(results_base_path, tasks_base_path): shutil.rmtree(results_base_path) -@pytest.mark.slow +@pytest.mark.integration def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, base_path=tasks_base_path) @@ -213,7 +214,7 @@ def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path): model = TextClassifier(document_embeddings, label_dict, False) - trainer = TextClassifierTrainer(model, corpus, label_dict, False) + trainer = TextClassifierTrainer(model, corpus, label_dict, test_mode=True) trainer.train(str(results_base_path), max_epochs=2) sentence = Sentence("Berlin is a really nice city.") @@ -237,7 +238,7 @@ def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path): shutil.rmtree(results_base_path) -@pytest.mark.slow +@pytest.mark.integration def test_train_charlm_nocache_load_use_classifier(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, base_path=tasks_base_path) @@ -250,7 +251,7 @@ def test_train_charlm_nocache_load_use_classifier(results_base_path, tasks_base_ model = TextClassifier(document_embeddings, label_dict, False) - trainer = TextClassifierTrainer(model, corpus, label_dict, False) + trainer = TextClassifierTrainer(model, corpus, label_dict, test_mode=True) trainer.train(str(results_base_path), max_epochs=2) sentence = Sentence("Berlin is a really nice city.") @@ -274,7 +275,7 @@ def test_train_charlm_nocache_load_use_classifier(results_base_path, tasks_base_ shutil.rmtree(results_base_path) -@pytest.mark.slow +@pytest.mark.integration def test_train_language_model(results_base_path, resources_path): # get default dictionary dictionary: Dictionary = Dictionary.load('chars') @@ -289,8 +290,8 @@ def test_train_language_model(results_base_path, resources_path): character_level=True) # train the language model - trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus) - trainer.train(str(results_base_path), sequence_length=10, mini_batch_size=10, max_epochs=5) + trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus, test_mode=True) + trainer.train(str(results_base_path), sequence_length=10, mini_batch_size=10, max_epochs=2) # use the character LM as embeddings to embed the example sentence 'I love Berlin' char_lm_embeddings = CharLMEmbeddings(str(results_base_path / 'best-lm.pt')) From c8f93d1550959ec8b70a05730a4c7e044de4e9ba Mon Sep 17 00:00:00 2001 From: tabergma Date: Tue, 6 Nov 2018 13:47:07 +0100 Subject: [PATCH 11/11] GH-177: Run integration test only at PR --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 099372cca2..573d4a4369 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,5 @@ install: - pip install -r requirements.txt -q before_script: cd tests script: - - pytest --runintegration + - 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then pytest --runintegration; fi' + - 'if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then pytest; fi' \ No newline at end of file