Merge pull request #183 from zalandoresearch/GH-177-tests

GH-177: Update tests
flairNLP · Nov 6, 2018 · 97a6c4c · 97a6c4c
2 parents 1086751 + c8f93d1
commit 97a6c4c
Show file tree

Hide file tree

Showing 14 changed files with 196 additions and 250 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,5 +1,3 @@
-env:
-  - TRAVIS=true
 language: python
 sudo: false
 python:
@@ -8,4 +6,5 @@ install:
   - pip install -r requirements.txt -q
 before_script: cd tests
 script:
-  - pytest
+  - 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then pytest --runintegration; fi'
+  - 'if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then pytest; fi'
diff --git a/README.md b/README.md
@@ -148,10 +148,23 @@ the code should hopefully be easy.
 
 You need [Pipenv](https://pipenv.readthedocs.io/) for this:
 
-```
+```bash
 pipenv install --dev && pipenv shell
-TRAVIS=true pytest
+pytest
+```
+
+To run integration tests execute:
+```bash
+pytest --runintegration
 ```
+The integration tests will train small models.
+Afterwards, the trained model will be loaded for prediction.
+
+To also run slow tests, such as loading and using the embeddings provided by flair, you should execute:
+```bash
+pytest --runslow
+```
+
 
 
 ## [License](/LICENSE)

diff --git a/flair/embeddings.py b/flair/embeddings.py
@@ -207,10 +207,10 @@ def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]:
                     word_embedding = self.precomputed_word_embeddings[token.text]
                 elif token.text.lower() in self.precomputed_word_embeddings:
                     word_embedding = self.precomputed_word_embeddings[token.text.lower()]
-                elif re.sub('\d', '#', token.text.lower()) in self.precomputed_word_embeddings:
-                    word_embedding = self.precomputed_word_embeddings[re.sub('\d', '#', token.text.lower())]
-                elif re.sub('\d', '0', token.text.lower()) in self.precomputed_word_embeddings:
-                    word_embedding = self.precomputed_word_embeddings[re.sub('\d', '0', token.text.lower())]
+                elif re.sub(r'\d', '#', token.text.lower()) in self.precomputed_word_embeddings:
+                    word_embedding = self.precomputed_word_embeddings[re.sub(r'\d', '#', token.text.lower())]
+                elif re.sub(r'\d', '0', token.text.lower()) in self.precomputed_word_embeddings:
+                    word_embedding = self.precomputed_word_embeddings[re.sub(r'\d', '0', token.text.lower())]
                 else:
                     word_embedding = np.zeros(self.embedding_length, dtype='float')
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,10 +1,43 @@
 import pytest
 from pathlib import Path
 
+
 @pytest.fixture(scope="module")
 def resources_path():
     return Path(__file__).parent / 'resources'
 
+
 @pytest.fixture(scope="module")
 def tasks_base_path(resources_path):
     return resources_path / 'tasks'
+
+
+@pytest.fixture(scope="module")
+def results_base_path(resources_path):
+    return resources_path / 'results'
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--runslow", action="store_true", default=False, help="run slow tests"
+    )
+    parser.addoption(
+        "--runintegration", action="store_true", default=False, help="run integration tests"
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--runslow") and config.getoption("--runintegration"):
+        return
+
+    if not config.getoption("--runslow"):
+        skip_slow = pytest.mark.skip(reason="need --runslow option to run")
+        for item in items:
+            if "slow" in item.keywords:
+                item.add_marker(skip_slow)
+
+    if not config.getoption("--runintegration"):
+        skip_integration = pytest.mark.skip(reason="need --runintegration option to run")
+        for item in items:
+            if "integration" in item.keywords:
+                item.add_marker(skip_integration)
diff --git a/tests/test_data.py b/tests/test_data.py
@@ -237,9 +237,6 @@ def test_label_set_confidence():
 
     assert (0.2 == label.score)
 
-    # with pytest.raises(ValueError):
-    #     label.name = ''
-
 
 def test_tagged_corpus_make_label_dictionary():
     sentence_1 = Sentence('sentence 1', labels=[Label('class_1')])

diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py
@@ -1,4 +1,3 @@
-import os
 import pytest
 
 from flair.embeddings import WordEmbeddings, TokenEmbeddings, CharLMEmbeddings, StackedEmbeddings, \
@@ -7,77 +6,77 @@
 from flair.data import Sentence
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_en_glove():
     load_and_apply_word_embeddings('en-glove')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_en_numberbatch():
     load_and_apply_word_embeddings('en-numberbatch')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_en_extvec():
     load_and_apply_word_embeddings('en-extvec')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_en_crawl():
     load_and_apply_word_embeddings('en-crawl')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_en_news():
     load_and_apply_word_embeddings('en-news')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_de_fasttext():
     load_and_apply_word_embeddings('de-fasttext')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_de_numberbatch():
     load_and_apply_word_embeddings('de-numberbatch')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_sv_fasttext():
     load_and_apply_word_embeddings('sv-fasttext')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_news_forward():
     load_and_apply_char_lm_embeddings('news-forward')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_news_backward():
     load_and_apply_char_lm_embeddings('news-backward')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_mix_forward():
     load_and_apply_char_lm_embeddings('mix-forward')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_mix_backward():
     load_and_apply_char_lm_embeddings('mix-backward')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_german_forward():
     load_and_apply_char_lm_embeddings('german-forward')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_german_backward():
     load_and_apply_char_lm_embeddings('german-backward')
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_stacked_embeddings():
     text = 'I love Berlin.'
     sentence: Sentence = Sentence(text)
@@ -98,23 +97,12 @@ def test_stacked_embeddings():
         assert(len(token.get_embedding()) == 0)
 
 
-@pytest.fixture
-def init_document_embeddings():
-    text = 'I love Berlin. Berlin is a great place to live.'
-    sentence: Sentence = Sentence(text)
-
-    glove: TokenEmbeddings = WordEmbeddings('en-glove')
-    charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward')
-
-    return sentence, glove, charlm
-
-
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_document_lstm_embeddings():
     sentence, glove, charlm = init_document_embeddings()
 
     embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
-                                                                bidirectional=False, use_first_representation=False)
+                                                                bidirectional=False)
 
     embeddings.embed(sentence)
 
@@ -126,12 +114,12 @@ def test_document_lstm_embeddings():
     assert (len(sentence.get_embedding()) == 0)
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_document_bidirectional_lstm_embeddings():
     sentence, glove, charlm = init_document_embeddings()
 
     embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
-                                                                bidirectional=True, use_first_representation=False)
+                                                                bidirectional=True)
 
     embeddings.embed(sentence)
 
@@ -143,12 +131,12 @@ def test_document_bidirectional_lstm_embeddings():
     assert (len(sentence.get_embedding()) == 0)
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_document_bidirectional_lstm_embeddings_using_first_representation():
     sentence, glove, charlm = init_document_embeddings()
 
     embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
-                                                                bidirectional=True, use_first_representation=True)
+                                                                bidirectional=True)
 
     embeddings.embed(sentence)
 
@@ -160,12 +148,12 @@ def test_document_bidirectional_lstm_embeddings_using_first_representation():
     assert (len(sentence.get_embedding()) == 0)
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_document_lstm_embeddings_using_first_representation():
     sentence, glove, charlm = init_document_embeddings()
 
     embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
-                                                                bidirectional=False, use_first_representation=True)
+                                                                bidirectional=False)
 
     embeddings.embed(sentence)
 
@@ -177,7 +165,7 @@ def test_document_lstm_embeddings_using_first_representation():
     assert (len(sentence.get_embedding()) == 0)
 
 
-@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
+@pytest.mark.slow
 def test_document_mean_embeddings():
     text = 'I love Berlin. Berlin is a great place to live.'
     sentence: Sentence = Sentence(text)
@@ -196,6 +184,16 @@ def test_document_mean_embeddings():
     assert (len(sentence.get_embedding()) == 0)
 
 
+def init_document_embeddings():
+    text = 'I love Berlin. Berlin is a great place to live.'
+    sentence: Sentence = Sentence(text)
+
+    glove: TokenEmbeddings = WordEmbeddings('en-glove')
+    charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward')
+
+    return sentence, glove, charlm
+
+
 def load_and_apply_word_embeddings(emb_type: str):
     text = 'I love Berlin.'
     sentence: Sentence = Sentence(text)
@@ -221,4 +219,4 @@ def load_and_apply_char_lm_embeddings(emb_type: str):
 
         token.clear_embeddings()
 
-        assert(len(token.get_embedding()) == 0)
+        assert(len(token.get_embedding()) == 0)
diff --git a/tests/test_language_model_trainer.py b/tests/test_language_model_trainer.py