Skip to content

Commit

Permalink
Merge pull request #183 from zalandoresearch/GH-177-tests
Browse files Browse the repository at this point in the history
GH-177: Update tests
  • Loading branch information
Alan Akbik authored Nov 6, 2018
2 parents 1086751 + c8f93d1 commit 97a6c4c
Show file tree
Hide file tree
Showing 14 changed files with 196 additions and 250 deletions.
5 changes: 2 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
env:
- TRAVIS=true
language: python
sudo: false
python:
Expand All @@ -8,4 +6,5 @@ install:
- pip install -r requirements.txt -q
before_script: cd tests
script:
- pytest
- 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then pytest --runintegration; fi'
- 'if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then pytest; fi'
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,23 @@ the code should hopefully be easy.

You need [Pipenv](https://pipenv.readthedocs.io/) for this:

```
```bash
pipenv install --dev && pipenv shell
TRAVIS=true pytest
pytest
```

To run integration tests execute:
```bash
pytest --runintegration
```
The integration tests will train small models.
Afterwards, the trained model will be loaded for prediction.

To also run slow tests, such as loading and using the embeddings provided by flair, you should execute:
```bash
pytest --runslow
```



## [License](/LICENSE)
Expand Down
8 changes: 4 additions & 4 deletions flair/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,10 @@ def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]:
word_embedding = self.precomputed_word_embeddings[token.text]
elif token.text.lower() in self.precomputed_word_embeddings:
word_embedding = self.precomputed_word_embeddings[token.text.lower()]
elif re.sub('\d', '#', token.text.lower()) in self.precomputed_word_embeddings:
word_embedding = self.precomputed_word_embeddings[re.sub('\d', '#', token.text.lower())]
elif re.sub('\d', '0', token.text.lower()) in self.precomputed_word_embeddings:
word_embedding = self.precomputed_word_embeddings[re.sub('\d', '0', token.text.lower())]
elif re.sub(r'\d', '#', token.text.lower()) in self.precomputed_word_embeddings:
word_embedding = self.precomputed_word_embeddings[re.sub(r'\d', '#', token.text.lower())]
elif re.sub(r'\d', '0', token.text.lower()) in self.precomputed_word_embeddings:
word_embedding = self.precomputed_word_embeddings[re.sub(r'\d', '0', token.text.lower())]
else:
word_embedding = np.zeros(self.embedding_length, dtype='float')

Expand Down
33 changes: 33 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,43 @@
import pytest
from pathlib import Path


@pytest.fixture(scope="module")
def resources_path():
return Path(__file__).parent / 'resources'


@pytest.fixture(scope="module")
def tasks_base_path(resources_path):
return resources_path / 'tasks'


@pytest.fixture(scope="module")
def results_base_path(resources_path):
return resources_path / 'results'


def pytest_addoption(parser):
parser.addoption(
"--runslow", action="store_true", default=False, help="run slow tests"
)
parser.addoption(
"--runintegration", action="store_true", default=False, help="run integration tests"
)


def pytest_collection_modifyitems(config, items):
if config.getoption("--runslow") and config.getoption("--runintegration"):
return

if not config.getoption("--runslow"):
skip_slow = pytest.mark.skip(reason="need --runslow option to run")
for item in items:
if "slow" in item.keywords:
item.add_marker(skip_slow)

if not config.getoption("--runintegration"):
skip_integration = pytest.mark.skip(reason="need --runintegration option to run")
for item in items:
if "integration" in item.keywords:
item.add_marker(skip_integration)
3 changes: 0 additions & 3 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,6 @@ def test_label_set_confidence():

assert (0.2 == label.score)

# with pytest.raises(ValueError):
# label.name = ''


def test_tagged_corpus_make_label_dictionary():
sentence_1 = Sentence('sentence 1', labels=[Label('class_1')])
Expand Down
72 changes: 35 additions & 37 deletions tests/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import pytest

from flair.embeddings import WordEmbeddings, TokenEmbeddings, CharLMEmbeddings, StackedEmbeddings, \
Expand All @@ -7,77 +6,77 @@
from flair.data import Sentence


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_en_glove():
load_and_apply_word_embeddings('en-glove')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_en_numberbatch():
load_and_apply_word_embeddings('en-numberbatch')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_en_extvec():
load_and_apply_word_embeddings('en-extvec')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_en_crawl():
load_and_apply_word_embeddings('en-crawl')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_en_news():
load_and_apply_word_embeddings('en-news')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_de_fasttext():
load_and_apply_word_embeddings('de-fasttext')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_de_numberbatch():
load_and_apply_word_embeddings('de-numberbatch')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_sv_fasttext():
load_and_apply_word_embeddings('sv-fasttext')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_news_forward():
load_and_apply_char_lm_embeddings('news-forward')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_news_backward():
load_and_apply_char_lm_embeddings('news-backward')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_mix_forward():
load_and_apply_char_lm_embeddings('mix-forward')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_mix_backward():
load_and_apply_char_lm_embeddings('mix-backward')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_german_forward():
load_and_apply_char_lm_embeddings('german-forward')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_german_backward():
load_and_apply_char_lm_embeddings('german-backward')


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_stacked_embeddings():
text = 'I love Berlin.'
sentence: Sentence = Sentence(text)
Expand All @@ -98,23 +97,12 @@ def test_stacked_embeddings():
assert(len(token.get_embedding()) == 0)


@pytest.fixture
def init_document_embeddings():
text = 'I love Berlin. Berlin is a great place to live.'
sentence: Sentence = Sentence(text)

glove: TokenEmbeddings = WordEmbeddings('en-glove')
charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward')

return sentence, glove, charlm


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_document_lstm_embeddings():
sentence, glove, charlm = init_document_embeddings()

embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
bidirectional=False, use_first_representation=False)
bidirectional=False)

embeddings.embed(sentence)

Expand All @@ -126,12 +114,12 @@ def test_document_lstm_embeddings():
assert (len(sentence.get_embedding()) == 0)


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_document_bidirectional_lstm_embeddings():
sentence, glove, charlm = init_document_embeddings()

embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
bidirectional=True, use_first_representation=False)
bidirectional=True)

embeddings.embed(sentence)

Expand All @@ -143,12 +131,12 @@ def test_document_bidirectional_lstm_embeddings():
assert (len(sentence.get_embedding()) == 0)


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_document_bidirectional_lstm_embeddings_using_first_representation():
sentence, glove, charlm = init_document_embeddings()

embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
bidirectional=True, use_first_representation=True)
bidirectional=True)

embeddings.embed(sentence)

Expand All @@ -160,12 +148,12 @@ def test_document_bidirectional_lstm_embeddings_using_first_representation():
assert (len(sentence.get_embedding()) == 0)


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_document_lstm_embeddings_using_first_representation():
sentence, glove, charlm = init_document_embeddings()

embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove, charlm], hidden_states=128,
bidirectional=False, use_first_representation=True)
bidirectional=False)

embeddings.embed(sentence)

Expand All @@ -177,7 +165,7 @@ def test_document_lstm_embeddings_using_first_representation():
assert (len(sentence.get_embedding()) == 0)


@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason="Skipping this test on Travis CI.")
@pytest.mark.slow
def test_document_mean_embeddings():
text = 'I love Berlin. Berlin is a great place to live.'
sentence: Sentence = Sentence(text)
Expand All @@ -196,6 +184,16 @@ def test_document_mean_embeddings():
assert (len(sentence.get_embedding()) == 0)


def init_document_embeddings():
text = 'I love Berlin. Berlin is a great place to live.'
sentence: Sentence = Sentence(text)

glove: TokenEmbeddings = WordEmbeddings('en-glove')
charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward')

return sentence, glove, charlm


def load_and_apply_word_embeddings(emb_type: str):
text = 'I love Berlin.'
sentence: Sentence = Sentence(text)
Expand All @@ -221,4 +219,4 @@ def load_and_apply_char_lm_embeddings(emb_type: str):

token.clear_embeddings()

assert(len(token.get_embedding()) == 0)
assert(len(token.get_embedding()) == 0)
36 changes: 0 additions & 36 deletions tests/test_language_model_trainer.py

This file was deleted.

Loading

0 comments on commit 97a6c4c

Please sign in to comment.