deepset-ai · tholor · Oct 26, 2020 · Oct 26, 2020 · Oct 26, 2020 · Oct 26, 2020
diff --git a/test/conftest.py b/test/conftest.py
@@ -152,12 +152,17 @@ def no_answer_prediction(no_answer_reader, test_docs_xs):
 def document_store_with_docs(request, test_docs_xs, elasticsearch_fixture):
     document_store = get_document_store(request.param)
     document_store.write_documents(test_docs_xs)
-    return document_store
+    yield document_store
+    if isinstance(document_store, FAISSDocumentStore):
+        document_store.faiss_index.reset()
 
 
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"])
 def document_store(request, test_docs_xs, elasticsearch_fixture):
-    return get_document_store(request.param)
+    document_store = get_document_store(request.param)
+    yield document_store
+    if isinstance(document_store, FAISSDocumentStore):
+        document_store.faiss_index.reset()
 
 
 @pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"])

diff --git a/test/test_dpr_retriever.py b/test/test_dpr_retriever.py
@@ -5,6 +5,7 @@
 from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory"], indirect=True)
 @pytest.mark.parametrize("retriever", ["dpr"], indirect=True)
 def test_dpr_inmemory_retrieval(document_store, retriever):
@@ -31,7 +32,6 @@ def test_dpr_inmemory_retrieval(document_store, retriever):
         )
     ]
 
-    document_store.delete_all_documents(index="test_dpr")
     document_store.write_documents(documents, index="test_dpr")
     document_store.update_embeddings(retriever=retriever, index="test_dpr")
     time.sleep(2)
@@ -49,5 +49,3 @@ def test_dpr_inmemory_retrieval(document_store, retriever):
     res = retriever.retrieve(query="Which philosopher attacked Schopenhauer?", index="test_dpr")
     assert res[0].meta["name"] == "1"
 
-    # clean up
-    document_store.delete_all_documents(index="test_dpr")
diff --git a/test/test_embedding_retriever.py b/test/test_embedding_retriever.py
@@ -2,6 +2,7 @@
 from haystack import Finder
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory"], indirect=True)
 @pytest.mark.parametrize("retriever", ["embedding"], indirect=True)
 def test_embedding_retriever(retriever, document_store):

diff --git a/test/test_faiss.py b/test/test_faiss.py
@@ -76,6 +76,7 @@ def test_faiss_write_docs(document_store, index_buffer_size, batch_size):
     check_data_correctness(documents_indexed, DOCUMENTS)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("document_store", ["faiss"], indirect=True)
 @pytest.mark.parametrize("retriever", ["dpr"], indirect=True)
 @pytest.mark.parametrize("index_buffer_size", [10_000, 2])

diff --git a/test/test_finder.py b/test/test_finder.py
@@ -2,6 +2,7 @@
 import pytest
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
 def test_finder_get_answers(reader, retriever_with_docs, document_store_with_docs):
     finder = Finder(reader, retriever_with_docs)
@@ -31,6 +32,7 @@ def test_finder_offsets(reader, retriever_with_docs, document_store_with_docs):
     assert prediction["answers"][0]["context"][start:end] == prediction["answers"][0]["answer"]
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
 def test_finder_get_answers_single_result(reader, retriever_with_docs, document_store_with_docs):
     finder = Finder(reader, retriever_with_docs)

diff --git a/test/test_reader.py b/test/test_reader.py
@@ -1,11 +1,12 @@
 import math
 
+import pytest
+
 from haystack import Document
 from haystack.reader.base import BaseReader
 from haystack.reader.farm import FARMReader
 
 
-
 def test_reader_basic(reader):
     assert reader is not None
     assert isinstance(reader, BaseReader)
@@ -23,6 +24,7 @@ def test_output(prediction):
     assert len(prediction["answers"]) == 5
 
 
+@pytest.mark.slow
 def test_no_answer_output(no_answer_prediction):
     assert no_answer_prediction is not None
     assert no_answer_prediction["question"] == "What is the meaning of life?"
@@ -38,9 +40,12 @@ def test_no_answer_output(no_answer_prediction):
     assert answers.count(None) == 1
     assert len(no_answer_prediction["answers"]) == 5
 
+
 # TODO Directly compare farm and transformers reader outputs
 # TODO checks to see that model is responsive to input arguments e.g. context_window_size - topk
 
+
+@pytest.mark.slow
 def test_prediction_attributes(prediction):
     # TODO FARM's prediction also has no_ans_gap
     attributes_gold = ["question", "answers"]
@@ -57,37 +62,42 @@ def test_answer_attributes(prediction):
         assert ag in answer
 
 
-def test_context_window_size(test_docs_xs):
-    # TODO parametrize window_size and farm/transformers reader using pytest
+@pytest.mark.slow
+@pytest.mark.parametrize("reader", ["farm"], indirect=True)
+@pytest.mark.parametrize("window_size", [10, 15, 20])
+def test_context_window_size(reader, test_docs_xs, window_size):
     docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]
-    for window_size in [10, 15, 20]:
-        farm_reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", num_processes=0,
-                              use_gpu=False, top_k_per_sample=5, no_ans_boost=None, context_window_size=window_size)
-        prediction = farm_reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5)
-        for answer in prediction["answers"]:
-            # If the extracted answer is larger than the context window, the context window is expanded.
-            # If the extracted answer is odd in length, the resulting context window is one less than context_window_size
-            # due to rounding (See FARM's QACandidate)
-            # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different
-            if len(answer["answer"]) <= window_size:
-                assert len(answer["context"]) in [window_size, window_size-1]
-            else:
-                assert len(answer["answer"]) == len(answer["context"])
-
-        # TODO Need to test transformers reader
+
+    if isinstance(reader, FARMReader):
+        reader.inferencer.model.prediction_heads[0].context_window_size = window_size
+
+    prediction = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5)
+    for answer in prediction["answers"]:
+        # If the extracted answer is larger than the context window, the context window is expanded.
+        # If the extracted answer is odd in length, the resulting context window is one less than context_window_size
+        # due to rounding (See FARM's QACandidate)
         # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different
+        if len(answer["answer"]) <= window_size:
+            assert len(answer["context"]) in [window_size, window_size - 1]
+        else:
+            assert len(answer["answer"]) == len(answer["context"])
 
+    # TODO Need to test transformers reader
+    # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different
 
-def test_top_k(test_docs_xs):
-    # TODO parametrize top_k and farm/transformers reader using pytest
-    # TODO transformers reader was crashing when tested on this
 
+@pytest.mark.parametrize("reader", ["farm"], indirect=True)
+@pytest.mark.parametrize("top_k", [2, 5, 10])
+def test_top_k(reader, test_docs_xs, top_k):
     docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]
-    farm_reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", num_processes=0,
-                             use_gpu=False, top_k_per_sample=4, no_ans_boost=None, top_k_per_candidate=4)
-    for top_k in [2, 5, 10]:
-        prediction = farm_reader.predict(question="Who lives in Berlin?", documents=docs, top_k=top_k)
-        assert len(prediction["answers"]) == top_k
-
 
+    reader.top_k_per_candidate = 4
+    if isinstance(reader, FARMReader):
+        reader.inferencer.model.prediction_heads[0].n_best = reader.top_k_per_candidate + 1
+        try:
+            reader.inferencer.model.prediction_heads[0].n_best_per_sample = 4
+        except:
+            print("WARNING: Could not set `top_k_per_sample` in FARM. Please update FARM version.")
 
+    prediction = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=top_k)
+    assert len(prediction["answers"]) == top_k
diff --git a/test/test_rest_api.py b/test/test_rest_api.py
@@ -18,6 +18,7 @@ def get_test_client_and_override_dependencies(reader, document_store_with_docs):
     return TestClient(app)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 @pytest.mark.parametrize("reader", ["farm"], indirect=True)
 def test_query_api(reader, document_store_with_docs):