diff --git a/test/conftest.py b/test/conftest.py index e5903500b7..1374482487 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -152,12 +152,17 @@ def no_answer_prediction(no_answer_reader, test_docs_xs): def document_store_with_docs(request, test_docs_xs, elasticsearch_fixture): document_store = get_document_store(request.param) document_store.write_documents(test_docs_xs) - return document_store + yield document_store + if isinstance(document_store, FAISSDocumentStore): + document_store.faiss_index.reset() @pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"]) def document_store(request, test_docs_xs, elasticsearch_fixture): - return get_document_store(request.param) + document_store = get_document_store(request.param) + yield document_store + if isinstance(document_store, FAISSDocumentStore): + document_store.faiss_index.reset() @pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"]) diff --git a/test/test_dpr_retriever.py b/test/test_dpr_retriever.py index 1c72b213c2..6819d3ddce 100644 --- a/test/test_dpr_retriever.py +++ b/test/test_dpr_retriever.py @@ -5,6 +5,7 @@ from haystack.document_store.elasticsearch import ElasticsearchDocumentStore +@pytest.mark.slow @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory"], indirect=True) @pytest.mark.parametrize("retriever", ["dpr"], indirect=True) def test_dpr_inmemory_retrieval(document_store, retriever): @@ -31,7 +32,6 @@ def test_dpr_inmemory_retrieval(document_store, retriever): ) ] - document_store.delete_all_documents(index="test_dpr") document_store.write_documents(documents, index="test_dpr") document_store.update_embeddings(retriever=retriever, index="test_dpr") time.sleep(2) @@ -49,5 +49,3 @@ def test_dpr_inmemory_retrieval(document_store, retriever): res = retriever.retrieve(query="Which philosopher attacked Schopenhauer?", index="test_dpr") assert res[0].meta["name"] == "1" - # clean up - document_store.delete_all_documents(index="test_dpr") diff --git a/test/test_embedding_retriever.py b/test/test_embedding_retriever.py index dd52587a5a..71a4f2b02e 100644 --- a/test/test_embedding_retriever.py +++ b/test/test_embedding_retriever.py @@ -2,6 +2,7 @@ from haystack import Finder +@pytest.mark.slow @pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory"], indirect=True) @pytest.mark.parametrize("retriever", ["embedding"], indirect=True) def test_embedding_retriever(retriever, document_store): diff --git a/test/test_faiss.py b/test/test_faiss.py index 49a375191d..9df0c93be2 100644 --- a/test/test_faiss.py +++ b/test/test_faiss.py @@ -76,6 +76,7 @@ def test_faiss_write_docs(document_store, index_buffer_size, batch_size): check_data_correctness(documents_indexed, DOCUMENTS) +@pytest.mark.slow @pytest.mark.parametrize("document_store", ["faiss"], indirect=True) @pytest.mark.parametrize("retriever", ["dpr"], indirect=True) @pytest.mark.parametrize("index_buffer_size", [10_000, 2]) diff --git a/test/test_finder.py b/test/test_finder.py index 3210240ddc..da9d672655 100644 --- a/test/test_finder.py +++ b/test/test_finder.py @@ -2,6 +2,7 @@ import pytest +@pytest.mark.slow @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True) def test_finder_get_answers(reader, retriever_with_docs, document_store_with_docs): finder = Finder(reader, retriever_with_docs) @@ -31,6 +32,7 @@ def test_finder_offsets(reader, retriever_with_docs, document_store_with_docs): assert prediction["answers"][0]["context"][start:end] == prediction["answers"][0]["answer"] +@pytest.mark.slow @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True) def test_finder_get_answers_single_result(reader, retriever_with_docs, document_store_with_docs): finder = Finder(reader, retriever_with_docs) diff --git a/test/test_reader.py b/test/test_reader.py index a6e84bb3e5..00b22389e4 100644 --- a/test/test_reader.py +++ b/test/test_reader.py @@ -1,11 +1,12 @@ import math +import pytest + from haystack import Document from haystack.reader.base import BaseReader from haystack.reader.farm import FARMReader - def test_reader_basic(reader): assert reader is not None assert isinstance(reader, BaseReader) @@ -23,6 +24,7 @@ def test_output(prediction): assert len(prediction["answers"]) == 5 +@pytest.mark.slow def test_no_answer_output(no_answer_prediction): assert no_answer_prediction is not None assert no_answer_prediction["question"] == "What is the meaning of life?" @@ -38,9 +40,12 @@ def test_no_answer_output(no_answer_prediction): assert answers.count(None) == 1 assert len(no_answer_prediction["answers"]) == 5 + # TODO Directly compare farm and transformers reader outputs # TODO checks to see that model is responsive to input arguments e.g. context_window_size - topk + +@pytest.mark.slow def test_prediction_attributes(prediction): # TODO FARM's prediction also has no_ans_gap attributes_gold = ["question", "answers"] @@ -57,37 +62,42 @@ def test_answer_attributes(prediction): assert ag in answer -def test_context_window_size(test_docs_xs): - # TODO parametrize window_size and farm/transformers reader using pytest +@pytest.mark.slow +@pytest.mark.parametrize("reader", ["farm"], indirect=True) +@pytest.mark.parametrize("window_size", [10, 15, 20]) +def test_context_window_size(reader, test_docs_xs, window_size): docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs] - for window_size in [10, 15, 20]: - farm_reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", num_processes=0, - use_gpu=False, top_k_per_sample=5, no_ans_boost=None, context_window_size=window_size) - prediction = farm_reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5) - for answer in prediction["answers"]: - # If the extracted answer is larger than the context window, the context window is expanded. - # If the extracted answer is odd in length, the resulting context window is one less than context_window_size - # due to rounding (See FARM's QACandidate) - # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different - if len(answer["answer"]) <= window_size: - assert len(answer["context"]) in [window_size, window_size-1] - else: - assert len(answer["answer"]) == len(answer["context"]) - - # TODO Need to test transformers reader + + if isinstance(reader, FARMReader): + reader.inferencer.model.prediction_heads[0].context_window_size = window_size + + prediction = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5) + for answer in prediction["answers"]: + # If the extracted answer is larger than the context window, the context window is expanded. + # If the extracted answer is odd in length, the resulting context window is one less than context_window_size + # due to rounding (See FARM's QACandidate) # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different + if len(answer["answer"]) <= window_size: + assert len(answer["context"]) in [window_size, window_size - 1] + else: + assert len(answer["answer"]) == len(answer["context"]) + # TODO Need to test transformers reader + # TODO Currently the behaviour of context_window_size in FARMReader and TransformerReader is different -def test_top_k(test_docs_xs): - # TODO parametrize top_k and farm/transformers reader using pytest - # TODO transformers reader was crashing when tested on this +@pytest.mark.parametrize("reader", ["farm"], indirect=True) +@pytest.mark.parametrize("top_k", [2, 5, 10]) +def test_top_k(reader, test_docs_xs, top_k): docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs] - farm_reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", num_processes=0, - use_gpu=False, top_k_per_sample=4, no_ans_boost=None, top_k_per_candidate=4) - for top_k in [2, 5, 10]: - prediction = farm_reader.predict(question="Who lives in Berlin?", documents=docs, top_k=top_k) - assert len(prediction["answers"]) == top_k - + reader.top_k_per_candidate = 4 + if isinstance(reader, FARMReader): + reader.inferencer.model.prediction_heads[0].n_best = reader.top_k_per_candidate + 1 + try: + reader.inferencer.model.prediction_heads[0].n_best_per_sample = 4 + except: + print("WARNING: Could not set `top_k_per_sample` in FARM. Please update FARM version.") + prediction = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=top_k) + assert len(prediction["answers"]) == top_k diff --git a/test/test_rest_api.py b/test/test_rest_api.py index 19927acea5..16893031c4 100644 --- a/test/test_rest_api.py +++ b/test/test_rest_api.py @@ -18,6 +18,7 @@ def get_test_client_and_override_dependencies(reader, document_store_with_docs): return TestClient(app) +@pytest.mark.slow @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True) @pytest.mark.parametrize("reader", ["farm"], indirect=True) def test_query_api(reader, document_store_with_docs):