From 56832d816ac01c08aba89d84f35b3ca404f5cd7e Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Mon, 28 Aug 2023 20:35:28 +0400 Subject: [PATCH] Support Elasticsearch 8 (#1664) --- .github/workflows/ci.yml | 4 +- elasticsearch_dsl/connections.py | 5 +- elasticsearch_dsl/search.py | 10 +- examples/alias_migration.py | 5 +- setup.cfg | 4 +- setup.py | 2 +- tests/conftest.py | 103 +++++++++--------- tests/test_connections.py | 52 ++++++--- tests/test_integration/test_document.py | 10 +- .../test_examples/test_composite_aggs.py | 5 +- tests/test_integration/test_search.py | 4 +- tests/test_result.py | 2 +- tests/test_utils.py | 2 +- 13 files changed, 118 insertions(+), 90 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5cd05d38f..18302977b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,7 +67,7 @@ jobs: "3.10", "3.11", ] - es-version: [7.0.0, 7.10.0] + es-version: [8.0.0, 8.9.0] steps: - name: Checkout Repository @@ -76,7 +76,7 @@ jobs: run: | mkdir /tmp/elasticsearch wget -O - https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${{ matrix.es-version }}-linux-x86_64.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1 - /tmp/elasticsearch/bin/elasticsearch -d + /tmp/elasticsearch/bin/elasticsearch -E xpack.security.enabled=false -E discovery.type=single-node -d - name: Setup Python - ${{ matrix.python-version }} uses: actions/setup-python@v4 with: diff --git a/elasticsearch_dsl/connections.py b/elasticsearch_dsl/connections.py index 04b9ba43d..7bfabeb0e 100644 --- a/elasticsearch_dsl/connections.py +++ b/elasticsearch_dsl/connections.py @@ -26,9 +26,10 @@ class Connections: singleton in this module. """ - def __init__(self): + def __init__(self, *, elasticsearch_class=Elasticsearch): self._kwargs = {} self._conns = {} + self.elasticsearch_class = elasticsearch_class def configure(self, **kwargs): """ @@ -80,7 +81,7 @@ def create_connection(self, alias="default", **kwargs): it under given alias. """ kwargs.setdefault("serializer", serializer) - conn = self._conns[alias] = Elasticsearch(**kwargs) + conn = self._conns[alias] = self.elasticsearch_class(**kwargs) return conn def get_connection(self, alias="default"): diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index c07fe7e71..724c06a52 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -18,7 +18,7 @@ import collections.abc import copy -from elasticsearch.exceptions import TransportError +from elasticsearch.exceptions import ApiError from elasticsearch.helpers import scan from .aggs import A, AggBase @@ -693,7 +693,8 @@ def count(self): d = self.to_dict(count=True) # TODO: failed shards detection - return es.count(index=self._index, body=d, **self._params)["count"] + resp = es.count(index=self._index, query=d.get("query", None), **self._params) + return resp["count"] def execute(self, ignore_cache=False): """ @@ -707,7 +708,8 @@ def execute(self, ignore_cache=False): es = get_connection(self._using) self._response = self._response_class( - self, es.search(index=self._index, body=self.to_dict(), **self._params) + self, + es.search(index=self._index, body=self.to_dict(), **self._params).body, ) return self._response @@ -799,7 +801,7 @@ def execute(self, ignore_cache=False, raise_on_error=True): for s, r in zip(self._searches, responses["responses"]): if r.get("error", False): if raise_on_error: - raise TransportError("N/A", r["error"]["type"], r["error"]) + raise ApiError("N/A", meta=responses.meta, body=r) r = None else: r = Response(s, r) diff --git a/examples/alias_migration.py b/examples/alias_migration.py index e0a44da30..c56627b9a 100644 --- a/examples/alias_migration.py +++ b/examples/alias_migration.py @@ -106,9 +106,8 @@ def migrate(move_data=True, update_alias=True): if move_data: # move data from current alias to the new index - es.reindex( - body={"source": {"index": ALIAS}, "dest": {"index": next_index}}, - request_timeout=3600, + es.options(request_timeout=3600).reindex( + body={"source": {"index": ALIAS}, "dest": {"index": next_index}} ) # refresh the index to make the changes visible es.indices.refresh(index=next_index) diff --git a/setup.cfg b/setup.cfg index 798d7e9b5..673fe8bbc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,6 +12,4 @@ filterwarnings = # The body parameter is no longer deprecated, see # https://github.com/elastic/elasticsearch-py/issues/2181#issuecomment-1490932964 ignore:The 'body' parameter is deprecated .*:DeprecationWarning - # calendar_interval was only added in Elasticsearch 7.2 and we still support Elasticsearch 7.0 - # using `default` instead of `ignore` to show it in the output as a reminder to remove it for Elasticsearch 8 - default:\[interval\] on \[date_histogram\] is deprecated, use \[fixed_interval\] or \[calendar_interval\] in the future.:elasticsearch.exceptions.ElasticsearchWarning + ignore:Legacy index templates are deprecated in favor of composable templates.:elasticsearch.exceptions.ElasticsearchWarning diff --git a/setup.py b/setup.py index 36e0fb224..9dc52a2e0 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ install_requires = [ "python-dateutil", - "elasticsearch>=7.0.0,<8.0.0", + "elasticsearch>=8.0.0,<9.0.0", ] develop_requires = [ diff --git a/tests/conftest.py b/tests/conftest.py index 7146cb056..0e5e082de 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,7 @@ from unittest import SkipTest, TestCase from unittest.mock import Mock +from elastic_transport import ObjectApiResponse from elasticsearch import Elasticsearch from elasticsearch.exceptions import ConnectionError from elasticsearch.helpers import bulk @@ -47,7 +48,7 @@ def get_test_client(wait=True, **kwargs): # construct kwargs from the environment - kw = {"timeout": 30} + kw = {"request_timeout": 30} if "PYTHON_CONNECTION_CLASS" in os.environ: from elasticsearch import connection @@ -131,8 +132,9 @@ def es_version(client): @fixture def write_client(client): yield client - client.indices.delete(index="test-*", ignore=404) - client.indices.delete_template(name="test-template", ignore=404) + for index_name in client.indices.get(index="test-*", expand_wildcards="all"): + client.indices.delete(index=index_name) + client.options(ignore_status=404).indices.delete_template(name="test-template") @fixture @@ -160,55 +162,58 @@ def data_client(client): @fixture def dummy_response(): - return { - "_shards": {"failed": 0, "successful": 10, "total": 10}, - "hits": { - "hits": [ - { - "_index": "test-index", - "_type": "company", - "_id": "elasticsearch", - "_score": 12.0, - "_source": {"city": "Amsterdam", "name": "Elasticsearch"}, - }, - { - "_index": "test-index", - "_type": "employee", - "_id": "42", - "_score": 11.123, - "_routing": "elasticsearch", - "_source": { - "name": {"first": "Shay", "last": "Bannon"}, - "lang": "java", - "twitter": "kimchy", + return ObjectApiResponse( + meta=None, + body={ + "_shards": {"failed": 0, "successful": 10, "total": 10}, + "hits": { + "hits": [ + { + "_index": "test-index", + "_type": "company", + "_id": "elasticsearch", + "_score": 12.0, + "_source": {"city": "Amsterdam", "name": "Elasticsearch"}, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "42", + "_score": 11.123, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Shay", "last": "Bannon"}, + "lang": "java", + "twitter": "kimchy", + }, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "47", + "_score": 1, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Honza", "last": "Král"}, + "lang": "python", + "twitter": "honzakral", + }, }, - }, - { - "_index": "test-index", - "_type": "employee", - "_id": "47", - "_score": 1, - "_routing": "elasticsearch", - "_source": { - "name": {"first": "Honza", "last": "Král"}, - "lang": "python", - "twitter": "honzakral", + { + "_index": "test-index", + "_type": "employee", + "_id": "53", + "_score": 16.0, + "_routing": "elasticsearch", }, - }, - { - "_index": "test-index", - "_type": "employee", - "_id": "53", - "_score": 16.0, - "_routing": "elasticsearch", - }, - ], - "max_score": 12.0, - "total": 123, + ], + "max_score": 12.0, + "total": 123, + }, + "timed_out": False, + "took": 123, }, - "timed_out": False, - "took": 123, - } + ) @fixture diff --git a/tests/test_connections.py b/tests/test_connections.py index 278760cc3..b22e6eba6 100644 --- a/tests/test_connections.py +++ b/tests/test_connections.py @@ -21,6 +21,11 @@ from elasticsearch_dsl import connections, serializer +class DummyElasticsearch: + def __init__(self, *args, hosts, **kwargs): + self.hosts = hosts + + def test_default_connection_is_returned_by_default(): c = connections.Connections() @@ -33,27 +38,36 @@ def test_default_connection_is_returned_by_default(): def test_get_connection_created_connection_if_needed(): - c = connections.Connections() - c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]}) + c = connections.Connections(elasticsearch_class=DummyElasticsearch) + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) default = c.get_connection() local = c.get_connection("local") - assert isinstance(default, Elasticsearch) - assert isinstance(local, Elasticsearch) + assert isinstance(default, DummyElasticsearch) + assert isinstance(local, DummyElasticsearch) - assert [{"host": "es.com"}] == default.transport.hosts - assert [{"host": "localhost"}] == local.transport.hosts + assert default.hosts == ["https://es.com:9200"] + assert local.hosts == ["https://localhost:9200"] def test_configure_preserves_unchanged_connections(): - c = connections.Connections() + c = connections.Connections(elasticsearch_class=DummyElasticsearch) - c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]}) + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) default = c.get_connection() local = c.get_connection("local") - c.configure(default={"hosts": ["not-es.com"]}, local={"hosts": ["localhost"]}) + c.configure( + default={"hosts": ["https://not-es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) new_default = c.get_connection() new_local = c.get_connection("local") @@ -62,9 +76,12 @@ def test_configure_preserves_unchanged_connections(): def test_remove_connection_removes_both_conn_and_conf(): - c = connections.Connections() + c = connections.Connections(elasticsearch_class=DummyElasticsearch) - c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]}) + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) c.add_connection("local2", object()) c.remove_connection("default") @@ -77,15 +94,16 @@ def test_remove_connection_removes_both_conn_and_conf(): def test_create_connection_constructs_client(): - c = connections.Connections() - c.create_connection("testing", hosts=["es.com"]) + c = connections.Connections(elasticsearch_class=DummyElasticsearch) + c.create_connection("testing", hosts=["https://es.com:9200"]) con = c.get_connection("testing") - assert [{"host": "es.com"}] == con.transport.hosts + assert con.hosts == ["https://es.com:9200"] def test_create_connection_adds_our_serializer(): - c = connections.Connections() - c.create_connection("testing", hosts=["es.com"]) + c = connections.Connections(elasticsearch_class=Elasticsearch) + c.create_connection("testing", hosts=["https://es.com:9200"]) - assert c.get_connection("testing").transport.serializer is serializer.serializer + c_serializers = c.get_connection("testing").transport.serializers + assert c_serializers.serializers["application/json"] is serializer.serializer diff --git a/tests/test_integration/test_document.py b/tests/test_integration/test_document.py index 52600afdc..6fb2f4eaa 100644 --- a/tests/test_integration/test_document.py +++ b/tests/test_integration/test_document.py @@ -279,7 +279,6 @@ def test_save_and_update_return_doc_meta(write_client): "_primary_term", "_seq_no", "_shards", - "_type", "_version", "result", } @@ -295,7 +294,6 @@ def test_save_and_update_return_doc_meta(write_client): "_primary_term", "_seq_no", "_shards", - "_type", "_version", "result", } @@ -318,11 +316,15 @@ def test_get_raises_404_on_non_existent_id(data_client): def test_get_returns_none_if_404_ignored(data_client): - assert None is Repository.get("elasticsearch-dsl-php", ignore=404) + assert None is Repository.get( + "elasticsearch-dsl-php", using=data_client.options(ignore_status=404) + ) def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client): - assert None is Repository.get("42", index="not-there", ignore=404) + assert None is Repository.get( + "42", index="not-there", using=data_client.options(ignore_status=404) + ) def test_get(data_client): diff --git a/tests/test_integration/test_examples/test_composite_aggs.py b/tests/test_integration/test_examples/test_composite_aggs.py index 16ad9b17f..373696927 100644 --- a/tests/test_integration/test_examples/test_composite_aggs.py +++ b/tests/test_integration/test_examples/test_composite_aggs.py @@ -34,7 +34,10 @@ def test_scan_aggs_with_multiple_aggs(data_client): {"files": A("terms", field="files")}, { "months": { - "date_histogram": {"field": "committed_date", "interval": "month"} + "date_histogram": { + "field": "committed_date", + "calendar_interval": "month", + } } }, ] diff --git a/tests/test_integration/test_search.py b/tests/test_integration/test_search.py index e3ce061eb..99fb51847 100644 --- a/tests/test_integration/test_search.py +++ b/tests/test_integration/test_search.py @@ -16,7 +16,7 @@ # under the License. -from elasticsearch import TransportError +from elasticsearch import ApiError from pytest import raises from elasticsearch_dsl import Date, Document, Keyword, MultiSearch, Q, Search, Text @@ -143,7 +143,7 @@ def test_multi_missing(data_client): ms = MultiSearch() ms = ms.add(s1).add(s2).add(s3) - with raises(TransportError): + with raises(ApiError): ms.execute() r1, r2, r3 = ms.execute(raise_on_error=False) diff --git a/tests/test_result.py b/tests/test_result.py index 7815bb92a..15e6ef7aa 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -40,7 +40,7 @@ def test_agg_response_is_pickleable(agg_response): def test_response_is_pickleable(dummy_response): - res = response.Response(Search(), dummy_response) + res = response.Response(Search(), dummy_response.body) res.hits r = pickle.loads(pickle.dumps(res)) diff --git a/tests/test_utils.py b/tests/test_utils.py index df503229f..73931d433 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -93,7 +93,7 @@ class MyClass: def to_dict(self): return 42 - assert serializer.serializer.dumps(MyClass()) == "42" + assert serializer.serializer.dumps(MyClass()) == b"42" def test_recursive_to_dict():