From 19fe7713e21552ca1d91ffe9337ee84b55259ba6 Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Sun, 23 Apr 2023 20:25:05 +0300 Subject: [PATCH] Sanitize DB_STATEMENT by default for elasticsearch (#1758) --- CHANGELOG.md | 2 ++ .../instrumentation/elasticsearch/__init__.py | 11 +++------- .../instrumentation/elasticsearch/utils.py | 3 ++- .../tests/test_elasticsearch.py | 21 ++----------------- 4 files changed, 9 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4e787b8bb..85bb0e5a65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fix elasticsearch db.statement attribute to be sanitized by default + ([#1758](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1758)) - Fix `AttributeError` when AWS Lambda handler receives a list event ([#1738](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1738)) - Fix `None does not implement middleware` error when there are no middlewares registered diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py index 4fd6bc79e1..d39c172c6a 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py @@ -44,7 +44,6 @@ The instrument() method accepts the following keyword args: tracer_provider (TracerProvider) - an optional tracer provider -sanitize_query (bool) - an optional query sanitization flag request_hook (Callable) - a function with extra user-defined logic to be performed before performing the request this function signature is: def request_hook(span: Span, method: str, url: str, kwargs) @@ -138,13 +137,11 @@ def _instrument(self, **kwargs): tracer = get_tracer(__name__, __version__, tracer_provider) request_hook = kwargs.get("request_hook") response_hook = kwargs.get("response_hook") - sanitize_query = kwargs.get("sanitize_query", False) _wrap( elasticsearch, "Transport.perform_request", _wrap_perform_request( tracer, - sanitize_query, self._span_name_prefix, request_hook, response_hook, @@ -163,7 +160,6 @@ def _uninstrument(self, **kwargs): def _wrap_perform_request( tracer, - sanitize_query, span_name_prefix, request_hook=None, response_hook=None, @@ -225,10 +221,9 @@ def wrapper(wrapped, _, args, kwargs): if method: attributes["elasticsearch.method"] = method if body: - statement = str(body) - if sanitize_query: - statement = sanitize_body(body) - attributes[SpanAttributes.DB_STATEMENT] = statement + attributes[SpanAttributes.DB_STATEMENT] = sanitize_body( + body + ) if params: attributes["elasticsearch.params"] = str(params) if doc_id: diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py index 7c5d753b31..ca4a466bab 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py @@ -29,7 +29,8 @@ def _flatten_dict(d, parent_key=""): items = [] for k, v in d.items(): new_key = parent_key + "." + k if parent_key else k - if isinstance(v, dict): + # recursive call _flatten_dict for a non-empty dict value + if isinstance(v, dict) and v: items.extend(_flatten_dict(v, new_key).items()) else: items.append((new_key, v)) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index c69ba70fdd..02bf3eb591 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -58,9 +58,7 @@ class TestElasticsearchIntegration(TestBase): "elasticsearch.url": "/test-index/_search", "elasticsearch.method": helpers.dsl_search_method, "elasticsearch.target": "test-index", - SpanAttributes.DB_STATEMENT: str( - {"query": {"bool": {"filter": [{"term": {"author": "testing"}}]}}} - ), + SpanAttributes.DB_STATEMENT: str({"query": {"bool": {"filter": "?"}}}), } create_attributes = { @@ -264,18 +262,6 @@ def test_dsl_search(self, request_mock): ) def test_dsl_search_sanitized(self, request_mock): - # Reset instrumentation to use sanitized query (default) - ElasticsearchInstrumentor().uninstrument() - ElasticsearchInstrumentor().instrument(sanitize_query=True) - - # update expected attributes to match sanitized query - sanitized_search_attributes = self.search_attributes.copy() - sanitized_search_attributes.update( - { - SpanAttributes.DB_STATEMENT: "{'query': {'bool': {'filter': '?'}}}" - } - ) - request_mock.return_value = (1, {}, '{"hits": {"hits": []}}') client = Elasticsearch() search = Search(using=client, index="test-index").filter( @@ -289,7 +275,7 @@ def test_dsl_search_sanitized(self, request_mock): self.assertIsNotNone(span.end_time) self.assertEqual( span.attributes, - sanitized_search_attributes, + self.search_attributes, ) def test_dsl_create(self, request_mock): @@ -320,9 +306,6 @@ def test_dsl_create(self, request_mock): ) def test_dsl_create_sanitized(self, request_mock): - # Reset instrumentation to explicitly use sanitized query - ElasticsearchInstrumentor().uninstrument() - ElasticsearchInstrumentor().instrument(sanitize_query=True) request_mock.return_value = (1, {}, {}) client = Elasticsearch() Article.init(using=client)