From 6d37040ae2a30263d10fb9759907528b1b30dcb0 Mon Sep 17 00:00:00 2001 From: frnkvieira Date: Sun, 15 May 2022 21:28:04 -0300 Subject: [PATCH 1/4] Fixes capture_body flag not being respected and improves performance of body reader. --- elasticapm/contrib/starlette/__init__.py | 61 ++++++++++++------------ 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/elasticapm/contrib/starlette/__init__.py b/elasticapm/contrib/starlette/__init__.py index 373b49970..c3c6c820f 100644 --- a/elasticapm/contrib/starlette/__init__.py +++ b/elasticapm/contrib/starlette/__init__.py @@ -120,7 +120,7 @@ def __init__(self, app: ASGIApp, client: Optional[Client], **kwargs): elasticapm.instrumentation.control.instrument() # If we ever make this a general-use ASGI middleware we should use - # `asgiref.conpatibility.guarantee_single_callable(app)` here + # `asgiref.compatibility.guarantee_single_callable(app)` here self.app = app async def __call__(self, scope, receive, send): @@ -131,7 +131,7 @@ async def __call__(self, scope, receive, send): send: send awaitable callable """ # we only handle the http scope, skip anything else. - if scope["type"] != "http": + if scope["type"] != "http" or (scope["type"] == "http" and self.client.should_ignore_url(scope["path"])): await self.app(scope, receive, send) return @@ -145,32 +145,36 @@ async def wrapped_send(message): elasticapm.set_transaction_result(result, override=False) await send(message) - # When we consume the body from receive, we replace the streaming - # mechanism with a mocked version -- this workaround came from - # https://github.com/encode/starlette/issues/495#issuecomment-513138055 - body = b"" - while True: - message = await receive() - if not message: - break - if message["type"] == "http.request": - b = message.get("body", b"") - if b: - body += b - if not message.get("more_body", False): + if self.client.config.capture_body != "off": + + # When we consume the body from receive, we replace the streaming + # mechanism with a mocked version -- this workaround came from + # https://github.com/encode/starlette/issues/495#issuecomment-513138055 + body = [] + while True: + message = await receive() + if not message: break - if message["type"] == "http.disconnect": - break + if message["type"] == "http.request": + b = message.get("body", b"") + if b: + body.append(b) + if not message.get("more_body", False): + break + if message["type"] == "http.disconnect": + break + + async def mocked_receive() -> Message: + await asyncio.sleep(0) + return {"type": "http.request", "body": b"".join(body)} - async def _receive() -> Message: - await asyncio.sleep(0) - return {"type": "http.request", "body": body} + receive = mocked_receive - request = Request(scope, receive=_receive) + request = Request(scope, receive=receive) await self._request_started(request) try: - await self.app(scope, _receive, wrapped_send) + await self.app(scope, receive, wrapped_send) elasticapm.set_transaction_outcome(constants.OUTCOME.SUCCESS, override=False) except Exception: await self.capture_exception( @@ -216,15 +220,12 @@ async def _request_started(self, request: Request): if self.client.config.capture_body != "off": await get_body(request) - if not self.client.should_ignore_url(request.url.path): - trace_parent = TraceParent.from_headers(dict(request.headers)) - self.client.begin_transaction("request", trace_parent=trace_parent) + trace_parent = TraceParent.from_headers(dict(request.headers)) + self.client.begin_transaction("request", trace_parent=trace_parent) - await set_context( - lambda: get_data_from_request(request, self.client.config, constants.TRANSACTION), "request" - ) - transaction_name = self.get_route_name(request) or request.url.path - elasticapm.set_transaction_name("{} {}".format(request.method, transaction_name), override=False) + await set_context(lambda: get_data_from_request(request, self.client.config, constants.TRANSACTION), "request") + transaction_name = self.get_route_name(request) or request.url.path + elasticapm.set_transaction_name("{} {}".format(request.method, transaction_name), override=False) def get_route_name(self, request: Request) -> str: app = request.app From c74aa449924bdb4c431017978bde9b8030969f7b Mon Sep 17 00:00:00 2001 From: frnkvieira Date: Sun, 15 May 2022 22:08:13 -0300 Subject: [PATCH 2/4] Adds a test to ensure non-UTF-8 data is not triggering errors in ignored paths. --- tests/contrib/asyncio/starlette_tests.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/contrib/asyncio/starlette_tests.py b/tests/contrib/asyncio/starlette_tests.py index 99b3cc1d3..b91c4bd48 100644 --- a/tests/contrib/asyncio/starlette_tests.py +++ b/tests/contrib/asyncio/starlette_tests.py @@ -426,6 +426,14 @@ def test_static_files_only(app_static_files_only, elasticapm_client): assert request["socket"] == {"remote_address": "127.0.0.1"} +def test_non_utf_8_body_in_ignored_paths_with_capture_body(app, elasticapm_client): + client = TestClient(app) + elasticapm_client.config.update(1, capture_body="all", transaction_ignore_urls="/hello") + response = client.post("/hello", data=b"b$\x19\xc2") + assert response.status_code == 200 + assert len(elasticapm_client.events[constants.TRANSACTION]) == 0 + + def test_static_files_only_file_notfound(app_static_files_only, elasticapm_client): client = TestClient(app_static_files_only) From a84a887610ff2559b6f14525e16f30abe33fe705 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 25 Aug 2022 11:47:27 -0600 Subject: [PATCH 3/4] Apply encoding.long_field to starlette body Also update encoding.long_field to handle bytes --- elasticapm/contrib/starlette/__init__.py | 3 ++- elasticapm/utils/encoding.py | 12 +++++++++--- tests/contrib/asyncio/starlette_tests.py | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/elasticapm/contrib/starlette/__init__.py b/elasticapm/contrib/starlette/__init__.py index c3c6c820f..f916d93e1 100644 --- a/elasticapm/contrib/starlette/__init__.py +++ b/elasticapm/contrib/starlette/__init__.py @@ -47,6 +47,7 @@ from elasticapm.contrib.asyncio.traces import set_context from elasticapm.contrib.starlette.utils import get_body, get_data_from_request, get_data_from_response from elasticapm.utils.disttracing import TraceParent +from elasticapm.utils.encoding import long_field from elasticapm.utils.logging import get_logger logger = get_logger("elasticapm.errors.client") @@ -166,7 +167,7 @@ async def wrapped_send(message): async def mocked_receive() -> Message: await asyncio.sleep(0) - return {"type": "http.request", "body": b"".join(body)} + return {"type": "http.request", "body": long_field(b"".join(body))} receive = mocked_receive diff --git a/elasticapm/utils/encoding.py b/elasticapm/utils/encoding.py index c98b7b00e..aa277c229 100644 --- a/elasticapm/utils/encoding.py +++ b/elasticapm/utils/encoding.py @@ -229,6 +229,9 @@ def long_field(data): If the given data, converted to string, is longer than LONG_FIELD_MAX_LENGTH, truncate it to LONG_FIELD_MAX_LENGTH-1, adding the "…" character at the end. + If data is bytes, truncate it to LONG_FIELD_MAX_LENGTH-3, adding b"..." to + the end. + Returns the original data if truncation is not required. Per https://github.com/elastic/apm/blob/main/specs/agents/field-limits.md#long_field_max_length-configuration, @@ -242,9 +245,12 @@ def long_field(data): Other fields should be truncated via `elasticapm.utils.encoding.keyword_field()` """ - string = str(data) if not isinstance(data, str) else data - if len(string) > LONG_FIELD_MAX_LENGTH: - return string[: LONG_FIELD_MAX_LENGTH - 1] + "…" + str_or_bytes = str(data) if not isinstance(data, (str, bytes)) else data + if len(str_or_bytes) > LONG_FIELD_MAX_LENGTH: + if isinstance(str_or_bytes, bytes): + return str_or_bytes[: LONG_FIELD_MAX_LENGTH - 3] + b"..." + else: + return str_or_bytes[: LONG_FIELD_MAX_LENGTH - 1] + "…" else: return data diff --git a/tests/contrib/asyncio/starlette_tests.py b/tests/contrib/asyncio/starlette_tests.py index b91c4bd48..bd171f22c 100644 --- a/tests/contrib/asyncio/starlette_tests.py +++ b/tests/contrib/asyncio/starlette_tests.py @@ -434,6 +434,23 @@ def test_non_utf_8_body_in_ignored_paths_with_capture_body(app, elasticapm_clien assert len(elasticapm_client.events[constants.TRANSACTION]) == 0 +@pytest.mark.parametrize("elasticapm_client", [{"capture_body": "all"}], indirect=True) +def test_long_body(app, elasticapm_client): + client = TestClient(app) + + response = client.post( + "/", + data={"foo": "b" * 10000}, + ) + + assert response.status_code == 200 + + assert len(elasticapm_client.events[constants.TRANSACTION]) == 1 + transaction = elasticapm_client.events[constants.TRANSACTION][0] + request = transaction["context"]["request"] + assert request["body"] == "foo=" + "b" * 9993 + "..." + + def test_static_files_only_file_notfound(app_static_files_only, elasticapm_client): client = TestClient(app_static_files_only) From 391188dccec926a1c6e079730af7a1d4d1272551 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 25 Aug 2022 11:50:26 -0600 Subject: [PATCH 4/4] CHANGELOG --- CHANGELOG.asciidoc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index d539017fe..9fef563f5 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -39,10 +39,12 @@ endif::[] [float] ===== Bug fixes -* Differentiate Lambda URLs from API Gateway in AWS Lambda integration {pull}#1609[#1609] -* Restrict the size of Django request bodies to prevent APM Server rejection {pull}#1610[#1610] -* Restrict length of `exception.message` for exceptions captured by the agent {pull}#1619[#1619] -* Fix error when using elasticsearch(sniff_on_start=True) {pull}#1618[#1618] +* Differentiate Lambda URLs from API Gateway in AWS Lambda integration {pull}1609[#1609] +* Restrict the size of Django request bodies to prevent APM Server rejection {pull}1610[#1610] +* Restrict length of `exception.message` for exceptions captured by the agent {pull}1619[#1619] +* Restrict length of Starlette request bodies {pull}1549[#1549] +* Fix error when using elasticsearch(sniff_on_start=True) {pull}1618[#1618] +* Improve handling of ignored URLs and capture_body=off for Starlette {pull}1549[#1549] @@ -55,7 +57,7 @@ endif::[] [float] ===== Features -* Added lambda support for ELB triggers {pull}#1605[#1605] +* Added lambda support for ELB triggers {pull}1605[#1605] [[release-notes-6.10.2]] ==== 6.10.2 - 2022-08-04