diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e26036875..8592e4724d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1369](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1369)) - `opentelemetry-instrumentation-system-metrics` add supports to collect system thread count. ([#1339](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1339)) - `opentelemetry-exporter-richconsole` Fixing RichConsoleExpoter to allow multiple traces, fixing duplicate spans and include resources ([#1336](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1336)) +- `opentelemetry-instrumentation-asgi` Add support for regular expression matching of HTTP headers. + ([#1333](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1333)) -## [1.13.0-0.34b0](https://github.com/open-telemetry/opentelemetry-python/releases/tag/v1.13.0-0.34b0) - 2022-09-26 +### Fixed +- `opentelemetry-instrumentation-asgi` Fix keys() in class ASGIGetter so it decodes the keys before returning them. + ([#1333](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1333)) +- `opentelemetry-instrumentation-asgi` Make ASGIGetter.get() compare all keys in a case insensitive manner. + ([#1333](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1333)) +## [1.13.0-0.34b0](https://github.com/open-telemetry/opentelemetry-python/releases/tag/v1.13.0-0.34b0) - 2022-09-26 - `opentelemetry-instrumentation-asyncpg` Fix high cardinality in the span name ([#1324](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1324)) @@ -40,7 +47,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add metric instrumentation in starlette ([#1327](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1327)) - ### Fixed - `opentelemetry-instrumentation-boto3sqs` Make propagation compatible with other SQS instrumentations, add 'messaging.url' span attribute, and fix missing package dependencies. diff --git a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py index ce42c99536..88c3a16fa3 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py @@ -15,8 +15,7 @@ """ The opentelemetry-instrumentation-asgi package provides an ASGI middleware that can be used -on any ASGI framework (such as Django-channels / Quart) to track requests -timing through OpenTelemetry. +on any ASGI framework (such as Django-channels / Quart) to track request timing through OpenTelemetry. Usage (Quart) ------------- @@ -71,9 +70,14 @@ async def hello(): Request/Response hooks ********************** -Utilize request/response hooks to execute custom logic to be performed before/after performing a request. The server request hook takes in a server span and ASGI -scope object for every incoming request. The client request hook is called with the internal span and an ASGI scope which is sent as a dictionary for when the method receive is called. -The client response hook is called with the internal span and an ASGI event which is sent as a dictionary for when the method send is called. +This instrumentation supports request and response hooks. These are functions that get called +right after a span is created for a request and right before the span is finished for the response. + +- The server request hook is passed a server span and ASGI scope object for every incoming request. +- The client request hook is called with the internal span and an ASGI scope when the method ``receive`` is called. +- The client response hook is called with the internal span and an ASGI event when the method ``send`` is called. + +For example, .. code-block:: python @@ -93,54 +97,93 @@ def client_response_hook(span: Span, message: dict): Capture HTTP request and response headers ***************************************** -You can configure the agent to capture predefined HTTP headers as span attributes, according to the `semantic convention `_. +You can configure the agent to capture specified HTTP headers as span attributes, according to the +`semantic convention `_. Request headers *************** -To capture predefined HTTP request headers as span attributes, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` -to a comma-separated list of HTTP header names. +To capture HTTP request headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` to a comma delimited list of HTTP header names. For example, - :: export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST="content-type,custom_request_header" -will extract ``content-type`` and ``custom_request_header`` from request headers and add them as span attributes. +will extract ``content-type`` and ``custom_request_header`` from the request headers and add them as span attributes. + +Request header names in ASGI are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +variable will capture the header named ``custom-header``. + +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST="Accept.*,X-.*" -It is recommended that you should give the correct names of the headers to be captured in the environment variable. -Request header names in ASGI are case insensitive. So, giving header name as ``CUStom-Header`` in environment variable will be able capture header with name ``custom-header``. +Would match all request headers that start with ``Accept`` and ``X-``. -The name of the added span attribute will follow the format ``http.request.header.`` where ```` being the normalized HTTP header name (lowercase, with - characters replaced by _ ). -The value of the attribute will be single item list containing all the header values. +To capture all request headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` to ``".*"``. +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST=".*" -Example of the added span attribute, +The name of the added span attribute will follow the format ``http.request.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +single item list containing all the header values. + +For example: ``http.request.header.custom_request_header = [","]`` Response headers **************** -To capture predefined HTTP response headers as span attributes, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` -to a comma-separated list of HTTP header names. +To capture HTTP response headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` to a comma delimited list of HTTP header names. For example, - :: export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE="content-type,custom_response_header" -will extract ``content-type`` and ``custom_response_header`` from response headers and add them as span attributes. +will extract ``content-type`` and ``custom_response_header`` from the response headers and add them as span attributes. + +Response header names in ASGI are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +variable will capture the header named ``custom-header``. + +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE="Content.*,X-.*" -It is recommended that you should give the correct names of the headers to be captured in the environment variable. -Response header names captured in ASGI are case insensitive. So, giving header name as ``CUStomHeader`` in environment variable will be able capture header with name ``customheader``. +Would match all response headers that start with ``Content`` and ``X-``. -The name of the added span attribute will follow the format ``http.response.header.`` where ```` being the normalized HTTP header name (lowercase, with - characters replaced by _ ). -The value of the attribute will be single item list containing all the header values. +To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` to ``".*"``. +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE=".*" -Example of the added span attribute, +The name of the added span attribute will follow the format ``http.response.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +single item list containing all the header values. + +For example: ``http.response.header.custom_response_header = [","]`` +Sanitizing headers +****************** +In order to prevent storing sensitive data such as personally identifiable information (PII), session keys, passwords, +etc, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS`` +to a comma delimited list of HTTP header names to be sanitized. Regexes may be used, and all header names will be +matched in a case-insensitive manner. + +For example, +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS=".*session.*,set-cookie" + +will replace the value of headers such as ``session-id`` and ``set-cookie`` with ``[REDACTED]`` in the span. + Note: - Environment variable names to capture http headers are still experimental, and thus are subject to change. + The environment variable names used to capture HTTP headers are still experimental, and thus are subject to change. API --- @@ -169,8 +212,10 @@ def client_response_hook(span: Span, message: dict): from opentelemetry.trace import Span, set_span_in_context from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, + SanitizeValue, _parse_active_request_count_attrs, _parse_duration_attrs, get_custom_headers, @@ -202,19 +247,19 @@ def get( if not headers: return None - # asgi header keys are in lower case + # ASGI header keys are in lower case key = key.lower() decoded = [ _value.decode("utf8") for (_key, _value) in headers - if _key.decode("utf8") == key + if _key.decode("utf8").lower() == key ] if not decoded: return None return decoded def keys(self, carrier: dict) -> typing.List[str]: - return list(carrier.keys()) + return [_key.decode("utf8") for (_key, _value) in carrier] asgi_getter = ASGIGetter() @@ -289,35 +334,50 @@ def collect_custom_request_headers_attributes(scope): """returns custom HTTP request headers to be added into SERVER span as span attributes Refer specification https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md#http-request-and-response-headers""" - attributes = {} - custom_request_headers = get_custom_headers( - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST + sanitize = SanitizeValue( + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ) ) - for header in custom_request_headers: - values = asgi_getter.get(scope, header) - if values: - key = normalise_request_header_name(header) - attributes.setdefault(key, []).extend(values) + # Decode headers before processing. + headers = { + _key.decode("utf8"): _value.decode("utf8") + for (_key, _value) in scope.get("headers") + } - return attributes + return sanitize.sanitize_header_values( + headers, + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST + ), + normalise_request_header_name, + ) def collect_custom_response_headers_attributes(message): """returns custom HTTP response headers to be added into SERVER span as span attributes Refer specification https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md#http-request-and-response-headers""" - attributes = {} - custom_response_headers = get_custom_headers( - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE + + sanitize = SanitizeValue( + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ) ) - for header in custom_response_headers: - values = asgi_getter.get(message, header) - if values: - key = normalise_response_header_name(header) - attributes.setdefault(key, []).extend(values) + # Decode headers before processing. + headers = { + _key.decode("utf8"): _value.decode("utf8") + for (_key, _value) in message.get("headers") + } - return attributes + return sanitize.sanitize_header_values( + headers, + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE + ), + normalise_response_header_name, + ) def get_host_port_url_tuple(scope): @@ -353,7 +413,7 @@ def set_status_code(span, status_code): def get_default_span_details(scope: dict) -> Tuple[str, dict]: """Default implementation for get_default_span_details Args: - scope: the asgi scope dictionary + scope: the ASGI scope dictionary Returns: a tuple of the span name, and any attributes to attach to the span. """ @@ -427,7 +487,7 @@ async def __call__(self, scope, receive, send): """The ASGI application Args: - scope: A ASGI environment. + scope: An ASGI environment. receive: An awaitable callable yielding dictionaries send: An awaitable callable taking a single dictionary as argument. """ diff --git a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py index 1b00ee1279..6b824a7908 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py @@ -37,6 +37,7 @@ from opentelemetry.test.test_base import TestBase from opentelemetry.trace import SpanKind, format_span_id, format_trace_id from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, _active_requests_count_attrs, @@ -94,6 +95,15 @@ async def http_app_with_custom_headers(scope, receive, send): (b"Content-Type", b"text/plain"), (b"custom-test-header-1", b"test-header-value-1"), (b"custom-test-header-2", b"test-header-value-2"), + ( + b"my-custom-regex-header-1", + b"my-custom-regex-value-1,my-custom-regex-value-2", + ), + ( + b"My-Custom-Regex-Header-2", + b"my-custom-regex-value-3,my-custom-regex-value-4", + ), + (b"my-secret-header", b"my-secret-value"), ], } ) @@ -111,6 +121,15 @@ async def websocket_app_with_custom_headers(scope, receive, send): "headers": [ (b"custom-test-header-1", b"test-header-value-1"), (b"custom-test-header-2", b"test-header-value-2"), + ( + b"my-custom-regex-header-1", + b"my-custom-regex-value-1,my-custom-regex-value-2", + ), + ( + b"My-Custom-Regex-Header-2", + b"my-custom-regex-value-3,my-custom-regex-value-4", + ), + (b"my-secret-header", b"my-secret-value"), ], } ) @@ -756,8 +775,9 @@ async def wrapped_app(scope, receive, send): @mock.patch.dict( "os.environ", { - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3", - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: ".*my-secret.*", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*", }, ) class TestCustomHeaders(AsgiTestBase, TestBase): @@ -774,6 +794,9 @@ def test_http_custom_request_headers_in_span_attributes(self): [ (b"custom-test-header-1", b"test-header-value-1"), (b"custom-test-header-2", b"test-header-value-2"), + (b"Regex-Test-Header-1", b"Regex Test Value 1"), + (b"regex-test-header-2", b"RegexTestValue2,RegexTestValue3"), + (b"My-Secret-Header", b"My Secret Value"), ] ) self.seed_app(self.app) @@ -787,6 +810,11 @@ def test_http_custom_request_headers_in_span_attributes(self): "http.request.header.custom_test_header_2": ( "test-header-value-2", ), + "http.request.header.regex_test_header_1": ("Regex Test Value 1",), + "http.request.header.regex_test_header_2": ( + "RegexTestValue2,RegexTestValue3", + ), + "http.request.header.my_secret_header": ("[REDACTED]",), } for span in span_list: if span.kind == SpanKind.SERVER: @@ -833,6 +861,13 @@ def test_http_custom_response_headers_in_span_attributes(self): "http.response.header.custom_test_header_2": ( "test-header-value-2", ), + "http.response.header.my_custom_regex_header_1": ( + "my-custom-regex-value-1,my-custom-regex-value-2", + ), + "http.response.header.my_custom_regex_header_2": ( + "my-custom-regex-value-3,my-custom-regex-value-4", + ), + "http.response.header.my_secret_header": ("[REDACTED]",), } for span in span_list: if span.kind == SpanKind.SERVER: @@ -866,6 +901,9 @@ def test_websocket_custom_request_headers_in_span_attributes(self): "headers": [ (b"custom-test-header-1", b"test-header-value-1"), (b"custom-test-header-2", b"test-header-value-2"), + (b"Regex-Test-Header-1", b"Regex Test Value 1"), + (b"regex-test-header-2", b"RegexTestValue2,RegexTestValue3"), + (b"My-Secret-Header", b"My Secret Value"), ], "client": ("127.0.0.1", 32767), "server": ("127.0.0.1", 80), @@ -884,6 +922,11 @@ def test_websocket_custom_request_headers_in_span_attributes(self): "http.request.header.custom_test_header_2": ( "test-header-value-2", ), + "http.request.header.regex_test_header_1": ("Regex Test Value 1",), + "http.request.header.regex_test_header_2": ( + "RegexTestValue2,RegexTestValue3", + ), + "http.request.header.my_secret_header": ("[REDACTED]",), } for span in span_list: if span.kind == SpanKind.SERVER: @@ -948,6 +991,13 @@ def test_websocket_custom_response_headers_in_span_attributes(self): "http.response.header.custom_test_header_2": ( "test-header-value-2", ), + "http.response.header.my_custom_regex_header_1": ( + "my-custom-regex-value-1,my-custom-regex-value-2", + ), + "http.response.header.my_custom_regex_header_2": ( + "my-custom-regex-value-3,my-custom-regex-value-4", + ), + "http.response.header.my_secret_header": ("[REDACTED]",), } for span in span_list: if span.kind == SpanKind.SERVER: diff --git a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py index 22ff9dda9a..f3d39ab02f 100644 --- a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py +++ b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py @@ -79,6 +79,34 @@ def sanitize_header_value(self, header: str, value: str) -> str: else value ) + def sanitize_header_values( + self, headers: dict, header_regexes: list, normalize_function: callable + ) -> dict: + values = {} + + if header_regexes: + header_regexes_compiled = re_compile( + "|".join("^" + i + "$" for i in header_regexes), + RE_IGNORECASE, + ) + + for header_name in list( + filter( + header_regexes_compiled.match, + headers.keys(), + ) + ): + header_values = headers.get(header_name) + if header_values: + key = normalize_function(header_name.lower()) + values[key] = [ + self.sanitize_header_value( + header=header_name, value=header_values + ) + ] + + return values + _root = r"OTEL_PYTHON_{}"