Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metrics instrumentation flask #1186

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e91b396
add metric instrumentation
TheAnshul756 Jul 5, 2022
139a1cc
add metrics tests
TheAnshul756 Jul 7, 2022
02d7f29
Merge branch 'open-telemetry:main' into metrics-instrumentation-flask
TheAnshul756 Jul 7, 2022
528efb0
add changlog and fix lint
TheAnshul756 Jul 8, 2022
8278b7a
Merge branch 'main' into metrics-instrumentation-flask
TheAnshul756 Jul 8, 2022
615bf12
Merge branch 'main' into metrics-instrumentation-flask
srikanthccv Jul 11, 2022
aea88d4
use wsgi functions instaed of flask's own
TheAnshul756 Jul 13, 2022
754e062
Merge branch 'metrics-instrumentation-flask' of https://github.com/Th…
TheAnshul756 Jul 13, 2022
cade131
fix lint
TheAnshul756 Jul 13, 2022
d6026f8
change starttime and duration time
TheAnshul756 Jul 14, 2022
41ab66f
fix merge conflict
TheAnshul756 Jul 18, 2022
ea828be
add metric instrumentation in call instead of before request and tear…
TheAnshul756 Jul 18, 2022
4273177
change functions to private and remove unused vaiables
TheAnshul756 Jul 18, 2022
6bbfea2
fix typo
TheAnshul756 Jul 18, 2022
ab37e05
fix typo
TheAnshul756 Jul 18, 2022
22b11a5
Merge branch 'main' into metrics-instrumentation-flask
ocelotl Jul 25, 2022
9286231
add duration test
TheAnshul756 Jul 27, 2022
a758eab
Merge branch 'metrics-instrumentation-flask' of https://github.com/Th…
TheAnshul756 Jul 27, 2022
1a33b21
fix lint
TheAnshul756 Jul 27, 2022
dda7548
add attributes value test for metrics
TheAnshul756 Jul 27, 2022
9557035
remove unwanted import
TheAnshul756 Jul 28, 2022
27cdbc7
add test for uninstrument
TheAnshul756 Aug 1, 2022
536bf25
Merge branch 'main' into metrics-instrumentation-flask
srikanthccv Aug 1, 2022
bc89aa0
change request type to post for testing
TheAnshul756 Aug 1, 2022
ab2bed1
Merge branch 'metrics-instrumentation-flask' of https://github.com/Th…
TheAnshul756 Aug 1, 2022
0ba053f
add value test for post request
TheAnshul756 Aug 2, 2022
9fe8a18
Merge branch 'main' into metrics-instrumentation-flask
srikanthccv Aug 4, 2022
3516ec4
Merge branch 'main' into metrics-instrumentation-flask
srikanthccv Aug 4, 2022
f45ca51
add readme for metrics
TheAnshul756 Aug 5, 2022
7cc4a50
Merge branch 'metrics-instrumentation-flask' of https://github.com/Th…
TheAnshul756 Aug 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- `opentelemetry-instrumentation-redis` add support to instrument RedisCluster clients
([#1177](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1177))
- Add metric instumentation for flask
([#1186](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1186))

## [1.12.0rc2-0.32b0](https://github.com/open-telemetry/opentelemetry-python/releases/tag/v1.12.0rc2-0.32b0) - 2022-07-01

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def response_hook(span: Span, status: str, response_headers: List):
"""

from logging import getLogger
from timeit import default_timer
from typing import Collection

import flask
Expand All @@ -154,8 +155,8 @@ def response_hook(span: Span, status: str, response_headers: List):
get_global_response_propagator,
)
from opentelemetry.instrumentation.utils import _start_internal_or_server_span
from opentelemetry.metrics import get_meter
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.util._time import _time_ns
from opentelemetry.util.http import get_excluded_urls, parse_excluded_urls

_logger = getLogger(__name__)
Expand All @@ -164,7 +165,7 @@ def response_hook(span: Span, status: str, response_headers: List):
_ENVIRON_SPAN_KEY = "opentelemetry-flask.span_key"
_ENVIRON_ACTIVATION_KEY = "opentelemetry-flask.activation_key"
_ENVIRON_TOKEN = "opentelemetry-flask.token"

_ENVIRON_STATUS_CODE_KEY = "opentelemetry-flask.status_code"

_excluded_urls_from_env = get_excluded_urls("FLASK")

Expand All @@ -184,7 +185,7 @@ def _wrapped_app(wrapped_app_environ, start_response):
# In theory, we could start the span here and use
# update_name later but that API is "highly discouraged" so
# we better avoid it.
wrapped_app_environ[_ENVIRON_STARTTIME_KEY] = _time_ns()
wrapped_app_environ[_ENVIRON_STARTTIME_KEY] = default_timer()
TheAnshul756 marked this conversation as resolved.
Show resolved Hide resolved

def _start_response(status, response_headers, *args, **kwargs):
if flask.request and (
Expand All @@ -204,6 +205,11 @@ def _start_response(status, response_headers, *args, **kwargs):
otel_wsgi.add_response_attributes(
span, status, response_headers
)
status_code = otel_wsgi.parse_status_code(status)
if status_code is not None:
flask.request.environ[
_ENVIRON_STATUS_CODE_KEY
] = status_code
if (
span.is_recording()
and span.kind == trace.SpanKind.SERVER
Expand All @@ -229,7 +235,10 @@ def _start_response(status, response_headers, *args, **kwargs):


def _wrapped_before_request(
request_hook=None, tracer=None, excluded_urls=None
active_requests_counter,
request_hook=None,
tracer=None,
excluded_urls=None,
):
def _before_request():
if excluded_urls and excluded_urls.url_disabled(flask.request.url):
Expand All @@ -252,6 +261,10 @@ def _before_request():
attributes = otel_wsgi.collect_request_attributes(
flask_request_environ
)
active_requests_count_attrs = (
otel_wsgi.parse_active_request_count_attrs(attributes)
)
active_requests_counter.add(1, active_requests_count_attrs)
if flask.request.url_rule:
# For 404 that result from no route found, etc, we
# don't have a url_rule.
Expand All @@ -278,7 +291,11 @@ def _before_request():
return _before_request


def _wrapped_teardown_request(excluded_urls=None):
def _wrapped_teardown_request(
active_requests_counter,
duration_histogram,
excluded_urls=None,
):
def _teardown_request(exc):
# pylint: disable=E1101
if excluded_urls and excluded_urls.url_disabled(flask.request.url):
Expand All @@ -290,7 +307,21 @@ def _teardown_request(exc):
# a way that doesn't run `before_request`, like when it is created
# with `app.test_request_context`.
return
start = flask.request.environ.get(_ENVIRON_STARTTIME_KEY)
TheAnshul756 marked this conversation as resolved.
Show resolved Hide resolved
duration = max(round((default_timer() - start) * 1000), 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't all this logic be in _start_response?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, that is also a possible way. But we are starting span in before_request and ending it in teardown_request so I did the same for metrics too. Also here it says that, it is discouraged, I'm also not sure what to prefer. If doing it in start_response is more acceptable then I can make the changes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, please do. That comment is relevant to only tracing API and they are referring to update_name call on span once it is started. It is discouraged because sampling decision would have already been made based on it and such edge cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

attributes = otel_wsgi.collect_request_attributes(
flask.request.environ
)
active_requests_count_attrs = (
otel_wsgi.parse_active_request_count_attrs(attributes)
)
duration_attrs = otel_wsgi.parse_duration_attrs(attributes)
status_code = flask.request.environ.get(_ENVIRON_STATUS_CODE_KEY, None)
if status_code:
duration_attrs[SpanAttributes.HTTP_STATUS_CODE] = status_code

duration_histogram.record(duration, duration_attrs)
active_requests_counter.add(-1, active_requests_count_attrs)
if exc is None:
activation.__exit__(None, None, None)
else:
Expand All @@ -310,6 +341,7 @@ class _InstrumentedFlask(flask.Flask):
_tracer_provider = None
_request_hook = None
_response_hook = None
_meter_provider = None

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand All @@ -327,7 +359,22 @@ def __init__(self, *args, **kwargs):
__name__, __version__, _InstrumentedFlask._tracer_provider
)

meter = get_meter(
__name__, __version__, _InstrumentedFlask._meter_provider
)
duration_histogram = meter.create_histogram(
name="http.server.duration",
unit="ms",
description="measures the duration of the inbound HTTP request",
)
active_requests_counter = meter.create_up_down_counter(
lzchen marked this conversation as resolved.
Show resolved Hide resolved
name="http.server.active_requests",
unit="requests",
description="measures the number of concurrent HTTP requests that are currently in-flight",
)

_before_request = _wrapped_before_request(
active_requests_counter,
_InstrumentedFlask._request_hook,
tracer,
excluded_urls=_InstrumentedFlask._excluded_urls,
Expand All @@ -336,6 +383,8 @@ def __init__(self, *args, **kwargs):
self.before_request(_before_request)

_teardown_request = _wrapped_teardown_request(
active_requests_counter,
duration_histogram,
excluded_urls=_InstrumentedFlask._excluded_urls,
)
self.teardown_request(_teardown_request)
Expand Down Expand Up @@ -367,6 +416,8 @@ def _instrument(self, **kwargs):
if excluded_urls is None
else parse_excluded_urls(excluded_urls)
)
meter_provider = kwargs.get("meter_provider")
_InstrumentedFlask._meter_provider = meter_provider
flask.Flask = _InstrumentedFlask

def _uninstrument(self, **kwargs):
Expand All @@ -379,6 +430,7 @@ def instrument_app(
response_hook=None,
tracer_provider=None,
excluded_urls=None,
meter_provider=None,
):
if not hasattr(app, "_is_instrumented_by_opentelemetry"):
app._is_instrumented_by_opentelemetry = False
Expand All @@ -395,8 +447,20 @@ def instrument_app(
)

tracer = trace.get_tracer(__name__, __version__, tracer_provider)
meter = get_meter(__name__, __version__, meter_provider)
duration_histogram = meter.create_histogram(
name="http.server.duration",
unit="ms",
description="measures the duration of the inbound HTTP request",
)
active_requests_counter = meter.create_up_down_counter(
name="http.server.active_requests",
unit="requests",
description="measures the number of concurrent HTTP requests that are currently in-flight",
)

_before_request = _wrapped_before_request(
active_requests_counter,
request_hook,
tracer,
excluded_urls=excluded_urls,
Expand All @@ -405,6 +469,8 @@ def instrument_app(
app.before_request(_before_request)

_teardown_request = _wrapped_teardown_request(
active_requests_counter,
duration_histogram,
excluded_urls=excluded_urls,
)
app._teardown_request = _teardown_request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@
get_global_response_propagator,
set_global_response_propagator,
)
from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware
from opentelemetry.instrumentation.wsgi import (
OpenTelemetryMiddleware,
_active_requests_count_attrs,
_duration_attrs,
)
from opentelemetry.sdk.metrics.export import (
HistogramDataPoint,
NumberDataPoint,
)
from opentelemetry.sdk.resources import Resource
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.test.wsgitestutil import WsgiTestBase
Expand All @@ -49,6 +57,16 @@ def expected_attributes(override_attributes):
return default_attributes


_expected_metric_names = [
"http.server.active_requests",
"http.server.duration",
]
_recommended_attrs = {
"http.server.active_requests": _active_requests_count_attrs,
"http.server.duration": _duration_attrs,
}


class TestProgrammatic(InstrumentationTest, WsgiTestBase):
def setUp(self):
super().setUp()
Expand Down Expand Up @@ -250,6 +268,34 @@ def test_exclude_lists_from_explicit(self):
span_list = self.memory_exporter.get_finished_spans()
self.assertEqual(len(span_list), 1)

def test_wsgi_metrics(self):
TheAnshul756 marked this conversation as resolved.
Show resolved Hide resolved
self.client.get("/hello/123")
self.client.get("/hello/321")
self.client.get("/hello/756")
metrics_list = self.memory_metrics_reader.get_metrics_data()
number_data_point_seen = False
histogram_data_point_seen = False
self.assertTrue(len(metrics_list.resource_metrics) != 0)
for resource_metric in metrics_list.resource_metrics:
self.assertTrue(len(resource_metric.scope_metrics) != 0)
for scope_metric in resource_metric.scope_metrics:
self.assertTrue(len(scope_metric.metrics) != 0)
for metric in scope_metric.metrics:
self.assertIn(metric.name, _expected_metric_names)
data_points = list(metric.data.data_points)
self.assertEqual(len(data_points), 1)
for point in data_points:
if isinstance(point, HistogramDataPoint):
self.assertEqual(point.count, 3)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have a test for the actual values in the histogram? This will test that what we are recording for duration value is actually accurate.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry, I do not understand how can we get the value of time duration to test. Can you please give me an example?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

point.sum gives you the total duration.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we can get the actual value, but what will be the expected value? For three get requests point.sum value will be random time taken. We can not compare it to some hardcoded value. One possible solution I can think of is to mock the default_timer() function that we use to get the timestamp, and with that, we test the time duration captured. But I'm not sure if it is the correct way to test?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You do not have to make it exact hardcoded value. It can be assertGreaterEqual or assertAlmostEqual with delta based on how much time a view/endpoint takes to respond back.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trigger a request -> operation takes some time -> expected duration should be within x+/-∆

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added the test. Please have a look.

histogram_data_point_seen = True
if isinstance(point, NumberDataPoint):
number_data_point_seen = True
for attr in point.attributes:
self.assertIn(
attr, _recommended_attrs[metric.name]
)
self.assertTrue(number_data_point_seen and histogram_data_point_seen)


class TestProgrammaticHooks(InstrumentationTest, WsgiTestBase):
def setUp(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,14 +326,30 @@ def collect_custom_response_headers_attributes(response_headers):
return attributes


def _parse_status_code(resp_status):
def parse_status_code(resp_status):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would still keep this and other functions private. They are not intended to be user accessible. If there is really need we can always change from private to public but not vice versa.

status_code, _ = resp_status.split(" ", 1)
try:
return int(status_code)
except ValueError:
return None


def parse_active_request_count_attrs(req_attrs):
TheAnshul756 marked this conversation as resolved.
Show resolved Hide resolved
active_requests_count_attrs = {}
for attr_key in _active_requests_count_attrs:
if req_attrs.get(attr_key) is not None:
active_requests_count_attrs[attr_key] = req_attrs[attr_key]
return active_requests_count_attrs


def parse_duration_attrs(req_attrs):
duration_attrs = {}
for attr_key in _duration_attrs:
if req_attrs.get(attr_key) is not None:
duration_attrs[attr_key] = req_attrs[attr_key]
return duration_attrs


def add_response_attributes(
span, start_response_status, response_headers
): # pylint: disable=unused-argument
Expand Down Expand Up @@ -412,7 +428,7 @@ def _create_start_response(
@functools.wraps(start_response)
def _start_response(status, response_headers, *args, **kwargs):
add_response_attributes(span, status, response_headers)
status_code = _parse_status_code(status)
status_code = parse_status_code(status)
if status_code is not None:
duration_attrs[SpanAttributes.HTTP_STATUS_CODE] = status_code
if span.is_recording() and span.kind == trace.SpanKind.SERVER:
Expand All @@ -436,15 +452,10 @@ def __call__(self, environ, start_response):
start_response: The WSGI start_response callable.
"""
req_attrs = collect_request_attributes(environ)
active_requests_count_attrs = {}
for attr_key in _active_requests_count_attrs:
if req_attrs.get(attr_key) is not None:
active_requests_count_attrs[attr_key] = req_attrs[attr_key]

duration_attrs = {}
for attr_key in _duration_attrs:
if req_attrs.get(attr_key) is not None:
duration_attrs[attr_key] = req_attrs[attr_key]
active_requests_count_attrs = parse_active_request_count_attrs(
TheAnshul756 marked this conversation as resolved.
Show resolved Hide resolved
req_attrs
)
duration_attrs = parse_duration_attrs(req_attrs)

span, token = _start_internal_or_server_span(
tracer=self.tracer,
Expand Down