From bb42e0417b9a92612954f342630b5c0cc660d89e Mon Sep 17 00:00:00 2001 From: Allen Kim Date: Tue, 10 Oct 2023 02:34:02 +0900 Subject: [PATCH 1/2] Feature/add new process metrics (#1948) --- CHANGELOG.md | 6 ++ .../system_metrics/__init__.py | 65 +++++++++++++++++++ .../tests/test_system_metrics.py | 60 ++++++++++++++--- 3 files changed, 123 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f672d7652..d671707c12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased + +### Added +- `opentelemetry-instrumentation-system-metrics` Add support for collecting process metrics + ([#1948](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1948)) + ### Fixed - Fix version of Flask dependency `werkzeug` ([#1980](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1980)) + ## Version 1.20.0/0.41b0 (2023-09-01) ### Fixed diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index b7bd38907e..3d6a0c6775 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -36,6 +36,10 @@ "system.thread_count": None "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], + "process.runtime.gc_count": None, + "process.runtime.thread_count": None, + "process.runtime.cpu.utilization": None, + "process.runtime.context_switches": ["involuntary", "voluntary"], } Usage @@ -63,6 +67,7 @@ "system.network.io": ["transmit", "receive"], "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], + "process.runtime.context_switches": ["involuntary", "voluntary"], } SystemMetricsInstrumentor(config=configuration).instrument() @@ -105,6 +110,9 @@ "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], "process.runtime.gc_count": None, + "process.runtime.thread_count": None, + "process.runtime.cpu.utilization": None, + "process.runtime.context_switches": ["involuntary", "voluntary"], } @@ -150,6 +158,9 @@ def __init__( self._runtime_memory_labels = self._labels.copy() self._runtime_cpu_time_labels = self._labels.copy() self._runtime_gc_count_labels = self._labels.copy() + self._runtime_thread_count_labels = self._labels.copy() + self._runtime_cpu_utilization_labels = self._labels.copy() + self._runtime_context_switches_labels = self._labels.copy() def instrumentation_dependencies(self) -> Collection[str]: return _instruments @@ -347,6 +358,29 @@ def _instrument(self, **kwargs): unit="bytes", ) + if "process.runtime.thread_count" in self._config: + self._meter.create_observable_up_down_counter( + name=f"process.runtime.{self._python_implementation}.thread_count", + callbacks=[self._get_runtime_thread_count], + description="Runtime active threads count", + ) + + if "process.runtime.cpu.utilization" in self._config: + self._meter.create_observable_gauge( + name=f"process.runtime.{self._python_implementation}.cpu.utilization", + callbacks=[self._get_runtime_cpu_utilization], + description="Runtime CPU utilization", + unit="1", + ) + + if "process.runtime.context_switches" in self._config: + self._meter.create_observable_counter( + name=f"process.runtime.{self._python_implementation}.context_switches", + callbacks=[self._get_runtime_context_switches], + description="Runtime context switches", + unit="switches", + ) + def _uninstrument(self, **__): pass @@ -646,3 +680,34 @@ def _get_runtime_gc_count( for index, count in enumerate(gc.get_count()): self._runtime_gc_count_labels["count"] = str(index) yield Observation(count, self._runtime_gc_count_labels.copy()) + + def _get_runtime_thread_count( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for runtime active thread count""" + yield Observation( + self._proc.num_threads(), self._runtime_thread_count_labels.copy() + ) + + def _get_runtime_cpu_utilization( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for runtime CPU utilization""" + proc_cpu_percent = self._proc.cpu_percent() + yield Observation( + proc_cpu_percent, + self._runtime_cpu_utilization_labels.copy(), + ) + + def _get_runtime_context_switches( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for runtime context switches""" + ctx_switches = self._proc.num_ctx_switches() + for metric in self._config["process.runtime.context_switches"]: + if hasattr(ctx_switches, metric): + self._runtime_context_switches_labels["type"] = metric + yield Observation( + getattr(ctx_switches, metric), + self._runtime_context_switches_labels.copy(), + ) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py index f6dbd6c9a1..e28c437009 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py @@ -18,13 +18,14 @@ from platform import python_implementation from unittest import mock -from opentelemetry.instrumentation.system_metrics import ( - SystemMetricsInstrumentor, -) from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.metrics.export import InMemoryMetricReader from opentelemetry.test.test_base import TestBase +from opentelemetry.instrumentation.system_metrics import ( + SystemMetricsInstrumentor, +) + def _mock_netconnection(): NetConnection = namedtuple( @@ -96,7 +97,7 @@ def test_system_metrics_instrument(self): for scope_metrics in resource_metrics.scope_metrics: for metric in scope_metrics.metrics: metric_names.append(metric.name) - self.assertEqual(len(metric_names), 18) + self.assertEqual(len(metric_names), 21) observer_names = [ "system.cpu.time", @@ -117,6 +118,9 @@ def test_system_metrics_instrument(self): f"process.runtime.{self.implementation}.memory", f"process.runtime.{self.implementation}.cpu_time", f"process.runtime.{self.implementation}.gc_count", + f"process.runtime.{self.implementation}.thread_count", + f"process.runtime.{self.implementation}.context_switches", + f"process.runtime.{self.implementation}.cpu.utilization", ] for observer in metric_names: @@ -128,6 +132,9 @@ def test_runtime_metrics_instrument(self): "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], "process.runtime.gc_count": None, + "process.runtime.thread_count": None, + "process.runtime.cpu.utilization": None, + "process.runtime.context_switches": ["involuntary", "voluntary"], } reader = InMemoryMetricReader() @@ -140,12 +147,15 @@ def test_runtime_metrics_instrument(self): for scope_metrics in resource_metrics.scope_metrics: for metric in scope_metrics.metrics: metric_names.append(metric.name) - self.assertEqual(len(metric_names), 3) + self.assertEqual(len(metric_names), 6) observer_names = [ f"process.runtime.{self.implementation}.memory", f"process.runtime.{self.implementation}.cpu_time", f"process.runtime.{self.implementation}.gc_count", + f"process.runtime.{self.implementation}.thread_count", + f"process.runtime.{self.implementation}.context_switches", + f"process.runtime.{self.implementation}.cpu.utilization", ] for observer in metric_names: @@ -161,9 +171,9 @@ def _assert_metrics(self, observer_name, reader, expected): for data_point in metric.data.data_points: for expect in expected: if ( - dict(data_point.attributes) - == expect.attributes - and metric.name == observer_name + dict(data_point.attributes) + == expect.attributes + and metric.name == observer_name ): self.assertEqual( data_point.value, @@ -782,3 +792,37 @@ def test_runtime_get_count(self, mock_gc_get_count): self._test_metrics( f"process.runtime.{self.implementation}.gc_count", expected ) + + @mock.patch("psutil.Process.num_ctx_switches") + def test_runtime_context_switches(self, mock_process_num_ctx_switches): + PCtxSwitches = namedtuple("PCtxSwitches", ["voluntary", "involuntary"]) + + mock_process_num_ctx_switches.configure_mock( + **{"return_value": PCtxSwitches(voluntary=1, involuntary=2)} + ) + + expected = [ + _SystemMetricsResult({"type": "voluntary"}, 1), + _SystemMetricsResult({"type": "involuntary"}, 2), + ] + self._test_metrics( + f"process.runtime.{self.implementation}.context_switches", expected + ) + + @mock.patch("psutil.Process.num_threads") + def test_runtime_thread_num(self, mock_process_thread_num): + mock_process_thread_num.configure_mock(**{"return_value": 42}) + + expected = [_SystemMetricsResult({}, 42)] + self._test_metrics( + f"process.runtime.{self.implementation}.thread_count", expected + ) + + @mock.patch("psutil.Process.cpu_percent") + def test_runtime_cpu_percent(self, mock_process_cpu_percent): + mock_process_cpu_percent.configure_mock(**{"return_value": 42}) + + expected = [_SystemMetricsResult({}, 42)] + self._test_metrics( + f"process.runtime.{self.implementation}.cpu.utilization", expected + ) From 3478831838ce54a972f7434ae6098f9bb28dc494 Mon Sep 17 00:00:00 2001 From: Jeremy Voss Date: Mon, 9 Oct 2023 12:02:52 -0700 Subject: [PATCH 2/2] Using new cloud resource id attribute (#1976) --- CHANGELOG.md | 2 ++ .../resource/detector/azure/app_service.py | 4 +--- .../src/opentelemetry/resource/detector/azure/vm.py | 11 +++++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d671707c12..2923ebc791 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix version of Flask dependency `werkzeug` ([#1980](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1980)) +- `opentelemetry-resource-detector-azure` Using new Cloud Resource ID attribute. + ([#1976](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1976)) ## Version 1.20.0/0.41b0 (2023-09-01) diff --git a/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/app_service.py b/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/app_service.py index ea0959cb93..823aac30fd 100644 --- a/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/app_service.py +++ b/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/app_service.py @@ -18,8 +18,6 @@ from opentelemetry.semconv.resource import ResourceAttributes, CloudPlatformValues, CloudProviderValues _AZURE_APP_SERVICE_STAMP_RESOURCE_ATTRIBUTE = "azure.app.service.stamp" -# TODO: Remove once this resource attribute is no longer missing from SDK -_CLOUD_RESOURCE_ID_RESOURCE_ATTRIBUTE = "cloud.resource_id" _REGION_NAME = "REGION_NAME" _WEBSITE_HOME_STAMPNAME = "WEBSITE_HOME_STAMPNAME" _WEBSITE_HOSTNAME = "WEBSITE_HOSTNAME" @@ -49,7 +47,7 @@ def detect(self) -> Resource: azure_resource_uri = _get_azure_resource_uri(website_site_name) if azure_resource_uri: - attributes[_CLOUD_RESOURCE_ID_RESOURCE_ATTRIBUTE] = azure_resource_uri + attributes[ResourceAttributes.CLOUD_RESOURCE_ID] = azure_resource_uri for (key, env_var) in _APP_SERVICE_ATTRIBUTE_ENV_VARS.items(): value = environ.get(env_var) if value: diff --git a/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/vm.py b/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/vm.py index 02f8ea537f..11b04ebff3 100644 --- a/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/vm.py +++ b/resource/opentelemetry-resource-detector-azure/src/opentelemetry/resource/detector/azure/vm.py @@ -19,11 +19,14 @@ from urllib.error import URLError from opentelemetry.sdk.resources import ResourceDetector, Resource -from opentelemetry.semconv.resource import ResourceAttributes, CloudPlatformValues, CloudProviderValues +from opentelemetry.semconv.resource import ( + ResourceAttributes, + CloudPlatformValues, + CloudProviderValues, +) # TODO: Remove when cloud resource id is no longer missing in Resource Attributes -_CLOUD_RESOURCE_ID_RESOURCE_ATTRIBUTE = "cloud.resource_id" _AZURE_VM_METADATA_ENDPOINT = "http://169.254.169.254/metadata/instance/compute?api-version=2021-12-13&format=json" _AZURE_VM_SCALE_SET_NAME_ATTRIBUTE = "azure.vm.scaleset.name" _AZURE_VM_SKU_ATTRIBUTE = "azure.vm.sku" @@ -35,7 +38,7 @@ ResourceAttributes.CLOUD_PLATFORM, ResourceAttributes.CLOUD_PROVIDER, ResourceAttributes.CLOUD_REGION, - _CLOUD_RESOURCE_ID_RESOURCE_ATTRIBUTE, + ResourceAttributes.CLOUD_RESOURCE_ID, ResourceAttributes.HOST_ID, ResourceAttributes.HOST_NAME, ResourceAttributes.HOST_TYPE, @@ -81,7 +84,7 @@ def get_attribute_from_metadata(self, metadata_json, attribute_key): ams_value = CloudProviderValues.AZURE.value elif attribute_key == ResourceAttributes.CLOUD_REGION: ams_value = metadata_json["location"] - elif attribute_key == _CLOUD_RESOURCE_ID_RESOURCE_ATTRIBUTE: + elif attribute_key == ResourceAttributes.CLOUD_RESOURCE_ID: ams_value = metadata_json["resourceId"] elif attribute_key == ResourceAttributes.HOST_ID or \ attribute_key == ResourceAttributes.SERVICE_INSTANCE_ID: