From 4934181350a579f66d9f87d783325283fe22dee9 Mon Sep 17 00:00:00 2001 From: Noctua Date: Thu, 13 Feb 2025 12:06:52 +0000 Subject: [PATCH] chore: update charm libraries --- .../grafana_k8s/v0/grafana_dashboard.py | 11 +- .../tempo_coordinator_k8s/v0/charm_tracing.py | 123 ++++++++++++------ 2 files changed, 92 insertions(+), 42 deletions(-) diff --git a/lib/charms/grafana_k8s/v0/grafana_dashboard.py b/lib/charms/grafana_k8s/v0/grafana_dashboard.py index 9071e466f..d618c7991 100644 --- a/lib/charms/grafana_k8s/v0/grafana_dashboard.py +++ b/lib/charms/grafana_k8s/v0/grafana_dashboard.py @@ -219,7 +219,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 41 +LIBPATCH = 42 PYDEPS = ["cosl >= 0.0.50"] @@ -964,6 +964,13 @@ def _replace_uid( "Processed dashboard '%s': kept original uid '%s'", dashboard_path, original_uid ) + @classmethod + def _add_tags(cls, dashboard_dict: dict, charm_name: str): + tags: List[str] = dashboard_dict.get("tags", []) + if not any(tag.startswith("charm: ") for tag in tags): + tags.append(f"charm: {charm_name}") + dashboard_dict["tags"] = tags + @classmethod def load_dashboards_from_dir( cls, @@ -1006,6 +1013,8 @@ def _is_dashboard(p: Path) -> bool: charm_name=charm_name, ) + cls._add_tags(dashboard_dict=dashboard_dict, charm_name=charm_name) + id = "file:{}".format(path.stem) dashboard_templates[id] = cls._content_to_dashboard_object( charm_name=charm_name, diff --git a/lib/charms/tempo_coordinator_k8s/v0/charm_tracing.py b/lib/charms/tempo_coordinator_k8s/v0/charm_tracing.py index a9b6deeb6..ebf80ede2 100644 --- a/lib/charms/tempo_coordinator_k8s/v0/charm_tracing.py +++ b/lib/charms/tempo_coordinator_k8s/v0/charm_tracing.py @@ -19,15 +19,19 @@ ```python # import the necessary charm libs -from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer, charm_tracing_config +from charms.tempo_coordinator_k8s.v0.tracing import ( + TracingEndpointRequirer, + charm_tracing_config, +) from charms.tempo_coordinator_k8s.v0.charm_tracing import charm_tracing + # decorate your charm class with charm_tracing: @charm_tracing( # forward-declare the instance attributes that the instrumentor will look up to obtain the # tempo endpoint and server certificate tracing_endpoint="tracing_endpoint", - server_cert="server_cert" + server_cert="server_cert", ) class MyCharm(CharmBase): _path_to_cert = "/path/to/cert.crt" @@ -37,10 +41,12 @@ class MyCharm(CharmBase): # If you do support TLS, you'll need to make sure that the server cert is copied to this location # and kept up to date so the instrumentor can use it. - def __init__(self, ...): - ... - self.tracing = TracingEndpointRequirer(self, ...) - self.tracing_endpoint, self.server_cert = charm_tracing_config(self.tracing, self._path_to_cert) + def __init__(self, framework): + # ... + self.tracing = TracingEndpointRequirer(self) + self.tracing_endpoint, self.server_cert = charm_tracing_config( + self.tracing, self._path_to_cert + ) ``` # Detailed usage @@ -226,12 +232,6 @@ def my_tracing_endpoint(self) -> Optional[str]: 3) If you were passing a certificate (str) using `server_cert`, you need to change it to provide an *absolute* path to the certificate file instead. """ -import typing - -from opentelemetry.exporter.otlp.proto.common._internal.trace_encoder import ( - encode_spans, -) -from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter def _remove_stale_otel_sdk_packages(): @@ -286,12 +286,15 @@ def _remove_stale_otel_sdk_packages(): # apply hacky patch to remove stale opentelemetry sdk packages on upgrade-charm. # it could be trouble if someone ever decides to implement their own tracer parallel to # ours and before the charm has inited. We assume they won't. +# !!IMPORTANT!! keep all otlp imports UNDER this call. _remove_stale_otel_sdk_packages() import functools import inspect import logging import os +import typing +from collections import deque from contextlib import contextmanager from contextvars import Context, ContextVar, copy_context from pathlib import Path @@ -310,6 +313,9 @@ def _remove_stale_otel_sdk_packages(): import opentelemetry import ops +from opentelemetry.exporter.otlp.proto.common._internal.trace_encoder import ( + encode_spans, +) from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import ReadableSpan, Span, TracerProvider @@ -318,6 +324,7 @@ def _remove_stale_otel_sdk_packages(): SpanExporter, SpanExportResult, ) +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter from opentelemetry.trace import INVALID_SPAN, Tracer from opentelemetry.trace import get_current_span as otlp_get_current_span from opentelemetry.trace import ( @@ -338,7 +345,7 @@ def _remove_stale_otel_sdk_packages(): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 5 +LIBPATCH = 6 PYDEPS = ["opentelemetry-exporter-otlp-proto-http==1.21.0"] @@ -366,7 +373,9 @@ def _remove_stale_otel_sdk_packages(): BUFFER_DEFAULT_MAX_EVENT_HISTORY_LENGTH = 100 _MiB_TO_B = 2**20 # megabyte to byte conversion rate _OTLP_SPAN_EXPORTER_TIMEOUT = 1 -"""Timeout in seconds that the OTLP span exporter has to push traces to the backend.""" + + +# Timeout in seconds that the OTLP span exporter has to push traces to the backend. class _Buffer: @@ -398,45 +407,75 @@ def save(self, spans: typing.Sequence[ReadableSpan]): if self._max_event_history_length < 1: dev_logger.debug("buffer disabled: max history length < 1") return - - current_history_length = len(self.load()) - new_history_length = current_history_length + len(spans) - if (diff := self._max_event_history_length - new_history_length) < 0: - self.drop(diff) self._save(spans) def _serialize(self, spans: Sequence[ReadableSpan]) -> bytes: # encode because otherwise we can't json-dump them return encode_spans(spans).SerializeToString() + def _prune(self, queue: Sequence[bytes]) -> Sequence[bytes]: + """Prune the queue until it fits in our constraints.""" + n_dropped_spans = 0 + # drop older events if we are past the max history length + overflow = len(queue) - self._max_event_history_length + if overflow > 0: + n_dropped_spans += overflow + logger.warning( + f"charm tracing buffer exceeds max history length ({self._max_event_history_length} events)" + ) + + new_spans = deque(queue[-self._max_event_history_length :]) + + # drop older events if the buffer is too big; all units are bytes + logged_drop = False + target_size = self._max_buffer_size_mib * _MiB_TO_B + current_size = sum(len(span) for span in new_spans) + while current_size > target_size: + current_size -= len(new_spans.popleft()) + n_dropped_spans += 1 + + # only do this once + if not logged_drop: + logger.warning( + f"charm tracing buffer exceeds size limit ({self._max_buffer_size_mib}MiB)." + ) + logged_drop = True + + if n_dropped_spans > 0: + dev_logger.debug( + f"charm tracing buffer overflow: dropped {n_dropped_spans} older spans. " + f"Please increase the buffer limits, or ensure the spans can be flushed." + ) + return new_spans + def _save(self, spans: Sequence[ReadableSpan], replace: bool = False): dev_logger.debug(f"saving {len(spans)} new spans to buffer") old = [] if replace else self.load() - new = self._serialize(spans) + queue = old + [self._serialize(spans)] + new_buffer = self._prune(queue) - try: - # if the buffer exceeds the size limit, we start dropping old spans until it does - - while len((new + self._SPANSEP.join(old))) > (self._max_buffer_size_mib * _MiB_TO_B): - if not old: - # if we've already dropped all spans and still we can't get under the - # size limit, we can't save this span - logger.error( - f"span exceeds total buffer size limit ({self._max_buffer_size_mib}MiB); " - f"buffering FAILED" - ) - return - - old = old[1:] - logger.warning( - f"buffer size exceeds {self._max_buffer_size_mib}MiB; dropping older spans... " - f"Please increase the buffer size, disable buffering, or ensure the spans can be flushed." - ) + if queue and not new_buffer: + # this means that, given our constraints, we are pruning so much that there are no events left. + logger.error( + "No charm events could be buffered into charm traces buffer. Please increase the memory or history size limits." + ) + return - self._db_file.write_bytes(new + self._SPANSEP.join(old)) + try: + self._write(new_buffer) except Exception: logger.exception("error buffering spans") + def _write(self, spans: Sequence[bytes]): + """Write the spans to the db file.""" + # ensure the destination folder exists + db_file_dir = self._db_file.parent + if not db_file_dir.exists(): + dev_logger.info(f"creating buffer dir: {db_file_dir}") + db_file_dir.mkdir(parents=True) + + self._db_file.write_bytes(self._SPANSEP.join(spans)) + def load(self) -> List[bytes]: """Load currently buffered spans from the cache file. @@ -461,8 +500,10 @@ def drop(self, n_spans: Optional[int] = None): else: dev_logger.debug("emptying buffer") new = [] - - self._db_file.write_bytes(self._SPANSEP.join(new)) + try: + self._write(new) + except Exception: + logger.exception("error writing charm traces buffer") def flush(self) -> Optional[bool]: """Export all buffered spans to the given exporter, then clear the buffer.