Skip to content

Commit 85ddf8b

Browse files
committed
Introduce DD traces & metrics
1 parent bcfb9e5 commit 85ddf8b

File tree

8 files changed

+222
-105
lines changed

8 files changed

+222
-105
lines changed

packages/opal-common/opal_common/git/bundle_maker.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pathlib import Path
33
from typing import List, Optional, Set
44

5+
from ddtrace import tracer
56
from git import Repo
67
from git.objects import Commit
78
from opal_common.engine import get_rego_package, is_data_module, is_policy_module
@@ -249,7 +250,10 @@ def make_bundle(self, commit: Commit) -> PolicyBundle:
249250
logger.debug(f"Explicit manifest to be used: {explicit_manifest}")
250251

251252
for source_file in viewer.files(filter):
252-
contents = source_file.read()
253+
with tracer.trace(
254+
"bundle_maker.git_file_read", resource=str(source_file.path)
255+
):
256+
contents = source_file.read()
253257
path = source_file.path
254258

255259
if is_data_module(path):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from typing import Optional
2+
from urllib.parse import urlparse
3+
import os
4+
5+
from ddtrace import Span, patch, tracer, config
6+
from ddtrace.filters import TraceFilter
7+
from loguru import logger
8+
9+
10+
def configure_apm(enable_apm: bool, service_name: str):
11+
"""optionally enable datadog APM / profiler."""
12+
if enable_apm:
13+
logger.info("Enabling DataDog APM")
14+
15+
class FilterRootPathTraces(TraceFilter):
16+
def process_trace(self, trace: list[Span]) -> Optional[list[Span]]:
17+
for span in trace:
18+
if span.parent_id is not None:
19+
return trace
20+
21+
if url := span.get_tag("http.url"):
22+
parsed_url = urlparse(url)
23+
24+
if parsed_url.path == "/":
25+
return None
26+
27+
return trace
28+
29+
patch(fastapi=True, redis=True, asyncpg=True, aiohttp=True, celery=True)
30+
tracer.configure(
31+
settings={
32+
"FILTERS": [
33+
FilterRootPathTraces(),
34+
]
35+
}
36+
)
37+
38+
# Override service name
39+
config.fastapi["service_name"] = service_name
40+
config.fastapi["request_span_name"] = f"{service_name}.request"
41+
42+
else:
43+
logger.info("DataDog APM disabled")
44+
tracer.configure(enabled=False)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import os
2+
from typing import Optional
3+
4+
import datadog
5+
from loguru import logger
6+
7+
8+
def configure_metrics(
9+
enable_metrics: bool, statsd_host: str, statsd_port: int, namespace: str = ""
10+
):
11+
if not enable_metrics:
12+
logger.info("DogStatsD metrics disabled")
13+
return
14+
else:
15+
logger.info(
16+
"DogStatsD metrics enabled; statsd: {host}:{port}",
17+
host=statsd_host,
18+
port=statsd_port,
19+
)
20+
21+
if not namespace:
22+
namespace = os.environ.get("DD_SERVICE", "")
23+
24+
namespace = namespace.lower().replace("-", "_")
25+
datadog.initialize(
26+
statsd_host=statsd_host,
27+
statsd_port=statsd_port,
28+
statsd_namespace=f"permit.{namespace}",
29+
)
30+
31+
32+
def _format_tags(tags: Optional[dict[str, str]]) -> Optional[list[str]]:
33+
if not tags:
34+
return None
35+
36+
return [f"{k}:{v}" for k, v in tags.items()]
37+
38+
39+
def increment(metric: str, tags: Optional[dict[str, str]] = None):
40+
datadog.statsd.increment(metric, tags=_format_tags(tags))
41+
42+
43+
def decrement(metric: str, tags: Optional[dict[str, str]] = None):
44+
datadog.statsd.decrement(metric, tags=_format_tags(tags))
45+
46+
47+
def gauge(metric: str, value: float, tags: Optional[dict[str, str]] = None):
48+
datadog.statsd.gauge(metric, value, tags=_format_tags(tags))
49+
50+
51+
def event(title: str, message: str, tags: Optional[dict[str, str]] = None):
52+
datadog.statsd.event(title=title, message=message, tags=_format_tags(tags))

packages/opal-common/opal_common/topics/publisher.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
from typing import Any, Optional, Set
33

4+
from ddtrace import tracer
45
from fastapi_websocket_pubsub import PubSubClient, PubSubEndpoint, Topic, TopicList
56
from opal_common.logger import logger
67

@@ -131,10 +132,12 @@ def __init__(self, endpoint: PubSubEndpoint):
131132
self._endpoint = endpoint
132133
super().__init__()
133134

135+
async def _publish_impl(self, topics: TopicList, data: Any = None):
136+
with tracer.trace("topic_publisher.publish", resource=str(topics)):
137+
await self._endpoint.publish(topics=topics, data=data)
138+
134139
async def publish(self, topics: TopicList, data: Any = None):
135-
await self._add_task(
136-
asyncio.create_task(self._endpoint.publish(topics=topics, data=data))
137-
)
140+
await self._add_task(asyncio.create_task(self._publish_impl(topics, data)))
138141

139142

140143
class ClientSideTopicPublisher(TopicPublisher):

packages/opal-server/opal_server/git_fetcher.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import aiofiles.os
1010
import pygit2
11+
from ddtrace import tracer
1112
from git import Repo
1213
from opal_common.async_utils import run_sync
1314
from opal_common.git.bundle_maker import BundleMaker
@@ -177,7 +178,7 @@ async def fetch_and_notify_on_changes(
177178
repo_lock = await self._get_repo_lock()
178179
async with repo_lock:
179180
with tracer.trace(
180-
"scopes_service.fetch_and_notify_on_changes",
181+
"git_policy_fetcher.fetch_and_notify_on_changes",
181182
resource=self._scope_id,
182183
):
183184
if self._discover_repository(self._repo_path):
@@ -335,6 +336,7 @@ def _get_current_branch_head(self) -> str:
335336
raise ValueError("Could not find current branch head")
336337
return head_commit_hash
337338

339+
@tracer.wrap("git_policy_fetcher.make_bundle")
338340
def make_bundle(self, base_hash: Optional[str] = None) -> PolicyBundle:
339341
repo = Repo(str(self._repo_path))
340342
bundle_maker = BundleMaker(

packages/opal-server/opal_server/scopes/api.py

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from fastapi.responses import RedirectResponse
1616
from fastapi_websocket_pubsub import PubSubEndpoint
1717
from git import InvalidGitRepositoryError
18+
from opal_common.monitoring import metrics
1819
from opal_common.async_utils import run_sync
1920
from opal_common.authentication.authz import (
2021
require_peer_type,
@@ -277,6 +278,12 @@ async def get_scope_policy(
277278
return await _generate_default_scope_bundle(scope_id)
278279

279280
async def _generate_default_scope_bundle(scope_id: str) -> PolicyBundle:
281+
metrics.event(
282+
"ScopeNotFound",
283+
message=f"Scope {scope_id} not found. Serving default scope instead",
284+
tags={"scope_id": scope_id},
285+
)
286+
280287
try:
281288
scope = await scopes.get("default")
282289
fetcher = GitPolicyFetcher(

packages/opal-server/opal_server/scopes/service.py

+91-87
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import List, Optional, Set, cast
66

77
import git
8+
from ddtrace import tracer
89
from fastapi_websocket_pubsub import PubSubEndpoint
910
from opal_common.git.commit_viewer import VersionedFile
1011
from opal_common.logger import logger
@@ -122,102 +123,105 @@ async def sync_scope(
122123
assert scope_id, ValueError("scope_id not set for sync_scope")
123124
scope = await self._scopes.get(scope_id)
124125

125-
if not isinstance(scope.policy, GitPolicyScopeSource):
126-
logger.warning("Non-git scopes are currently not supported!")
127-
return
128-
source = cast(GitPolicyScopeSource, scope.policy)
129-
130-
logger.debug(
131-
f"Sync scope: {scope.scope_id} (remote: {source.url}, branch: {source.branch}, req_time: {req_time})"
132-
)
133-
134-
callbacks = PolicyFetcherCallbacks()
135-
if notify_on_changes:
136-
callbacks = NewCommitsCallbacks(
137-
base_dir=self._base_dir,
138-
scope_id=scope.scope_id,
139-
source=source,
140-
pubsub_endpoint=self._pubsub_endpoint,
141-
)
142-
143-
fetcher = GitPolicyFetcher(
144-
self._base_dir,
145-
scope.scope_id,
146-
source,
147-
callbacks=callbacks,
148-
)
126+
with tracer.trace("scopes_service.sync_scope", resource=scope.scope_id):
127+
if not isinstance(scope.policy, GitPolicyScopeSource):
128+
logger.warning("Non-git scopes are currently not supported!")
129+
return
130+
source = cast(GitPolicyScopeSource, scope.policy)
149131

150-
try:
151-
await fetcher.fetch_and_notify_on_changes(
152-
hinted_hash=hinted_hash, force_fetch=force_fetch, req_time=req_time
153-
)
154-
except Exception as e:
155-
logger.exception(
156-
f"Could not fetch policy for scope {scope.scope_id}, got error: {e}"
132+
logger.debug(
133+
f"Sync scope: {scope.scope_id} (remote: {source.url}, branch: {source.branch}, req_time: {req_time})"
157134
)
158135

159-
async def delete_scope(self, scope_id: str):
160-
logger.info(f"Delete scope: {scope_id}")
161-
scope = await self._scopes.get(scope_id)
162-
url = scope.policy.url
163-
164-
scopes = await self._scopes.all()
165-
remove_repo_clone = True
166-
167-
for scope in scopes:
168-
if scope.scope_id != scope_id and scope.policy.url == url:
169-
logger.info(
170-
f"found another scope with same remote url ({scope.scope_id}), skipping clone deletion"
136+
callbacks = PolicyFetcherCallbacks()
137+
if notify_on_changes:
138+
callbacks = NewCommitsCallbacks(
139+
base_dir=self._base_dir,
140+
scope_id=scope.scope_id,
141+
source=source,
142+
pubsub_endpoint=self._pubsub_endpoint,
171143
)
172-
remove_repo_clone = False
173-
break
174144

175-
if remove_repo_clone:
176-
scope_dir = GitPolicyFetcher.repo_clone_path(
177-
self._base_dir, cast(GitPolicyScopeSource, scope.policy)
145+
fetcher = GitPolicyFetcher(
146+
self._base_dir,
147+
scope.scope_id,
148+
source,
149+
callbacks=callbacks,
178150
)
179-
shutil.rmtree(scope_dir, ignore_errors=True)
180-
181-
await self._scopes.delete(scope_id)
182-
183-
async def sync_scopes(self, only_poll_updates=False, notify_on_changes=True):
184-
scopes = await self._scopes.all()
185-
if only_poll_updates:
186-
# Only sync scopes that have polling enabled (in a periodic check)
187-
scopes = [scope for scope in scopes if scope.policy.poll_updates]
188-
189-
logger.info(
190-
f"OPAL Scopes: syncing {len(scopes)} scopes in the background (polling updates: {only_poll_updates})"
191-
)
192-
193-
fetched_source_ids = set()
194-
skipped_scopes = []
195-
for scope in scopes:
196-
src_id = GitPolicyFetcher.source_id(scope.policy)
197-
198-
# Give priority to scopes that have a unique url per shard (so we'll clone all repos asap)
199-
if src_id in fetched_source_ids:
200-
skipped_scopes.append(scope)
201-
continue
202151

203152
try:
204-
await self.sync_scope(
205-
scope=scope,
206-
force_fetch=True,
207-
notify_on_changes=notify_on_changes,
153+
await fetcher.fetch_and_notify_on_changes(
154+
hinted_hash=hinted_hash, force_fetch=force_fetch, req_time=req_time
208155
)
209156
except Exception as e:
210-
logger.exception(f"sync_scope failed for {scope.scope_id}")
211-
212-
fetched_source_ids.add(src_id)
157+
logger.exception(
158+
f"Could not fetch policy for scope {scope.scope_id}, got error: {e}"
159+
)
213160

214-
for scope in skipped_scopes:
215-
# No need to refetch the same repo, just check for changes
216-
try:
217-
await self.sync_scope(
218-
scope=scope,
219-
force_fetch=False,
220-
notify_on_changes=notify_on_changes,
161+
async def delete_scope(self, scope_id: str):
162+
with tracer.trace("scopes_service.delete_scope", resource=scope_id):
163+
logger.info(f"Delete scope: {scope_id}")
164+
scope = await self._scopes.get(scope_id)
165+
url = scope.policy.url
166+
167+
scopes = await self._scopes.all()
168+
remove_repo_clone = True
169+
170+
for scope in scopes:
171+
if scope.scope_id != scope_id and scope.policy.url == url:
172+
logger.info(
173+
f"found another scope with same remote url ({scope.scope_id}), skipping clone deleteion"
174+
)
175+
remove_repo_clone = False
176+
break
177+
178+
if remove_repo_clone:
179+
scope_dir = GitPolicyFetcher.repo_clone_path(
180+
self._base_dir, cast(GitPolicyScopeSource, scope.policy)
221181
)
222-
except Exception as e:
223-
logger.exception(f"sync_scope failed for {scope.scope_id}")
182+
shutil.rmtree(scope_dir, ignore_errors=True)
183+
184+
await self._scopes.delete(scope_id)
185+
186+
async def sync_scopes(self, only_poll_updates=False, notify_on_changes=True):
187+
with tracer.trace("scopes_service.sync_scopes"):
188+
scopes = await self._scopes.all()
189+
if only_poll_updates:
190+
# Only sync scopes that have polling enabled (in a periodic check)
191+
scopes = [scope for scope in scopes if scope.policy.poll_updates]
192+
193+
logger.info(
194+
f"OPAL Scopes: syncing {len(scopes)} scopes in the background (polling updates: {only_poll_updates})"
195+
)
196+
197+
fetched_source_ids = set()
198+
skipped_scopes = []
199+
for scope in scopes:
200+
src_id = GitPolicyFetcher.source_id(scope.policy)
201+
202+
# Give priority to scopes that have a unique url per shard (so we'll clone all repos asap)
203+
if src_id in fetched_source_ids:
204+
skipped_scopes.append(scope)
205+
continue
206+
207+
try:
208+
await self.sync_scope(
209+
scope=scope,
210+
force_fetch=True,
211+
notify_on_changes=notify_on_changes,
212+
)
213+
except Exception as e:
214+
logger.exception(f"sync_scope failed for {scope.scope_id}")
215+
216+
fetched_source_ids.add(src_id)
217+
218+
for scope in skipped_scopes:
219+
# No need to refetch the same repo, just check for changes
220+
try:
221+
await self.sync_scope(
222+
scope=scope,
223+
force_fetch=False,
224+
notify_on_changes=notify_on_changes,
225+
)
226+
except Exception as e:
227+
logger.exception(f"sync_scope failed for {scope.scope_id}")

0 commit comments

Comments
 (0)