Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Add OTEL_EXCLUDE_METRICS #11317

Merged
merged 1 commit into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ spec:
value: "{{ .Values.storage.worldStateMapSize }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.bootNode.otelExcludeMetrics }}"
ports:
- containerPort: {{ .Values.bootNode.service.nodePort }}
- containerPort: {{ .Values.bootNode.service.p2pTcpPort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/faucet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ spec:
value: faucet
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.faucet.otelExcludeMetrics }}"
ports:
- name: http
containerPort: {{ .Values.faucet.service.nodePort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ spec:
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.proverAgent.otelExcludeMetrics }}"
resources:
{{- toYaml .Values.proverAgent.resources | nindent 12 }}
{{- end }}
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-broker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ spec:
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.proverBroker.otelExcludeMetrics }}"
resources:
{{- toYaml .Values.proverBroker.resources | nindent 12 }}
volumes:
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ spec:
value: "{{ .Values.storage.worldStateMapSize }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.proverNode.otelExcludeMetrics }}"
ports:
- containerPort: {{ .Values.proverNode.service.nodePort }}
- containerPort: {{ .Values.proverNode.service.p2pTcpPort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/pxe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ spec:
value: "{{ .Values.aztec.realProofs }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.pxe.otelExcludeMetrics }}"
ports:
- name: http
containerPort: {{ .Values.pxe.service.nodePort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/transaction-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ spec:
value: "{{ .Values.bot.stopIfUnhealthy }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.bot.otelExcludeMetrics }}"
ports:
- name: http
containerPort: {{ .Values.bot.service.nodePort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ spec:
value: "{{ .Values.storage.worldStateMapSize }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.validator.otelExcludeMetrics }}"
ports:
- containerPort: {{ .Values.validator.service.nodePort }}
- containerPort: {{ .Values.validator.service.p2pTcpPort }}
Expand Down
9 changes: 9 additions & 0 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ bootNode:
stakingAssetAddress: ""
storageSize: "1Gi"
dataDir: "/data"
otelExcludeMetrics: ""

validator:
# If true, the validator will use its peers to serve as the boot node.
Expand Down Expand Up @@ -130,6 +131,7 @@ validator:
dataDir: "/data"
l1FixedPriorityFeePerGas: ""
l1GasLimitBufferPercentage: ""
otelExcludeMetrics: ""

proverNode:
proverPublisherPrivateKey: "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80"
Expand Down Expand Up @@ -166,6 +168,7 @@ proverNode:
intervalMs: 1000
maxParallelRequests: 100
failedProofStore: "gs://aztec-develop/spartan/failed-proofs"
otelExcludeMetrics: ""

pxe:
logLevel: "debug; info: aztec:simulator, json-rpc"
Expand All @@ -182,6 +185,7 @@ pxe:
requests:
memory: "4Gi"
cpu: "1"
otelExcludeMetrics: ""

bot:
enabled: true
Expand Down Expand Up @@ -211,6 +215,7 @@ bot:
requests:
memory: "4Gi"
cpu: "1"
otelExcludeMetrics: ""

ethereum:
externalHost: ""
Expand All @@ -236,6 +241,7 @@ ethereum:
cpu: "1"
storageSize: "80Gi"
deployL1ContractsPrivateKey:
otelExcludeMetrics: ""

proverAgent:
service:
Expand All @@ -254,6 +260,7 @@ proverAgent:
memory: "4Gi"
cpu: "1"
pollInterval: 200
otelExcludeMetrics: ""

proverBroker:
service:
Expand All @@ -271,6 +278,7 @@ proverBroker:
memory: "4Gi"
cpu: "1"
maxOldSpaceSize: "3584"
otelExcludeMetrics: ""

jobs:
deployL1Verifier:
Expand All @@ -288,3 +296,4 @@ faucet:
requests:
memory: "2Gi"
cpu: "200m"
otelExcludeMetrics: ""
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export type EnvVar =
| 'OTEL_EXPORTER_OTLP_LOGS_ENDPOINT'
| 'OTEL_SERVICE_NAME'
| 'OTEL_COLLECT_INTERVAL_MS'
| 'OTEL_EXCLUDE_METRICS'
| 'OTEL_EXPORT_TIMEOUT_MS'
| 'OUTBOX_CONTRACT_ADDRESS'
| 'P2P_BLOCK_CHECK_INTERVAL_MS'
Expand Down
13 changes: 13 additions & 0 deletions yarn-project/telemetry-client/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export interface TelemetryClientConfig {
k8sPodUid?: string;
k8sPodName?: string;
k8sNamespaceName?: string;
otelExcludeMetrics?: string[];
}

export const telemetryClientConfigMappings: ConfigMappingsType<TelemetryClientConfig> = {
Expand Down Expand Up @@ -57,6 +58,18 @@ export const telemetryClientConfigMappings: ConfigMappingsType<TelemetryClientCo
defaultValue: 30000, // Default extracted from otel client
parseEnv: (val: string) => parseInt(val),
},
otelExcludeMetrics: {
env: 'OTEL_EXCLUDE_METRICS',
description: 'A list of metric prefixes to exclude from export',
parseEnv: (val: string) =>
val
? val
.split(',')
.map(s => s.trim())
.filter(s => s.length > 0)
: [],
defaultValue: [],
},
k8sPodUid: {
env: 'K8S_POD_UID',
description: 'The UID of the Kubernetes pod (injected automatically by k8s)',
Expand Down
8 changes: 6 additions & 2 deletions yarn-project/telemetry-client/src/otel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic
import { type TelemetryClientConfig } from './config.js';
import { EventLoopMonitor } from './event_loop_monitor.js';
import { linearBuckets } from './histogram_utils.js';
import { OtelFilterMetricExporter } from './otel_filter_metric_exporter.js';
import { registerOtelLoggerProvider } from './otel_logger_provider.js';
import { getOtelResource } from './otel_resource.js';
import { type Gauge, type TelemetryClient } from './telemetry.js';
Expand Down Expand Up @@ -247,7 +248,7 @@ export class OpenTelemetryClient implements TelemetryClient {
tracerProvider.register();

const meterProvider = OpenTelemetryClient.createMeterProvider(resource, {
exporter: new GoogleCloudMetricExporter(),
exporter: new OtelFilterMetricExporter(new GoogleCloudMetricExporter(), config.otelExcludeMetrics ?? []),
exportTimeoutMillis: config.otelExportTimeoutMs,
exportIntervalMillis: config.otelCollectIntervalMs,
});
Expand All @@ -269,7 +270,10 @@ export class OpenTelemetryClient implements TelemetryClient {

const meterProvider = OpenTelemetryClient.createMeterProvider(resource, {
exporter: config.metricsCollectorUrl
? new OTLPMetricExporter({ url: config.metricsCollectorUrl.href })
? new OtelFilterMetricExporter(
new OTLPMetricExporter({ url: config.metricsCollectorUrl.href }),
config.otelExcludeMetrics ?? [],
)
: undefined,
exportTimeoutMillis: config.otelExportTimeoutMs,
exportIntervalMillis: config.otelCollectIntervalMs,
Expand Down
38 changes: 38 additions & 0 deletions yarn-project/telemetry-client/src/otel_filter_metric_exporter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { type ExportResult } from '@opentelemetry/core';
import { type MetricData, type PushMetricExporter, type ResourceMetrics } from '@opentelemetry/sdk-metrics';

export class OtelFilterMetricExporter implements PushMetricExporter {
constructor(private readonly exporter: PushMetricExporter, private readonly excludeMetricPrefixes: string[]) {
if (exporter.selectAggregation) {
(this as PushMetricExporter).selectAggregation = exporter.selectAggregation.bind(exporter);
}
if (exporter.selectAggregationTemporality) {
(this as PushMetricExporter).selectAggregationTemporality = exporter.selectAggregationTemporality.bind(exporter);
}
}

public export(metrics: ResourceMetrics, resultCallback: (result: ExportResult) => void): void {
const filteredMetrics: ResourceMetrics = {
resource: metrics.resource,
scopeMetrics: metrics.scopeMetrics
.map(({ scope, metrics }) => ({ scope, metrics: this.filterMetrics(metrics) }))
.filter(({ metrics }) => metrics.length > 0),
};

this.exporter.export(filteredMetrics, resultCallback);
}

private filterMetrics(metrics: MetricData[]): MetricData[] {
return metrics.filter(
metric => !this.excludeMetricPrefixes.some(prefix => metric.descriptor.name.startsWith(prefix)),
);
}

public forceFlush(): Promise<void> {
return this.exporter.forceFlush();
}

public shutdown(): Promise<void> {
return this.exporter.shutdown();
}
}
Loading