Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add generic prometheus endpoints #209

Merged
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c23caa9
feat: add execution and additional metrics jobs to prometheus service
cbermudez97 Sep 18, 2023
8b5bf77
Merge branch 'main' into feat/add-generic-prometheus-endpoints
cbermudez97 Sep 19, 2023
93d5e78
feat: add beacon metrics gazer to prometheus
cbermudez97 Sep 19, 2023
b859bac
fix: ignore not defined metrics info
cbermudez97 Sep 19, 2023
a407a8a
Merge branch 'main' into feat/add-generic-prometheus-endpoints
cbermudez97 Sep 20, 2023
fd0f7ff
fix: wrong name
cbermudez97 Sep 20, 2023
e65f1b4
Merge branch 'main' into feat/add-generic-prometheus-endpoints
cbermudez97 Sep 20, 2023
d3d1858
fix: error with el metrics info being a list
cbermudez97 Sep 20, 2023
6a034c7
fix: wrong cl context property name
cbermudez97 Sep 20, 2023
e61fc8d
fix: error with beacon metrics gazer endpoint
cbermudez97 Sep 20, 2023
21f57fd
fix: add constants on prometheus launcher
cbermudez97 Sep 20, 2023
0b1c0e0
fix: typo in constant name
cbermudez97 Sep 20, 2023
625ae32
fix: wrong prometheus target endpoint in metrics gazer
cbermudez97 Sep 20, 2023
9ec492a
Merge branch 'main' into feat/add-generic-prometheus-endpoints
cbermudez97 Sep 21, 2023
242d9fa
fix: indentation
cbermudez97 Sep 21, 2023
abeb2ac
fix: run kurtosis lint . --format
cbermudez97 Sep 21, 2023
087a35c
chore: merge branch 'main' into feat/add-generic-prometheus-endpoints
cbermudez97 Sep 22, 2023
7e6dc72
chore: merge branch 'main' into feat/add-generic-prometheus-endpoints
cbermudez97 Sep 27, 2023
a3dda25
fix: old imports
cbermudez97 Sep 27, 2023
81a25c0
feat: launch prometheus with beacon metrics gazer
cbermudez97 Sep 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 33 additions & 23 deletions main.star
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def run(plan, args={}):
prometheus_config_template = read_file(
static_files.PROMETHEUS_CONFIG_TEMPLATE_FILEPATH
)
prometheus_additional_metrics_jobs = []
h4ck3rk3y marked this conversation as resolved.
Show resolved Hide resolved

plan.print("Read the prometheus, grafana templates")

Expand Down Expand Up @@ -215,6 +216,7 @@ def run(plan, args={}):

if not args_with_right_defaults.launch_additional_services:
return
launch_prometheus_grafana = False
for additional_service in args_with_right_defaults.additional_services:
if additional_service == "tx_spammer":
plan.print("Launching transaction spammer")
Expand Down Expand Up @@ -269,12 +271,17 @@ def run(plan, args={}):
beacon_metrics_gazer_config_template = read_file(
static_files.BEACON_METRICS_GAZER_CONFIG_TEMPLATE_FILEPATH
)
beacon_metrics_gazer.launch_beacon_metrics_gazer(
plan,
beacon_metrics_gazer_config_template,
all_cl_client_contexts,
args_with_right_defaults.participants,
network_params,
beacon_metrics_gazer_prometheus_metrics_job = (
beacon_metrics_gazer.launch_beacon_metrics_gazer(
plan,
beacon_metrics_gazer_config_template,
all_cl_client_contexts,
args_with_right_defaults.participants,
network_params,
)
)
prometheus_additional_metrics_jobs.append(
h4ck3rk3y marked this conversation as resolved.
Show resolved Hide resolved
beacon_metrics_gazer_prometheus_metrics_job
)
plan.print("Succesfully launched beacon metrics gazer")
elif additional_service == "light_beaconchain_explorer":
Expand All @@ -287,25 +294,28 @@ def run(plan, args={}):
)
plan.print("Succesfully light-beaconchain-explorer")
elif additional_service == "prometheus_grafana":
h4ck3rk3y marked this conversation as resolved.
Show resolved Hide resolved
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
prometheus_config_template,
all_cl_client_contexts,
all_el_client_contexts,
)
plan.print("Successfully launched Prometheus")

plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")
# Allow prometheus to be launched last so is able to collect metrics from other services
launch_prometheus_grafana = True
else:
fail("Invalid additional service %s" % (additional_service))
if launch_prometheus_grafana:
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
prometheus_config_template,
all_el_client_contexts,
all_cl_client_contexts,
prometheus_additional_metrics_jobs,
)

plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")

if args_with_right_defaults.wait_for_finalization:
plan.print("Waiting for the first finalized epoch")
Expand Down
18 changes: 17 additions & 1 deletion src/beacon_metrics_gazer/beacon_metrics_gazer_launcher.star
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
shared_utils = import_module(
"github.com/kurtosis-tech/eth2-package/src/shared_utils/shared_utils.star"
)
prometheus = import_module(
"github.com/kurtosis-tech/eth2-package/src/prometheus/prometheus_launcher.star"
)


SERVICE_NAME = "beacon-metrics-gazer"
Expand All @@ -9,6 +12,8 @@ IMAGE_NAME = "ethpandaops/beacon-metrics-gazer:master"
HTTP_PORT_ID = "http"
HTTP_PORT_NUMBER = 8080

METRICS_PATH = "/metrics"

BEACON_METRICS_GAZER_CONFIG_FILENAME = "validator-ranges.yaml"

BEACON_METRICS_GAZER_CONFIG_MOUNT_DIRPATH_ON_SERVICE = "/config"
Expand Down Expand Up @@ -59,7 +64,18 @@ def launch_beacon_metrics_gazer(
cl_client_contexts[0].http_port_num,
)

plan.add_service(SERVICE_NAME, config)
beacon_metrics_gazer_service = plan.add_service(SERVICE_NAME, config)

return prometheus.new_metrics_job(
job_name=SERVICE_NAME,
endpoint="{0}:{1}".format(
beacon_metrics_gazer_service.ip_address, HTTP_PORT_NUMBER
),
metrics_path=METRICS_PATH,
labels={
"service": SERVICE_NAME,
},
)


def get_config(config_files_artifact_name, ip_addr, http_port_num):
Expand Down
117 changes: 104 additions & 13 deletions src/prometheus/prometheus_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ shared_utils = import_module(

SERVICE_NAME = "prometheus"

EXECUTION_CLIENT_TYPE = "execution"
BEACON_CLIENT_TYPE = "beacon"
VALIDATOR_CLIENT_TYPE = "validator"

METRICS_INFO_NAME_KEY = "name"
METRICS_INFO_URL_KEY = "url"
METRICS_INFO_PATH_KEY = "path"

# TODO(old) I'm not sure if we should use latest version or ping an specific version instead
IMAGE_NAME = "prom/prometheus:latest"

Expand All @@ -22,17 +30,18 @@ USED_PORTS = {
}


def launch_prometheus(plan, config_template, cl_client_contexts, el_client_contexts):
all_nodes_metrics_info = []
for client in cl_client_contexts:
all_nodes_metrics_info.extend(client.cl_nodes_metrics_info)

for client in el_client_contexts:
# etheruemjs doesn't populate metrics just yet
if client.el_metrics_info != [None]:
all_nodes_metrics_info.extend(client.el_metrics_info)

template_data = new_config_template_data(all_nodes_metrics_info)
def launch_prometheus(
plan,
config_template,
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
):
template_data = new_config_template_data(
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
)
template_and_data = shared_utils.new_template_and_data(
config_template, template_data
)
Expand Down Expand Up @@ -75,5 +84,87 @@ def get_config(config_files_artifact_name):
)


def new_config_template_data(cl_nodes_metrics_info):
return {"CLNodesMetricsInfo": cl_nodes_metrics_info}
def new_config_template_data(
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
):
metrics_jobs = []
# Adding execution clients metrics jobs
for context in el_client_contexts:
if len(context.el_metrics_info) >= 1 and context.el_metrics_info[0] != None:
execution_metrics_info = context.el_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=execution_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=execution_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=execution_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.service_name,
"client_type": EXECUTION_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding consensus clients metrics jobs
for context in cl_client_contexts:
if (
len(context.cl_nodes_metrics_info) >= 1
and context.cl_nodes_metrics_info[0] != None
):
# Adding beacon node metrics
beacon_metrics_info = context.cl_nodes_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=beacon_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=beacon_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=beacon_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.beacon_service_name,
"client_type": BEACON_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
if (
len(context.cl_nodes_metrics_info) >= 2
and context.cl_nodes_metrics_info[1] != None
):
# Adding validator node metrics
validator_metrics_info = context.cl_nodes_metrics_info[1]
metrics_jobs.append(
new_metrics_job(
job_name=validator_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=validator_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=validator_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.validator_service_name,
"client_type": VALIDATOR_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding additional metrics jobs
for job in additional_metrics_jobs:
if job == None:
continue
metrics_jobs.append(job)
return {
"MetricsJobs": metrics_jobs,
}


def new_metrics_job(
job_name,
endpoint,
metrics_path,
labels,
scrape_interval="15s",
):
return {
"Name": job_name,
"Endpoint": endpoint,
"MetricsPath": metrics_path,
"Labels": labels,
"ScrapeInterval": scrape_interval,
}
27 changes: 13 additions & 14 deletions static_files/prometheus-config/prometheus.yml.tmpl
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_interval: 15s
scrape_configs:
{{ range $clNode := .CLNodesMetricsInfo }}
- job_name: '{{ $clNode.name }}'
metrics_path: {{ $clNode.path }}
static_configs:
- targets: ['{{ $clNode.url }}']
{{ end }}
- job_name: 'beacon-metrics-gazer'
metrics_path: '/metrics'
static_configs:
- targets: ['beacon-metrics-gazer:8080']
{{- range $job := .MetricsJobs }}
- job_name: "{{ $job.Name }}"
metrics_path: "{{ $job.MetricsPath }}"
{{- if $job.ScrapeInterval }}
h4ck3rk3y marked this conversation as resolved.
Show resolved Hide resolved
scrape_interval: {{ $job.ScrapeInterval }}
{{- end }}
static_configs:
- targets: ['{{ $job.Endpoint }}']
labels:{{ range $labelName, $labelValue := $job.Labels }}
{{ $labelName }}: "{{ $labelValue }}"
{{- end }}
{{- end }}