From 5db6611ab831a92212a21859b42a911cd12bce0c Mon Sep 17 00:00:00 2001 From: Barnabas Busa Date: Mon, 29 Jan 2024 10:06:53 +0100 Subject: [PATCH] feat: enable custom resource limit per network (#471) --- .github/tests/tolerations.yaml | 36 ++++ README.md | 76 +++++++++ main.star | 2 + src/cl/lighthouse/lighthouse_launcher.star | 43 ++++- src/cl/lodestar/lodestar_launcher.star | 41 ++++- src/cl/nimbus/nimbus_launcher.star | 51 ++++-- src/cl/prysm/prysm_launcher.star | 42 ++++- src/cl/teku/teku_launcher.star | 44 +++-- src/el/besu/besu_launcher.star | 32 +++- src/el/erigon/erigon_launcher.star | 35 +++- src/el/ethereumjs/ethereumjs_launcher.star | 31 +++- src/el/geth/geth_launcher.star | 31 +++- src/el/nethermind/nethermind_launcher.star | 34 +++- src/el/reth/reth_launcher.star | 32 +++- src/package_io/constants.star | 184 +++++++++++++++++++++ src/package_io/input_parser.star | 49 ++++++ src/participant_network.star | 12 ++ 17 files changed, 680 insertions(+), 95 deletions(-) create mode 100644 .github/tests/tolerations.yaml diff --git a/.github/tests/tolerations.yaml b/.github/tests/tolerations.yaml new file mode 100644 index 000000000..140938834 --- /dev/null +++ b/.github/tests/tolerations.yaml @@ -0,0 +1,36 @@ +participants: + - el_client_type: reth + cl_client_type: teku + cl_split_mode_enabled: true + cl_tolerations: + - key: "node-role.kubernetes.io/master1" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/master2" + operator: "Exists" + effect: "NoSchedule" + el_tolerations: + - key: "node-role.kubernetes.io/master3" + operator: "Exists" + effect: "NoSchedule" + validator_tolerations: + - key: "node-role.kubernetes.io/master4" + operator: "Exists" + effect: "NoSchedule" + - el_client_type: reth + cl_client_type: teku + cl_split_mode_enabled: true + tolerations: + - key: "node-role.kubernetes.io/master5" + operator: "Exists" + effect: "NoSchedule" + - el_client_type: reth + cl_client_type: teku + cl_split_mode_enabled: true +additional_services: + - dora +global_tolerations: + - key: "node-role.kubernetes.io/master6" + value: "true" + operator: "Equal" + effect: "NoSchedule" diff --git a/README.md b/README.md index c9ccd9977..85c0fd7c9 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,26 @@ To mitigate these issues, you can use the `el_client_volume_size` and `cl_client For optimal performance, we recommend using a cloud provider that allows you to provision Kubernetes clusters with fast persistent storage or self hosting your own Kubernetes cluster with fast persistent storage. +#### Taints and tolerations +It is possible to run the package on a Kubernetes cluster with taints and tolerations. This is done by adding the tolerations to the `tolerations` field in the `network_params.yaml` file. For example: +```yaml +participants: + - el_client_type: reth + cl_client_type: teku +global_tolerations: + - key: "node-role.kubernetes.io/master6" + value: "true" + operator: "Equal" + effect: "NoSchedule" +``` + +It is possible to define toleration globally, per participant or per container. The order of precedence is as follows: +1. Container (`el_tolerations`, `cl_tolerations`, `validator_tolerations`) +2. Participant (`tolerations`) +3. Global (`global_tolerations`) + +This feature is only available for Kubernetes. To learn more about taints and tolerations, please visit the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). + #### Tear down The testnet will reside in an [enclave][enclave] - an isolated, ephemeral environment. The enclave and its contents (e.g. running containers, files artifacts, etc) will persist until torn down. You can remove an enclave and its contents with: @@ -147,6 +167,17 @@ participants: # Example; el_extra_labels: {"ethereum-package.partition": "1"} el_extra_labels: {} + # A list of tolerations that will be passed to the EL client container + # Only works with Kubernetes + # Example: el_tolerations: + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + # toleration_seconds: 3600 + # Defaults to empty + el_tolerations: [] + # The type of CL client that should be started # Valid values are nimbus, lighthouse, lodestar, teku, and prysm cl_client_type: lighthouse @@ -178,6 +209,40 @@ participants: # Default values can be found in /src/package_io/constants.star VOLUME_SIZE cl_client_volume_size: 0 + # A list of tolerations that will be passed to the CL client container + # Only works with Kubernetes + # Example: el_tolerations: + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + # toleration_seconds: 3600 + # Defaults to empty + el_tolerations: [] + + # A list of tolerations that will be passed to the validator container + # Only works with Kubernetes + # Example: el_tolerations: + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + # toleration_seconds: 3600 + # Defaults to empty + validator_tolerations: [] + + # A list of tolerations that will be passed to the EL/CL/validator containers + # This is to be used when you don't want to specify the tolerations for each container separately + # Only works with Kubernetes + # Example: tolerations: + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + # toleration_seconds: 3600 + # Defaults to empty + tolerations: [] + # A list of optional extra params that will be passed to the CL client Beacon container for modifying its behaviour # If the client combines the Beacon & validator nodes (e.g. Teku, Nimbus), then this list will be passed to the combined Beacon-validator node beacon_extra_params: [] @@ -495,6 +560,17 @@ xatu_sentry_params: - voluntary_exit - contribution_and_proof - blob_sidecar + +# Global tolerations that will be passed to all containers (unless overridden by a more specific toleration) +# Only works with Kubernetes +# Example: tolerations: +# - key: "key" +# operator: "Equal" +# value: "value" +# effect: "NoSchedule" +# toleration_seconds: 3600 +# Defaults to empty +global_tolerations: [] ``` #### Example configurations diff --git a/main.star b/main.star index 1a24190c4..4a219d12d 100644 --- a/main.star +++ b/main.star @@ -62,6 +62,7 @@ def run(plan, args={}): parallel_keystore_generation = args_with_right_defaults.parallel_keystore_generation persistent = args_with_right_defaults.persistent xatu_sentry_params = args_with_right_defaults.xatu_sentry_params + global_tolerations = args_with_right_defaults.global_tolerations grafana_datasource_config_template = read_file( static_files.GRAFANA_DATASOURCE_CONFIG_TEMPLATE_FILEPATH @@ -96,6 +97,7 @@ def run(plan, args={}): jwt_file, persistent, xatu_sentry_params, + global_tolerations, parallel_keystore_generation, ) diff --git a/src/cl/lighthouse/lighthouse_launcher.star b/src/cl/lighthouse/lighthouse_launcher.star index 28b5bc15a..ed7096e84 100644 --- a/src/cl/lighthouse/lighthouse_launcher.star +++ b/src/cl/lighthouse/lighthouse_launcher.star @@ -28,9 +28,7 @@ BEACON_METRICS_PORT_NUM = 5054 # The min/max CPU/memory that the beacon node can use BEACON_MIN_CPU = 50 -BEACON_MAX_CPU = 1000 BEACON_MIN_MEMORY = 256 -BEACON_MAX_MEMORY = 1024 # ---------------------------------- Validator client ------------------------------------- VALIDATOR_KEYS_MOUNTPOINT_ON_CLIENTS = "/data/lighthouse/validator-keys" @@ -84,7 +82,7 @@ VALIDATOR_USED_PORTS = { ), } -LIGHTHOUSE_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "error", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "warn", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "info", @@ -121,6 +119,10 @@ def launch( extra_validator_labels, persistent, cl_volume_size, + cl_tolerations, + validator_tolerations, + participant_tolerations, + global_tolerations, split_mode_enabled=False, ): beacon_service_name = "{0}".format(service_name) @@ -129,19 +131,34 @@ def launch( ) log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, LIGHTHOUSE_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS + ) + + tolerations = input_parser.get_client_tolerations( + cl_tolerations, participant_tolerations, global_tolerations ) - bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU - bn_max_cpu = int(bn_max_cpu) if int(bn_max_cpu) > 0 else BEACON_MAX_CPU - bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY - bn_max_mem = int(bn_max_mem) if int(bn_max_mem) > 0 else BEACON_MAX_MEMORY network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU + bn_max_cpu = ( + int(bn_max_cpu) + if int(bn_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["lighthouse_max_cpu"] + ) + bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY + bn_max_mem = ( + int(bn_max_mem) + if int(bn_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["lighthouse_max_mem"] + ) + cl_volume_size = ( int(cl_volume_size) if int(cl_volume_size) > 0 @@ -169,6 +186,7 @@ def launch( extra_beacon_labels, persistent, cl_volume_size, + tolerations, ) beacon_service = plan.add_service(beacon_service_name, beacon_config) @@ -203,7 +221,9 @@ def launch( v_max_cpu = int(v_max_cpu) if int(v_max_cpu) > 0 else VALIDATOR_MAX_CPU v_min_mem = int(v_min_mem) if int(v_min_mem) > 0 else VALIDATOR_MIN_MEMORY v_max_mem = int(v_max_mem) if int(v_max_mem) > 0 else VALIDATOR_MAX_MEMORY - + tolerations = input_parser.get_client_tolerations( + validator_tolerations, participant_tolerations, global_tolerations + ) validator_config = get_validator_config( launcher.el_cl_genesis_data, image, @@ -219,6 +239,7 @@ def launch( extra_validator_params, extra_validator_labels, persistent, + tolerations, ) validator_service = plan.add_service(validator_service_name, validator_config) @@ -297,6 +318,7 @@ def get_beacon_config( extra_labels, persistent, cl_volume_size, + tolerations, ): # If snooper is enabled use the snooper engine context, otherwise use the execution client context if snooper_enabled: @@ -445,6 +467,7 @@ def get_beacon_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) @@ -463,6 +486,7 @@ def get_validator_config( extra_params, extra_labels, persistent, + tolerations, ): validator_keys_dirpath = shared_utils.path_join( VALIDATOR_KEYS_MOUNTPOINT_ON_CLIENTS, @@ -528,6 +552,7 @@ def get_validator_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/cl/lodestar/lodestar_launcher.star b/src/cl/lodestar/lodestar_launcher.star index 21fbe2e8f..243218f4b 100644 --- a/src/cl/lodestar/lodestar_launcher.star +++ b/src/cl/lodestar/lodestar_launcher.star @@ -22,9 +22,7 @@ METRICS_PORT_NUM = 8008 # The min/max CPU/memory that the beacon node can use BEACON_MIN_CPU = 50 -BEACON_MAX_CPU = 1000 BEACON_MIN_MEMORY = 256 -BEACON_MAX_MEMORY = 1024 # ---------------------------------- Validator client ------------------------------------- VALIDATOR_KEYS_MOUNT_DIRPATH_ON_SERVICE_CONTAINER = "/validator-keys" @@ -63,7 +61,7 @@ VALIDATOR_USED_PORTS = { } -LODESTAR_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "error", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "warn", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "info", @@ -100,6 +98,10 @@ def launch( extra_validator_labels, persistent, cl_volume_size, + cl_tolerations, + validator_tolerations, + participant_tolerations, + global_tolerations, split_mode_enabled=False, ): beacon_service_name = "{0}".format(service_name) @@ -107,20 +109,34 @@ def launch( service_name, VALIDATOR_SUFFIX_SERVICE_NAME ) log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, LODESTAR_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS ) - bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU - bn_max_cpu = int(bn_max_cpu) if int(bn_max_cpu) > 0 else BEACON_MAX_CPU - bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY - bn_max_mem = int(bn_max_mem) if int(bn_max_mem) > 0 else BEACON_MAX_MEMORY + tolerations = input_parser.get_client_tolerations( + cl_tolerations, participant_tolerations, global_tolerations + ) network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU + bn_max_cpu = ( + int(bn_max_cpu) + if int(bn_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["lodestar_max_cpu"] + ) + bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY + bn_max_mem = ( + int(bn_max_mem) + if int(bn_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["lodestar_max_mem"] + ) + cl_volume_size = ( int(cl_volume_size) if int(cl_volume_size) > 0 @@ -148,6 +164,7 @@ def launch( extra_beacon_labels, persistent, cl_volume_size, + tolerations, ) beacon_service = plan.add_service(beacon_service_name, beacon_config) @@ -183,6 +200,9 @@ def launch( v_max_cpu = int(v_max_cpu) if int(v_max_cpu) > 0 else VALIDATOR_MAX_CPU v_min_mem = int(v_min_mem) if int(v_min_mem) > 0 else VALIDATOR_MIN_MEMORY v_max_mem = int(v_max_mem) if int(v_max_mem) > 0 else VALIDATOR_MAX_MEMORY + tolerations = input_parser.get_client_tolerations( + validator_tolerations, participant_tolerations, global_tolerations + ) validator_config = get_validator_config( launcher.el_cl_genesis_data, image, @@ -198,6 +218,7 @@ def launch( extra_validator_params, extra_validator_labels, persistent, + tolerations, ) plan.add_service(validator_service_name, validator_config) @@ -268,6 +289,7 @@ def get_beacon_config( extra_labels, persistent, cl_volume_size, + tolerations, ): el_client_rpc_url_str = "http://{0}:{1}".format( el_client_context.ip_addr, @@ -397,6 +419,7 @@ def get_beacon_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) @@ -415,6 +438,7 @@ def get_validator_config( extra_params, extra_labels, persistent, + tolerations, ): root_dirpath = shared_utils.path_join( VALIDATOR_DATA_DIRPATH_ON_SERVICE_CONTAINER, service_name @@ -478,6 +502,7 @@ def get_validator_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/cl/nimbus/nimbus_launcher.star b/src/cl/nimbus/nimbus_launcher.star index d8f9072b8..8148725c2 100644 --- a/src/cl/nimbus/nimbus_launcher.star +++ b/src/cl/nimbus/nimbus_launcher.star @@ -23,9 +23,7 @@ BEACON_METRICS_PORT_NUM = 8008 # The min/max CPU/memory that the beacon node can use BEACON_MIN_CPU = 50 -BEACON_MAX_CPU = 1000 BEACON_MIN_MEMORY = 256 -BEACON_MAX_MEMORY = 1024 DEFAULT_BEACON_IMAGE_ENTRYPOINT = ["nimbus_beacon_node"] @@ -96,7 +94,7 @@ VALIDATOR_USED_PORTS = { ), } -NIMBUS_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "ERROR", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "WARN", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "INFO", @@ -135,6 +133,10 @@ def launch( extra_validator_labels, persistent, cl_volume_size, + cl_tolerations, + validator_tolerations, + participant_tolerations, + global_tolerations, split_mode_enabled, ): beacon_service_name = "{0}".format(service_name) @@ -143,27 +145,34 @@ def launch( ) log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, NIMBUS_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS ) - # Holesky has a bigger memory footprint, so it needs more memory - if launcher.network == constants.NETWORK_NAME.holesky: - holesky_beacon_memory_limit = 4096 - bn_max_mem = ( - int(bn_max_mem) if int(bn_max_mem) > 0 else holesky_beacon_memory_limit - ) - - bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU - bn_max_cpu = int(bn_max_cpu) if int(bn_max_cpu) > 0 else BEACON_MAX_CPU - bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY - bn_max_mem = int(bn_max_mem) if int(bn_max_mem) > 0 else BEACON_MAX_MEMORY + tolerations = input_parser.get_client_tolerations( + cl_tolerations, participant_tolerations, global_tolerations + ) network_name = ( "devnets" - if launcher.network != constants.NETWORK_NAME.kurtosis + if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU + bn_max_cpu = ( + int(bn_max_cpu) + if int(bn_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["nimbus_max_cpu"] + ) + bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY + bn_max_mem = ( + int(bn_max_mem) + if int(bn_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["nimbus_max_mem"] + ) + cl_volume_size = ( int(cl_volume_size) if int(cl_volume_size) > 0 @@ -192,6 +201,7 @@ def launch( split_mode_enabled, persistent, cl_volume_size, + tolerations, ) beacon_service = plan.add_service(beacon_service_name, beacon_config) @@ -232,7 +242,9 @@ def launch( v_max_cpu = int(v_max_cpu) if int(v_max_cpu) > 0 else VALIDATOR_MAX_CPU v_min_mem = int(v_min_mem) if int(v_min_mem) > 0 else VALIDATOR_MIN_MEMORY v_max_mem = int(v_max_mem) if int(v_max_mem) > 0 else VALIDATOR_MAX_MEMORY - + tolerations = input_parser.get_client_tolerations( + validator_tolerations, participant_tolerations, global_tolerations + ) validator_config = get_validator_config( launcher.el_cl_genesis_data, image, @@ -248,6 +260,7 @@ def launch( extra_validator_params, extra_validator_labels, persistent, + tolerations, ) validator_service = plan.add_service(validator_service_name, validator_config) @@ -302,6 +315,7 @@ def get_beacon_config( split_mode_enabled, persistent, cl_volume_size, + tolerations, ): validator_keys_dirpath = "" validator_secrets_dirpath = "" @@ -427,6 +441,7 @@ def get_beacon_config( extra_labels, ), user=User(uid=0, gid=0), + tolerations=tolerations, ) @@ -445,6 +460,7 @@ def get_validator_config( extra_params, extra_labels, persistent, + tolerations, ): validator_keys_dirpath = "" validator_secrets_dirpath = "" @@ -498,6 +514,7 @@ def get_validator_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/cl/prysm/prysm_launcher.star b/src/cl/prysm/prysm_launcher.star index ff4cdf7fa..90b95cd65 100644 --- a/src/cl/prysm/prysm_launcher.star +++ b/src/cl/prysm/prysm_launcher.star @@ -26,9 +26,7 @@ BEACON_MONITORING_PORT_NUM = 8080 # The min/max CPU/memory that the beacon node can use BEACON_MIN_CPU = 100 -BEACON_MAX_CPU = 2000 BEACON_MIN_MEMORY = 256 -BEACON_MAX_MEMORY = 1024 # ---------------------------------- Validator client ------------------------------------- VALIDATOR_DATA_DIRPATH_ON_SERVICE_CONTAINER = "/data/prysm/validator-data/" @@ -75,7 +73,7 @@ VALIDATOR_NODE_USED_PORTS = { ), } -PRYSM_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "error", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "warn", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "info", @@ -112,6 +110,10 @@ def launch( extra_validator_labels, persistent, cl_volume_size, + cl_tolerations, + validator_tolerations, + participant_tolerations, + global_tolerations, split_mode_enabled=False, ): split_images = images.split(IMAGE_SEPARATOR_DELIMITER) @@ -134,19 +136,34 @@ def launch( service_name, VALIDATOR_SUFFIX_SERVICE_NAME ) log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, PRYSM_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS + ) + + tolerations = input_parser.get_client_tolerations( + cl_tolerations, participant_tolerations, global_tolerations ) - bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU - bn_max_cpu = int(bn_max_cpu) if int(bn_max_cpu) > 0 else BEACON_MAX_CPU - bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY - bn_max_mem = int(bn_max_mem) if int(bn_max_mem) > 0 else BEACON_MAX_MEMORY network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU + bn_max_cpu = ( + int(bn_max_cpu) + if int(bn_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["prysm_max_cpu"] + ) + bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY + bn_max_mem = ( + int(bn_max_mem) + if int(bn_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["prysm_max_mem"] + ) + cl_volume_size = ( int(cl_volume_size) if int(cl_volume_size) > 0 @@ -173,6 +190,7 @@ def launch( extra_beacon_labels, persistent, cl_volume_size, + tolerations, ) beacon_service = plan.add_service(beacon_service_name, beacon_config) @@ -189,6 +207,9 @@ def launch( v_max_cpu = int(v_max_cpu) if int(v_max_cpu) > 0 else VALIDATOR_MAX_CPU v_min_mem = int(v_min_mem) if int(v_min_mem) > 0 else VALIDATOR_MIN_MEMORY v_max_mem = int(v_max_mem) if int(v_max_mem) > 0 else VALIDATOR_MAX_MEMORY + tolerations = input_parser.get_client_tolerations( + validator_tolerations, participant_tolerations, global_tolerations + ) validator_config = get_validator_config( launcher.el_cl_genesis_data, validator_image, @@ -207,6 +228,7 @@ def launch( launcher.prysm_password_relative_filepath, launcher.prysm_password_artifact_uuid, persistent, + tolerations, ) validator_service = plan.add_service(validator_service_name, validator_config) @@ -285,6 +307,7 @@ def get_beacon_config( extra_labels, persistent, cl_volume_size, + tolerations, ): # If snooper is enabled use the snooper engine context, otherwise use the execution client context if snooper_enabled: @@ -412,6 +435,7 @@ def get_beacon_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) @@ -433,6 +457,7 @@ def get_validator_config( prysm_password_relative_filepath, prysm_password_artifact_uuid, persistent, + tolerations, ): validator_keys_dirpath = shared_utils.path_join( VALIDATOR_KEYS_MOUNT_DIRPATH_ON_SERVICE_CONTAINER, @@ -497,6 +522,7 @@ def get_validator_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/cl/teku/teku_launcher.star b/src/cl/teku/teku_launcher.star index d2dfd47e6..24d6d95d0 100644 --- a/src/cl/teku/teku_launcher.star +++ b/src/cl/teku/teku_launcher.star @@ -23,9 +23,7 @@ BEACON_METRICS_PORT_NUM = 8008 # The min/max CPU/memory that the beacon node can use BEACON_MIN_CPU = 50 -BEACON_MAX_CPU = 1000 BEACON_MIN_MEMORY = 1024 -BEACON_MAX_MEMORY = 2048 BEACON_METRICS_PATH = "/metrics" # ---------------------------------- Validator client ------------------------------------- @@ -88,7 +86,7 @@ VALIDATOR_USED_PORTS = { ENTRYPOINT_ARGS = ["sh", "-c"] -TEKU_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "ERROR", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "WARN", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "INFO", @@ -125,6 +123,10 @@ def launch( extra_validator_labels, persistent, cl_volume_size, + cl_tolerations, + validator_tolerations, + participant_tolerations, + global_tolerations, split_mode_enabled, ): beacon_service_name = "{0}".format(service_name) @@ -132,7 +134,11 @@ def launch( service_name, VALIDATOR_SUFFIX_SERVICE_NAME ) log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, TEKU_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS + ) + + tolerations = input_parser.get_client_tolerations( + cl_tolerations, participant_tolerations, global_tolerations ) extra_params = [param for param in extra_beacon_params] + [ @@ -146,17 +152,27 @@ def launch( int(bn_max_mem) if int(bn_max_mem) > 0 else holesky_beacon_memory_limit ) - bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU - bn_max_cpu = int(bn_max_cpu) if int(bn_max_cpu) > 0 else BEACON_MAX_CPU - bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY - bn_max_mem = int(bn_max_mem) if int(bn_max_mem) > 0 else BEACON_MAX_MEMORY - network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + bn_min_cpu = int(bn_min_cpu) if int(bn_min_cpu) > 0 else BEACON_MIN_CPU + bn_max_cpu = ( + int(bn_max_cpu) + if int(bn_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["teku_max_cpu"] + ) + bn_min_mem = int(bn_min_mem) if int(bn_min_mem) > 0 else BEACON_MIN_MEMORY + bn_max_mem = ( + int(bn_max_mem) + if int(bn_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["teku_max_mem"] + ) + cl_volume_size = ( int(cl_volume_size) if int(cl_volume_size) > 0 @@ -185,6 +201,7 @@ def launch( split_mode_enabled, persistent, cl_volume_size, + tolerations, ) beacon_service = plan.add_service(service_name, config) @@ -227,7 +244,9 @@ def launch( v_max_cpu = int(v_max_cpu) if int(v_max_cpu) > 0 else VALIDATOR_MAX_CPU v_min_mem = int(v_min_mem) if int(v_min_mem) > 0 else VALIDATOR_MIN_MEMORY v_max_mem = int(v_max_mem) if int(v_max_mem) > 0 else VALIDATOR_MAX_MEMORY - + tolerations = input_parser.get_client_tolerations( + validator_tolerations, participant_tolerations, global_tolerations + ) validator_config = get_validator_config( launcher.el_cl_genesis_data, image, @@ -244,6 +263,7 @@ def launch( extra_validator_params, extra_validator_labels, persistent, + tolerations, ) validator_service = plan.add_service(validator_service_name, validator_config) @@ -298,6 +318,7 @@ def get_beacon_config( split_mode_enabled, persistent, cl_volume_size, + tolerations, ): validator_keys_dirpath = "" validator_secrets_dirpath = "" @@ -466,6 +487,7 @@ def get_beacon_config( extra_labels, ), user=User(uid=0, gid=0), + tolerations=tolerations, ) @@ -485,6 +507,7 @@ def get_validator_config( extra_params, extra_labels, persistent, + tolerations, ): validator_keys_dirpath = "" validator_secrets_dirpath = "" @@ -549,6 +572,7 @@ def get_validator_config( el_client_context.client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/el/besu/besu_launcher.star b/src/el/besu/besu_launcher.star index fe3c8b3a4..12a282b27 100644 --- a/src/el/besu/besu_launcher.star +++ b/src/el/besu/besu_launcher.star @@ -50,7 +50,7 @@ USED_PORTS = { ENTRYPOINT_ARGS = ["sh", "-c"] -BESU_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "ERROR", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "WARN", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "INFO", @@ -76,21 +76,38 @@ def launch( extra_labels, persistent, el_volume_size, + el_tolerations, + participant_tolerations, + global_tolerations, ): log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, BESU_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS + ) + tolerations = input_parser.get_client_tolerations( + el_tolerations, participant_tolerations, global_tolerations ) - el_min_cpu = int(el_min_cpu) if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU - el_max_cpu = int(el_max_cpu) if int(el_max_cpu) > 0 else EXECUTION_MAX_CPU - el_min_mem = int(el_min_mem) if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY - el_max_mem = int(el_max_mem) if int(el_max_mem) > 0 else EXECUTION_MAX_MEMORY network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + el_min_cpu = int(el_min_cpu) if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU + el_max_cpu = ( + int(el_max_cpu) + if int(el_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["besu_max_cpu"] + ) + el_min_mem = int(el_min_mem) if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY + el_max_mem = ( + int(el_max_mem) + if int(el_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["besu_max_mem"] + ) + el_volume_size = ( el_volume_size if int(el_volume_size) > 0 @@ -118,6 +135,7 @@ def launch( extra_labels, persistent, el_volume_size, + tolerations, ) service = plan.add_service(service_name, config) @@ -161,6 +179,7 @@ def get_config( extra_labels, persistent, el_volume_size, + tolerations, ): cmd = [ "besu", @@ -255,6 +274,7 @@ def get_config( extra_labels, ), user=User(uid=0, gid=0), + tolerations=tolerations, ) diff --git a/src/el/erigon/erigon_launcher.star b/src/el/erigon/erigon_launcher.star index 2306ac3c5..cc503ec2e 100644 --- a/src/el/erigon/erigon_launcher.star +++ b/src/el/erigon/erigon_launcher.star @@ -18,9 +18,7 @@ METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 100 -EXECUTION_MAX_CPU = 1000 EXECUTION_MIN_MEMORY = 512 -EXECUTION_MAX_MEMORY = 2048 # Port IDs WS_RPC_PORT_ID = "ws-rpc" @@ -52,7 +50,7 @@ USED_PORTS = { ENTRYPOINT_ARGS = ["sh", "-c"] -ERIGON_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "1", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "2", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "3", @@ -78,22 +76,38 @@ def launch( extra_labels, persistent, el_volume_size, + el_tolerations, + participant_tolerations, + global_tolerations, ): log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, ERIGON_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS + ) + tolerations = input_parser.get_client_tolerations( + el_tolerations, participant_tolerations, global_tolerations ) - - el_min_cpu = el_min_cpu if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU - el_max_cpu = el_max_cpu if int(el_max_cpu) > 0 else EXECUTION_MAX_CPU - el_min_mem = el_min_mem if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY - el_max_mem = el_max_mem if int(el_max_mem) > 0 else EXECUTION_MAX_MEMORY network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + el_min_cpu = int(el_min_cpu) if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU + el_max_cpu = ( + int(el_max_cpu) + if int(el_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["erigon_max_cpu"] + ) + el_min_mem = int(el_min_mem) if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY + el_max_mem = ( + int(el_max_mem) + if int(el_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["erigon_max_mem"] + ) + el_volume_size = ( el_volume_size if int(el_volume_size) > 0 @@ -122,6 +136,7 @@ def launch( extra_labels, persistent, el_volume_size, + tolerations, ) service = plan.add_service(service_name, config) @@ -168,6 +183,7 @@ def get_config( extra_labels, persistent, el_volume_size, + tolerations, ): init_datadir_cmd_str = "erigon init --datadir={0} {1}".format( EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, @@ -276,6 +292,7 @@ def get_config( extra_labels, ), user=User(uid=0, gid=0), + tolerations=tolerations, ) diff --git a/src/el/ethereumjs/ethereumjs_launcher.star b/src/el/ethereumjs/ethereumjs_launcher.star index a0a0901f1..55a9a926e 100644 --- a/src/el/ethereumjs/ethereumjs_launcher.star +++ b/src/el/ethereumjs/ethereumjs_launcher.star @@ -15,9 +15,7 @@ METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 100 -EXECUTION_MAX_CPU = 2000 EXECUTION_MIN_MEMORY = 256 -EXECUTION_MAX_MEMORY = 1024 # Port IDs RPC_PORT_ID = "rpc" @@ -80,22 +78,38 @@ def launch( extra_labels, persistent, el_volume_size, + el_tolerations, + participant_tolerations, + global_tolerations, ): log_level = input_parser.get_client_log_level_or_default( participant_log_level, global_log_level, VERBOSITY_LEVELS ) - - el_min_cpu = el_min_cpu if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU - el_max_cpu = el_max_cpu if int(el_max_cpu) > 0 else EXECUTION_MAX_CPU - el_min_mem = el_min_mem if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY - el_max_mem = el_max_mem if int(el_max_mem) > 0 else EXECUTION_MAX_MEMORY + tolerations = input_parser.get_client_tolerations( + el_tolerations, participant_tolerations, global_tolerations + ) network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + el_min_cpu = int(el_min_cpu) if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU + el_max_cpu = ( + int(el_max_cpu) + if int(el_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["ethereumjs_max_cpu"] + ) + el_min_mem = int(el_min_mem) if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY + el_max_mem = ( + int(el_max_mem) + if int(el_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["ethereumjs_max_mem"] + ) + el_volume_size = ( el_volume_size if int(el_volume_size) > 0 @@ -123,6 +137,7 @@ def launch( extra_labels, persistent, el_volume_size, + tolerations, ) service = plan.add_service(service_name, config) @@ -165,6 +180,7 @@ def get_config( extra_labels, persistent, el_volume_size, + tolerations, ): cmd = [ "--dataDir=" + EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, @@ -249,6 +265,7 @@ def get_config( cl_client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/el/geth/geth_launcher.star b/src/el/geth/geth_launcher.star index 61e47160b..727818f7c 100644 --- a/src/el/geth/geth_launcher.star +++ b/src/el/geth/geth_launcher.star @@ -17,9 +17,7 @@ METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 300 -EXECUTION_MAX_CPU = 2000 EXECUTION_MIN_MEMORY = 512 -EXECUTION_MAX_MEMORY = 2048 # Port IDs RPC_PORT_ID = "rpc" @@ -88,20 +86,38 @@ def launch( extra_labels, persistent, el_volume_size, + el_tolerations, + participant_tolerations, + global_tolerations, ): log_level = input_parser.get_client_log_level_or_default( participant_log_level, global_log_level, VERBOSITY_LEVELS ) - el_min_cpu = el_min_cpu if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU - el_max_cpu = el_max_cpu if int(el_max_cpu) > 0 else EXECUTION_MAX_CPU - el_min_mem = el_min_mem if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY - el_max_mem = el_max_mem if int(el_max_mem) > 0 else EXECUTION_MAX_MEMORY + tolerations = input_parser.get_client_tolerations( + el_tolerations, participant_tolerations, global_tolerations + ) + network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + el_min_cpu = int(el_min_cpu) if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU + el_max_cpu = ( + int(el_max_cpu) + if int(el_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["geth_max_cpu"] + ) + el_min_mem = int(el_min_mem) if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY + el_max_mem = ( + int(el_max_mem) + if int(el_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["geth_max_mem"] + ) + el_volume_size = ( el_volume_size if int(el_volume_size) > 0 @@ -133,6 +149,7 @@ def launch( launcher.final_genesis_timestamp, persistent, el_volume_size, + tolerations, ) service = plan.add_service(service_name, config) @@ -182,6 +199,7 @@ def get_config( final_genesis_timestamp, persistent, el_volume_size, + tolerations, ): # TODO: Remove this once electra fork has path based storage scheme implemented if electra_fork_epoch != None or constants.NETWORK_NAME.verkle in network: @@ -336,6 +354,7 @@ def get_config( cl_client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/el/nethermind/nethermind_launcher.star b/src/el/nethermind/nethermind_launcher.star index 0cfcef765..e59bfcf1e 100644 --- a/src/el/nethermind/nethermind_launcher.star +++ b/src/el/nethermind/nethermind_launcher.star @@ -19,9 +19,7 @@ METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 100 -EXECUTION_MAX_CPU = 1000 EXECUTION_MIN_MEMORY = 512 -EXECUTION_MAX_MEMORY = 2048 # Port IDs RPC_PORT_ID = "rpc" @@ -50,7 +48,7 @@ USED_PORTS = { ), } -NETHERMIND_LOG_LEVELS = { +VERBOSITY_LEVELS = { constants.GLOBAL_CLIENT_LOG_LEVEL.error: "ERROR", constants.GLOBAL_CLIENT_LOG_LEVEL.warn: "WARN", constants.GLOBAL_CLIENT_LOG_LEVEL.info: "INFO", @@ -76,21 +74,38 @@ def launch( extra_labels, persistent, el_volume_size, + el_tolerations, + participant_tolerations, + global_tolerations, ): log_level = input_parser.get_client_log_level_or_default( - participant_log_level, global_log_level, NETHERMIND_LOG_LEVELS + participant_log_level, global_log_level, VERBOSITY_LEVELS + ) + tolerations = input_parser.get_client_tolerations( + el_tolerations, participant_tolerations, global_tolerations ) - el_min_cpu = el_min_cpu if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU - el_max_cpu = el_max_cpu if int(el_max_cpu) > 0 else EXECUTION_MAX_CPU - el_min_mem = el_min_mem if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY - el_max_mem = el_max_mem if int(el_max_mem) > 0 else EXECUTION_MAX_MEMORY network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + el_min_cpu = int(el_min_cpu) if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU + el_max_cpu = ( + int(el_max_cpu) + if int(el_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["nethermind_max_cpu"] + ) + el_min_mem = int(el_min_mem) if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY + el_max_mem = ( + int(el_max_mem) + if int(el_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["nethermind_max_mem"] + ) + el_volume_size = ( el_volume_size if int(el_volume_size) > 0 @@ -118,6 +133,7 @@ def launch( extra_labels, persistent, el_volume_size, + tolerations, ) service = plan.add_service(service_name, config) @@ -162,6 +178,7 @@ def get_config( extra_labels, persistent, el_volume_size, + tolerations, ): cmd = [ "--log=" + log_level, @@ -246,6 +263,7 @@ def get_config( cl_client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/el/reth/reth_launcher.star b/src/el/reth/reth_launcher.star index cadca6348..411f8ee15 100644 --- a/src/el/reth/reth_launcher.star +++ b/src/el/reth/reth_launcher.star @@ -13,9 +13,7 @@ METRICS_PORT_NUM = 9001 # The min/max CPU/memory that the execution node can use EXECUTION_MIN_CPU = 100 -EXECUTION_MAX_CPU = 1000 EXECUTION_MIN_MEMORY = 256 -EXECUTION_MAX_MEMORY = 1024 # Port IDs RPC_PORT_ID = "rpc" @@ -79,21 +77,37 @@ def launch( extra_labels, persistent, el_volume_size, + el_tolerations, + participant_tolerations, + global_tolerations, ): log_level = input_parser.get_client_log_level_or_default( participant_log_level, global_log_level, VERBOSITY_LEVELS ) - - el_min_cpu = el_min_cpu if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU - el_max_cpu = el_max_cpu if int(el_max_cpu) > 0 else EXECUTION_MAX_CPU - el_min_mem = el_min_mem if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY - el_max_mem = el_max_mem if int(el_max_mem) > 0 else EXECUTION_MAX_MEMORY + tolerations = input_parser.get_client_tolerations( + el_tolerations, participant_tolerations, global_tolerations + ) network_name = ( "devnets" if launcher.network != "kurtosis" + and launcher.network != "ephemery" and launcher.network not in constants.PUBLIC_NETWORKS else launcher.network ) + + el_min_cpu = int(el_min_cpu) if int(el_min_cpu) > 0 else EXECUTION_MIN_CPU + el_max_cpu = ( + int(el_max_cpu) + if int(el_max_cpu) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["reth_max_cpu"] + ) + el_min_mem = int(el_min_mem) if int(el_min_mem) > 0 else EXECUTION_MIN_MEMORY + el_max_mem = ( + int(el_max_mem) + if int(el_max_mem) > 0 + else constants.RAM_CPU_OVERRIDES[network_name]["reth_max_mem"] + ) + el_volume_size = ( el_volume_size if int(el_volume_size) > 0 @@ -121,6 +135,7 @@ def launch( extra_labels, persistent, el_volume_size, + tolerations, ) service = plan.add_service(service_name, config) @@ -164,6 +179,7 @@ def get_config( extra_labels, persistent, el_volume_size, + tolerations, ): init_datadir_cmd_str = "reth init --datadir={0} --chain={1}".format( EXECUTION_DATA_DIRPATH_ON_CLIENT_CONTAINER, @@ -242,6 +258,7 @@ def get_config( persistent_key="data-{0}".format(service_name), size=el_volume_size, ) + return ServiceConfig( image=image, ports=USED_PORTS, @@ -261,6 +278,7 @@ def get_config( cl_client_name, extra_labels, ), + tolerations=tolerations, ) diff --git a/src/package_io/constants.star b/src/package_io/constants.star index a0055a3cc..150e89d6d 100644 --- a/src/package_io/constants.star +++ b/src/package_io/constants.star @@ -165,6 +165,19 @@ VOLUME_SIZE = { "nimbus_volume_size": 100000, # 100GB "lodestar_volume_size": 100000, # 100GB }, + "ephemery": { + "geth_volume_size": 5000, # 5GB + "erigon_volume_size": 3000, # 3GB + "nethermind_volume_size": 3000, # 3GB + "besu_volume_size": 3000, # 3GB + "reth_volume_size": 3000, # 3GB + "ethereumjs_volume_size": 3000, # 3GB + "prysm_volume_size": 1000, # 1GB + "lighthouse_volume_size": 1000, # 1GB + "teku_volume_size": 1000, # 1GB + "nimbus_volume_size": 1000, # 1GB + "lodestar_volume_size": 1000, # 1GB + }, "kurtosis": { "geth_volume_size": 5000, # 5GB "erigon_volume_size": 3000, # 3GB @@ -179,3 +192,174 @@ VOLUME_SIZE = { "lodestar_volume_size": 1000, # 1GB }, } + +RAM_CPU_OVERRIDES = { + "mainnet": { + "geth_max_mem": 16384, # 16GB + "geth_max_cpu": 4000, # 4 cores + "erigon_max_mem": 16384, # 16GB + "erigon_max_cpu": 4000, # 4 cores + "nethermind_max_mem": 16384, # 16GB + "nethermind_max_cpu": 4000, # 4 cores + "besu_max_mem": 16384, # 16GB + "besu_max_cpu": 4000, # 4 cores + "reth_max_mem": 16384, # 16GB + "reth_max_cpu": 4000, # 4 cores + "ethereumjs_max_mem": 16384, # 16GB + "ethereumjs_max_cpu": 4000, # 4 cores + "prysm_max_mem": 16384, # 16GB + "prysm_max_cpu": 4000, # 4 cores + "lighthouse_max_mem": 16384, # 16GB + "lighthouse_max_cpu": 4000, # 4 cores + "teku_max_mem": 16384, # 16GB + "teku_max_cpu": 4000, # 4 cores + "nimbus_max_mem": 16384, # 16GB + "nimbus_max_cpu": 4000, # 4 cores + "lodestar_max_mem": 16384, # 16GB + "lodestar_max_cpu": 4000, # 4 cores + }, + "goerli": { + "geth_max_mem": 8192, # 8GB + "geth_max_cpu": 2000, # 2 cores + "erigon_max_mem": 8192, # 8GB + "erigon_max_cpu": 2000, # 2 cores + "nethermind_max_mem": 8192, # 8GB + "nethermind_max_cpu": 2000, # 2 cores + "besu_max_mem": 8192, # 8GB + "besu_max_cpu": 2000, # 2 cores + "reth_max_mem": 8192, # 8GB + "reth_max_cpu": 2000, # 2 cores + "ethereumjs_max_mem": 8192, # 8GB + "ethereumjs_max_cpu": 2000, # 2 cores + "prysm_max_mem": 8192, # 8GB + "prysm_max_cpu": 2000, # 2 cores + "lighthouse_max_mem": 8192, # 8GB + "lighthouse_max_cpu": 2000, # 2 cores + "teku_max_mem": 8192, # 8GB + "teku_max_cpu": 2000, # 2 cores + "nimbus_max_mem": 8192, # 8GB + "nimbus_max_cpu": 2000, # 2 cores + "lodestar_max_mem": 8192, # 8GB + "lodestar_max_cpu": 2000, # 2 cores + }, + "sepolia": { + "geth_max_mem": 4096, # 4GB + "geth_max_cpu": 1000, # 1 core + "erigon_max_mem": 4096, # 4GB + "erigon_max_cpu": 1000, # 1 core + "nethermind_max_mem": 4096, # 4GB + "nethermind_max_cpu": 1000, # 1 core + "besu_max_mem": 4096, # 4GB + "besu_max_cpu": 1000, # 1 core + "reth_max_mem": 4096, # 4GB + "reth_max_cpu": 1000, # 1 core + "ethereumjs_max_mem": 4096, # 4GB + "ethereumjs_max_cpu": 1000, # 1 core + "prysm_max_mem": 4096, # 4GB + "prysm_max_cpu": 1000, # 1 core + "lighthouse_max_mem": 4096, # 4GB + "lighthouse_max_cpu": 1000, # 1 core + "teku_max_mem": 4096, # 4GB + "teku_max_cpu": 1000, # 1 core + "nimbus_max_mem": 4096, # 4GB + "nimbus_max_cpu": 1000, # 1 core + "lodestar_max_mem": 4096, # 4GB + "lodestar_max_cpu": 1000, # 1 core + }, + "holesky": { + "geth_max_mem": 8192, # 8GB + "geth_max_cpu": 2000, # 2 cores + "erigon_max_mem": 8192, # 8GB + "erigon_max_cpu": 2000, # 2 cores + "nethermind_max_mem": 8192, # 8GB + "nethermind_max_cpu": 2000, # 2 cores + "besu_max_mem": 8192, # 8GB + "besu_max_cpu": 2000, # 2 cores + "reth_max_mem": 8192, # 8GB + "reth_max_cpu": 2000, # 2 cores + "ethereumjs_max_mem": 8192, # 8GB + "ethereumjs_max_cpu": 2000, # 2 cores + "prysm_max_mem": 8192, # 8GB + "prysm_max_cpu": 2000, # 2 cores + "lighthouse_max_mem": 8192, # 8GB + "lighthouse_max_cpu": 2000, # 2 cores + "teku_max_mem": 8192, # 8GB + "teku_max_cpu": 2000, # 2 cores + "nimbus_max_mem": 8192, # 8GB + "nimbus_max_cpu": 2000, # 2 cores + "lodestar_max_mem": 8192, # 8GB + "lodestar_max_cpu": 2000, # 2 cores + }, + "devnets": { + "geth_max_mem": 4096, # 4GB + "geth_max_cpu": 1000, # 1 core + "erigon_max_mem": 4096, # 4GB + "erigon_max_cpu": 1000, # 1 core + "nethermind_max_mem": 4096, # 4GB + "nethermind_max_cpu": 1000, # 1 core + "besu_max_mem": 4096, # 4GB + "besu_max_cpu": 1000, # 1 core + "reth_max_mem": 4096, # 4GB + "reth_max_cpu": 1000, # 1 core + "ethereumjs_max_mem": 4096, # 4GB + "ethereumjs_max_cpu": 1000, # 1 core + "prysm_max_mem": 4096, # 4GB + "prysm_max_cpu": 1000, # 1 core + "lighthouse_max_mem": 4096, # 4GB + "lighthouse_max_cpu": 1000, # 1 core + "teku_max_mem": 4096, # 4GB + "teku_max_cpu": 1000, # 1 core + "nimbus_max_mem": 4096, # 4GB + "nimbus_max_cpu": 1000, # 1 core + "lodestar_max_mem": 4096, # 4GB + "lodestar_max_cpu": 1000, # 1 core + }, + "ephemery": { + "geth_max_mem": 1024, # 1GB + "geth_max_cpu": 1000, # 1 core + "erigon_max_mem": 1024, # 1GB + "erigon_max_cpu": 1000, # 1 core + "nethermind_max_mem": 1024, # 1GB + "nethermind_max_cpu": 1000, # 1 core + "besu_max_mem": 1024, # 1GB + "besu_max_cpu": 1000, # 1 core + "reth_max_mem": 1024, # 1GB + "reth_max_cpu": 1000, # 1 core + "ethereumjs_max_mem": 1024, # 1GB + "ethereumjs_max_cpu": 1000, # 1 core + "prysm_max_mem": 1024, # 1GB + "prysm_max_cpu": 1000, # 1 core + "lighthouse_max_mem": 1024, # 1GB + "lighthouse_max_cpu": 1000, # 1 core + "teku_max_mem": 1024, # 1GB + "teku_max_cpu": 1000, # 1 core + "nimbus_max_mem": 1024, # 1GB + "nimbus_max_cpu": 1000, # 1 core + "lodestar_max_mem": 1024, # 1GB + "lodestar_max_cpu": 1000, # 1 core + }, + "kurtosis": { + "geth_max_mem": 1024, # 1GB + "geth_max_cpu": 1000, # 1 core + "erigon_max_mem": 1024, # 1GB + "erigon_max_cpu": 1000, # 1 core + "nethermind_max_mem": 1024, # 1GB + "nethermind_max_cpu": 1000, # 1 core + "besu_max_mem": 1024, # 1GB + "besu_max_cpu": 1000, # 1 core + "reth_max_mem": 1024, # 1GB + "reth_max_cpu": 1000, # 1 core + "ethereumjs_max_mem": 1024, # 1GB + "ethereumjs_max_cpu": 1000, # 1 core + "prysm_max_mem": 1024, # 1GB + "prysm_max_cpu": 1000, # 1 core + "lighthouse_max_mem": 1024, # 1GB + "lighthouse_max_cpu": 1000, # 1 core + "teku_max_mem": 1024, # 1GB + "teku_max_cpu": 1000, # 1 core + "nimbus_max_mem": 1024, # 1GB + "nimbus_max_cpu": 1000, # 1 core + "lodestar_max_mem": 1024, # 1GB + "lodestar_max_cpu": 1000, # 1 core + }, +} diff --git a/src/package_io/input_parser.star b/src/package_io/input_parser.star index b55d9fbd5..cc0cb91e0 100644 --- a/src/package_io/input_parser.star +++ b/src/package_io/input_parser.star @@ -78,6 +78,7 @@ def input_parser(plan, input_args): result["assertoor_params"] = get_default_assertoor_params() result["xatu_sentry_params"] = get_default_xatu_sentry_params() result["persistent"] = False + result["global_tolerations"] = [] for attr in input_args: value = input_args[attr] @@ -143,11 +144,15 @@ def input_parser(plan, input_args): el_extra_params=participant["el_extra_params"], el_extra_env_vars=participant["el_extra_env_vars"], el_extra_labels=participant["el_extra_labels"], + el_tolerations=participant["el_tolerations"], cl_client_type=participant["cl_client_type"], cl_client_image=participant["cl_client_image"], cl_client_log_level=participant["cl_client_log_level"], cl_client_volume_size=participant["cl_client_volume_size"], cl_split_mode_enabled=participant["cl_split_mode_enabled"], + cl_tolerations=participant["cl_tolerations"], + tolerations=participant["tolerations"], + validator_tolerations=participant["validator_tolerations"], beacon_extra_params=participant["beacon_extra_params"], beacon_extra_labels=participant["beacon_extra_labels"], validator_extra_params=participant["validator_extra_params"], @@ -268,6 +273,7 @@ def input_parser(plan, input_args): beacon_subscriptions=result["xatu_sentry_params"]["beacon_subscriptions"], xatu_server_tls=result["xatu_sentry_params"]["xatu_server_tls"], ), + global_tolerations=result["global_tolerations"], ) @@ -454,6 +460,44 @@ def get_client_log_level_or_default( return log_level +def get_client_tolerations( + specific_container_toleration, participant_tolerations, global_tolerations +): + toleration_list = [] + tolerations = [] + tolerations = specific_container_toleration if specific_container_toleration else [] + if not tolerations: + tolerations = participant_tolerations if participant_tolerations else [] + if not tolerations: + tolerations = global_tolerations if global_tolerations else [] + + if tolerations != []: + for toleration_data in tolerations: + if toleration_data.get("toleration_seconds"): + toleration_list.append( + Toleration( + key=toleration_data.get("key", ""), + value=toleration_data.get("value", ""), + operator=toleration_data.get("operator", ""), + effect=toleration_data.get("effect", ""), + toleration_seconds=toleration_data.get("toleration_seconds"), + ) + ) + # Gyani has to fix this in the future + # https://github.com/kurtosis-tech/kurtosis/issues/2093 + else: + toleration_list.append( + Toleration( + key=toleration_data.get("key", ""), + value=toleration_data.get("value", ""), + operator=toleration_data.get("operator", ""), + effect=toleration_data.get("effect", ""), + ) + ) + + return toleration_list + + def default_input_args(): network_params = default_network_params() participants = [default_participant()] @@ -467,6 +511,7 @@ def default_input_args(): "xatu_sentry_enabled": False, "parallel_keystore_generation": False, "disable_peer_scoring": False, + "global_tolerations": [], } @@ -499,11 +544,15 @@ def default_participant(): "el_extra_params": [], "el_extra_env_vars": {}, "el_extra_labels": {}, + "el_tolerations": [], "cl_client_type": "lighthouse", "cl_client_image": "", "cl_client_log_level": "", "cl_client_volume_size": 0, "cl_split_mode_enabled": False, + "cl_tolerations": [], + "validator_tolerations": [], + "tolerations": [], "beacon_extra_params": [], "beacon_extra_labels": {}, "validator_extra_params": [], diff --git a/src/participant_network.star b/src/participant_network.star index c52410ab9..f722732c3 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -64,6 +64,7 @@ def launch_participant_network( jwt_file, persistent, xatu_sentry_params, + global_tolerations, parallel_keystore_generation=False, ): num_participants = len(participants) @@ -327,6 +328,9 @@ def launch_participant_network( participant.el_extra_labels, persistent, participant.el_client_volume_size, + participant.el_tolerations, + participant.tolerations, + global_tolerations, ) # Add participant el additional prometheus metrics @@ -474,6 +478,10 @@ def launch_participant_network( participant.validator_extra_labels, persistent, participant.cl_client_volume_size, + participant.cl_tolerations, + participant.validator_tolerations, + participant.tolerations, + global_tolerations, participant.cl_split_mode_enabled, ) else: @@ -506,6 +514,10 @@ def launch_participant_network( participant.validator_extra_labels, persistent, participant.cl_client_volume_size, + participant.cl_tolerations, + participant.validator_tolerations, + participant.tolerations, + global_tolerations, participant.cl_split_mode_enabled, )