Skip to content

Commit

Permalink
Merge pull request #3003 from annuay-google/annuay/add-machine-type-a…
Browse files Browse the repository at this point in the history
…vailability-checks

Add machine type availability checks
  • Loading branch information
annuay-google authored Sep 11, 2024
2 parents b827380 + 62acb6a commit dd5647e
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,15 @@ modules. For support with the underlying modules, see the instructions in the

| Name | Version |
|------|---------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.3 |
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.4 |
| <a name="requirement_google"></a> [google](#requirement\_google) | >= 5.11 |

## Providers

| Name | Version |
|------|---------|
| <a name="provider_google"></a> [google](#provider\_google) | >= 5.11 |
| <a name="provider_terraform"></a> [terraform](#provider\_terraform) | n/a |

## Modules

Expand All @@ -147,7 +148,9 @@ No modules.

| Name | Type |
|------|------|
| [terraform_data.machine_type_zone_validation](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource |
| [google_compute_image.slurm](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source |
| [google_compute_machine_types.machine_types_by_zone](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_machine_types) | data source |
| [google_compute_reservation.reservation](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_reservation) | data source |
| [google_compute_zones.available](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_zones) | data source |

Expand Down
27 changes: 26 additions & 1 deletion community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ locals {
gpu = one(local.guest_accelerator)

labels = local.labels
machine_type = var.machine_type
machine_type = terraform_data.machine_type_zone_validation.output
metadata = local.metadata
min_cpu_platform = var.min_cpu_platform

Expand Down Expand Up @@ -171,3 +171,28 @@ data "google_compute_reservation" "reservation" {
# Add a validation that if reservation.project != var.project_id it should be a shared reservation
}
}

data "google_compute_machine_types" "machine_types_by_zone" {
for_each = local.zones
filter = format("name = \"%s\"", var.machine_type)
zone = each.value
}

locals {
machine_types_by_zone = data.google_compute_machine_types.machine_types_by_zone
zones_with_machine_type = [for k, v in local.machine_types_by_zone : k if length(v.machine_types) > 0]
}

resource "terraform_data" "machine_type_zone_validation" {
input = var.machine_type
lifecycle {
precondition {
condition = length(local.zones_with_machine_type) > 0
error_message = <<-EOT
machine type ${var.machine_type} is not available in any of the zones ${jsonencode(local.zones)}". To list zones in which it is available, run:
gcloud compute machine-types list --filter="name=${var.machine_type}"
EOT
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

terraform {
required_version = ">= 1.3"
required_version = ">= 1.4"

required_providers {
google = {
Expand Down
2 changes: 1 addition & 1 deletion tools/cloud-build/daily-tests/tests/hcls.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ deployment_name: "hcls-v6-{{ build }}"
# No non-alphanumerical characters in the slurm cluster name - they will be
# removed by Cluster Toolkit slurm wrappers, which will break the playbook
slurm_cluster_name: "hclsv6{{ build[0:4] }}"
zone: europe-west1-d
zone: europe-west1-c
workspace: /workspace
blueprint_yaml: "{{ workspace }}/examples/hcls-blueprint.yaml"
network: "{{ test_name }}-net"
Expand Down
6 changes: 3 additions & 3 deletions tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ deployment_name: "enter-{{ build }}"
# Manually adding the slurm_cluster_name for use in node names, which filters
# non-alphanumeric chars and is capped at 10 chars.
slurm_cluster_name: "enter{{ build[0:5] }}"
zone: europe-west1-d
zone: europe-west4-c
cli_deployment_vars:
network_name: "{{ network }}"
region: europe-west1
region: europe-west4
zone: "{{ zone }}"
gpu_zones: "[europe-west1-b,europe-west1-c,europe-west1-d]"
gpu_zones: "[europe-west4-a,europe-west4-b,europe-west4-c]"
workspace: /workspace
blueprint_yaml: "{{ workspace }}/examples/hpc-enterprise-slurm.yaml"
network: "{{ test_name }}-net"
Expand Down
6 changes: 3 additions & 3 deletions tools/cloud-build/daily-tests/tests/slurm-v6-rocky8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ slurm_cluster_name: "rock8{{ build[0:5] }}"

cli_deployment_vars:
network_name: "{{ network }}"
region: us-west4
zone: us-west4-c
region: us-central1
zone: us-central1-a

zone: us-west4-c
zone: us-central1-a
workspace: /workspace
blueprint_yaml: "{{ workspace }}/examples/hpc-slurm.yaml"
network: "{{ test_name }}-net"
Expand Down
4 changes: 2 additions & 2 deletions tools/cloud-build/provision/reservations.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ resource "google_compute_reservation" "c2standard60_us_west4_c" {
}

resource "google_compute_reservation" "n1standard8_with_tesla_t4_europe_west1_d" {
name = "n1standard8-with-tesla-t4-europe-west1-d"
zone = "europe-west1-d"
name = "n1standard8-with-tesla-t4-europe-west1-c"
zone = "europe-west1-c"
description = local.reservation_description

specific_reservation {
Expand Down

0 comments on commit dd5647e

Please sign in to comment.