From b380f18ec28e9e17d3b2207e5e974c672ef7afe5 Mon Sep 17 00:00:00 2001 From: Vidit Bhat Date: Tue, 21 May 2024 01:45:26 +0530 Subject: [PATCH] roachtest: surface cloud cluster spec info in artifacts for aws This is a follow up PR to https://github.com/cockroachdb/cockroach/pull/124243. It implements the surfacing of cluster specs for aws and refactors logic in `cluster.go` to make it cloud agnostic. Epic: none Release note: None --- pkg/cmd/roachtest/cluster.go | 12 ++---- pkg/roachprod/vm/aws/aws.go | 58 ++++++++++++++++++++++++++- pkg/roachprod/vm/azure/azure.go | 4 +- pkg/roachprod/vm/flagstub/flagstub.go | 4 +- pkg/roachprod/vm/gce/gcloud.go | 17 +++++--- pkg/roachprod/vm/local/local.go | 4 +- pkg/roachprod/vm/vm.go | 4 +- 7 files changed, 82 insertions(+), 21 deletions(-) diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go index 9a354760a35a..788f9af46981 100644 --- a/pkg/cmd/roachtest/cluster.go +++ b/pkg/cmd/roachtest/cluster.go @@ -1394,7 +1394,7 @@ func (c *clusterImpl) FetchDebugZip( }) } -// FetchVMSpecs downloads the VM specs from the cluster using `roachprod get`. +// FetchVMSpecs saves the VM specs for each VM in the cluster. // The logs will be placed in the test's artifacts dir. func (c *clusterImpl) FetchVMSpecs(ctx context.Context, l *logger.Logger) error { if c.IsLocal() { @@ -1418,18 +1418,12 @@ func (c *clusterImpl) FetchVMSpecs(ctx context.Context, l *logger.Logger) error for provider, vms := range providerToVMs { p := vm.Providers[provider] - vmSpecs, err := p.GetVMSpecs(vms) + vmSpecs, err := p.GetVMSpecs(l, vms) if err != nil { l.Errorf("failed to get VM spec for provider %s: %s", provider, err) continue } - for _, vmSpec := range vmSpecs { - name, ok := vmSpec["name"].(string) - if !ok { - l.Errorf("failed to create spec files for VM\n%v", vmSpec) - continue - } - + for name, vmSpec := range vmSpecs { dest := filepath.Join(vmSpecsFolder, name+".json") specJSON, err := json.MarshalIndent(vmSpec, "", " ") if err != nil { diff --git a/pkg/roachprod/vm/aws/aws.go b/pkg/roachprod/vm/aws/aws.go index b82f58b4e7b1..d3a9e1cc7156 100644 --- a/pkg/roachprod/vm/aws/aws.go +++ b/pkg/roachprod/vm/aws/aws.go @@ -18,6 +18,7 @@ import ( "math/rand" "os" "os/exec" + "slices" "strconv" "strings" "time" @@ -278,8 +279,61 @@ func (p *Provider) GetHostErrorVMs( return nil, nil } -func (p *Provider) GetVMSpecs(vms vm.List) ([]map[string]interface{}, error) { - return nil, nil +// GetVMSpecs returns a map from VM.Name to a map of VM attributes, provided by AWS +func (p *Provider) GetVMSpecs( + l *logger.Logger, vms vm.List, +) (map[string]map[string]interface{}, error) { + if vms == nil { + return nil, errors.New("vms cannot be nil") + } + + byRegion, err := regionMap(vms) + if err != nil { + return nil, err + } + + // Extract the spec of all VMs and create a map from VM name to spec. + vmSpecs := make(map[string]map[string]interface{}) + for region, list := range byRegion { + args := []string{ + "ec2", "describe-instances", + "--region", region, + "--instance-ids", + } + args = append(args, list.ProviderIDs()...) + var describeInstancesResponse DescribeInstancesOutput + err := p.runJSONCommand(l, args, &describeInstancesResponse) + if err != nil { + return nil, errors.Wrapf(err, "error describing instances in region %s: ", region) + } + if len(describeInstancesResponse.Reservations) == 0 { + l.Errorf("failed to create spec files for instances in region %s: no Reservations found", region) + continue + } + + for _, r := range describeInstancesResponse.Reservations { + for _, instance := range r.Instances { + i := slices.IndexFunc(instance.Tags, func(tag Tag) bool { + return tag.Key == "Name" + }) + if i != -1 { + instanceRecord, err := json.MarshalIndent(instance, "", " ") + if err != nil { + l.Errorf("Failed to marshal JSON: %v for instance \n%v", err, instance) + continue + } + var vmSpec map[string]interface{} + err = json.Unmarshal(instanceRecord, &vmSpec) + if err != nil { + l.Errorf("Failed to unmarshal JSON: %v for instance record \n%v", err, instanceRecord) + continue + } + vmSpecs[instance.Tags[i].Value] = vmSpec + } + } + } + } + return vmSpecs, nil } const ( diff --git a/pkg/roachprod/vm/azure/azure.go b/pkg/roachprod/vm/azure/azure.go index 763f81d23bea..67ed92d4f0f9 100644 --- a/pkg/roachprod/vm/azure/azure.go +++ b/pkg/roachprod/vm/azure/azure.go @@ -113,7 +113,9 @@ func (p *Provider) GetHostErrorVMs( return nil, nil } -func (p *Provider) GetVMSpecs(vms vm.List) ([]map[string]interface{}, error) { +func (p *Provider) GetVMSpecs( + l *logger.Logger, vms vm.List, +) (map[string]map[string]interface{}, error) { return nil, nil } diff --git a/pkg/roachprod/vm/flagstub/flagstub.go b/pkg/roachprod/vm/flagstub/flagstub.go index 6f1b66e8a5be..e36d62a87f17 100644 --- a/pkg/roachprod/vm/flagstub/flagstub.go +++ b/pkg/roachprod/vm/flagstub/flagstub.go @@ -48,7 +48,9 @@ func (p *provider) GetHostErrorVMs( return nil, nil } -func (p *provider) GetVMSpecs(vms vm.List) ([]map[string]interface{}, error) { +func (p *provider) GetVMSpecs( + l *logger.Logger, vms vm.List, +) (map[string]map[string]interface{}, error) { return nil, nil } diff --git a/pkg/roachprod/vm/gce/gcloud.go b/pkg/roachprod/vm/gce/gcloud.go index e3f5fc0c25ec..974dbc8471c1 100644 --- a/pkg/roachprod/vm/gce/gcloud.go +++ b/pkg/roachprod/vm/gce/gcloud.go @@ -400,16 +400,18 @@ func (p *Provider) GetHostErrorVMs( return hostErrorVMs, nil } -// GetVMSpecs returns a json list of VM specs, provided by GCE -func (p *Provider) GetVMSpecs(vms vm.List) ([]map[string]interface{}, error) { +// GetVMSpecs returns a map from VM.Name to a map of VM attributes, provided by GCE +func (p *Provider) GetVMSpecs( + l *logger.Logger, vms vm.List, +) (map[string]map[string]interface{}, error) { if p.GetProject() == "" { return nil, errors.New("project name cannot be empty") } if vms == nil { return nil, errors.New("vms cannot be nil") } - // Extract the spec of all VMs. - var vmSpecs []map[string]interface{} + // Extract the spec of all VMs and create a map from VM name to spec. + vmSpecs := make(map[string]map[string]interface{}) for _, vmInstance := range vms { var vmSpec map[string]interface{} vmFullResourceName := "projects/" + p.GetProject() + "/zones/" + vmInstance.Zone + "/instances/" + vmInstance.Name @@ -418,7 +420,12 @@ func (p *Provider) GetVMSpecs(vms vm.List) ([]map[string]interface{}, error) { if err := runJSONCommand(args, &vmSpec); err != nil { return nil, errors.Wrapf(err, "error describing instance %s in zone %s", vmInstance.Name, vmInstance.Zone) } - vmSpecs = append(vmSpecs, vmSpec) + name, ok := vmSpec["name"].(string) + if !ok { + l.Errorf("failed to create spec files for VM\n%v", vmSpec) + continue + } + vmSpecs[name] = vmSpec } return vmSpecs, nil } diff --git a/pkg/roachprod/vm/local/local.go b/pkg/roachprod/vm/local/local.go index 7334c74b3631..30b21412a9fa 100644 --- a/pkg/roachprod/vm/local/local.go +++ b/pkg/roachprod/vm/local/local.go @@ -141,7 +141,9 @@ func (p *Provider) GetHostErrorVMs( return nil, nil } -func (p *Provider) GetVMSpecs(vms vm.List) ([]map[string]interface{}, error) { +func (p *Provider) GetVMSpecs( + l *logger.Logger, vms vm.List, +) (map[string]map[string]interface{}, error) { return nil, nil } diff --git a/pkg/roachprod/vm/vm.go b/pkg/roachprod/vm/vm.go index 66bb7d814e83..67c780a20ad2 100644 --- a/pkg/roachprod/vm/vm.go +++ b/pkg/roachprod/vm/vm.go @@ -510,8 +510,8 @@ type Provider interface { GetPreemptedSpotVMs(l *logger.Logger, vms List, since time.Time) ([]PreemptedVM, error) // GetHostErrorVMs returns a list of VMs that had host error since the time specified. GetHostErrorVMs(l *logger.Logger, vms List, since time.Time) ([]string, error) - // GetVMSpecs returns a json list of VM specs, according to a specific cloud provider. - GetVMSpecs(vms List) ([]map[string]interface{}, error) + // GetVMSpecs returns a map from VM.Name to a map of VM attributes, according to a specific cloud provider. + GetVMSpecs(l *logger.Logger, vms List) (map[string]map[string]interface{}, error) // CreateLoadBalancer creates a load balancer, for a specific port, that // delegates to the given cluster.