add supported gpu types

kubernetes-sigs · Jul 15, 2022 · 0ec3e07 · 0ec3e07
1 parent 763acc0
commit 0ec3e07
Show file tree

Hide file tree

Showing 5 changed files with 36 additions and 9 deletions.
diff --git a/api/v1beta1/gcpmachine_types.go b/api/v1beta1/gcpmachine_types.go
@@ -66,16 +66,16 @@ const (
 	IPForwardingDisabled IPForwarding = "Disabled"
 )
 
-// GCPAccleratorConfig represents the GPU accelerator configuration for the GCP machine.
-type GCPAcceleratorConfig struct {
-	// AccerleratorType is the type of the GPU accelerator to be used for the GCP machine. 
+// AcceleratorConfig is the GPU accelerator configuration for the GCP machine.
+type AcceleratorConfig struct {
+	// AccerleratorType is the type of the GPU accelerator to be used for the GCP machine.
 	// +required
 	AcceleratorType string `json:"acceleratorType"`
 
 	// AcceleratorCount is the number of accelerators to be used for the GCP machine.
 	// Defaults to 1.
 	// +optional
-	AcceleratorCount int64 `json:"acceleratorCount,omitempty"`
+	AcceleratorCount int `json:"acceleratorCount,omitempty"`
 }
 
 // GCPMachineSpec defines the desired state of GCPMachine.
@@ -102,18 +102,18 @@ type GCPMachineSpec struct {
 	Image *string `json:"image,omitempty"`
 
 	// AcceleratorConfig is the accelerator configuration for the GCP machine.
-	AcceleratorConfig *GCPAcceleratorConfig `json:"acceleratorConfig,omitempty"`
+	AcceleratorConfig *AcceleratorConfig `json:"acceleratorConfig,omitempty"`
 
 	// OnHostMaintenance is the action to take when the host machine is being upgraded.
 	// It is either "TERMINATE" or "MIGRATE" depending on the machine type and preemptibility.
 	// Default=TERMINATE
 	// +optional
 	OnHostMaintenance string `json:"onHostMaintenance,omitempty"`
 
-	// AutomaticRestart is whether the instance should be automatically restarted if it is terminated by GCP. 
+	// AutomaticRestart is whether the instance should be automatically restarted if it is terminated by GCP.
 	// Default=true
 	// +optional
-	AutomaticRestart bool `json:"automaticRestart,omitempty"`
+	AutomaticRestart *bool `json:"automaticRestart,omitempty"`
 
 	// AdditionalLabels is an optional set of tags to add to an instance, in addition to the ones added by default by the
 	// GCP provider. If both the GCPCluster and the GCPMachine specify the same tag name with different values, the
@@ -128,7 +128,7 @@ type GCPMachineSpec struct {
 	// +optional
 	AdditionalMetadata []MetadataItem `json:"additionalMetadata,omitempty"`
 
-	// BuildName is the name of the build to use for the GCP instance. 
+	// BuildName is the name of the build to use for the GCP instance.
 	// +optional
 	BuildName string `json:"buildName,omitempty"`
 

diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go
@@ -115,6 +115,16 @@ func (m *MachineScope) Name() string {
 	return m.GCPMachine.Name
 }
 
+// AcceleratorType returns the AcceleratorType from the GCPMachine.
+func (m *MachineScope) AcceleratorType() string {
+	return m.GCPMachine.Spec.AcceleratorConfig.AcceleratorType
+}
+
+// AcceleratorCount returns the number of accelerators for the machine.
+func (m *MachineScope) AcceleratorCount() int {
+	return m.GCPMachine.Spec.AcceleratorConfig.AcceleratorCount
+}
+
 // Namespace returns the namespace name.
 func (m *MachineScope) Namespace() string {
 	return m.GCPMachine.Namespace

diff --git a/cloud/services/compute/instances/reconcile.go b/cloud/services/compute/instances/reconcile.go
@@ -59,6 +59,8 @@ func (s *Service) Reconcile(ctx context.Context) error {
 	machineName := s.scope.Name()
 	zone := s.scope.Zone()
 	project := s.scope.Project()
+	acceleratorType := s.scope.AcceleratorType()
+	acceleratorCount := s.scope.AcceleratorCount()
 
 	// Since we don't know when the project was created, we must account for
 	// both types of internal-dns:

diff --git a/cloud/services/compute/instances/service.go b/cloud/services/compute/instances/service.go
@@ -43,6 +43,8 @@ type Scope interface {
 	InstanceSpec() *compute.Instance
 	InstanceImageSpec() *compute.AttachedDisk
 	InstanceAdditionalDiskSpec() []*compute.AttachedDisk
+	AcceleratorType() string
+	AcceleratorCount() int
 }
 
 // Service implements instances reconciler.
@@ -61,4 +63,4 @@ func New(scope Scope) *Service {
 		instances:      scope.Cloud().Instances(),
 		instancegroups: scope.Cloud().InstanceGroups(),
 	}
-}
+}
diff --git a/controllers/gcpmachine_controller.go b/controllers/gcpmachine_controller.go
@@ -47,8 +47,21 @@ type GCPMachineReconciler struct {
 	client.Client
 	ReconcileTimeout time.Duration
 	WatchFilterValue string
+	AccleratorConfig   *infrav1.AcceleratorConfig
 }
 
+// add the supported gpu types to the machine type 
+var (
+	gpuTypes = map[string]string{
+		"nvidia-tesla-k80":  "NVIDIA_K80_GPUS",
+ 		"nvidia-tesla-p100": "NVIDIA_P100_GPUS",
+ 		"nvidia-tesla-v100": "NVIDIA_V100_GPUS",
+ 		"nvidia-tesla-a100": "NVIDIA_A100_GPUS",
+ 		"nvidia-tesla-p4":   "NVIDIA_P4_GPUS",
+ 		"nvidia-tesla-t4":   "NVIDIA_T4_GPUS",
+	}
+)
+
 // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
 // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch
 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch