From 15960ac9d5451d976c5084bc44f61f24017aa34d Mon Sep 17 00:00:00 2001 From: cwiklik Date: Tue, 28 May 2024 13:11:23 -0400 Subject: [PATCH 1/4] added kueue integration code Signed-off-by: cwiklik --- .../cluster-metrics/templates/operator.yaml | 2 +- charts/kueue-ks/.helmignore | 23 + charts/kueue-ks/Chart.yaml | 24 + charts/kueue-ks/resources.yaml | 0 charts/kueue-ks/templates/_helpers.tpl | 62 ++ charts/kueue-ks/templates/operator.yaml | 543 ++++++++++++++++++ charts/kueue-ks/templates/resources.yaml | 0 .../templates/tests/test-connection.yaml | 15 + charts/kueue-ks/values.yaml | 68 +++ clustermetrics/Makefile | 9 + .../config/manager/kustomization.yaml | 2 +- kueue-ks/.dockerignore | 4 + kueue-ks/.gitignore | 25 + kueue-ks/Dockerfile | 27 + kueue-ks/Makefile | 172 ++++++ kueue-ks/PROJECT | 22 + kueue-ks/README.md | 94 +++ kueue-ks/cmd/kueue-ks/main.go | 196 +++++++ kueue-ks/config/default/kustomization.yaml | 74 +++ .../default/manager_auth_proxy_patch.yaml | 39 ++ .../config/default/manager_config_patch.yaml | 20 + .../manager/controller_manager_config.yaml | 21 + kueue-ks/config/manager/kustomization.yaml | 10 + kueue-ks/config/manager/manager.yaml | 70 +++ kueue-ks/config/prometheus/kustomization.yaml | 2 + kueue-ks/config/prometheus/monitor.yaml | 20 + .../rbac/auth_proxy_client_clusterrole.yaml | 9 + kueue-ks/config/rbac/auth_proxy_role.yaml | 17 + .../config/rbac/auth_proxy_role_binding.yaml | 12 + kueue-ks/config/rbac/auth_proxy_service.yaml | 15 + kueue-ks/config/rbac/kustomization.yaml | 18 + .../config/rbac/leader_election_role.yaml | 37 ++ .../rbac/leader_election_role_binding.yaml | 12 + kueue-ks/config/rbac/role_binding.yaml | 12 + kueue-ks/config/rbac/service_account.yaml | 5 + .../controllers/admissioncheck_controller.go | 129 +++++ .../controllers/clustermetrics_controller.go | 159 +++++ kueue-ks/controllers/workload_controller.go | 395 +++++++++++++ kueue-ks/examples/batch-job-spot.yaml | 29 + kueue-ks/examples/batch-job.yaml | 27 + kueue-ks/examples/pytorch-simple-job.yaml | 35 ++ kueue-ks/go.mod | 79 +++ kueue-ks/go.sum | 292 ++++++++++ kueue-ks/hack/boilerplate.go.txt | 15 + scripts/common/install-ks-k3d.sh | 123 ++++ scripts/kueue/install-all.sh | 24 + scripts/kueue/install-kueue-ks.sh | 274 +++++++++ .../kueue/templates/admissioncheck-ks.yaml | 6 + .../templates/binding-policy-cluster1.yaml | 14 + .../templates/binding-policy-cluster2.yaml | 14 + scripts/kueue/templates/default-flavor.yaml | 4 + .../kueue/templates/spot-resource-flavor.yaml | 7 + .../templates/transform-pytorch-job.yaml | 9 + scripts/kueue/templates/user-queue-ks.yaml | 7 + .../templates/zero-cluster-queue-ks.yaml | 26 + 55 files changed, 3347 insertions(+), 2 deletions(-) create mode 100644 charts/kueue-ks/.helmignore create mode 100644 charts/kueue-ks/Chart.yaml create mode 100644 charts/kueue-ks/resources.yaml create mode 100644 charts/kueue-ks/templates/_helpers.tpl create mode 100644 charts/kueue-ks/templates/operator.yaml create mode 100644 charts/kueue-ks/templates/resources.yaml create mode 100644 charts/kueue-ks/templates/tests/test-connection.yaml create mode 100644 charts/kueue-ks/values.yaml create mode 100644 kueue-ks/.dockerignore create mode 100644 kueue-ks/.gitignore create mode 100644 kueue-ks/Dockerfile create mode 100644 kueue-ks/Makefile create mode 100644 kueue-ks/PROJECT create mode 100644 kueue-ks/README.md create mode 100644 kueue-ks/cmd/kueue-ks/main.go create mode 100644 kueue-ks/config/default/kustomization.yaml create mode 100644 kueue-ks/config/default/manager_auth_proxy_patch.yaml create mode 100644 kueue-ks/config/default/manager_config_patch.yaml create mode 100644 kueue-ks/config/manager/controller_manager_config.yaml create mode 100644 kueue-ks/config/manager/kustomization.yaml create mode 100644 kueue-ks/config/manager/manager.yaml create mode 100644 kueue-ks/config/prometheus/kustomization.yaml create mode 100644 kueue-ks/config/prometheus/monitor.yaml create mode 100644 kueue-ks/config/rbac/auth_proxy_client_clusterrole.yaml create mode 100644 kueue-ks/config/rbac/auth_proxy_role.yaml create mode 100644 kueue-ks/config/rbac/auth_proxy_role_binding.yaml create mode 100644 kueue-ks/config/rbac/auth_proxy_service.yaml create mode 100644 kueue-ks/config/rbac/kustomization.yaml create mode 100644 kueue-ks/config/rbac/leader_election_role.yaml create mode 100644 kueue-ks/config/rbac/leader_election_role_binding.yaml create mode 100644 kueue-ks/config/rbac/role_binding.yaml create mode 100644 kueue-ks/config/rbac/service_account.yaml create mode 100644 kueue-ks/controllers/admissioncheck_controller.go create mode 100644 kueue-ks/controllers/clustermetrics_controller.go create mode 100644 kueue-ks/controllers/workload_controller.go create mode 100644 kueue-ks/examples/batch-job-spot.yaml create mode 100644 kueue-ks/examples/batch-job.yaml create mode 100644 kueue-ks/examples/pytorch-simple-job.yaml create mode 100644 kueue-ks/go.mod create mode 100644 kueue-ks/go.sum create mode 100644 kueue-ks/hack/boilerplate.go.txt create mode 100755 scripts/common/install-ks-k3d.sh create mode 100644 scripts/kueue/install-all.sh create mode 100755 scripts/kueue/install-kueue-ks.sh create mode 100644 scripts/kueue/templates/admissioncheck-ks.yaml create mode 100644 scripts/kueue/templates/binding-policy-cluster1.yaml create mode 100644 scripts/kueue/templates/binding-policy-cluster2.yaml create mode 100644 scripts/kueue/templates/default-flavor.yaml create mode 100644 scripts/kueue/templates/spot-resource-flavor.yaml create mode 100644 scripts/kueue/templates/transform-pytorch-job.yaml create mode 100644 scripts/kueue/templates/user-queue-ks.yaml create mode 100644 scripts/kueue/templates/zero-cluster-queue-ks.yaml diff --git a/charts/cluster-metrics/templates/operator.yaml b/charts/cluster-metrics/templates/operator.yaml index eeed366..407e5bf 100644 --- a/charts/cluster-metrics/templates/operator.yaml +++ b/charts/cluster-metrics/templates/operator.yaml @@ -297,7 +297,7 @@ spec: - --metrics-bind-address=127.0.0.1:8080 - --leader-elect - --metrics-name={{.Values.clusterName}} - image: ko.local/cluster-metrics:439831d + image: ko.local/cluster-metrics:102f815 livenessProbe: httpGet: path: /healthz diff --git a/charts/kueue-ks/.helmignore b/charts/kueue-ks/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/kueue-ks/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/kueue-ks/Chart.yaml b/charts/kueue-ks/Chart.yaml new file mode 100644 index 0000000..2dd0bbd --- /dev/null +++ b/charts/kueue-ks/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: kueue-ks +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/charts/kueue-ks/resources.yaml b/charts/kueue-ks/resources.yaml new file mode 100644 index 0000000..e69de29 diff --git a/charts/kueue-ks/templates/_helpers.tpl b/charts/kueue-ks/templates/_helpers.tpl new file mode 100644 index 0000000..fc2e70f --- /dev/null +++ b/charts/kueue-ks/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "kueue-ks.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "kueue-ks.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "kueue-ks.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "kueue-ks.labels" -}} +helm.sh/chart: {{ include "kueue-ks.chart" . }} +{{ include "kueue-ks.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "kueue-ks.selectorLabels" -}} +app.kubernetes.io/name: {{ include "kueue-ks.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "kueue-ks.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "kueue-ks.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/charts/kueue-ks/templates/operator.yaml b/charts/kueue-ks/templates/operator.yaml new file mode 100644 index 0000000..023f4b2 --- /dev/null +++ b/charts/kueue-ks/templates/operator.yaml @@ -0,0 +1,543 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-controller-manager + namespace: kueue-ks-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-leader-election-role + namespace: kueue-ks-system +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-kueue-ks-editor-role +rules: +- apiGroups: + - galaxy.kubestellar.io + resources: + - clustermetrics + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - galaxy.kubestellar.io + resources: + - clustermetrics/status + verbs: + - get +- apiGroups: + - control.kubestellar.io + resources: + - bindingpolicies + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks + - workloads + - clusterqueues + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks/status + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads/status + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs + verbs: + - delete + - create + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + verbs: + - delete + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-kueue-ks-viewer-role +rules: +- apiGroups: + - galaxy.kubestellar.io + resources: + - clustermetrics + verbs: + - get + - list + - watch +- apiGroups: + - galaxy.kubestellar.io + resources: + - clustermetrics/status + verbs: + - get +- apiGroups: + - control.kubestellar.io + resources: + - bindingpolicies + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks + - workloads + - clusterqueues + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks/status + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads/status + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs + verbs: + - delete + - create + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + verbs: + - delete + - get + - list + - patch + - update + - watch +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kueue-ks-manager-role +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - galaxy.kubestellar.io + resources: + - clustermetrics + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - galaxy.kubestellar.io + resources: + - clustermetrics/finalizers + verbs: + - update +- apiGroups: + - galaxy.kubestellar.io + resources: + - clustermetrics/status + verbs: + - get + - patch + - update +- apiGroups: + - control.kubestellar.io + resources: + - bindingpolicies + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks + - workloads + - clusterqueues + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks/status + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads/status + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs + verbs: + - delete + - create + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + verbs: + - delete + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-proxy-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-leader-election-rolebinding + namespace: kueue-ks-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kueue-ks-leader-election-role +subjects: +- kind: ServiceAccount + name: kueue-ks-controller-manager + namespace: kueue-ks-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kueue-ks-manager-role +subjects: +- kind: ServiceAccount + name: kueue-ks-controller-manager + namespace: kueue-ks-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + name: kueue-ks-proxy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kueue-ks-proxy-role +subjects: +- kind: ServiceAccount + name: kueue-ks-controller-manager + namespace: kueue-ks-system +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + control-plane: controller-manager + name: kueue-ks-controller-manager-metrics-service + namespace: kueue-ks-system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: https + selector: + control-plane: controller-manager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: kueue-ks + control-plane: controller-manager + name: kueue-ks-controller-manager + namespace: kueue-ks-system +spec: + replicas: 1 + selector: + matchLabels: + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + spec: + containers: + - args: + - --secure-listen-address=0.0.0.0:8443 + - --upstream=http://127.0.0.1:8080/ + - --logtostderr=true + - --v=0 + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.16.0 + name: kube-rbac-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + - args: + - --health-probe-bind-address=:8081 + - --metrics-bind-address=127.0.0.1:8080 + - --leader-elect + image: ko.local/kueue-ks:102f815 + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + securityContext: + runAsNonRoot: true + serviceAccountName: kueue-ks-controller-manager + terminationGracePeriodSeconds: 10 diff --git a/charts/kueue-ks/templates/resources.yaml b/charts/kueue-ks/templates/resources.yaml new file mode 100644 index 0000000..e69de29 diff --git a/charts/kueue-ks/templates/tests/test-connection.yaml b/charts/kueue-ks/templates/tests/test-connection.yaml new file mode 100644 index 0000000..ad0504c --- /dev/null +++ b/charts/kueue-ks/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "kueue-ks.fullname" . }}-test-connection" + labels: + {{- include "kueue-ks.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "kueue-ks.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/charts/kueue-ks/values.yaml b/charts/kueue-ks/values.yaml new file mode 100644 index 0000000..1dc457e --- /dev/null +++ b/charts/kueue-ks/values.yaml @@ -0,0 +1,68 @@ +# Default values for kueue-ks. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: nginx + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + port: 80 + + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: / + port: http +readinessProbe: + httpGet: + path: / + port: http + + diff --git a/clustermetrics/Makefile b/clustermetrics/Makefile index 1eb71c3..e5bb796 100644 --- a/clustermetrics/Makefile +++ b/clustermetrics/Makefile @@ -184,6 +184,15 @@ kind-load-image: install-local-chart: chart kind-load-image if [ ! -n "$(CONTEXT)" ] ; then helm upgrade --install --create-namespace -n clustermetrics-system cluster-metrics ../charts/cluster-metrics ${HELM_OPTS} ; else helm --kube-context ${CONTEXT} upgrade --install --create-namespace -n clustermetrics-system cluster-metrics ../charts/cluster-metrics ${HELM_OPTS} ; fi +# this is used for local testing on k3d +.PHONY: k3d-load-image +k3d-load-image: + k3d image import ${IMG} -c ${CLUSTER} + +.PHONY: k3d-install-local-chart +k3d-install-local-chart: chart k3d-load-image + if [ ! -n "$(CONTEXT)" ] ; then helm upgrade --install --create-namespace -n clustermetrics-system cluster-metrics ../charts/cluster-metrics ${HELM_OPTS} ; else helm --kube-context ${CONTEXT} upgrade --install --create-namespace -n clustermetrics-system cluster-metrics ../charts/cluster-metrics ${HELM_OPTS} ; fi + ##@ Dependencies ## Location to install dependencies to diff --git a/clustermetrics/config/manager/kustomization.yaml b/clustermetrics/config/manager/kustomization.yaml index 8ce8dd4..9f8cb89 100644 --- a/clustermetrics/config/manager/kustomization.yaml +++ b/clustermetrics/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: ko.local/cluster-metrics - newTag: 439831d + newTag: 102f815 diff --git a/kueue-ks/.dockerignore b/kueue-ks/.dockerignore new file mode 100644 index 0000000..0f04682 --- /dev/null +++ b/kueue-ks/.dockerignore @@ -0,0 +1,4 @@ +# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file +# Ignore build and test binaries. +bin/ +testbin/ diff --git a/kueue-ks/.gitignore b/kueue-ks/.gitignore new file mode 100644 index 0000000..c0a7a54 --- /dev/null +++ b/kueue-ks/.gitignore @@ -0,0 +1,25 @@ + +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib +bin +testbin/* + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Kubernetes Generated files - skip generated files, except for vendored files + +!vendor/**/zz_generated.* + +# editor and IDE paraphernalia +.idea +*.swp +*.swo +*~ diff --git a/kueue-ks/Dockerfile b/kueue-ks/Dockerfile new file mode 100644 index 0000000..126ff49 --- /dev/null +++ b/kueue-ks/Dockerfile @@ -0,0 +1,27 @@ +# Build the manager binary +FROM golang:1.18 as builder + +WORKDIR /workspace +# Copy the Go Modules manifests +COPY go.mod go.mod +COPY go.sum go.sum +# cache deps before building and copying source so that we don't need to re-download as much +# and so that source changes don't invalidate our downloaded layer +RUN go mod download + +# Copy the go source +COPY cmd/kueue-ks/main.go main.go +COPY api/ api/ +COPY controllers/ controllers/ + +# Build +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o manager main.go + +# Use distroless as minimal base image to package the manager binary +# Refer to https://github.com/GoogleContainerTools/distroless for more details +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/manager . +USER 65532:65532 + +ENTRYPOINT ["/manager"] diff --git a/kueue-ks/Makefile b/kueue-ks/Makefile new file mode 100644 index 0000000..28bd9ff --- /dev/null +++ b/kueue-ks/Makefile @@ -0,0 +1,172 @@ + +# Image URL to use all building/pushing image targets +KO_DOCKER_REPO ?= ko.local +IMAGE_TAG ?= $(shell git rev-parse --short HEAD) +#IMAGE_TAG ?= 0.0.1 +CMD_NAME ?= kueue-ks +IMG ?= ${KO_DOCKER_REPO}/${CMD_NAME}:${IMAGE_TAG} +# Image URL to use all building/pushing image targets +#IMG ?= controller:latest +# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. +ENVTEST_K8S_VERSION = 1.24.1 + +ARCH := $(shell go env GOARCH) +OS := $(shell go env GOOS) + +# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) +ifeq (,$(shell go env GOBIN)) +GOBIN=$(shell go env GOPATH)/bin +else +GOBIN=$(shell go env GOBIN) +endif + +# CONTAINER_TOOL defines the container tool to be used for building images. +# Be aware that the target commands are only tested with Docker which is +# scaffolded by default. However, you might want to replace it to use other +# tools. (i.e. podman) +CONTAINER_TOOL ?= docker + +# Setting SHELL to bash allows bash commands to be executed by recipes. +# This is a requirement for 'setup-envtest.sh' in the test target. +# Options are set to exit when a recipe line exits non-zero or a piped command fails. +SHELL = /usr/bin/env bash -o pipefail +.SHELLFLAGS = -ec + +.PHONY: all +all: build + +##@ General + +# The help target prints out all targets with their descriptions organized +# beneath their categories. The categories are represented by '##@' and the +# target descriptions by '##'. The awk commands is responsible for reading the +# entire set of makefiles included in this invocation, looking for lines of the +# file as xyz: ## something, and then pretty-format the target and help. Then, +# if there's a line with ##@ something, that gets pretty-printed as a category. +# More info on the usage of ANSI control characters for terminal formatting: +# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters +# More info on the awk command: +# http://linuxcommand.org/lc3_adv_awk.php + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Development + +.PHONY: manifests +manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. + $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. + $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." + +.PHONY: fmt +fmt: ## Run go fmt against code. + go fmt ./... + +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: test +test: manifests generate fmt vet envtest ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test ./... -coverprofile cover.out + +##@ Build + +.PHONY: build +build: generate fmt vet ## Build manager binary. + go build -o bin/manager main.go + +.PHONY: run +run: manifests generate fmt vet ## Run a controller from your host. + go run ./main.go + +.PHONY: docker-build +docker-build: test ## Build docker image with the manager. + docker build -t ${IMG} . + +.PHONY: docker-push +docker-push: ## Push docker image with the manager. + docker push ${IMG} + + +# this is used for local testing +#.PHONY: k3d-load-image +#k3d-load-image: +# k3d image import ${IMG} -c ${CLUSTER} + +#.PHONY: k3d-install-local-chart +#k3d-install-local-chart: chart k3d-load-image +# if [ ! -n "$(CONTEXT)" ] ; then helm upgrade --install --create-namespace -n clustermetrics-system cluster-m +#etrics ../charts/cluster-metrics ${HELM_OPTS} ; else helm --kube-context ${CONTEXT} upgrade --install --create-names +#pace -n clustermetrics-system cluster-metrics ../charts/cluster-metrics ${HELM_OPTS} ; fi + +##@ Deployment + +ifndef ignore-not-found + ignore-not-found = false +endif + +.PHONY: install +install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. + $(KUSTOMIZE) build config/crd | kubectl apply -f - + +.PHONY: uninstall +uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/crd | kubectl delete --ignore-not-found=$(ignore-not-found) -f - + +.PHONY: deploy +deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default | kubectl apply -f - + +.PHONY: undeploy +undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/default | kubectl delete --ignore-not-found=$(ignore-not-found) -f - + +.PHONY: ko-local-build +ko-local-build: + KO_DOCKER_REPO=${KO_DOCKER_REPO} ko build -B ./cmd/${CMD_NAME} -t ${IMAGE_TAG} --platform linux/${ARCH} + +.PHONY: k3d-load-image +k3d-load-image: + k3d image import ${IMG} -c ${CLUSTER} + +.PHONY: k3d-install-local-chart +k3d-install-local-chart: k3d-load-image + if [ ! -n "$(CONTEXT)" ] ; then helm upgrade --install --create-namespace -n kueue-ks-system kueue-ks ../charts/kueue-ks ${HELM_OPTS} ; else helm --kube-context ${CONTEXT} upgrade --install --create-namespace -n kueue-ks-system kueue-ks ../charts/kueue-ks ${HELM_OPTS} ; fi + +##@ Build Dependencies + +## Location to install dependencies to +LOCALBIN ?= $(shell pwd)/bin +$(LOCALBIN): + mkdir -p $(LOCALBIN) + +## Tool Binaries +KUSTOMIZE ?= $(LOCALBIN)/kustomize +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen +ENVTEST ?= $(LOCALBIN)/setup-envtest + +## Tool Versions +KUSTOMIZE_VERSION ?= v3.8.7 +CONTROLLER_TOOLS_VERSION ?= v0.9.0 + +KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" +.PHONY: kustomize +kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. +$(KUSTOMIZE): $(LOCALBIN) + curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN) + +.PHONY: controller-gen +controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. +$(CONTROLLER_GEN): $(LOCALBIN) + GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION) + +.PHONY: envtest +envtest: $(ENVTEST) ## Download envtest-setup locally if necessary. +$(ENVTEST): $(LOCALBIN) + GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest diff --git a/kueue-ks/PROJECT b/kueue-ks/PROJECT new file mode 100644 index 0000000..e4abdf8 --- /dev/null +++ b/kueue-ks/PROJECT @@ -0,0 +1,22 @@ +domain: galaxy.kubestellar.io +layout: +- go.kubebuilder.io/v3 +projectName: kueue-ks +repo: kubestellar/galaxy/kueue-ks +resources: +- controller: true + domain: galaxy.kubestellar.io + group: kueue.x-k8s.io + kind: Workload + version: v1beta1 +- controller: true + domain: galaxy.kubestellar.io + group: galaxy.kubestellar.io + kind: ClusterMetrics + version: v1alpha1 +- controller: true + domain: galaxy.kubestellar.io + group: kueue.x-k8s.io + kind: AdmissionCheck + version: v1beta1 +version: "3" diff --git a/kueue-ks/README.md b/kueue-ks/README.md new file mode 100644 index 0000000..e6eddcd --- /dev/null +++ b/kueue-ks/README.md @@ -0,0 +1,94 @@ +# kueue-ks +// TODO(user): Add simple overview of use/purpose + +## Description +// TODO(user): An in-depth paragraph about your project and overview of use + +## Getting Started +You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) to get a local cluster for testing, or run against a remote cluster. +**Note:** Your controller will automatically use the current context in your kubeconfig file (i.e. whatever cluster `kubectl cluster-info` shows). + +### Running on the cluster +1. Install Instances of Custom Resources: + +```sh +kubectl apply -f config/samples/ +``` + +2. Build and push your image to the location specified by `IMG`: + +```sh +make docker-build docker-push IMG=/kueue-ks:tag +``` + +3. Deploy the controller to the cluster with the image specified by `IMG`: + +```sh +make deploy IMG=/kueue-ks:tag +``` + +### Uninstall CRDs +To delete the CRDs from the cluster: + +```sh +make uninstall +``` + +### Undeploy controller +UnDeploy the controller to the cluster: + +```sh +make undeploy +``` + +## Contributing +// TODO(user): Add detailed information on how you would like others to contribute to this project + +### How it works +This project aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) + +It uses [Controllers](https://kubernetes.io/docs/concepts/architecture/controller/) +which provides a reconcile function responsible for synchronizing resources untile the desired state is reached on the cluster + +### Test It Out +1. Install the CRDs into the cluster: + +```sh +make install +``` + +2. Run your controller (this will run in the foreground, so switch to a new terminal if you want to leave it running): + +```sh +make run +``` + +**NOTE:** You can also run this in one step by running: `make install run` + +### Modifying the API definitions +If you are editing the API definitions, generate the manifests such as CRs or CRDs using: + +```sh +make manifests +``` + +**NOTE:** Run `make --help` for more information on all potential `make` targets + +More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html) + +## License + +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/kueue-ks/cmd/kueue-ks/main.go b/kueue-ks/cmd/kueue-ks/main.go new file mode 100644 index 0000000..d1bc35d --- /dev/null +++ b/kueue-ks/cmd/kueue-ks/main.go @@ -0,0 +1,196 @@ +/* +Copyright 2024 The KubeStellar Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "crypto/tls" + "flag" + "os" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + _ "k8s.io/client-go/plugin/pkg/client/auth" + "k8s.io/client-go/restmapper" + + "kubestellar/galaxy/kueue-ks/controllers" + + metricsv1alpha1 "kubestellar/galaxy/clustermetrics/api/v1alpha1" + scheduler "kubestellar/galaxy/mc-scheduling/pkg/scheduler" + ksv1alpha1 "github.com/kubestellar/kubestellar/api/control/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" + kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" + kueueClient "sigs.k8s.io/kueue/client-go/clientset/versioned" + + //+kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(kueue.AddToScheme(scheme)) + utilruntime.Must(ksv1alpha1.AddToScheme(scheme)) + utilruntime.Must(metricsv1alpha1.AddToScheme(scheme)) + //+kubebuilder:scaffold:scheme +} + +func main() { + var secureMetrics bool + var metricsAddr string + var enableLeaderElection bool + var probeAddr string + var enableHTTP2 bool + var clusterQueue string + + flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.BoolVar(&secureMetrics, "metrics-secure", false, + "If set the metrics endpoint is served securely") + flag.BoolVar(&enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers") + flag.StringVar(&clusterQueue, "clusterQueue-name", "cluster-queue-ks", "cluster queue name") + + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + // if the enable-http2 flag is false (the default), http/2 should be disabled + // due to its vulnerabilities. More specifically, disabling http/2 will + // prevent from being vulnerable to the HTTP/2 Stream Cancellation and + // Rapid Reset CVEs. For more information see: + // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 + // - https://github.com/advisories/GHSA-4374-p667-p6c8 + disableHTTP2 := func(c *tls.Config) { + setupLog.Info("disabling http/2") + c.NextProtos = []string{"http/1.1"} + } + + tlsOpts := []func(*tls.Config){} + if !enableHTTP2 { + tlsOpts = append(tlsOpts, disableHTTP2) + } + webhookServer := webhook.NewServer(webhook.Options{ + TLSOpts: tlsOpts, + }) + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + Metrics: metricsserver.Options{ + BindAddress: metricsAddr, + SecureServing: secureMetrics, + TLSOpts: tlsOpts, + }, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: "423ebda8.kubestellar.io", + // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily + // when the Manager ends. This requires the binary to immediately end when the + // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly + // speeds up voluntary leader transitions as the new leader don't have to wait + // LeaseDuration time first. + // + // In the default scaffold provided, the program ends immediately after + // the manager stops, so would be fine to enable this option. However, + // if you are doing or is intended to do any operation such as perform cleanups + // after the manager stops then its usage might be unsafe. + // LeaderElectionReleaseOnCancel: true, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + kClient, err := kueueClient.NewForConfig(ctrl.GetConfigOrDie()) + if err != nil { + setupLog.Error(err, "unable to locate Config") + os.Exit(1) + } + + // in reality, you would only construct these once + clientset, err := kubernetes.NewForConfig(ctrl.GetConfigOrDie()) + if err != nil { + setupLog.Error(err, "unable to locate Config") + os.Exit(1) + } + + groupResources, err := restmapper.GetAPIGroupResources(clientset.Discovery()) + if err != nil { + setupLog.Error(err, "unable to locate Config") + os.Exit(1) + } + rm := restmapper.NewDiscoveryRESTMapper(groupResources) + if err = (&controllers.WorkloadReconciler{ + Client: mgr.GetClient(), + KueueClient: kClient, + DynamicClient: dynamic.NewForConfigOrDie(ctrl.GetConfigOrDie()), + RestMapper: rm, + Scheduler: scheduler.NewDefaultScheduler(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Workload") + os.Exit(1) + } + if err = (&controllers.ClusterMetricsReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + WorkerClusters: make(map[string]metricsv1alpha1.ClusterMetrics), + ClusterQueue: clusterQueue, + + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ClusterMetrics") + os.Exit(1) + } + if err = (&controllers.AdmissionCheckReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "AdmissionCheck") + os.Exit(1) + } + //+kubebuilder:scaffold:builder + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/kueue-ks/config/default/kustomization.yaml b/kueue-ks/config/default/kustomization.yaml new file mode 100644 index 0000000..2ae8e04 --- /dev/null +++ b/kueue-ks/config/default/kustomization.yaml @@ -0,0 +1,74 @@ +# Adds namespace to all resources. +namespace: kueue-ks-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: kueue-ks- + +# Labels to add to all resources and selectors. +#commonLabels: +# someName: someValue + +bases: +- ../crd +- ../rbac +- ../manager +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- ../webhook +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. +#- ../certmanager +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +#- ../prometheus + +patchesStrategicMerge: +# Protect the /metrics endpoint by putting it behind auth. +# If you want your controller-manager to expose the /metrics +# endpoint w/o any authn/z, please comment the following line. +- manager_auth_proxy_patch.yaml + +# Mount the controller config file for loading manager configurations +# through a ComponentConfig type +#- manager_config_patch.yaml + +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- manager_webhook_patch.yaml + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. +# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. +# 'CERTMANAGER' needs to be enabled to use ca injection +#- webhookcainjection_patch.yaml + +# the following config is for teaching kustomize how to do var substitution +vars: +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. +#- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR +# objref: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # this name should match the one in certificate.yaml +# fieldref: +# fieldpath: metadata.namespace +#- name: CERTIFICATE_NAME +# objref: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # this name should match the one in certificate.yaml +#- name: SERVICE_NAMESPACE # namespace of the service +# objref: +# kind: Service +# version: v1 +# name: webhook-service +# fieldref: +# fieldpath: metadata.namespace +#- name: SERVICE_NAME +# objref: +# kind: Service +# version: v1 +# name: webhook-service diff --git a/kueue-ks/config/default/manager_auth_proxy_patch.yaml b/kueue-ks/config/default/manager_auth_proxy_patch.yaml new file mode 100644 index 0000000..28a6ef7 --- /dev/null +++ b/kueue-ks/config/default/manager_auth_proxy_patch.yaml @@ -0,0 +1,39 @@ +# This patch inject a sidecar container which is a HTTP proxy for the +# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: kube-rbac-proxy + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.12.0 + args: + - "--secure-listen-address=0.0.0.0:8443" + - "--upstream=http://127.0.0.1:8080/" + - "--logtostderr=true" + - "--v=0" + ports: + - containerPort: 8443 + protocol: TCP + name: https + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 64Mi + - name: manager + args: + - "--health-probe-bind-address=:8081" + - "--metrics-bind-address=127.0.0.1:8080" + - "--leader-elect" diff --git a/kueue-ks/config/default/manager_config_patch.yaml b/kueue-ks/config/default/manager_config_patch.yaml new file mode 100644 index 0000000..6c40015 --- /dev/null +++ b/kueue-ks/config/default/manager_config_patch.yaml @@ -0,0 +1,20 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: manager + args: + - "--config=controller_manager_config.yaml" + volumeMounts: + - name: manager-config + mountPath: /controller_manager_config.yaml + subPath: controller_manager_config.yaml + volumes: + - name: manager-config + configMap: + name: manager-config diff --git a/kueue-ks/config/manager/controller_manager_config.yaml b/kueue-ks/config/manager/controller_manager_config.yaml new file mode 100644 index 0000000..30ae78a --- /dev/null +++ b/kueue-ks/config/manager/controller_manager_config.yaml @@ -0,0 +1,21 @@ +apiVersion: controller-runtime.sigs.k8s.io/v1alpha1 +kind: ControllerManagerConfig +health: + healthProbeBindAddress: :8081 +metrics: + bindAddress: 127.0.0.1:8080 +webhook: + port: 9443 +leaderElection: + leaderElect: true + resourceName: 132661b5.galaxy.kubestellar.io +# leaderElectionReleaseOnCancel defines if the leader should step down volume +# when the Manager ends. This requires the binary to immediately end when the +# Manager is stopped, otherwise, this setting is unsafe. Setting this significantly +# speeds up voluntary leader transitions as the new leader don't have to wait +# LeaseDuration time first. +# In the default scaffold provided, the program ends immediately after +# the manager stops, so would be fine to enable this option. However, +# if you are doing or is intended to do any operation such as perform cleanups +# after the manager stops then its usage might be unsafe. +# leaderElectionReleaseOnCancel: true diff --git a/kueue-ks/config/manager/kustomization.yaml b/kueue-ks/config/manager/kustomization.yaml new file mode 100644 index 0000000..2bcd3ee --- /dev/null +++ b/kueue-ks/config/manager/kustomization.yaml @@ -0,0 +1,10 @@ +resources: +- manager.yaml + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: +- name: manager-config + files: + - controller_manager_config.yaml diff --git a/kueue-ks/config/manager/manager.yaml b/kueue-ks/config/manager/manager.yaml new file mode 100644 index 0000000..878ad48 --- /dev/null +++ b/kueue-ks/config/manager/manager.yaml @@ -0,0 +1,70 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + name: system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager +spec: + selector: + matchLabels: + control-plane: controller-manager + replicas: 1 + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + spec: + securityContext: + runAsNonRoot: true + # TODO(user): For common cases that do not require escalating privileges + # it is recommended to ensure that all your Pods/Containers are restrictive. + # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted + # Please uncomment the following code if your project does NOT have to work on old Kubernetes + # versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ). + # seccompProfile: + # type: RuntimeDefault + containers: + - command: + - /manager + args: + - --leader-elect + image: controller:latest + name: manager + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + # TODO(user): Configure the resources accordingly based on the project requirements. + # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + serviceAccountName: controller-manager + terminationGracePeriodSeconds: 10 diff --git a/kueue-ks/config/prometheus/kustomization.yaml b/kueue-ks/config/prometheus/kustomization.yaml new file mode 100644 index 0000000..ed13716 --- /dev/null +++ b/kueue-ks/config/prometheus/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- monitor.yaml diff --git a/kueue-ks/config/prometheus/monitor.yaml b/kueue-ks/config/prometheus/monitor.yaml new file mode 100644 index 0000000..d19136a --- /dev/null +++ b/kueue-ks/config/prometheus/monitor.yaml @@ -0,0 +1,20 @@ + +# Prometheus Monitor Service (Metrics) +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + control-plane: controller-manager + name: controller-manager-metrics-monitor + namespace: system +spec: + endpoints: + - path: /metrics + port: https + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + insecureSkipVerify: true + selector: + matchLabels: + control-plane: controller-manager diff --git a/kueue-ks/config/rbac/auth_proxy_client_clusterrole.yaml b/kueue-ks/config/rbac/auth_proxy_client_clusterrole.yaml new file mode 100644 index 0000000..51a75db --- /dev/null +++ b/kueue-ks/config/rbac/auth_proxy_client_clusterrole.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/kueue-ks/config/rbac/auth_proxy_role.yaml b/kueue-ks/config/rbac/auth_proxy_role.yaml new file mode 100644 index 0000000..80e1857 --- /dev/null +++ b/kueue-ks/config/rbac/auth_proxy_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: proxy-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/kueue-ks/config/rbac/auth_proxy_role_binding.yaml b/kueue-ks/config/rbac/auth_proxy_role_binding.yaml new file mode 100644 index 0000000..ec7acc0 --- /dev/null +++ b/kueue-ks/config/rbac/auth_proxy_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: proxy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: proxy-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/kueue-ks/config/rbac/auth_proxy_service.yaml b/kueue-ks/config/rbac/auth_proxy_service.yaml new file mode 100644 index 0000000..71f1797 --- /dev/null +++ b/kueue-ks/config/rbac/auth_proxy_service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: https + selector: + control-plane: controller-manager diff --git a/kueue-ks/config/rbac/kustomization.yaml b/kueue-ks/config/rbac/kustomization.yaml new file mode 100644 index 0000000..731832a --- /dev/null +++ b/kueue-ks/config/rbac/kustomization.yaml @@ -0,0 +1,18 @@ +resources: +# All RBAC will be applied under this service account in +# the deployment namespace. You may comment out this resource +# if your manager will use a service account that exists at +# runtime. Be sure to update RoleBinding and ClusterRoleBinding +# subjects if changing service account names. +- service_account.yaml +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# Comment the following 4 lines if you want to disable +# the auth proxy (https://github.com/brancz/kube-rbac-proxy) +# which protects your /metrics endpoint. +- auth_proxy_service.yaml +- auth_proxy_role.yaml +- auth_proxy_role_binding.yaml +- auth_proxy_client_clusterrole.yaml diff --git a/kueue-ks/config/rbac/leader_election_role.yaml b/kueue-ks/config/rbac/leader_election_role.yaml new file mode 100644 index 0000000..4190ec8 --- /dev/null +++ b/kueue-ks/config/rbac/leader_election_role.yaml @@ -0,0 +1,37 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/kueue-ks/config/rbac/leader_election_role_binding.yaml b/kueue-ks/config/rbac/leader_election_role_binding.yaml new file mode 100644 index 0000000..1d1321e --- /dev/null +++ b/kueue-ks/config/rbac/leader_election_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/kueue-ks/config/rbac/role_binding.yaml b/kueue-ks/config/rbac/role_binding.yaml new file mode 100644 index 0000000..2070ede --- /dev/null +++ b/kueue-ks/config/rbac/role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/kueue-ks/config/rbac/service_account.yaml b/kueue-ks/config/rbac/service_account.yaml new file mode 100644 index 0000000..7cd6025 --- /dev/null +++ b/kueue-ks/config/rbac/service_account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: controller-manager + namespace: system diff --git a/kueue-ks/controllers/admissioncheck_controller.go b/kueue-ks/controllers/admissioncheck_controller.go new file mode 100644 index 0000000..95e2b54 --- /dev/null +++ b/kueue-ks/controllers/admissioncheck_controller.go @@ -0,0 +1,129 @@ +/* +Copyright 2024 The KubeStellar Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" +) + +// AdmissionCheckReconciler reconciles a AdmissionCheck object +type AdmissionCheckReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +const ( + // AdmissionCheckActive indicates that the controller of the admission check is + // ready to evaluate the checks states + AdmissionCheckActive string = "Active" + + // AdmissionChecksSingleInstanceInClusterQueue indicates if the AdmissionCheck should be the only + // one managed by the same controller (as determined by the controllerName field) in a ClusterQueue. + // Having multiple AdmissionChecks managed by the same controller where at least one has this condition + // set to true will cause the ClusterQueue to be marked as Inactive. + AdmissionChecksSingleInstanceInClusterQueue string = "SingleInstanceInClusterQueue" +) +const ( + ControllerName = "kubestellar.io/ks-kueue" + SingleInstanceReason = "KubestellarKueue" + SingleInstanceMessage = "only one KubestellarKueue managed admission check can be used in one ClusterQueue" +) + +//+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=admissionchecks,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=admissionchecks/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=admissionchecks/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the AdmissionCheck object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.1/pkg/reconcile +func (r *AdmissionCheckReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + ac := &kueue.AdmissionCheck{} + if err := r.Client.Get(ctx, req.NamespacedName, ac); err != nil || ac.Spec.ControllerName != ControllerName { + return reconcile.Result{}, client.IgnoreNotFound(err) + } + + log.Info("Reconcile AdmissionCheck - AC Name:" + ac.Name) + for key, value := range ac.GetLabels() { + fmt.Printf("Label %s:%s\n", key, value) + } + for key1, value1 := range ac.GetAnnotations() { + fmt.Printf("Annotation %s:%s\n", key1, value1) + } + newCondition := metav1.Condition{ + Type: kueue.AdmissionCheckActive, + Status: metav1.ConditionTrue, + Reason: "Active", + Message: "The admission check is active", + } + needsUpdate := false + oldCondition := apimeta.FindStatusCondition(ac.Status.Conditions, kueue.AdmissionCheckActive) + if !cmpConditionState(oldCondition, &newCondition) { + apimeta.SetStatusCondition(&ac.Status.Conditions, newCondition) + needsUpdate = true + } + if !apimeta.IsStatusConditionTrue(ac.Status.Conditions, AdmissionChecksSingleInstanceInClusterQueue) { + apimeta.SetStatusCondition(&ac.Status.Conditions, metav1.Condition{ + Type: AdmissionChecksSingleInstanceInClusterQueue, + Status: metav1.ConditionTrue, + Reason: SingleInstanceReason, + Message: SingleInstanceMessage, + }) + needsUpdate = true + } + + if needsUpdate { + err := r.Client.Status().Update(ctx, ac) + if err != nil { + log.V(2).Error(err, "Updating check condition", "newCondition", newCondition) + } + return reconcile.Result{}, err + } + return ctrl.Result{}, nil +} +func cmpConditionState(a, b *metav1.Condition) bool { + if a == b { + return true + } + if a == nil || b == nil { + return false + } + return a.Status == b.Status && a.Reason == b.Reason && a.Message == b.Message +} + +// SetupWithManager sets up the controller with the Manager. +func (r *AdmissionCheckReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&kueue.AdmissionCheck{}). + Complete(r) +} diff --git a/kueue-ks/controllers/clustermetrics_controller.go b/kueue-ks/controllers/clustermetrics_controller.go new file mode 100644 index 0000000..b783754 --- /dev/null +++ b/kueue-ks/controllers/clustermetrics_controller.go @@ -0,0 +1,159 @@ +/* +Copyright 2024 The KubeStellar Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + clustermetrics "kubestellar/galaxy/clustermetrics/api/v1alpha1" + + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + //"k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + v1beta1 "sigs.k8s.io/kueue/apis/kueue/v1beta1" +) + +// ClusterMetricsReconciler reconciles a ClusterMetrics object +type ClusterMetricsReconciler struct { + client.Client + Scheme *runtime.Scheme + WorkerClusters map[string]clustermetrics.ClusterMetrics + ClusterQueue string +} +//+kubebuilder:rbac:groups=galaxy.kubestellar.io.galaxy.kubestellar.io,resources=clustermetrics,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=galaxy.kubestellar.io.galaxy.kubestellar.io,resources=clustermetrics/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=galaxy.kubestellar.io.galaxy.kubestellar.io,resources=clustermetrics/finalizers,verbs=update + +/* +func NewClusterMetricsReconciler(c client.Client, s *runtime.Scheme, q string, cfg *rest.Config) *ClusterMetricsReconciler { + return &ClusterMetricsReconciler{ + Client: c, + Scheme: s, + WorkerClusters: make(map[string]clustermetrics.ClusterMetrics), + ClusterQueue: q, + } +} +*/ +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the ClusterMetrics object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.1/pkg/reconcile +func (r *ClusterMetricsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + + clusterMetrics := clustermetrics.ClusterMetrics{} + + if err := r.Client.Get(ctx, req.NamespacedName, &clusterMetrics); err != nil { + fmt.Printf("unable to get clusterinfo: %s", err) + return ctrl.Result{}, err + } + log.Info("%%%%%%%%%% ", "Cluster", clusterMetrics.Name) + cachedClusterMetrics, found := r.WorkerClusters[clusterMetrics.Name] + if !found { + r.WorkerClusters[clusterMetrics.Name] = clusterMetrics + } else { + cachedClusterMetrics.Status.Nodes = clusterMetrics.Status.Nodes + } + + available := map[string]*resource.Quantity{} + for _, cm := range r.WorkerClusters { + for _, node := range cm.Status.Nodes { + //log.Info("%%%%%%%%%% ", "Cluster", cm.Name) + if available["cpu"] == nil { + available["cpu"] = resource.NewQuantity(0, resource.BinarySI) + } + available["cpu"].Add(*node.AllocatableResources.Cpu()) + + if available["memory"] == nil { + available["memory"] = resource.NewQuantity(0, resource.BinarySI) + } + available["memory"].Add(*node.AllocatableResources.Memory()) + } + } + clusterQueue := v1beta1.ClusterQueue{} + qq := types.NamespacedName{ + Name: r.ClusterQueue, + } + if err := r.Client.Get(ctx, qq, &clusterQueue); err != nil { + fmt.Printf("unable to get clusterqueue: %s", err) + return ctrl.Result{}, err + } + Default := 0 + update := false + //log.Info("Clusterqueue :::::::", "Resources", clusterQueue.Spec.ResourceGroups[0].Flavors[Default]) + queueNominalCpuCount := clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[0].NominalQuota + if clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[0].Name == "cpu" { + // log.Info("Clusterqueue nominal ---- CPU") + if available["cpu"] != nil { + // log.Info("Clusterqueue nominal cpus ----", + // "", queueNominalCpuCount, + // "", queueNominalCpuCount.Format) + if available["cpu"].Value() > queueNominalCpuCount.Value() { + update = true + delta := available["cpu"].DeepCopy() + delta.Sub(queueNominalCpuCount) + queueNominalCpuCount.Add(delta) + // log.Info("ClusterQueue New CPU Quota ----", "", queueNominalCpuCount.Value()) + clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[0].NominalQuota = queueNominalCpuCount + } + } + } + if clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[1].Name == "memory" { + // log.Info("Clusterqueue nominal ---- MEMORY") + queueNominalMemoryQuota := clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[1].NominalQuota //.ScaledValue(resource.Giga) + + if available["memory"] != nil { + // log.Info("Clusterqueue nominal memory ----", + // "", queueNominalMemoryQuota, + // "", queueNominalMemoryQuota.Format) + if available["memory"].ScaledValue(resource.Kilo) > queueNominalMemoryQuota.ScaledValue(resource.Kilo) { + update = true + delta := available["memory"].DeepCopy() + delta.Sub(queueNominalMemoryQuota) + queueNominalMemoryQuota.Add(delta) + // log.Info("ClusterQueue New Memory Quota ----", "", queueNominalMemoryQuota) + clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[1].NominalQuota = queueNominalMemoryQuota + } + } + } + if update { + log.Info("Updating ClusterQueue") + if err := r.Client.Update(ctx, &clusterQueue); err != nil { + fmt.Printf("unable to Update clusterqueue: %s", err) + return ctrl.Result{}, nil + } + + } + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *ClusterMetricsReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&clustermetrics.ClusterMetrics{}). + Complete(r) +} diff --git a/kueue-ks/controllers/workload_controller.go b/kueue-ks/controllers/workload_controller.go new file mode 100644 index 0000000..b64604c --- /dev/null +++ b/kueue-ks/controllers/workload_controller.go @@ -0,0 +1,395 @@ +/* +Copyright 2024 The KubeStellar Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "errors" + "fmt" + "time" + + metricsv1alpha1 "kubestellar/galaxy/clustermetrics/api/v1alpha1" + scheduler "kubestellar/galaxy/mc-scheduling/pkg/scheduler" + + ksv1alpha1 "github.com/kubestellar/kubestellar/api/control/v1alpha1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/rest" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" + kueueClient "sigs.k8s.io/kueue/client-go/clientset/versioned" + "sigs.k8s.io/kueue/pkg/util/admissioncheck" + "sigs.k8s.io/kueue/pkg/workload" +) + +// WorkloadReconciler reconciles a Workload object +type WorkloadReconciler struct { + Client client.Client + RestMapper meta.RESTMapper + KueueClient *kueueClient.Clientset + DynamicClient *dynamic.DynamicClient + Scheduler scheduler.MultiClusterScheduler +} + +const ( + KsLabelLocationGroupKey = "location-group" + KsLabelLocationGroup = "edge" + KsLabelClusterNameKey = "name" + AssignedClusterLabel = "mcc.kubestellar.io/cluster" +) + + +//+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=workloads,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=workloads/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=workloads/finalizers,verbs=update + + +func NewWorkloadReconciler(c client.Client, kueueClient *kueueClient.Clientset, cfg *rest.Config, rm meta.RESTMapper) *WorkloadReconciler { + return &WorkloadReconciler{ + Client: c, + RestMapper: rm, + DynamicClient: dynamic.NewForConfigOrDie(cfg), + KueueClient: kueueClient, + Scheduler: scheduler.NewDefaultScheduler(), + } +} + + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the Workload object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.1/pkg/reconcile +func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Reconcile Workload ----") + + wl := &kueue.Workload{} + if err := r.Client.Get(ctx, req.NamespacedName, wl); err != nil { + log.Error(err, "Error when fetching Workload object ") + return reconcile.Result{}, client.IgnoreNotFound(err) + } + if !workload.HasQuotaReservation(wl) { + //1.2 workload has no reservation + log.Info("workload with no reservation, delete owned requests") + return reconcile.Result{}, r.evictJob(ctx, wl) + } + + if apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) { + //1.2 workload has no reservation or is finished + log.Info("remote workload has completed") + return reconcile.Result{}, nil + } + + if IsAdmitted(wl) { + // check the state of the request, eventually toggle the checks to false + // otherwise there is nothing to here + log.Info("workload admitted, sync checks") + return reconcile.Result{}, nil + } + + jobObject, mapping, err := r.getJobObject(ctx, wl.GetObjectMeta()) + if err != nil { + log.Error(err, "Unable to fetch JobObject") + return ctrl.Result{}, err + } + + log.Info("Found jobObject ----" + jobObject.GetName()) + if jobObject.GetLabels() == nil { + jobObject.SetLabels(map[string]string{}) + } + // jobs with assigned cluster have already been scheduled to run + if _, exists := jobObject.GetLabels()[AssignedClusterLabel]; exists { + log.Info("............ Cluster Assignment Present") + + if workload.HasAllChecksReady(wl) { + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + wl := &kueue.Workload{} + if err := r.Client.Get(ctx, req.NamespacedName, wl); err != nil { + log.Error(err, "Error when fetching Workload object ") + return err + //return reconcile.Result{}, client.IgnoreNotFound(err) + } + log.Info("............ All Checks Ready") + newCondition := metav1.Condition{ + Type: string(kueue.CheckStateReady), + Status: metav1.ConditionTrue, + Reason: "ClusterAssigned", + Message: fmt.Sprintf("Job Ready for Sync to the Work Cluster %q", jobObject.GetLabels()[AssignedClusterLabel]), + } + apimeta.SetStatusCondition(&wl.Status.Conditions, newCondition) + return r.Client.Status().Update(ctx, wl) + }) + if err != nil { + return reconcile.Result{}, err + } + + meta := &metav1.ObjectMeta{ + Name: jobObject.GetName(), + Namespace: jobObject.GetNamespace(), + Labels: jobObject.GetLabels(), + } + + if err = r.createBindingPolicy(ctx, meta); err != nil { + log.Error(err, "Error creating BindingPolicy object ") + return reconcile.Result{}, err + } + log.Info("New BindingPolicy created for object", "Name", meta.Name) + + } else { + log.Info("............ Not All Checks Ready") + relevantChecks, err := admissioncheck.FilterForController(ctx, r.Client, wl.Status.AdmissionChecks, ControllerName) + if err != nil { + return reconcile.Result{}, err + } + + if len(relevantChecks) == 0 { + return reconcile.Result{}, nil + } + for check := range relevantChecks { + log.Info(">>>>>>>>>>>>>> relevant check", "", check) + } + acs := workload.FindAdmissionCheck(wl.Status.AdmissionChecks, relevantChecks[0]) + + if acs == nil { + log.Info(">>>>>>>>>>>>>> admissioncheck is null") + } else { + log.Info(">>>>>>>>>>>>>> ", "ACS", acs) + acs.State = kueue.CheckStateReady + acs.Message = fmt.Sprintf("The workload got reservation on %q", jobObject.GetLabels()[AssignedClusterLabel]) + // update the transition time since is used to detect the lost worker state. + acs.LastTransitionTime = metav1.NewTime(time.Now()) + wlPatch := workload.BaseSSAWorkload(wl) + workload.SetAdmissionCheckState(&wlPatch.Status.AdmissionChecks, *acs) + err := r.Client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership) + if err != nil { + return reconcile.Result{}, err + } + } + } + return ctrl.Result{}, nil + } + podSpecs, err := extractPodSpecList(wl) + if err != nil { + return ctrl.Result{}, err + } + + clusterMetricsList := &metricsv1alpha1.ClusterMetricsList{} + err = r.Client.List(ctx, clusterMetricsList, &client.ListOptions{}) + if err != nil { + return ctrl.Result{}, err + } + requeAfter := time.Duration(10 * float64(time.Second)) + + cluster := r.Scheduler.SelectCluster(podSpecs, clusterMetricsList) + + if cluster == "" { + log.Info("------------- Scheduler did not find suitable cluster for a Job to run") + return reconcile.Result{RequeueAfter: requeAfter}, nil + } + fmt.Printf("Selected cluster: %s\n", cluster) + labels := jobObject.GetLabels() + labels[AssignedClusterLabel] = cluster + + jobObject.SetLabels(labels) + + _, err = r.DynamicClient.Resource(mapping.Resource).Namespace(wl.Namespace).Update(ctx, jobObject, metav1.UpdateOptions{}) + if err != nil { + log.Error(err, "Error when Updating object ") + return ctrl.Result{}, err + } + log.Info("Updated jobObject with target cluster label ----" + jobObject.GetName()) + + return ctrl.Result{RequeueAfter: time.Duration(1 * float64(time.Second))}, nil + + +} + +func (r *WorkloadReconciler) RemoteFinishedCondition(wl kueue.Workload) *metav1.Condition { + var bestMatch *metav1.Condition + if c := apimeta.FindStatusCondition(wl.Status.Conditions, kueue.WorkloadFinished); c != nil && c.Status == metav1.ConditionTrue && (bestMatch == nil || c.LastTransitionTime.Before(&bestMatch.LastTransitionTime)) { + bestMatch = c + } + + return bestMatch +} +func extractPodSpecList(workload *kueue.Workload) ([]*corev1.PodSpec, error) { + podSpecList := []*corev1.PodSpec{} + for _, podSet := range workload.Spec.PodSets { + podSpecList = append(podSpecList, podSet.Template.Spec.DeepCopy()) + } + return podSpecList, nil +} + +/* +Evicts job from a WEC by deleting its BindingPolicy on the controlling host (HUB) +*/ +func (r *WorkloadReconciler) evictJob(ctx context.Context, workload *kueue.Workload) error { + log := log.FromContext(ctx) + log.Info("evictJob() ----") + wlOwners := workload.GetObjectMeta().GetOwnerReferences() + + meta := &metav1.ObjectMeta{ + Name: wlOwners[0].Name, + Namespace: workload.Namespace, + } + namespacedName := types.NamespacedName{ + Name: bindingPolicyName(meta), + } + bindingPolicy := ksv1alpha1.BindingPolicy{} + if err := r.Client.Get(ctx, namespacedName, &bindingPolicy); err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return err + } + if err := r.Client.Delete(ctx, &bindingPolicy); err != nil { + if apierrors.IsNotFound(err) { + return nil + } + log.Error(err, "evictJob() ---- Error deleting BindingPolicy object ") + return err + } + log.Info("Deleted BindingPolicy ----", "", bindingPolicyName(meta)) + return nil +} +func WorkloadKey(req ctrl.Request) string { + return fmt.Sprintf("%s/%s", req.Namespace, req.Name) +} + +// IsAdmitted returns true if the workload is admitted. +func IsAdmitted(w *kueue.Workload) bool { + return apimeta.IsStatusConditionTrue(w.Status.Conditions, kueue.WorkloadAdmitted) +} +func (r *WorkloadReconciler) getJobObject(ctx context.Context, meta metav1.Object) (*unstructured.Unstructured, *apimeta.RESTMapping, error) { + log := log.FromContext(ctx) + log.Info("getJobObject() ----") + + wlOwners := meta.GetOwnerReferences() + log.Info("workload ", "", meta.GetName()) + gvk := schema.FromAPIVersionAndKind(wlOwners[0].APIVersion, wlOwners[0].Kind) + gk := schema.GroupKind{Group: gvk.Group, Kind: gvk.Kind} + mapping, err := r.RestMapper.RESTMapping(gk, gvk.Version) + if err != nil { + log.Error(err, "Error when RESTMapping object ") + return nil, nil, err + } + jobObject, err := r.DynamicClient.Resource(mapping.Resource).Namespace(meta.GetNamespace()).Get(ctx, wlOwners[0].Name, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + log.Error(err, "Job object not found") + return nil, nil, err + } + log.Error(err, "Error when RESTMapping object ") + return nil, nil, err + } + + return jobObject.DeepCopy(), mapping, nil +} +func (*WorkloadReconciler) GetWorkloadKey(o runtime.Object) (types.NamespacedName, error) { + wl, isWl := o.(*kueue.Workload) + if !isWl { + return types.NamespacedName{}, errors.New("not a workload") + } + return client.ObjectKeyFromObject(wl), nil +} +func (r *WorkloadReconciler) createBindingPolicy(ctx context.Context, meta metav1.Object) error { + bindingPolicy := ksv1alpha1.BindingPolicy{} + namespacedName := types.NamespacedName{ + Name: bindingPolicyName(meta), + } + if err := r.Client.Get(ctx, namespacedName, &bindingPolicy); err != nil { + // create BindingPolicy object per appwrapper + if apierrors.IsNotFound(err) { + + if meta.GetLabels() != nil && meta.GetLabels()[AssignedClusterLabel] != "" { + + bindingPolicy = ksv1alpha1.BindingPolicy{ + Spec: ksv1alpha1.BindingPolicySpec{ + ClusterSelectors: []metav1.LabelSelector{ + { + MatchLabels: map[string]string{ + KsLabelLocationGroupKey: KsLabelLocationGroup, + KsLabelClusterNameKey: meta.GetLabels()[AssignedClusterLabel], + }, + }, + }, + Downsync: []ksv1alpha1.DownsyncObjectTest{ + { + Namespaces: []string{ + meta.GetNamespace(), + }, + ObjectNames: []string{ + meta.GetName(), + }, + + ObjectSelectors: []metav1.LabelSelector{ + { + MatchLabels: map[string]string{ + AssignedClusterLabel: meta.GetLabels()[AssignedClusterLabel], + }, + }, + }, + }, + }, + // turn the flag on so that kubestellar updates status of the appwrapper + WantSingletonReportedState: true, + }, + } + bindingPolicy.Name = bindingPolicyName(meta) + if err := r.Client.Create(ctx, &bindingPolicy); err != nil { + if apierrors.IsAlreadyExists(err) { + return nil + } + return err + } + // BindingPolicy created + return nil + } else { + return errors.New("cluster assignment label is missing from the Object - Add label " + AssignedClusterLabel) + } + } else { + return err + } + } + return nil +} +func bindingPolicyName(meta metav1.Object) string { + return meta.GetName() + "-" + meta.GetNamespace() +} +// SetupWithManager sets up the controller with the Manager. +func (r *WorkloadReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&kueue.Workload{}). + Complete(r) +} diff --git a/kueue-ks/examples/batch-job-spot.yaml b/kueue-ks/examples/batch-job-spot.yaml new file mode 100644 index 0000000..14282ec --- /dev/null +++ b/kueue-ks/examples/batch-job-spot.yaml @@ -0,0 +1,29 @@ +apiVersion: batch/v1 +kind: Job +metadata: + generateName: sample-job- + namespace: default + labels: + kueue.x-k8s.io/queue-name: user-queue-ks +spec: + parallelism: 3 + completions: 3 + + template: + spec: +# nodeSelector: +# instance-type: spot + containers: + - name: dummy-job + image: gcr.io/k8s-staging-perf-tests/sleep:v0.1.0 + args: ["30s"] + resources: + requests: + cpu: 1 + memory: "200Mi" +# nvidia.com/gpu: 1 + limits: + memory: "200Mi" + cpu: 1 +# nvidia.com/gpu: 1 + restartPolicy: Never diff --git a/kueue-ks/examples/batch-job.yaml b/kueue-ks/examples/batch-job.yaml new file mode 100644 index 0000000..2e93df0 --- /dev/null +++ b/kueue-ks/examples/batch-job.yaml @@ -0,0 +1,27 @@ +apiVersion: batch/v1 +kind: Job +metadata: + generateName: sample-job- + namespace: default + labels: + kueue.x-k8s.io/queue-name: user-queue-ks +spec: + parallelism: 3 + completions: 3 + + template: + spec: + containers: + - name: dummy-job + image: gcr.io/k8s-staging-perf-tests/sleep:v0.1.0 + args: ["30s"] + resources: + requests: + cpu: 1 + memory: "200Mi" +# nvidia.com/gpu: 1 + limits: + memory: "200Mi" + cpu: 1 +# nvidia.com/gpu: 1 + restartPolicy: Never diff --git a/kueue-ks/examples/pytorch-simple-job.yaml b/kueue-ks/examples/pytorch-simple-job.yaml new file mode 100644 index 0000000..c36bfda --- /dev/null +++ b/kueue-ks/examples/pytorch-simple-job.yaml @@ -0,0 +1,35 @@ +apiVersion: "kubeflow.org/v1" +kind: PyTorchJob +metadata: + name: pytorch-simple + labels: + kueue.x-k8s.io/queue-name: user-queue-ks + +spec: + pytorchReplicaSpecs: + Master: + replicas: 1 + restartPolicy: OnFailure + template: + spec: + containers: + - name: pytorch + image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 + imagePullPolicy: Always + command: + - "python3" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" + Worker: + replicas: 1 + restartPolicy: OnFailure + template: + spec: + containers: + - name: pytorch + image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 + imagePullPolicy: Always + command: + - "python3" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" \ No newline at end of file diff --git a/kueue-ks/go.mod b/kueue-ks/go.mod new file mode 100644 index 0000000..a342995 --- /dev/null +++ b/kueue-ks/go.mod @@ -0,0 +1,79 @@ +module kubestellar/galaxy/kueue-ks + +go 1.21 + +require ( + k8s.io/api v0.29.2 + k8s.io/apimachinery v0.29.2 + k8s.io/client-go v0.29.2 + kubestellar/galaxy/mc-scheduling v0.0.0-20240523005204-102f815f55d7 + sigs.k8s.io/controller-runtime v0.17.3 + sigs.k8s.io/kueue v0.6.2 +) + +require ( + github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/evanphx/json-patch/v5 v5.8.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.20.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.4 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/imdario/mergo v0.3.16 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kubestellar/kubestellar v0.22.0 + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.18.0 // indirect + github.com/prometheus/client_model v0.5.0 // indirect + github.com/prometheus/common v0.45.0 // indirect + github.com/prometheus/procfs v0.12.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.26.0 // indirect + golang.org/x/net v0.22.0 // indirect + golang.org/x/oauth2 v0.13.0 // indirect + golang.org/x/sys v0.19.0 // indirect + golang.org/x/term v0.19.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.4.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.29.2 // indirect + k8s.io/component-base v0.29.2 // indirect + k8s.io/klog/v2 v2.110.1 // indirect + k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + kubestellar/galaxy/clustermetrics v0.0.0-20240523005204-102f815f55d7 + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) + +replace kubestellar/galaxy/clustermetrics => ../clustermetrics + +replace kubestellar/galaxy/mc-scheduling => ../mc-scheduling diff --git a/kueue-ks/go.sum b/kueue-ks/go.sum new file mode 100644 index 0000000..913f8bd --- /dev/null +++ b/kueue-ks/go.sum @@ -0,0 +1,292 @@ +github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= +github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= +github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230321174746-8dcc6526cfb1 h1:X8MJ0fnN5FPdcGF5Ij2/OW+HgiJrRg3AfHAx1PJtIzM= +github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230321174746-8dcc6526cfb1/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= +github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= +github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= +github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1u0KQro= +github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= +github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.20.0 h1:ESKJdU9ASRfaPNOPRx12IUyA1vn3R9GiE3KYD14BXdQ= +github.com/go-openapi/jsonpointer v0.20.0/go.mod h1:6PGzBjjIIumbLYysB73Klnms1mwnU4G3YHOECG3CedA= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= +github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/cel-go v0.17.7 h1:6ebJFzu1xO2n7TLtN+UBqShGBhlD85bhvglh5DpcfqQ= +github.com/google/cel-go v0.17.7/go.mod h1:HXZKzB0LXqer5lHHgfWAnlYwJaQBDKMjxjulNQzhwhY= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk= +github.com/google/pprof v0.0.0-20230323073829-e72429f035bd/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kubestellar/kubestellar v0.22.0 h1:+87rrP2K+NU6ZmHsl3AnY5AJnrQ5kuv7uBS+wL4lNoQ= +github.com/kubestellar/kubestellar v0.22.0/go.mod h1:3NNHDXqev8W3mxS0YBIofjqh1yvIM8H2cKZJh+TOr9A= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= +github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.16.0 h1:7q1w9frJDzninhXxjZd+Y/x54XNjG/UlRLIYPZafsPM= +github.com/onsi/ginkgo/v2 v2.16.0/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= +github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= +github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= +github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= +github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= +github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= +github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= +github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= +github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= +github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= +github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.etcd.io/etcd/api/v3 v3.5.10 h1:szRajuUUbLyppkhs9K6BRtjY37l66XQQmw7oZRANE4k= +go.etcd.io/etcd/api/v3 v3.5.10/go.mod h1:TidfmT4Uycad3NM/o25fG3J07odo4GBB9hoxaodFCtI= +go.etcd.io/etcd/client/pkg/v3 v3.5.10 h1:kfYIdQftBnbAq8pUWFXfpuuxFSKzlmM5cSn76JByiT0= +go.etcd.io/etcd/client/pkg/v3 v3.5.10/go.mod h1:DYivfIviIuQ8+/lCq4vcxuseg2P2XbHygkKwFo9fc8U= +go.etcd.io/etcd/client/v3 v3.5.10 h1:W9TXNZ+oB3MCd/8UjxHTWK5J9Nquw9fQBLJd5ne5/Ao= +go.etcd.io/etcd/client/v3 v3.5.10/go.mod h1:RVeBnDz2PUEZqTpgqwAtUd8nAPf5kjyFyND7P1VkOKc= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.42.0 h1:ZOLJc06r4CB42laIXg/7udr0pbZyuAihN10A/XuiQRY= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.42.0/go.mod h1:5z+/ZWJQKXa9YT34fQNx5K8Hd1EoIhvtUygUQPqEOgQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 h1:KfYpVmrjI7JuToy5k8XV3nkapjWx48k4E4JOtVstzQI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0/go.mod h1:SeQhzAEccGVZVEy7aH87Nh0km+utSpo1pTv6eMMop48= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 h1:3d+S281UTjM+AbF31XSOYn1qXn3BgIdWl8HNEpx08Jk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0/go.mod h1:0+KuTDyKL4gjKCF75pHOX4wuzYDUZYfAQdSu43o+Z2I= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o= +go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= +go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= +go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= +go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= +golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY= +golang.org/x/oauth2 v0.13.0/go.mod h1:/JMhi4ZRXAf4HG9LiNmxvk+45+96RUlVThiH8FzNBn0= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= +golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.4.0 h1:Z81tqI5ddIoXDPvVQ7/7CC9TnLM7ubaFG2qXYd5BbYY= +golang.org/x/time v0.4.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= +google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b h1:+YaDE2r2OG8t/z5qmsh7Y+XXwCbvadxxZ0YY6mTdrVA= +google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:CgAqfJo+Xmu0GwA0411Ht3OU3OntXwsGmrmjI8ioGXI= +google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b h1:CIC2YMXmIhYw6evmhPxBKJ4fmLbOFtXQN/GV3XOZR8k= +google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:IBQ646DjkDkvUIsVq/cc03FUFQ9wbZu7yE396YcL870= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405 h1:AB/lmRny7e2pLhFEYIbl5qkDAUt2h0ZRO4wGPhZf+ik= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405/go.mod h1:67X1fPuzjcrkymZzZV1vvkFeTn2Rvc6lYF9MYFGCcwE= +google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= +google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= +k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= +k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2Iu+btg= +k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8= +k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= +k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= +k8s.io/apiserver v0.29.2 h1:+Z9S0dSNr+CjnVXQePG8TcBWHr3Q7BmAr7NraHvsMiQ= +k8s.io/apiserver v0.29.2/go.mod h1:B0LieKVoyU7ykQvPFm7XSdIHaCHSzCzQWPFa5bqbeMQ= +k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg= +k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA= +k8s.io/component-base v0.29.2 h1:lpiLyuvPA9yV1aQwGLENYyK7n/8t6l3nn3zAtFTJYe8= +k8s.io/component-base v0.29.2/go.mod h1:BfB3SLrefbZXiBfbM+2H1dlat21Uewg/5qtKOl8degM= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/metrics v0.29.1 h1:qutc3aIPMCniMuEApuLaeYX47rdCn8eycVDx7R6wMlQ= +k8s.io/metrics v0.29.1/go.mod h1:JrbV2U71+v7d/9qb90UVKL8r0uJ6Z2Hy4V7mDm05cKs= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0 h1:TgtAeesdhpm2SGwkQasmbeqDo8th5wOBA5h/AjTKA4I= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0/go.mod h1:VHVDI/KrK4fjnV61bE2g3sA7tiETLn8sooImelsCx3Y= +sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= +sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/kueue v0.6.2 h1:wSX6DY/BNCIaza9R8TyhRxwzSrY02EkyxfXXeHbV758= +sigs.k8s.io/kueue v0.6.2/go.mod h1:TQ1lIc+2rdvObH7SszaxdCGTMYbp9XupKnCj0ZZDgEw= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/kueue-ks/hack/boilerplate.go.txt b/kueue-ks/hack/boilerplate.go.txt new file mode 100644 index 0000000..094f1c1 --- /dev/null +++ b/kueue-ks/hack/boilerplate.go.txt @@ -0,0 +1,15 @@ +/* +Copyright 2024 The KubeStellar Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ \ No newline at end of file diff --git a/scripts/common/install-ks-k3d.sh b/scripts/common/install-ks-k3d.sh new file mode 100755 index 0000000..04e19ee --- /dev/null +++ b/scripts/common/install-ks-k3d.sh @@ -0,0 +1,123 @@ +#!/bin/bash + +# IMPORTANT WHEN RUNNING WITH RANCHER DESKTOP +# +# During the install documented below you may run out of quota for open files/inotify +# To prevent the problem from happening create this file: +# +# touch ~/Library/Application\ Support/rancher-desktop/lima/_config/override.yaml +# +# and add contents from https://github.com/kubestellar/galaxy/tree/main/scripts/kfp#preparation +# +# Restart Rancher Desktop for changes to take effect +# +set -x # echo so that users can understand what is happening +set -e # exit on error + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) +source "${SCRIPT_DIR}/setup-shell.sh" +source "${SCRIPT_DIR}/config.sh" + +export KUBESTELLAR_VERSION=0.22.0 +export OCM_STATUS_ADDON_VERSION=0.2.0-rc8 +export OCM_TRANSPORT_PLUGIN=0.1.7 + +WORK_DIR=$(mktemp -d -p /tmp) +echo "using ${WORK_DIR} to clone repos" + +# Cleanup function to delete the temp directory +function cleanup { + rm -rf "$WORK_DIR" + echo "Deleted temp working directory $WORK_DIR" +} + +# Register the cleanup function to be called on EXIT signal +trap cleanup EXIT + +: -------------------------------------------------------------- + +: clean up all + +k3d cluster delete kubeflex cluster1 cluster2 +cp ~/.kube/config ~/.kube/config.bak || true +rm ~/.kube/config || true +touch ~/.kube/config || true + +k3d cluster create -p "9443:443@loadbalancer" --k3s-arg "--disable=traefik@server:*" kubeflex + +helm install ingress-nginx ingress-nginx --set "controller.extraArgs.enable-ssl-passthrough=" --repo https://kubernetes.github.io/ingress-nginx --version 4.6.1 --namespace ingress-nginx --create-namespace + +docker stop k3d-kubeflex-server-0 +docker rename k3d-kubeflex-server-0 kubeflex-control-plane +docker start kubeflex-control-plane + + +: Wait for all pods to be restarted in k3d-kubeflex +wait-for-cmd '(($(kubectl --context k3d-kubeflex get po -A 2>/dev/null | grep -c Running) >= 5))' + +: pods above first get into Running state but then Fail and Restart. Wait and watch again +sleep 20 + +: Wait for all pods to be restarted in k3d-kubeflex +wait-for-cmd '(($(kubectl --context k3d-kubeflex get po -A 2>/dev/null | grep -c Running) >= 5))' + +: initialize kubeflex and create the its1 space with OCM running in it: +kflex init +kubectl apply -f https://raw.githubusercontent.com/kubestellar/kubestellar/v${KUBESTELLAR_VERSION}/config/postcreate-hooks/kubestellar.yaml +kubectl apply -f https://raw.githubusercontent.com/kubestellar/kubestellar/v${KUBESTELLAR_VERSION}/config/postcreate-hooks/ocm.yaml +kubectl config rename-context k3d-kubeflex kubeflex +kflex create its1 --type vcluster -p ocm + +: wait for OCM cluster manager up + +wait-for-cmd '(($(wrap-cmd kubectl --context its1 get deployments.apps -n open-cluster-management -o jsonpath='{.status.readyReplicas}' cluster-manager 2>/dev/null || echo 0) >= 1))' + + +: Install OCM status addon +helm --kube-context its1 upgrade --install status-addon -n open-cluster-management --create-namespace oci://ghcr.io/kubestellar/ocm-status-addon-chart --version v${OCM_STATUS_ADDON_VERSION} + +: Create a Workload Description Space wds1 in KubeFlex +kflex create wds1 -p kubestellar + +: Deploy the OCM based transport controller +helm --kube-context kubeflex upgrade --install ocm-transport-plugin oci://ghcr.io/kubestellar/ocm-transport-plugin/chart/ocm-transport-plugin --version ${OCM_TRANSPORT_PLUGIN} \ + --set transport_cp_name=its1 \ + --set wds_cp_name=wds1 \ + -n wds1-system + +: create clusters and register + +flags="--force-internal-endpoint-lookup" +cluster_port=31080 +for cluster in "${clusters[@]}"; do + k3d cluster create -p "${cluster_port}:80@loadbalancer" --network k3d-kubeflex ${cluster} +: Renaming new cluster as ${cluster} + kubectl config rename-context k3d-${cluster} ${cluster} +: Registering ${cluster} + clusteradm --context its1 get token | grep '^clusteradm join' | sed "s//${cluster}/" | awk '{print $0 " --context '${cluster}' --singleton '${flags}'"}' | sh +: Done + (( cluster_port += 100)) +done + +: Waiting for clusters csr to reach Pending state + +: Wait for csrs in its1 +wait-for-cmd '(($(kubectl --context its1 get csr 2>/dev/null | grep -c Pending) >= 2))' + +: accept csr + +clusteradm --context its1 accept --clusters cluster1 + +: Confirm cluster1 is accepted and label it for the BindingPolicy + +kubectl --context its1 get managedclusters +kubectl --context its1 label managedcluster cluster1 location-group=edge name=cluster1 + +clusteradm --context its1 accept --clusters cluster2 + +: Confirm cluster2 is accepted and label it for the BindingPolicy + +kubectl --context its1 get managedclusters +kubectl --context its1 label managedcluster cluster2 location-group=edge name=cluster2 + +: Kubestellar has been installed on your K3D diff --git a/scripts/kueue/install-all.sh b/scripts/kueue/install-all.sh new file mode 100644 index 0000000..0403174 --- /dev/null +++ b/scripts/kueue/install-all.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Copyright 2024 The KubeStellar Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x # echo so that users can understand what is happening +set -e # exit on error + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + +${SCRIPT_DIR}/../common/install-ks-k3d.sh +${SCRIPT_DIR}/install-kueue.sh + diff --git a/scripts/kueue/install-kueue-ks.sh b/scripts/kueue/install-kueue-ks.sh new file mode 100755 index 0000000..dca0498 --- /dev/null +++ b/scripts/kueue/install-kueue-ks.sh @@ -0,0 +1,274 @@ +#!/bin/bash + +# Copyright 2024 The KubeStellar Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# IMPORTANT WHEN RUNNING WITH RANCHER DESKTOP +# +# During the install documented below you may run out of quota for open files/inotify. +# This problem manifests itself by all pods in the cluster being in Pending state. +# +# To prevent the problem, create this file: +# +# touch ~/Library/Application\ Support/rancher-desktop/lima/_config/override.yaml +# +# and add the following content: +# +# provision: +# - mode: system +# script: | +# #!/bin/sh +# cat <<'EOF' > /etc/security/limits.d/rancher-desktop.conf +# * soft nofile 82920 +# * hard nofile 82920 +# EOF +# sysctl -w vm.max_map_count=262144 +# sysctl -w fs.inotify.max_user_instances=8192 +# sysctl -w fs.inotify.max_user_watches=1048576 +# +# +# Restart Rancher Desktop for changes to take effect +# +set -x # echo so that users can understand what is happening +set -e # exit on error + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) +source "${SCRIPT_DIR}/../common/setup-shell.sh" +source "${SCRIPT_DIR}/../common/config.sh" + +export KUBESTELLAR_VERSION=0.22.0 +export OCM_STATUS_ADDON_VERSION=0.2.0-rc8 +export OCM_TRANSPORT_PLUGIN=0.1.7 +export KUEUE_VERSION=v0.6.2 + +WORK_DIR=$(mktemp -d -p /tmp) +echo "using ${WORK_DIR} to clone repos" + +# Cleanup function to delete the temp directory +function cleanup { + rm -rf "$WORK_DIR" + echo "Deleted temp working directory $WORK_DIR" +} + +# Register the cleanup function to be called on EXIT signal +trap cleanup EXIT + +: -------------------------------------------------------------- + +: clean up all + +kubectl config use-context kubeflex + +if ! output=$(kubectl get deployment kueue-controller-manager -n kueue-system --no-headers 2>&1); then + printf "kueue deployment does not exists, not need to delete it\n" >&2 +else + printf "Deleting kueue \n" >&2 + kubectl delete --ignore-not-found -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml + echo "Deleted!" +fi + +: Deleting bindingpolicies, appwrappers, jobs, clustermetrics + +if ! output=$(kubectl --context kubeflex delete appwrappers --all 2>&1); then + echo "" >&2 +fi + +kubectl --context kubeflex delete jobs --all --ignore-not-found +kubectl --context kubeflex delete bindingpolicies --all --ignore-not-found + +: Deleting kueue-ks controllers +if ! output=$( helm delete kueue-ks -n kueue-ks-system --ignore-not-found 2>&1); then + echo "" >&2 +fi +kubectl delete serviceaccount kueue-ks-controller-manager -n kueue-ks-system --ignore-not-found +kubectl delete clusterrole kueue-ks-kueue-ks-editor-role --ignore-not-found +kubectl delete clusterrole kueue-ks-manager-role --ignore-not-found +kubectl delete clusterrole kueue-ks-metrics-reader --ignore-not-found +kubectl delete role kueue-ks-leader-election-role -n kueue-ks-system --ignore-not-found +kubectl delete clusterrolebinding kueue-ks-manager-rolebinding --ignore-not-found +kubectl delete clusterrolebinding kueue-ks-proxy-rolebinding --ignore-not-found +kubectl delete service kueue-ks-controller-manager-metrics-service -n kueue-ks-system --ignore-not-found +kubectl delete clusterrole kueue-ks-proxy-role --ignore-not-found +kubectl delete ns kueue-ks-system --ignore-not-found + +: Deleting cluster metrics controller +if ! output=$( helm --kube-context wds2 delete cluster-metrics --ignore-not-found 2>&1); then + echo "" >&2 +fi + + +#sleep 5 + +for cluster in "${clusters[@]}"; do + kubectl --context $cluster delete clusterroles appwrappers-access --ignore-not-found + kubectl --context $cluster delete clusterrolebindings klusterlet-appwrappers-access --ignore-not-found + kubectl --context ${cluster} delete -f https://raw.githubusercontent.com/project-codeflare/appwrapper/main/config/crd/bases/workload.codeflare.dev_appwrappers.yaml --ignore-not-found + kubectl --context ${cluster} delete -f https://raw.githubusercontent.com/kubestellar/galaxy/main/clustermetrics/config/crd/bases/galaxy.kubestellar.io_clustermetrics.yaml --ignore-not-found + kubectl --context ${cluster} delete -k github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=v1.5.0 --ignore-not-found + + kubectl --context ${cluster} delete ns clustermetrics-system --ignore-not-found + +done +# 11 kubectl --context kubeflex delete -f ${SCRIPT_DIR}/../kueue/templates/transform-pytorch-job.yaml --ignore-not-found +kubectl --context kubeflex delete -f ${SCRIPT_DIR}/templates/transform-pytorch-job.yaml --ignore-not-found + +kubectl --context kubeflex delete -f https://raw.githubusercontent.com/project-codeflare/appwrapper/main/config/crd/bases/workload.codeflare.dev_appwrappers.yaml --ignore-not-found +kubectl --context kubeflex delete -f https://raw.githubusercontent.com/kubestellar/galaxy/main/clustermetrics/config/crd/bases/galaxy.kubestellar.io_clustermetrics.yaml --ignore-not-found +kubectl --context kubeflex delete -f https://raw.githubusercontent.com/kubeflow/training-operator/855e0960668b34992ba4e1fd5914a08a3362cfb1/manifests/base/crds/kubeflow.org_pytorchjobs.yaml --ignore-not-found +kubectl --context kubeflex delete ns clustermetrics-system --ignore-not-found + +kubectl --context kubeflex delete clusterrolebinding kubeflex-manager-cluster-admin-rolebinding --ignore-not-found + +helm --kube-context kubeflex uninstall -n wds2-system kubestellar --ignore-not-found + + +if ! output=$( kflex delete wds2 2>&1); then + echo "" >&2 +fi + +sleep 15 + +: ----------------- INSTALLING ----------------- + +kubectl --context kubeflex apply -f https://raw.githubusercontent.com/kubeflow/training-operator/855e0960668b34992ba4e1fd5914a08a3362cfb1/manifests/base/crds/kubeflow.org_pytorchjobs.yaml + +: Installing kueue controller +kubectl --context kubeflex apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/$KUEUE_VERSION/manifests.yaml + +: waiting for kueue controller to come up +wait-for-cmd '(($(wrap-cmd kubectl --context kubeflex get deployments -n kueue-system -o jsonpath='{.status.readyReplicas}' kueue-controller-manager 2>/dev/null || echo 0) >= 1))' + +kubectl --context kubeflex create -f ${SCRIPT_DIR}/templates/admissioncheck-ks.yaml +kubectl --context kubeflex create -f ${SCRIPT_DIR}/templates/user-queue-ks.yaml +kubectl --context kubeflex create -f ${SCRIPT_DIR}/templates/spot-resource-flavor.yaml +kubectl --context kubeflex create -f ${SCRIPT_DIR}/templates/default-flavor.yaml +kubectl --context kubeflex create -f ${SCRIPT_DIR}/templates/zero-cluster-queue-ks.yaml + + +: Installing ClusterRoleBinding + +kubectl --context kubeflex apply -f - < Date: Wed, 29 May 2024 09:40:24 -0400 Subject: [PATCH 2/4] cleanup, added README's, renamed clustermetrics-controller to quotamanager-controller Signed-off-by: cwiklik --- README.md | 7 +- .../cluster-metrics/templates/operator.yaml | 2 +- .../config/manager/kustomization.yaml | 2 +- kueue-ks/README.md | 47 +++++-------- .../controllers/admissioncheck_controller.go | 10 +-- ...ntroller.go => quotamanager_controller.go} | 67 ++++++++----------- kueue-ks/controllers/workload_controller.go | 15 +---- scripts/kueue/README.md | 40 +++++++++++ 8 files changed, 98 insertions(+), 92 deletions(-) rename kueue-ks/controllers/{clustermetrics_controller.go => quotamanager_controller.go} (70%) create mode 100644 scripts/kueue/README.md diff --git a/README.md b/README.md index f2ff4d1..57bf62c 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ Additional modules, tools and documentation to facilitate KubeStellar integration with other community projects. -This project includes bash-based scripts to replicate demos and PoCs such as KFP + KubeStellar integration -and Argo Workflows + KubeStellar integration. +This project includes bash-based scripts to replicate demos and PoCs such as KFP + KubeStellar integration, Argo Workflows + KubeStellar, and Kueue + KubeStellar integrations. - [suspend-webook](./suspend-webhook/) webook used to suspend argo workflows (and in the future other types of workloads supporting the suspend flag) @@ -16,6 +15,7 @@ sync/status sycn mechanisms. - [mc-scheduling](./mc-scheduling/) -A Multi-cluster scheduling framework supporting pluggable schedulers. +- [kueue-ks](./kueue-ks/) -Set of controllers enabling integration of Kueue with KubeStellar. ## KubeFlow Pipelines v2 @@ -25,3 +25,6 @@ Check out this [instructions](./scripts/kfp/) ## Argo Workflows Check out this [instructions](./scripts/argo-wf/) + +## kueue-ks +Check out this [instructions](./scripts/kueue/) diff --git a/charts/cluster-metrics/templates/operator.yaml b/charts/cluster-metrics/templates/operator.yaml index 407e5bf..95b8e66 100644 --- a/charts/cluster-metrics/templates/operator.yaml +++ b/charts/cluster-metrics/templates/operator.yaml @@ -297,7 +297,7 @@ spec: - --metrics-bind-address=127.0.0.1:8080 - --leader-elect - --metrics-name={{.Values.clusterName}} - image: ko.local/cluster-metrics:102f815 + image: ko.local/cluster-metrics:15960ac livenessProbe: httpGet: path: /healthz diff --git a/clustermetrics/config/manager/kustomization.yaml b/clustermetrics/config/manager/kustomization.yaml index 9f8cb89..d521993 100644 --- a/clustermetrics/config/manager/kustomization.yaml +++ b/clustermetrics/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: ko.local/cluster-metrics - newTag: 102f815 + newTag: 15960ac diff --git a/kueue-ks/README.md b/kueue-ks/README.md index e6eddcd..efa83b8 100644 --- a/kueue-ks/README.md +++ b/kueue-ks/README.md @@ -1,44 +1,33 @@ -# kueue-ks -// TODO(user): Add simple overview of use/purpose +# Multi-cluster Job Workload Management with Kueue and KubeStellar +This project aims to simplify the deployment and management of batch workloads across multiple Kubernetes clusters using [Kueue](https://kueue.sigs.k8s.io) for job queueing and [KubeStellar](https://docs.kubestellar.io) for multi-cluster configuration management. -## Description -// TODO(user): An in-depth paragraph about your project and overview of use -## Getting Started -You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) to get a local cluster for testing, or run against a remote cluster. -**Note:** Your controller will automatically use the current context in your kubeconfig file (i.e. whatever cluster `kubectl cluster-info` shows). +## Overview +This repository contains two core controllers: -### Running on the cluster -1. Install Instances of Custom Resources: +- *WorkloadController:* watches for Kueue `Workload` objects and orchestrates the downsync and deployment of corresponding jobs to worker clusters managed by KubeStellar +- *QuotaManagerController:* monitors [ClusterMetrics](https://github.com/kubestellar/galaxy/tree/main/clustermetrics) from each worker cluster and dynamically updates Kueue's global resource quotas as needed -```sh -kubectl apply -f config/samples/ -``` +## Description +In multi-cluster Kubernetes environments, managing batch workloads and ensuring efficient resource utilization across clusters can be a complex challenge. Organizations often face issues such as resource contention, over-provisioning, and inefficient workload distribution, leading to suboptimal resource utilization and increased costs. -2. Build and push your image to the location specified by `IMG`: - -```sh -make docker-build docker-push IMG=/kueue-ks:tag -``` - -3. Deploy the controller to the cluster with the image specified by `IMG`: +The kueue-ks project goal is to address these challenges by leveraging Kueue's quota management capabilities and integrating with KubeStellar for multi-cluster configuration management. The primary goal is to enable centralized management and intelligent distribution of batch workloads across multiple clusters based on available resource quotas. -```sh -make deploy IMG=/kueue-ks:tag -``` -### Uninstall CRDs -To delete the CRDs from the cluster: +## Getting Started +You’ll need a Kubernetes cluster to run against. You can use [K3D](https://k3d.io) to get a local cluster for testing. + +### Running on the K3D cluster +1. Check out this [instructions](./scripts/kueue/) + +2. Run job examples: ```sh -make uninstall +kubectl create -f examples/batch-job.yaml ``` -### Undeploy controller -UnDeploy the controller to the cluster: - ```sh -make undeploy +kubectl create -f examples/pytorch-simple-job.yaml ``` ## Contributing diff --git a/kueue-ks/controllers/admissioncheck_controller.go b/kueue-ks/controllers/admissioncheck_controller.go index 95e2b54..5367f24 100644 --- a/kueue-ks/controllers/admissioncheck_controller.go +++ b/kueue-ks/controllers/admissioncheck_controller.go @@ -57,13 +57,9 @@ const ( //+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=admissionchecks/status,verbs=get;update;patch //+kubebuilder:rbac:groups=kueue.x-k8s.io.galaxy.kubestellar.io,resources=admissionchecks/finalizers,verbs=update -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the AdmissionCheck object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// + +//Reconciles kueue AdmissionCheck + // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.1/pkg/reconcile func (r *AdmissionCheckReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { diff --git a/kueue-ks/controllers/clustermetrics_controller.go b/kueue-ks/controllers/quotamanager_controller.go similarity index 70% rename from kueue-ks/controllers/clustermetrics_controller.go rename to kueue-ks/controllers/quotamanager_controller.go index b783754..3d2fad1 100644 --- a/kueue-ks/controllers/clustermetrics_controller.go +++ b/kueue-ks/controllers/quotamanager_controller.go @@ -25,34 +25,35 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - //"k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" v1beta1 "sigs.k8s.io/kueue/apis/kueue/v1beta1" ) -// ClusterMetricsReconciler reconciles a ClusterMetrics object +const ( + CPU = "cpu" + Memory = "memory" +) + +// ClusterMetricsReconciler reconciles ClusterMetrics object from each cluster and updates +// global quota managed by kueue in a cluster queue. As new clusters join or new nodes +// are added this controller increases quota accordingly. Quota decreasing is not so +// straighforward. Although technically the decrease can be done, kueue will not preempt +// any jobs even if it is required due to reduced quota. The only way the decrease can +// work is to stop accepting new jobs, drain, decrease quota and open the gate for new +// jobs. type ClusterMetricsReconciler struct { client.Client Scheme *runtime.Scheme WorkerClusters map[string]clustermetrics.ClusterMetrics ClusterQueue string } + //+kubebuilder:rbac:groups=galaxy.kubestellar.io.galaxy.kubestellar.io,resources=clustermetrics,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=galaxy.kubestellar.io.galaxy.kubestellar.io,resources=clustermetrics/status,verbs=get;update;patch //+kubebuilder:rbac:groups=galaxy.kubestellar.io.galaxy.kubestellar.io,resources=clustermetrics/finalizers,verbs=update -/* -func NewClusterMetricsReconciler(c client.Client, s *runtime.Scheme, q string, cfg *rest.Config) *ClusterMetricsReconciler { - return &ClusterMetricsReconciler{ - Client: c, - Scheme: s, - WorkerClusters: make(map[string]clustermetrics.ClusterMetrics), - ClusterQueue: q, - } -} -*/ // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. // TODO(user): Modify the Reconcile function to compare the state specified by @@ -82,16 +83,15 @@ func (r *ClusterMetricsReconciler) Reconcile(ctx context.Context, req ctrl.Reque available := map[string]*resource.Quantity{} for _, cm := range r.WorkerClusters { for _, node := range cm.Status.Nodes { - //log.Info("%%%%%%%%%% ", "Cluster", cm.Name) - if available["cpu"] == nil { - available["cpu"] = resource.NewQuantity(0, resource.BinarySI) + if available[CPU] == nil { + available[CPU] = resource.NewQuantity(0, resource.BinarySI) } - available["cpu"].Add(*node.AllocatableResources.Cpu()) + available[CPU].Add(*node.AllocatableResources.Cpu()) - if available["memory"] == nil { - available["memory"] = resource.NewQuantity(0, resource.BinarySI) + if available[Memory] == nil { + available[Memory] = resource.NewQuantity(0, resource.BinarySI) } - available["memory"].Add(*node.AllocatableResources.Memory()) + available[Memory].Add(*node.AllocatableResources.Memory()) } } clusterQueue := v1beta1.ClusterQueue{} @@ -104,38 +104,27 @@ func (r *ClusterMetricsReconciler) Reconcile(ctx context.Context, req ctrl.Reque } Default := 0 update := false - //log.Info("Clusterqueue :::::::", "Resources", clusterQueue.Spec.ResourceGroups[0].Flavors[Default]) queueNominalCpuCount := clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[0].NominalQuota - if clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[0].Name == "cpu" { - // log.Info("Clusterqueue nominal ---- CPU") - if available["cpu"] != nil { - // log.Info("Clusterqueue nominal cpus ----", - // "", queueNominalCpuCount, - // "", queueNominalCpuCount.Format) - if available["cpu"].Value() > queueNominalCpuCount.Value() { + if clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[0].Name == CPU { + if available[CPU] != nil { + if available[CPU].Value() > queueNominalCpuCount.Value() { update = true - delta := available["cpu"].DeepCopy() + delta := available[CPU].DeepCopy() delta.Sub(queueNominalCpuCount) queueNominalCpuCount.Add(delta) - // log.Info("ClusterQueue New CPU Quota ----", "", queueNominalCpuCount.Value()) clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[0].NominalQuota = queueNominalCpuCount } } } - if clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[1].Name == "memory" { - // log.Info("Clusterqueue nominal ---- MEMORY") + if clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[1].Name == Memory { queueNominalMemoryQuota := clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[1].NominalQuota //.ScaledValue(resource.Giga) - if available["memory"] != nil { - // log.Info("Clusterqueue nominal memory ----", - // "", queueNominalMemoryQuota, - // "", queueNominalMemoryQuota.Format) - if available["memory"].ScaledValue(resource.Kilo) > queueNominalMemoryQuota.ScaledValue(resource.Kilo) { + if available[Memory] != nil { + if available[Memory].ScaledValue(resource.Kilo) > queueNominalMemoryQuota.ScaledValue(resource.Kilo) { update = true - delta := available["memory"].DeepCopy() + delta := available[Memory].DeepCopy() delta.Sub(queueNominalMemoryQuota) queueNominalMemoryQuota.Add(delta) - // log.Info("ClusterQueue New Memory Quota ----", "", queueNominalMemoryQuota) clusterQueue.Spec.ResourceGroups[0].Flavors[Default].Resources[1].NominalQuota = queueNominalMemoryQuota } } @@ -154,6 +143,6 @@ func (r *ClusterMetricsReconciler) Reconcile(ctx context.Context, req ctrl.Reque // SetupWithManager sets up the controller with the Manager. func (r *ClusterMetricsReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&clustermetrics.ClusterMetrics{}). + For(&clustermetrics.ClusterMetrics{}). Complete(r) } diff --git a/kueue-ks/controllers/workload_controller.go b/kueue-ks/controllers/workload_controller.go index b64604c..1f9eb0b 100644 --- a/kueue-ks/controllers/workload_controller.go +++ b/kueue-ks/controllers/workload_controller.go @@ -81,13 +81,8 @@ func NewWorkloadReconciler(c client.Client, kueueClient *kueueClient.Clientset, } -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the Workload object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// +// Reconciles kueue Workload object and if quota exists it downsyncs a job to a worker cluster. + // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.1/pkg/reconcile func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -100,13 +95,11 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c return reconcile.Result{}, client.IgnoreNotFound(err) } if !workload.HasQuotaReservation(wl) { - //1.2 workload has no reservation log.Info("workload with no reservation, delete owned requests") return reconcile.Result{}, r.evictJob(ctx, wl) } if apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) { - //1.2 workload has no reservation or is finished log.Info("remote workload has completed") return reconcile.Result{}, nil } @@ -130,15 +123,12 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c } // jobs with assigned cluster have already been scheduled to run if _, exists := jobObject.GetLabels()[AssignedClusterLabel]; exists { - log.Info("............ Cluster Assignment Present") - if workload.HasAllChecksReady(wl) { err := retry.RetryOnConflict(retry.DefaultRetry, func() error { wl := &kueue.Workload{} if err := r.Client.Get(ctx, req.NamespacedName, wl); err != nil { log.Error(err, "Error when fetching Workload object ") return err - //return reconcile.Result{}, client.IgnoreNotFound(err) } log.Info("............ All Checks Ready") newCondition := metav1.Condition{ @@ -167,7 +157,6 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c log.Info("New BindingPolicy created for object", "Name", meta.Name) } else { - log.Info("............ Not All Checks Ready") relevantChecks, err := admissioncheck.FilterForController(ctx, r.Client, wl.Status.AdmissionChecks, ControllerName) if err != nil { return reconcile.Result{}, err diff --git a/scripts/kueue/README.md b/scripts/kueue/README.md new file mode 100644 index 0000000..ec900df --- /dev/null +++ b/scripts/kueue/README.md @@ -0,0 +1,40 @@ +# README + +These scripts are currently supporting deployment of KubeStellar, Kueue, and integration controllers on K3D Kubernetes only. + +## Preparation + +Before runnning the scripts, make sure to increase your ulimits and +fs.inotify.max_user_watches and fs.inotify.max_user_instances for the machine +running the kind clusters. See [these instructions](https://kind.sigs.k8s.io/docs/user/known-issues/#pod-errors-due-to-too-many-open-files) for more info. + +For Rancher Desktop follow [these instructions](https://docs.rancherdesktop.io/how-to-guides/increasing-open-file-limit). +and use the following config: + +```yaml +provision: +- mode: system + script: | + #!/bin/sh + cat <<'EOF' > /etc/security/limits.d/rancher-desktop.conf + * soft nofile 82920 + * hard nofile 82920 + EOF + sysctl -w vm.max_map_count=262144 + sysctl -w fs.inotify.max_user_instances=8192 + sysctl -w fs.inotify.max_user_watches=1048576 +``` + +Before running the script, make sure to run `go mod tidy` + + +## Running the scripts + +Note: at the start each script deletes yor current kubeflex, cluster1 and cluster2 clusters, and +backs up and delete your default kubeconfig in ~/.kube/config. + +To install, run the `install-all` script: + +```shell +./instal-all.sh +``` \ No newline at end of file From 8a85ff5de8d8bf0d99f6b58f57e91d96c46c871c Mon Sep 17 00:00:00 2001 From: cwiklik Date: Wed, 29 May 2024 10:25:02 -0400 Subject: [PATCH 3/4] cleanup Signed-off-by: cwiklik --- charts/kueue-ks/values.yaml | 65 ------------------------------------- kueue-ks/README.md | 2 +- 2 files changed, 1 insertion(+), 66 deletions(-) diff --git a/charts/kueue-ks/values.yaml b/charts/kueue-ks/values.yaml index 1dc457e..b28b04f 100644 --- a/charts/kueue-ks/values.yaml +++ b/charts/kueue-ks/values.yaml @@ -1,68 +1,3 @@ -# Default values for kueue-ks. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. -replicaCount: 1 - -image: - repository: nginx - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "" - -imagePullSecrets: [] -nameOverride: "" -fullnameOverride: "" - -serviceAccount: - # Specifies whether a service account should be created - create: true - # Automatically mount a ServiceAccount's API credentials? - automount: true - # Annotations to add to the service account - annotations: {} - # The name of the service account to use. - # If not set and create is true, a name is generated using the fullname template - name: "" - -podAnnotations: {} -podLabels: {} - -podSecurityContext: {} - # fsGroup: 2000 - -securityContext: {} - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 - -service: - type: ClusterIP - port: 80 - - -resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - -livenessProbe: - httpGet: - path: / - port: http -readinessProbe: - httpGet: - path: / - port: http diff --git a/kueue-ks/README.md b/kueue-ks/README.md index efa83b8..554f686 100644 --- a/kueue-ks/README.md +++ b/kueue-ks/README.md @@ -67,7 +67,7 @@ More information can be found via the [Kubebuilder Documentation](https://book.k ## License -Copyright 2024. +Copyright 2024 The KubeStellar Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From b2330ffc36b06f9e2061161c09379e4728ddaec8 Mon Sep 17 00:00:00 2001 From: cwiklik Date: Wed, 29 May 2024 10:42:26 -0400 Subject: [PATCH 4/4] cleaned up kueue chart values Signed-off-by: cwiklik --- charts/cluster-metrics/templates/operator.yaml | 2 +- charts/kueue-ks/values.yaml | 3 +++ clustermetrics/config/manager/kustomization.yaml | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/charts/cluster-metrics/templates/operator.yaml b/charts/cluster-metrics/templates/operator.yaml index 95b8e66..c7ab5b9 100644 --- a/charts/cluster-metrics/templates/operator.yaml +++ b/charts/cluster-metrics/templates/operator.yaml @@ -297,7 +297,7 @@ spec: - --metrics-bind-address=127.0.0.1:8080 - --leader-elect - --metrics-name={{.Values.clusterName}} - image: ko.local/cluster-metrics:15960ac + image: ko.local/cluster-metrics:8a85ff5 livenessProbe: httpGet: path: /healthz diff --git a/charts/kueue-ks/values.yaml b/charts/kueue-ks/values.yaml index b28b04f..36a3929 100644 --- a/charts/kueue-ks/values.yaml +++ b/charts/kueue-ks/values.yaml @@ -1,3 +1,6 @@ +service: + type: ClusterIP + port: 80 diff --git a/clustermetrics/config/manager/kustomization.yaml b/clustermetrics/config/manager/kustomization.yaml index d521993..b768052 100644 --- a/clustermetrics/config/manager/kustomization.yaml +++ b/clustermetrics/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: ko.local/cluster-metrics - newTag: 15960ac + newTag: 8a85ff5