Skip to content

Commit

Permalink
Merge pull request #4 from lvyanru8200/main
Browse files Browse the repository at this point in the history
Feat: Overall process completion with some test data included
  • Loading branch information
lvyanru8200 authored Nov 17, 2023
2 parents 409e84b + edbe186 commit a9dd771
Show file tree
Hide file tree
Showing 7 changed files with 597 additions and 37 deletions.
2 changes: 2 additions & 0 deletions cmd/controller-manager/app/controller_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
finetunev1beta1 "github.com/DataTunerX/meta-server/api/finetune/v1beta1"
"github.com/go-logr/zapr"
"github.com/operator-framework/operator-lib/leader"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
"github.com/spf13/pflag"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand All @@ -35,6 +36,7 @@ func init() {
utilruntime.Must(finetunev1beta1.AddToScheme(scheme))
utilruntime.Must(corev1beta1.AddToScheme(scheme))
utilruntime.Must(extensionv1beta1.AddToScheme(scheme))
utilruntime.Must(rayv1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

Expand Down
11 changes: 6 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ module github.com/DataTunerX/finetune-experiment-controller
go 1.19

require (
github.com/DataTunerX/meta-server v0.0.0-20231109015709-57812268ad17
github.com/DataTunerX/meta-server v0.0.0-20231116102108-24bd83a6be89
github.com/DataTunerX/utility-server v0.0.0-20231107081331-e4ac0bbd2db2
github.com/go-logr/zapr v1.2.3
github.com/operator-framework/operator-lib v0.11.0
github.com/ray-project/kuberay/ray-operator v1.0.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.17.0
k8s.io/api v0.26.0
Expand All @@ -32,7 +33,7 @@ require (
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/uuid v1.1.2 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/josharian/intern v1.0.0 // indirect
Expand All @@ -59,10 +60,10 @@ require (
go.uber.org/multierr v1.10.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/net v0.15.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.12.0 // indirect
golang.org/x/sys v0.12.0 // indirect
golang.org/x/term v0.12.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/time v0.3.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
Expand Down
27 changes: 18 additions & 9 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3f
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DataTunerX/meta-server v0.0.0-20231109015709-57812268ad17 h1:szsQx64N0bTO6qpCmD4V7Ne1AuF+y/KcRyyh7UE2SRQ=
github.com/DataTunerX/meta-server v0.0.0-20231109015709-57812268ad17/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231113032938-bf87d14956b1 h1:WxEyoS9Dlkm2Yfcpn0sL0Gz/xfXdN0fdxb/dGYAQIqQ=
github.com/DataTunerX/meta-server v0.0.0-20231113032938-bf87d14956b1/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231116063244-4b1d018072c0 h1:BJ6OqFz1ROHizgQ9eNWpWSCzMEe4PFLhCloBUsLrYa0=
github.com/DataTunerX/meta-server v0.0.0-20231116063244-4b1d018072c0/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231116064242-ea7bb845394f h1:ivD0gAMQ0gWtJ1/xWeUqkOce0PEO2LXWfjAAGiPwTvw=
github.com/DataTunerX/meta-server v0.0.0-20231116064242-ea7bb845394f/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/meta-server v0.0.0-20231116102108-24bd83a6be89 h1:czoBDPd42BBGiCREjfnaxG5BNcHk+9MnkemXAnG/bEw=
github.com/DataTunerX/meta-server v0.0.0-20231116102108-24bd83a6be89/go.mod h1:MrA+U+PYANBfU8B43hrkJQ3WOIFPzUqowUO7s+KafvU=
github.com/DataTunerX/utility-server v0.0.0-20231107081331-e4ac0bbd2db2 h1:3mBAWDqYrWtDk9xvIHDG/dN5zGcliwJnyvpWHFHcC+A=
github.com/DataTunerX/utility-server v0.0.0-20231107081331-e4ac0bbd2db2/go.mod h1:qL3DYjQa7av0QkZoFrycHbpXHGQfBNEDke8uv+FdDn4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
Expand Down Expand Up @@ -177,8 +183,9 @@ github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLe
github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g=
Expand Down Expand Up @@ -281,6 +288,8 @@ github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1
github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo=
github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4=
github.com/ray-project/kuberay/ray-operator v1.0.0 h1:i69nvbV7az2FG41VHQgxrmhD+SUl8ca+ek4RPbSE2Q0=
github.com/ray-project/kuberay/ray-operator v1.0.0/go.mod h1:7C7ebIkxtkmOX8w1iiLrKM1j4hkZs/Guzm3WdePk/yg=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/sagikazarmark/locafero v0.3.0 h1:zT7VEGWC2DTflmccN/5T1etyKvxSxpHsjb9cJvm4SvQ=
Expand Down Expand Up @@ -416,8 +425,8 @@ golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qx
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down Expand Up @@ -487,12 +496,12 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU=
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down
77 changes: 68 additions & 9 deletions internal/controller/finetune/finetuneexperiment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ package finetune

import (
"context"
"fmt"
"time"

"github.com/DataTunerX/utility-server/logging"
"k8s.io/apimachinery/pkg/types"

"github.com/DataTunerX/finetune-experiment-controller/pkg/util/handlererr"
finetunev1beta1 "github.com/DataTunerX/meta-server/api/finetune/v1beta1"
Expand All @@ -39,10 +41,6 @@ type FinetuneExperimentReconciler struct {
Log logging.Logger
}

const (
finetuneFinalizer = "finetune.datatunerx.io/finalizer"
)

//+kubebuilder:rbac:groups=finetune.datatunerx.io,resources=finetuneexperiments,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=finetune.datatunerx.io,resources=finetuneexperiments/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=finetune.datatunerx.io,resources=finetuneexperiments/finalizers,verbs=update
Expand All @@ -60,25 +58,86 @@ func (r *FinetuneExperimentReconciler) Reconcile(ctx context.Context, req ctrl.R
}

if finetuneExperiment.GetDeletionTimestamp() != nil {
if controllerutil.ContainsFinalizer(finetuneExperiment, finetuneFinalizer) {
if controllerutil.ContainsFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer) {
// todo cleaner
controllerutil.RemoveFinalizer(finetuneExperiment, finetuneFinalizer)
controllerutil.RemoveFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer)
if err := r.Update(ctx, finetuneExperiment); err != nil {
r.Log.Errorf("Remove finalizer failed: %s/%s, Err: %v", req.Name, req.Namespace, err)
return handlererr.HandlerErr(err)
}
}
return handlererr.HandlerErr(nil)
}
if !controllerutil.ContainsFinalizer(finetuneExperiment, finetuneFinalizer) {
controllerutil.AddFinalizer(finetuneExperiment, finetuneFinalizer)
if !controllerutil.ContainsFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer) {
controllerutil.AddFinalizer(finetuneExperiment, finetunev1beta1.FinetuneGroupFinalizer)
err := r.Update(ctx, finetuneExperiment)
if err != nil {
r.Log.Errorf("Add finalizer failed: %s/%s, %v", req.Name, req.Namespace, err)
return handlererr.HandlerErr(err)
}
}
return ctrl.Result{}, nil

if finetuneExperiment.Spec.Pending {
finetuneExperiment.Status.State = finetunev1beta1.FinetuneExperimentPending
if err := r.Client.Status().Update(ctx, finetuneExperiment); err != nil {
r.Log.Errorf("Update fineExperiment %s/%s status failed", finetuneExperiment.Name, finetuneExperiment.Namespace)
return handlererr.HandlerErr(err)
}
return handlererr.HandlerErr(nil)
}

for i := range finetuneExperiment.Spec.FinetuneJobs {
finetuneJob := finetuneExperiment.Spec.FinetuneJobs[i]
if finetuneJob.Name == nil {
name := fmt.Sprintf("%s-%s", finetuneExperiment.Name, "finetunejob")
finetuneJob.Name = &name
}
finetuneJobInstance := &finetunev1beta1.FinetuneJob{}
finetuneJobInstance.Spec = finetuneJob.Spec
finetuneJobInstance.Name = *finetuneJob.Name
finetuneJobInstance.Namespace = finetuneExperiment.Namespace
if err := ctrl.SetControllerReference(finetuneExperiment, finetuneJobInstance, r.Scheme); err != nil {
r.Log.Errorf("SetControllerReference failed finetuneJob: %s/%s, owner finetuneExperiment: %s/%s, err: %v",
finetuneJobInstance.Name, finetuneJobInstance.Namespace, finetuneExperiment.Name, finetuneExperiment.Namespace, err)
return handlererr.HandlerErr(err)
}
if err := r.Client.Create(ctx, finetuneJobInstance); err != nil {
if !errors.IsAlreadyExists(err) {
r.Log.Errorf("Create finetuneJob %s/%s failed: %v", finetuneJobInstance.Name, finetuneJobInstance.Namespace, err)
return handlererr.HandlerErr(err)
}
}
existFinetuneJob := &finetunev1beta1.FinetuneJob{}
if err := r.Client.Get(ctx, types.NamespacedName{
Name: *finetuneJob.Name,
Namespace: finetuneExperiment.Namespace,
}, existFinetuneJob); err != nil {
r.Log.Errorf("Get finetuneJob failed: %v", err)
return handlererr.HandlerErr(err)
}
alreadyExists := false

// Iterate over the JobsStatus to check if existFinetuneJob.Name exists
for _, jobStatus := range finetuneExperiment.Status.JobsStatus {
if jobStatus.Name == existFinetuneJob.Name {
alreadyExists = true
break
}
}
if !alreadyExists {
finetuneExperiment.Status.JobsStatus = append(finetuneExperiment.Status.JobsStatus, finetunev1beta1.FinetuneJobStatusSetting{
Name: existFinetuneJob.Name,
FinetuneJobStatus: existFinetuneJob.Status,
})
}

}
finetuneExperiment.Status.State = finetunev1beta1.FinetuneExperimentProcessing
if err := r.Client.Status().Update(ctx, finetuneExperiment); err != nil {
r.Log.Errorf("Update fineExperiment %s/%s status failed", finetuneExperiment.Name, finetuneExperiment.Namespace)
return handlererr.HandlerErr(err)
}
return handlererr.HandlerErr(nil)
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
Loading

0 comments on commit a9dd771

Please sign in to comment.