Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sidecar terminator ignore the exit code of the sidecar container #1303

Merged
merged 1 commit into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 63 additions & 17 deletions pkg/controller/sidecarterminator/sidecar_terminator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,17 @@

import (
"context"
"encoding/json"
"flag"
"fmt"
"strings"
"time"

appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
"github.com/openkruise/kruise/pkg/util/ratelimiter"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
Expand All @@ -39,14 +37,22 @@
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
"github.com/openkruise/kruise/pkg/features"
"github.com/openkruise/kruise/pkg/util"
utilclient "github.com/openkruise/kruise/pkg/util/client"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
"github.com/openkruise/kruise/pkg/util/ratelimiter"
)

func init() {
flag.IntVar(&concurrentReconciles, "sidecarterminator-workers", concurrentReconciles, "Max concurrent workers for SidecarTerminator controller.")
}

var (
concurrentReconciles = 3
concurrentReconciles = 3
SidecarTerminated corev1.PodConditionType = "SidecarTerminated"
)

/**
Expand Down Expand Up @@ -131,17 +137,8 @@
return reconcile.Result{}, nil
}

if containersCompleted(pod, getSidecar(pod)) {
klog.V(3).Infof("SidecarTerminator -- all sidecars of pod(%v/%v) have been completed, no need to process", pod.Namespace, pod.Name)
return reconcile.Result{}, nil
}

if pod.Spec.RestartPolicy == corev1.RestartPolicyOnFailure && !containersSucceeded(pod, getMain(pod)) {
klog.V(3).Infof("SidecarTerminator -- pod(%v/%v) is trying to restart, no need to process", pod.Namespace, pod.Name)
return reconcile.Result{}, nil
}

sidecarNeedToExecuteKillContainer, sidecarNeedToExecuteInPlaceUpdate, err := r.groupSidecars(pod)

if err != nil {
return reconcile.Result{}, err
}
Expand All @@ -150,13 +147,62 @@
return reconcile.Result{}, err
}

if err := r.markJobPodTerminated(pod); err != nil {
return reconcile.Result{}, err
}

Check warning on line 152 in pkg/controller/sidecarterminator/sidecar_terminator_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/sidecarterminator/sidecar_terminator_controller.go#L151-L152

Added lines #L151 - L152 were not covered by tests

if err := r.executeKillContainerAction(pod, sidecarNeedToExecuteKillContainer); err != nil {
return reconcile.Result{}, err
}

return reconcile.Result{}, nil
}

// markJobPodTerminated terminate the job pod and skip the state of the sidecar containers
// This method should only be called before the executeKillContainerAction
func (r *ReconcileSidecarTerminator) markJobPodTerminated(pod *corev1.Pod) error {
if pod.Status.Phase == corev1.PodFailed || pod.Status.Phase == corev1.PodSucceeded {
return nil
}

Check warning on line 166 in pkg/controller/sidecarterminator/sidecar_terminator_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/sidecarterminator/sidecar_terminator_controller.go#L165-L166

Added lines #L165 - L166 were not covered by tests

// after the pod is terminated by the sidecar terminator, kubelet will kill the containers that are not in the terminal phase
// 1. sidecar container terminate with non-zero exit code
// 2. sidecar container is not in a terminal phase (still running or waiting)
klog.V(3).Infof("all of the main containers are completed, will terminate the job pod %s/%s", pod.Namespace, pod.Name)
// terminate the pod, ignore the status of the sidecar containers.
// in kubelet,pods are not allowed to transition out of terminal phases.

// patch pod condition
status := corev1.PodStatus{
Conditions: []corev1.PodCondition{
{
Type: SidecarTerminated,
Status: corev1.ConditionTrue,
LastTransitionTime: metav1.Now(),
Message: "Terminated by Sidecar Terminator",
},
},
}

// patch pod phase
if containersSucceeded(pod, getMain(pod)) {
status.Phase = corev1.PodSucceeded
} else {
status.Phase = corev1.PodFailed
}
klog.V(3).Infof("terminate the job pod %s/%s phase=%s", pod.Namespace, pod.Name, status.Phase)

by, _ := json.Marshal(status)
patchCondition := fmt.Sprintf(`{"status":%s}`, string(by))
rcvObject := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name}}

if err := r.Status().Patch(context.TODO(), rcvObject, client.RawPatch(types.StrategicMergePatchType, []byte(patchCondition))); err != nil {
return fmt.Errorf("failed to patch pod status: %v", err)
}

Check warning on line 201 in pkg/controller/sidecarterminator/sidecar_terminator_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/sidecarterminator/sidecar_terminator_controller.go#L200-L201

Added lines #L200 - L201 were not covered by tests

return nil
}

func (r *ReconcileSidecarTerminator) groupSidecars(pod *corev1.Pod) (sets.String, sets.String, error) {
runningOnVK, err := IsPodRunningOnVirtualKubelet(pod, r.Client)
if err != nil {
Expand Down
Loading
Loading