Skip to content

Commit

Permalink
Merge pull request #84123 from smarterclayton/terminating_cause
Browse files Browse the repository at this point in the history
Handle namespace deletion more gracefully in built-in controllers
  • Loading branch information
k8s-ci-robot authored Nov 4, 2019
2 parents 7b6369c + bd92607 commit 6a19261
Show file tree
Hide file tree
Showing 17 changed files with 131 additions and 33 deletions.
5 changes: 4 additions & 1 deletion pkg/controller/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,10 @@ func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodT
}
newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(pod)
if err != nil {
r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err)
// only send an event if the namespace isn't terminating
if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err)
}
return err
}
accessor, err := meta.Accessor(object)
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/cronjob/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ go_library(
"//staging/src/k8s.io/api/batch/v1:go_default_library",
"//staging/src/k8s.io/api/batch/v1beta1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
Expand Down
7 changes: 6 additions & 1 deletion pkg/controller/cronjob/cronjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
batchv1 "k8s.io/api/batch/v1"
batchv1beta1 "k8s.io/api/batch/v1beta1"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -333,7 +334,11 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo
}
jobResp, err := jc.CreateJob(sj.Namespace, jobReq)
if err != nil {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
// If the namespace is being torn down, we can safely ignore
// this error since all subsequent creations will fail.
if !errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
}
return
}
klog.V(4).Infof("Created Job %s for %s", jobResp.Name, nameForLog)
Expand Down
25 changes: 16 additions & 9 deletions pkg/controller/daemon/daemon_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -951,15 +951,22 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod
err := dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate,
ds, metav1.NewControllerRef(ds, controllerKind))

if err != nil && errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return
if err != nil {
if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
// If the namespace is being torn down, we can safely ignore
// this error since all subsequent creations will fail.
return
}
if errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return
}
}
if err != nil {
klog.V(2).Infof("Failed creation, decrementing expectations for set %q/%q", ds.Namespace, ds.Name)
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/deployment/deployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ func (dc *DeploymentController) processNextWorkItem() bool {
}

func (dc *DeploymentController) handleErr(err error, key interface{}) {
if err == nil {
if err == nil || errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
dc.queue.Forget(key)
return
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/deployment/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ func (dc *DeploymentController) getNewReplicaSet(d *apps.Deployment, rsList, old
klog.V(2).Infof("Found a hash collision for deployment %q - bumping collisionCount (%d->%d) to resolve it", d.Name, preCollisionCount, *d.Status.CollisionCount)
}
return nil, err
case errors.HasStatusCause(err, v1.NamespaceTerminatingCause):
// if the namespace is terminating, all subsequent creates will fail and we can safely do nothing
return nil, err
case err != nil:
msg := fmt.Sprintf("Failed to create new replica set %q: %v", newRS.Name, err)
if deploymentutil.HasProgressDeadline(d) {
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/endpoint/endpoints_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,11 @@ func (e *EndpointController) syncService(key string) error {
// 2. policy is misconfigured, in which case no service would function anywhere.
// Given the frequency of 1, we log at a lower level.
klog.V(5).Infof("Forbidden from creating endpoints: %v", err)

// If the namespace is terminating, creates will continue to fail. Simply drop the item.
if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
return nil
}
}

if createEndpoints {
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/endpointslice/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

corev1 "k8s.io/api/core/v1"
discovery "k8s.io/api/discovery/v1alpha1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
Expand Down Expand Up @@ -150,6 +151,10 @@ func (r *reconciler) finalize(
addTriggerTimeAnnotation(endpointSlice, triggerTime)
_, err := r.client.DiscoveryV1alpha1().EndpointSlices(service.Namespace).Create(endpointSlice)
if err != nil {
// If the namespace is terminating, creates will continue to fail. Simply drop the item.
if errors.HasStatusCause(err, corev1.NamespaceTerminatingCause) {
return nil
}
errs = append(errs, fmt.Errorf("Error creating EndpointSlice for Service %s/%s: %v", service.Namespace, service.Name, err))
}
}
Expand Down
25 changes: 16 additions & 9 deletions pkg/controller/job/job_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -771,15 +771,22 @@ func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *b
go func() {
defer wait.Done()
err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind))
if err != nil && errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return
if err != nil {
if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
// If the namespace is being torn down, we can safely ignore
// this error since all subsequent creations will fail.
return
}
if errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return
}
}
if err != nil {
defer utilruntime.HandleError(err)
Expand Down
25 changes: 16 additions & 9 deletions pkg/controller/replicaset/replica_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,15 +523,22 @@ func (rsc *ReplicaSetController) manageReplicas(filteredPods []*v1.Pod, rs *apps
// event spam that those failures would generate.
successfulCreations, err := slowStartBatch(diff, controller.SlowStartInitialBatchSize, func() error {
err := rsc.podControl.CreatePodsWithControllerRef(rs.Namespace, &rs.Spec.Template, rs, metav1.NewControllerRef(rs, rsc.GroupVersionKind))
if err != nil && errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return nil
if err != nil {
if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
// if the namespace is being terminated, we don't have to do
// anything because any creation will fail
return nil
}
if errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return nil
}
}
return err
})
Expand Down
5 changes: 4 additions & 1 deletion pkg/controller/serviceaccount/serviceaccounts_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ func (c *ServiceAccountsController) syncNamespace(key string) error {
sa.Namespace = ns.Name

if _, err := c.client.CoreV1().ServiceAccounts(ns.Name).Create(&sa); err != nil && !apierrs.IsAlreadyExists(err) {
createFailures = append(createFailures, err)
// we can safely ignore terminating namespace errors
if !apierrs.HasStatusCause(err, v1.NamespaceTerminatingCause) {
createFailures = append(createFailures, err)
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/serviceaccount/tokens_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,10 @@ func (e *TokensController) ensureReferencedToken(serviceAccount *v1.ServiceAccou
// Save the secret
createdToken, err := e.client.CoreV1().Secrets(serviceAccount.Namespace).Create(secret)
if err != nil {
// if the namespace is being terminated, create will fail no matter what
if apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
return false, err
}
// retriable error
return true, err
}
Expand Down
6 changes: 6 additions & 0 deletions staging/src/k8s.io/api/core/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -4659,6 +4659,12 @@ const (
NamespaceTerminating NamespacePhase = "Terminating"
)

const (
// NamespaceTerminatingCause is returned as a defaults.cause item when a change is
// forbidden due to the namespace being terminated.
NamespaceTerminatingCause metav1.CauseType = "NamespaceTerminating"
)

type NamespaceConditionType string

// These are valid conditions of a namespace.
Expand Down
22 changes: 22 additions & 0 deletions staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,28 @@ func (e *StatusError) DebugError() (string, []interface{}) {
return "server response object: %#v", []interface{}{e.ErrStatus}
}

// HasStatusCause returns true if the provided error has a details cause
// with the provided type name.
func HasStatusCause(err error, name metav1.CauseType) bool {
_, ok := StatusCause(err, name)
return ok
}

// StatusCause returns the named cause from the provided error if it exists and
// the error is of the type APIStatus. Otherwise it returns false.
func StatusCause(err error, name metav1.CauseType) (metav1.StatusCause, bool) {
apierr, ok := err.(APIStatus)
if !ok || apierr == nil || apierr.Status().Details == nil {
return metav1.StatusCause{}, false
}
for _, cause := range apierr.Status().Details.Causes {
if cause.Type == name {
return cause, true
}
}
return metav1.StatusCause{}, false
}

// UnexpectedObjectError can be returned by FromObject if it's passed a non-status object.
type UnexpectedObjectError struct {
Object runtime.Object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ go_test(
embed = [":go_default_library"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,15 @@ func (l *Lifecycle) Admit(ctx context.Context, a admission.Attributes, o admissi
return nil
}

// TODO: This should probably not be a 403
return admission.NewForbidden(a, fmt.Errorf("unable to create new content in namespace %s because it is being terminated", a.GetNamespace()))
err := admission.NewForbidden(a, fmt.Errorf("unable to create new content in namespace %s because it is being terminated", a.GetNamespace()))
if apierr, ok := err.(*errors.StatusError); ok {
apierr.ErrStatus.Details.Causes = append(apierr.ErrStatus.Details.Causes, metav1.StatusCause{
Type: v1.NamespaceTerminatingCause,
Message: fmt.Sprintf("namespace %s is being terminated", a.GetNamespace()),
Field: "metadata.namespace",
})
}
return err
}

return nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@ package lifecycle
import (
"context"
"fmt"
"reflect"
"testing"
"time"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/admission"
Expand Down Expand Up @@ -192,6 +195,14 @@ func TestAdmissionNamespaceTerminating(t *testing.T) {
if err == nil {
t.Errorf("Expected error rejecting creates in a namespace when it is terminating")
}
expectedCause := metav1.StatusCause{
Type: v1.NamespaceTerminatingCause,
Message: fmt.Sprintf("namespace %s is being terminated", namespace),
Field: "metadata.namespace",
}
if cause, ok := errors.StatusCause(err, v1.NamespaceTerminatingCause); !ok || !reflect.DeepEqual(expectedCause, cause) {
t.Errorf("Expected status cause indicating the namespace is terminating: %t %s", ok, diff.ObjectReflectDiff(expectedCause, cause))
}

// verify update operations in the namespace can proceed
err = handler.Admit(context.TODO(), admission.NewAttributesRecord(&pod, nil, v1.SchemeGroupVersion.WithKind("Pod").GroupKind().WithVersion("version"), pod.Namespace, pod.Name, v1.Resource("pods").WithVersion("version"), "", admission.Update, &metav1.UpdateOptions{}, false, nil), nil)
Expand Down

0 comments on commit 6a19261

Please sign in to comment.