From 90b04bb285d7b7e18a6c37a9fc9d6c061af78fc4 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Wed, 19 Feb 2025 20:26:50 -0500
Subject: [PATCH 01/12] check restartCounts and idle crashlooping pods

---
 controllers/idler/idler_controller.go      | 105 ++++++++++++++-------
 controllers/idler/idler_controller_test.go |  41 ++++++--
 2 files changed, 103 insertions(+), 43 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index eb4bdb75..d4d7a82d 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -34,6 +34,11 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 )
 
+const (
+	RestartThreshold     = 50
+	RequeueTimeThreshold = 300
+)
+
 var SupportedScaleResources = map[schema.GroupVersionKind]schema.GroupVersionResource{
 	schema.GroupVersion{Group: "camel.apache.org", Version: "v1"}.WithKind("Integration"):          schema.GroupVersion{Group: "camel.apache.org", Version: "v1"}.WithResource("integrations"),
 	schema.GroupVersion{Group: "camel.apache.org", Version: "v1alpha1"}.WithKind("KameletBinding"): schema.GroupVersion{Group: "camel.apache.org", Version: "v1alpha1"}.WithResource("kameletbindings"),
@@ -110,7 +115,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
 	// Find the earlier pod to kill and requeue. Otherwise, use the idler timeoutSeconds to requeue.
 	nextTime := nextPodToBeKilledAfter(logger, idler)
 	if nextTime == nil {
-		after := time.Duration(idler.Spec.TimeoutSeconds) * time.Second
+		after := time.Duration(RequeueTimeThreshold) * time.Second
 		logger.Info("requeueing for next pod to check", "after_seconds", after.Seconds())
 		return reconcile.Result{
 			Requeue:      true,
@@ -133,9 +138,19 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 	newStatusPods := make([]toolchainv1alpha1.Pod, 0, 10)
 	for _, pod := range podList.Items {
 		pod := pod // TODO We won't need it after upgrading to go 1.22: https://go.dev/blog/loopvar-preview
-		logger := log.FromContext(ctx)
-		podLogger := logger.WithValues("pod_name", pod.Name, "pod_phase", pod.Status.Phase)
+		podLogger := log.FromContext(ctx).WithValues("pod_name", pod.Name, "pod_phase", pod.Status.Phase)
 		if trackedPod := findPodByName(idler, pod.Name); trackedPod != nil {
+			// check the restart count for the trackedPod
+			crashlooping, restartCount := isRestartingOften(pod.Status)
+			if crashlooping {
+				podLogger.Info("Pod is restarting too often. Killing the pod", "restart_count", restartCount)
+				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
+				err := deletePodsAndCreateNotification(ctx, podLogger, pod, true, r, idler)
+				if err != nil {
+					return err
+				}
+				continue
+			}
 			timeoutSeconds := idler.Spec.TimeoutSeconds
 			if isOwnedByVM(pod.ObjectMeta) {
 				// use 1/12th of the timeout for VMs
@@ -144,43 +159,11 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 			// Already tracking this pod. Check the timeout.
 			if time.Now().After(trackedPod.StartTime.Add(time.Duration(timeoutSeconds) * time.Second)) {
 				podLogger.Info("Pod running for too long. Killing the pod.", "start_time", trackedPod.StartTime.Format("2006-01-02T15:04:05Z"), "timeout_seconds", timeoutSeconds)
-				var podreason string
-				podCondition := pod.Status.Conditions
-				for _, podCond := range podCondition {
-					if podCond.Type == "Ready" {
-						podreason = podCond.Reason
-					}
-				}
-
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				lctx := log.IntoContext(ctx, podLogger)
-				appType, appName, deletedByController, err := r.scaleControllerToZero(lctx, pod.ObjectMeta)
+				err := deletePodsAndCreateNotification(ctx, podLogger, pod, false, r, idler)
 				if err != nil {
 					return err
 				}
-				if !deletedByController { // Pod not managed by a controller. We can just delete the pod.
-					logger.Info("Deleting pod without controller")
-					if err := r.AllNamespacesClient.Delete(ctx, &pod); err != nil {
-						return err
-					}
-					podLogger.Info("Pod deleted")
-				}
-
-				if appName == "" {
-					appName = pod.Name
-					appType = "Pod"
-				}
-				// Send notification if the deleted pod was managed by a controller or was a standalone pod that was not completed
-				// eg. If a build pod is in "PodCompleted" status then it was not running so there's no reason to send an idler notification
-				if podreason != "PodCompleted" || deletedByController {
-					// By now either a pod has been deleted or scaled to zero by controller, idler Triggered notification should be sent
-					if err := r.createNotification(ctx, idler, appName, appType); err != nil {
-						logger.Error(err, "failed to create Notification")
-						if err = r.setStatusIdlerNotificationCreationFailed(ctx, idler, err.Error()); err != nil {
-							logger.Error(err, "failed to set status IdlerNotificationCreationFailed")
-						} // not returning error to continue tracking remaining pods
-					}
-				}
 
 			} else {
 				newStatusPods = append(newStatusPods, *trackedPod) // keep tracking
@@ -198,6 +181,56 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 	return r.updateStatusPods(ctx, idler, newStatusPods)
 }
 
+// Check if the pod belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
+// if it is a standalone pod, delete it.
+// Send notification if the deleted pod was managed by a controller, was a standalone pod that was not completed or was crashlooping
+func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger, pod corev1.Pod, isCrashlooping bool, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
+	lctx := log.IntoContext(ctx, podLogger)
+	logger := log.FromContext(ctx)
+	var podreason string
+	podCondition := pod.Status.Conditions
+	for _, podCond := range podCondition {
+		if podCond.Type == "Ready" {
+			podreason = podCond.Reason
+		}
+	}
+	appType, appName, deletedByController, err := r.scaleControllerToZero(lctx, pod.ObjectMeta)
+	if err != nil {
+		return err
+	}
+	if !deletedByController { // Pod not managed by a controller. We can just delete the pod.
+		log.FromContext(ctx).Info("Deleting pod without controller")
+		if err := r.AllNamespacesClient.Delete(ctx, &pod); err != nil {
+			return err
+		}
+		podLogger.Info("Pod deleted")
+	}
+	if appName == "" {
+		appName = pod.Name
+		appType = "Pod"
+	}
+
+	// If a build pod is in "PodCompleted" status then it was not running so there's no reason to send an idler notification
+	if podreason != "PodCompleted" || deletedByController || isCrashlooping {
+		// By now either a pod has been deleted or scaled to zero by controller, idler Triggered notification should be sent
+		if err := r.createNotification(ctx, idler, appName, appType); err != nil {
+			logger.Error(err, "failed to create Notification")
+			if err = r.setStatusIdlerNotificationCreationFailed(ctx, idler, err.Error()); err != nil {
+				logger.Error(err, "failed to set status IdlerNotificationCreationFailed")
+			} // not returning error to continue tracking remaining pods
+		}
+	}
+	return nil
+}
+
+func isRestartingOften(podstatus corev1.PodStatus) (bool, int32) {
+	var restartCount int32
+	for _, status := range podstatus.ContainerStatuses {
+		restartCount += status.RestartCount
+	}
+	return restartCount > RestartThreshold, restartCount
+}
+
 func (r *Reconciler) createNotification(ctx context.Context, idler *toolchainv1alpha1.Idler, appName string, appType string) error {
 	log.FromContext(ctx).Info("Create Notification")
 	//Get the HostClient
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index e0dc5a19..fbd15578 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -117,10 +117,10 @@ func TestEnsureIdling(t *testing.T) {
 
 		// then
 		require.NoError(t, err)
-		// no pods found - the controller will requeue after the idler's timeout
+		// no pods found - the controller will requeue after 5 mins
 		assert.Equal(t, reconcile.Result{
 			Requeue:      true,
-			RequeueAfter: 30 * time.Second,
+			RequeueAfter: RequeueTimeThreshold * time.Second,
 		}, res)
 		memberoperatortest.AssertThatIdler(t, idler.Name, cl).HasConditions(memberoperatortest.Running())
 	})
@@ -143,8 +143,9 @@ func TestEnsureIdling(t *testing.T) {
 		reconciler, req, cl, allCl, dynamicClient := prepareReconcile(t, idler.Name, getHostCluster, idler, nsTmplSet, mur)
 
 		podsTooEarlyToKill := preparePayloads(t, reconciler, idler.Name, "", freshStartTimes(idler))
-
+		podCrashLooping := preparePayloadCrashloopingPod(t, reconciler, idler.Name, "restartCount-")
 		podsRunningForTooLong := preparePayloads(t, reconciler, idler.Name, "todelete-", expiredStartTimes(idler))
+
 		noise := preparePayloads(t, reconciler, "another-namespace", "", expiredStartTimes(idler))
 
 		t.Run("First reconcile. Start tracking.", func(t *testing.T) {
@@ -158,6 +159,7 @@ func TestEnsureIdling(t *testing.T) {
 				PodsExist(podsRunningForTooLong.standalonePods).
 				PodsExist(podsTooEarlyToKill.standalonePods).
 				PodsExist(noise.standalonePods).
+				PodsExist(podCrashLooping.standalonePods).
 				DaemonSetExists(podsRunningForTooLong.daemonSet).
 				DaemonSetExists(podsTooEarlyToKill.daemonSet).
 				DaemonSetExists(noise.daemonSet).
@@ -191,24 +193,26 @@ func TestEnsureIdling(t *testing.T) {
 
 			// Tracked pods
 			memberoperatortest.AssertThatIdler(t, idler.Name, cl).
-				TracksPods(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.allPods...)).
+				TracksPods(append(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.allPods...), podCrashLooping.allPods...)).
 				HasConditions(memberoperatortest.Running())
 
 			assert.True(t, res.Requeue)
 			assert.Equal(t, 0, int(res.RequeueAfter)) // pods running for too long should be killed immediately
 
-			t.Run("Second Reconcile. Delete long running pods.", func(t *testing.T) {
+			t.Run("Second Reconcile. Delete long running and crashlooping pods.", func(t *testing.T) {
 				//when
 				res, err := reconciler.Reconcile(context.TODO(), req)
 
 				// then
 				require.NoError(t, err)
 				// Too long running pods are gone. All long running controllers are scaled down.
+				// Crashlooping pods are gone.
 				// The rest of the pods are still there and controllers are scaled up.
 				memberoperatortest.AssertThatInIdleableCluster(t, allCl, dynamicClient).
 					PodsDoNotExist(podsRunningForTooLong.standalonePods).
 					PodsExist(podsTooEarlyToKill.standalonePods).
 					PodsExist(noise.standalonePods).
+					PodsDoNotExist(podCrashLooping.standalonePods).
 					DaemonSetDoesNotExist(podsRunningForTooLong.daemonSet).
 					DaemonSetExists(podsTooEarlyToKill.daemonSet).
 					DaemonSetExists(noise.daemonSet).
@@ -284,7 +288,7 @@ func TestEnsureIdling(t *testing.T) {
 						// requeue after the idler timeout
 						assert.Equal(t, reconcile.Result{
 							Requeue:      true,
-							RequeueAfter: 60 * time.Second,
+							RequeueAfter: RequeueTimeThreshold * time.Second,
 						}, res)
 					})
 				})
@@ -522,7 +526,7 @@ func TestEnsureIdlingFailed(t *testing.T) {
 				require.NoError(t, err) // 'NotFound' errors are ignored!
 				assert.Equal(t, reconcile.Result{
 					Requeue:      true,
-					RequeueAfter: 60 * time.Second,
+					RequeueAfter: RequeueTimeThreshold * time.Second,
 				}, res)
 				memberoperatortest.AssertThatIdler(t, idler.Name, cl).ContainsCondition(memberoperatortest.Running())
 			}
@@ -1317,6 +1321,29 @@ func preparePayloadsSinglePod(t *testing.T, r *Reconciler, namespace, namePrefix
 	}
 }
 
+func preparePayloadCrashloopingPod(t *testing.T, r *Reconciler, namespace, namePrefix string) payloads {
+	standalonePods := make([]*corev1.Pod, 0, 1)
+	startTime := metav1.Now()
+	// Create a standalone pod with no owner which has restart count sum > 50
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      fmt.Sprintf("%s%s-pod-fail", namePrefix, namespace),
+			Namespace: namespace,
+		},
+		Status: corev1.PodStatus{StartTime: &startTime, ContainerStatuses: []corev1.ContainerStatus{
+			{RestartCount: 30},
+			{RestartCount: 24},
+		}},
+	}
+	err := r.AllNamespacesClient.Create(context.TODO(), pod)
+	require.NoError(t, err)
+	standalonePods = append(standalonePods, pod)
+	return payloads{
+		standalonePods: standalonePods,
+		allPods:        standalonePods,
+	}
+}
+
 func createPods(t *testing.T, r *Reconciler, owner metav1.Object, startTime metav1.Time, podsToTrack []*corev1.Pod, conditions ...corev1.PodCondition) []*corev1.Pod {
 	for i := 0; i < 3; i++ {
 		pod := &corev1.Pod{

From a9b27504aaa54ce3fd7aa66dad69abca3e0bf8f9 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Wed, 19 Feb 2025 21:25:56 -0500
Subject: [PATCH 02/12] rename const to RequeueTimeThresholdSeconds, max
 requeue time to 5 mins

---
 controllers/idler/idler_controller.go      | 8 ++++----
 controllers/idler/idler_controller_test.go | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index d4d7a82d..50ae7643 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -35,8 +35,8 @@ import (
 )
 
 const (
-	RestartThreshold     = 50
-	RequeueTimeThreshold = 300
+	RestartThreshold            = 50
+	RequeueTimeThresholdSeconds = 300
 )
 
 var SupportedScaleResources = map[schema.GroupVersionKind]schema.GroupVersionResource{
@@ -114,8 +114,8 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
 	}
 	// Find the earlier pod to kill and requeue. Otherwise, use the idler timeoutSeconds to requeue.
 	nextTime := nextPodToBeKilledAfter(logger, idler)
-	if nextTime == nil {
-		after := time.Duration(RequeueTimeThreshold) * time.Second
+	after := time.Duration(RequeueTimeThresholdSeconds) * time.Second
+	if nextTime == nil || *nextTime > after {
 		logger.Info("requeueing for next pod to check", "after_seconds", after.Seconds())
 		return reconcile.Result{
 			Requeue:      true,
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index fbd15578..3eb8f207 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -120,7 +120,7 @@ func TestEnsureIdling(t *testing.T) {
 		// no pods found - the controller will requeue after 5 mins
 		assert.Equal(t, reconcile.Result{
 			Requeue:      true,
-			RequeueAfter: RequeueTimeThreshold * time.Second,
+			RequeueAfter: RequeueTimeThresholdSeconds * time.Second,
 		}, res)
 		memberoperatortest.AssertThatIdler(t, idler.Name, cl).HasConditions(memberoperatortest.Running())
 	})
@@ -288,7 +288,7 @@ func TestEnsureIdling(t *testing.T) {
 						// requeue after the idler timeout
 						assert.Equal(t, reconcile.Result{
 							Requeue:      true,
-							RequeueAfter: RequeueTimeThreshold * time.Second,
+							RequeueAfter: RequeueTimeThresholdSeconds * time.Second,
 						}, res)
 					})
 				})
@@ -526,7 +526,7 @@ func TestEnsureIdlingFailed(t *testing.T) {
 				require.NoError(t, err) // 'NotFound' errors are ignored!
 				assert.Equal(t, reconcile.Result{
 					Requeue:      true,
-					RequeueAfter: RequeueTimeThreshold * time.Second,
+					RequeueAfter: RequeueTimeThresholdSeconds * time.Second,
 				}, res)
 				memberoperatortest.AssertThatIdler(t, idler.Name, cl).ContainsCondition(memberoperatortest.Running())
 			}

From d40b75bd609843b384e6dd0b6397ca9cda4f15ca Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Wed, 19 Feb 2025 21:52:57 -0500
Subject: [PATCH 03/12] fix condition

---
 controllers/idler/idler_controller.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index 50ae7643..b634ec69 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -115,7 +115,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
 	// Find the earlier pod to kill and requeue. Otherwise, use the idler timeoutSeconds to requeue.
 	nextTime := nextPodToBeKilledAfter(logger, idler)
 	after := time.Duration(RequeueTimeThresholdSeconds) * time.Second
-	if nextTime == nil || *nextTime > after {
+	if nextTime == nil || (nextTime != nil && *nextTime > after) {
 		logger.Info("requeueing for next pod to check", "after_seconds", after.Seconds())
 		return reconcile.Result{
 			Requeue:      true,

From 576b44c40a3fbd763c7792142a84d8a245b83b23 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Mon, 24 Feb 2025 17:09:21 -0500
Subject: [PATCH 04/12] add more scenarios for crashlooping pods but within
 threshold

---
 controllers/idler/idler_controller.go      |   4 +-
 controllers/idler/idler_controller_test.go | 109 ++++++++++++++++-----
 2 files changed, 90 insertions(+), 23 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index b634ec69..b06ade45 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -226,7 +226,9 @@ func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger,
 func isRestartingOften(podstatus corev1.PodStatus) (bool, int32) {
 	var restartCount int32
 	for _, status := range podstatus.ContainerStatuses {
-		restartCount += status.RestartCount
+		if restartCount < status.RestartCount {
+			restartCount = status.RestartCount
+		}
 	}
 	return restartCount > RestartThreshold, restartCount
 }
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index 3eb8f207..d4a84dad 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -143,7 +143,8 @@ func TestEnsureIdling(t *testing.T) {
 		reconciler, req, cl, allCl, dynamicClient := prepareReconcile(t, idler.Name, getHostCluster, idler, nsTmplSet, mur)
 
 		podsTooEarlyToKill := preparePayloads(t, reconciler, idler.Name, "", freshStartTimes(idler))
-		podCrashLooping := preparePayloadCrashloopingPod(t, reconciler, idler.Name, "restartCount-")
+		podsCrashLoopingWithinThreshold := preparePayloadCrashloopingPodsWithinThreshold(t, reconciler, idler.Name, "inThreshRestarts-", freshStartTimes(idler))
+		podsCrashLooping := preparePayloadCrashloopingPod(t, reconciler, idler.Name, "restartCount-")
 		podsRunningForTooLong := preparePayloads(t, reconciler, idler.Name, "todelete-", expiredStartTimes(idler))
 
 		noise := preparePayloads(t, reconciler, "another-namespace", "", expiredStartTimes(idler))
@@ -159,7 +160,7 @@ func TestEnsureIdling(t *testing.T) {
 				PodsExist(podsRunningForTooLong.standalonePods).
 				PodsExist(podsTooEarlyToKill.standalonePods).
 				PodsExist(noise.standalonePods).
-				PodsExist(podCrashLooping.standalonePods).
+				PodsExist(podsCrashLooping.standalonePods).
 				DaemonSetExists(podsRunningForTooLong.daemonSet).
 				DaemonSetExists(podsTooEarlyToKill.daemonSet).
 				DaemonSetExists(noise.daemonSet).
@@ -175,6 +176,7 @@ func TestEnsureIdling(t *testing.T) {
 				DeploymentScaledUp(noise.deployment).
 				DeploymentScaledUp(noise.integration).
 				DeploymentScaledUp(noise.kameletBinding).
+				DeploymentScaledUp(podsCrashLooping.deployment).
 				ReplicaSetScaledUp(podsRunningForTooLong.replicaSet).
 				ReplicaSetScaledUp(podsTooEarlyToKill.replicaSet).
 				ReplicaSetScaledUp(noise.replicaSet).
@@ -187,13 +189,15 @@ func TestEnsureIdling(t *testing.T) {
 				StatefulSetScaledUp(podsRunningForTooLong.statefulSet).
 				StatefulSetScaledUp(podsTooEarlyToKill.statefulSet).
 				StatefulSetScaledUp(noise.statefulSet).
+				StatefulSetScaledUp(podsCrashLoopingWithinThreshold.statefulSet).
 				VMRunning(podsRunningForTooLong.virtualmachine).
 				VMRunning(podsTooEarlyToKill.virtualmachine).
 				VMRunning(noise.virtualmachine)
 
+			// after golang 1.22 upgrade can use slices.Concat(podsTooEarlyToKill.allPods, podsRunningForTooLong.allPods, podsCrashLooping.allPods, podsCrashLoopingWithinThreshold.allPods)
 			// Tracked pods
 			memberoperatortest.AssertThatIdler(t, idler.Name, cl).
-				TracksPods(append(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.allPods...), podCrashLooping.allPods...)).
+				TracksPods(append(append(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.allPods...), podsCrashLooping.allPods...), podsCrashLoopingWithinThreshold.allPods...)).
 				HasConditions(memberoperatortest.Running())
 
 			assert.True(t, res.Requeue)
@@ -212,7 +216,7 @@ func TestEnsureIdling(t *testing.T) {
 					PodsDoNotExist(podsRunningForTooLong.standalonePods).
 					PodsExist(podsTooEarlyToKill.standalonePods).
 					PodsExist(noise.standalonePods).
-					PodsDoNotExist(podCrashLooping.standalonePods).
+					PodsDoNotExist(podsCrashLooping.standalonePods).
 					DaemonSetDoesNotExist(podsRunningForTooLong.daemonSet).
 					DaemonSetExists(podsTooEarlyToKill.daemonSet).
 					DaemonSetExists(noise.daemonSet).
@@ -222,6 +226,7 @@ func TestEnsureIdling(t *testing.T) {
 					DeploymentScaledDown(podsRunningForTooLong.deployment).
 					DeploymentScaledDown(podsRunningForTooLong.integration).
 					DeploymentScaledDown(podsRunningForTooLong.kameletBinding).
+					DeploymentScaledDown(podsCrashLooping.deployment).
 					DeploymentScaledUp(podsTooEarlyToKill.deployment).
 					DeploymentScaledUp(podsTooEarlyToKill.integration).
 					DeploymentScaledUp(podsTooEarlyToKill.kameletBinding).
@@ -240,13 +245,14 @@ func TestEnsureIdling(t *testing.T) {
 					StatefulSetScaledDown(podsRunningForTooLong.statefulSet).
 					StatefulSetScaledUp(podsTooEarlyToKill.statefulSet).
 					StatefulSetScaledUp(noise.statefulSet).
+					StatefulSetScaledUp(podsCrashLoopingWithinThreshold.statefulSet).
 					VMStopped(podsRunningForTooLong.virtualmachine).
 					VMRunning(podsTooEarlyToKill.virtualmachine).
 					VMRunning(noise.virtualmachine)
 
-				// Still tracking all pods. Even deleted ones.
+				// Only tracks pods that have not been processed in this reconcile
 				memberoperatortest.AssertThatIdler(t, idler.Name, cl).
-					TracksPods(podsTooEarlyToKill.allPods).
+					TracksPods(append(podsTooEarlyToKill.allPods, podsCrashLoopingWithinThreshold.allPods...)).
 					HasConditions(memberoperatortest.Running(), memberoperatortest.IdlerNotificationCreated())
 
 				assert.True(t, res.Requeue)
@@ -260,7 +266,7 @@ func TestEnsureIdling(t *testing.T) {
 					require.NoError(t, err)
 					// Tracking existing pods only.
 					memberoperatortest.AssertThatIdler(t, idler.Name, cl).
-						TracksPods(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.controlledPods...)).
+						TracksPods(append(append(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.controlledPods...), podsCrashLoopingWithinThreshold.allPods...), podsCrashLooping.controlledPods...)).
 						HasConditions(memberoperatortest.Running(), memberoperatortest.IdlerNotificationCreated())
 
 					assert.True(t, res.Requeue)
@@ -269,7 +275,7 @@ func TestEnsureIdling(t *testing.T) {
 					t.Run("No pods. No requeue.", func(t *testing.T) {
 						//given
 						// cleanup remaining pods
-						pods := append(podsTooEarlyToKill.allPods, podsRunningForTooLong.controlledPods...)
+						pods := append(append(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.controlledPods...), podsCrashLoopingWithinThreshold.allPods...), podsCrashLooping.controlledPods...)
 						for _, pod := range pods {
 							err := allCl.Delete(context.TODO(), pod)
 							require.NoError(t, err)
@@ -1122,7 +1128,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	require.NoError(t, err)
 	err = r.AllNamespacesClient.Create(context.TODO(), rs)
 	require.NoError(t, err)
-	controlledPods := createPods(t, r, rs, sTime, make([]*corev1.Pod, 0, 3), conditions...)
+	controlledPods := createPods(t, r, rs, sTime, make([]*corev1.Pod, 0, 3), false, conditions...)
 
 	// Deployment with Camel K integration as an owner reference and a scale sub resource
 	integration := &appsv1.Deployment{
@@ -1149,7 +1155,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	require.NoError(t, err)
 	err = r.AllNamespacesClient.Create(context.TODO(), integrationRS)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, integrationRS, sTime, controlledPods)
+	controlledPods = createPods(t, r, integrationRS, sTime, controlledPods, false)
 
 	// Deployment with Camel K integration as an owner reference and a scale sub resource
 	binding := &appsv1.Deployment{
@@ -1176,7 +1182,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	require.NoError(t, err)
 	err = r.AllNamespacesClient.Create(context.TODO(), bindingRS)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, bindingRS, sTime, controlledPods)
+	controlledPods = createPods(t, r, bindingRS, sTime, controlledPods, false)
 
 	// Standalone ReplicaSet
 	standaloneRs := &appsv1.ReplicaSet{
@@ -1185,7 +1191,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	}
 	err = r.AllNamespacesClient.Create(context.TODO(), standaloneRs)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, standaloneRs, sTime, controlledPods)
+	controlledPods = createPods(t, r, standaloneRs, sTime, controlledPods, false)
 
 	// DaemonSet
 	ds := &appsv1.DaemonSet{
@@ -1193,7 +1199,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	}
 	err = r.AllNamespacesClient.Create(context.TODO(), ds)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, ds, sTime, controlledPods)
+	controlledPods = createPods(t, r, ds, sTime, controlledPods, false)
 
 	// Job
 	job := &batchv1.Job{
@@ -1201,7 +1207,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	}
 	err = r.AllNamespacesClient.Create(context.TODO(), job)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, job, sTime, controlledPods)
+	controlledPods = createPods(t, r, job, sTime, controlledPods, false)
 
 	// StatefulSet
 	sts := &appsv1.StatefulSet{
@@ -1210,7 +1216,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	}
 	err = r.AllNamespacesClient.Create(context.TODO(), sts)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, sts, sTime, controlledPods)
+	controlledPods = createPods(t, r, sts, sTime, controlledPods, false)
 
 	// DeploymentConfig
 	dc := &openshiftappsv1.DeploymentConfig{
@@ -1227,7 +1233,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	require.NoError(t, err)
 	err = r.AllNamespacesClient.Create(context.TODO(), rc)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, rc, sTime, controlledPods)
+	controlledPods = createPods(t, r, rc, sTime, controlledPods, false)
 
 	// VirtualMachine
 	vm := &unstructured.Unstructured{}
@@ -1249,7 +1255,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	require.NoError(t, err)
 	_, err = r.DynamicClient.Resource(vmInstanceGVR).Namespace(namespace).Create(context.TODO(), vmi, metav1.CreateOptions{})
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, vmi, vmstartTime, controlledPods) // vmi controls pod
+	controlledPods = createPods(t, r, vmi, vmstartTime, controlledPods, false) // vmi controls pod
 
 	// Standalone ReplicationController
 	standaloneRC := &corev1.ReplicationController{
@@ -1258,7 +1264,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 	}
 	err = r.AllNamespacesClient.Create(context.TODO(), standaloneRC)
 	require.NoError(t, err)
-	controlledPods = createPods(t, r, standaloneRC, sTime, controlledPods)
+	controlledPods = createPods(t, r, standaloneRC, sTime, controlledPods, false)
 
 	// Pods with unknown owner. They are subject of direct management by the Idler.
 	// It doesn't have to be Idler. We just need any object as the owner of the pods
@@ -1270,7 +1276,7 @@ func preparePayloads(t *testing.T, r *Reconciler, namespace, namePrefix string,
 		},
 		Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: 30},
 	}
-	standalonePods := createPods(t, r, idler, sTime, make([]*corev1.Pod, 0, 3))
+	standalonePods := createPods(t, r, idler, sTime, make([]*corev1.Pod, 0, 3), false)
 
 	// Pods with no owner.
 	for i := 0; i < 3; i++ {
@@ -1324,6 +1330,7 @@ func preparePayloadsSinglePod(t *testing.T, r *Reconciler, namespace, namePrefix
 func preparePayloadCrashloopingPod(t *testing.T, r *Reconciler, namespace, namePrefix string) payloads {
 	standalonePods := make([]*corev1.Pod, 0, 1)
 	startTime := metav1.Now()
+	replicas := int32(3)
 	// Create a standalone pod with no owner which has restart count sum > 50
 	pod := &corev1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
@@ -1331,25 +1338,83 @@ func preparePayloadCrashloopingPod(t *testing.T, r *Reconciler, namespace, nameP
 			Namespace: namespace,
 		},
 		Status: corev1.PodStatus{StartTime: &startTime, ContainerStatuses: []corev1.ContainerStatus{
-			{RestartCount: 30},
+			{RestartCount: 52},
 			{RestartCount: 24},
 		}},
 	}
 	err := r.AllNamespacesClient.Create(context.TODO(), pod)
 	require.NoError(t, err)
 	standalonePods = append(standalonePods, pod)
+	// Deployment
+	d := &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s%s-deployment", namePrefix, namespace), Namespace: namespace},
+		Spec:       appsv1.DeploymentSpec{Replicas: &replicas},
+	}
+	err = r.AllNamespacesClient.Create(context.TODO(), d)
+	require.NoError(t, err)
+	rs := &appsv1.ReplicaSet{
+		ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-replicaset", d.Name), Namespace: namespace},
+		Spec:       appsv1.ReplicaSetSpec{Replicas: &replicas},
+	}
+	err = controllerutil.SetControllerReference(d, rs, r.Scheme)
+	require.NoError(t, err)
+	err = r.AllNamespacesClient.Create(context.TODO(), rs)
+	require.NoError(t, err)
+	controlledPods := createPods(t, r, rs, startTime, make([]*corev1.Pod, 0, 3), true)
+
+	allPods := append(standalonePods, controlledPods...)
 	return payloads{
 		standalonePods: standalonePods,
-		allPods:        standalonePods,
+		allPods:        allPods,
+		controlledPods: controlledPods,
+		deployment:     d,
 	}
 }
 
-func createPods(t *testing.T, r *Reconciler, owner metav1.Object, startTime metav1.Time, podsToTrack []*corev1.Pod, conditions ...corev1.PodCondition) []*corev1.Pod {
+func preparePayloadCrashloopingPodsWithinThreshold(t *testing.T, r *Reconciler, namespace, namePrefix string, times payloadStartTimes) payloads {
+	startTime := metav1.NewTime(times.defaultStartTime)
+	replicas := int32(3)
+	controlledPods := make([]*corev1.Pod, 0, 3)
+	// Create a StatefulSet with Crashlooping pods less than threshold
+	sts := &appsv1.StatefulSet{
+		ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s%s-statefulset", namePrefix, namespace), Namespace: namespace},
+		Spec:       appsv1.StatefulSetSpec{Replicas: &replicas},
+	}
+	err := r.AllNamespacesClient.Create(context.TODO(), sts)
+	require.NoError(t, err)
+	for i := 0; i < int(replicas); i++ {
+		pod := &corev1.Pod{
+			ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod-%d", sts.Name, i), Namespace: sts.Namespace},
+			Status: corev1.PodStatus{StartTime: &startTime, ContainerStatuses: []corev1.ContainerStatus{
+				{RestartCount: 30},
+				{RestartCount: 24},
+			}},
+		}
+		err := controllerutil.SetControllerReference(sts, pod, r.Scheme)
+		require.NoError(t, err)
+		controlledPods = append(controlledPods, pod)
+		err = r.AllNamespacesClient.Create(context.TODO(), pod)
+		require.NoError(t, err)
+	}
+	return payloads{
+		controlledPods: controlledPods,
+		statefulSet:    sts,
+		allPods:        controlledPods,
+	}
+}
+
+func createPods(t *testing.T, r *Reconciler, owner metav1.Object, startTime metav1.Time, podsToTrack []*corev1.Pod, isCrashlooping bool, conditions ...corev1.PodCondition) []*corev1.Pod {
 	for i := 0; i < 3; i++ {
 		pod := &corev1.Pod{
 			ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod-%d", owner.GetName(), i), Namespace: owner.GetNamespace()},
 			Status:     corev1.PodStatus{StartTime: &startTime, Conditions: conditions},
 		}
+		if isCrashlooping {
+			pod.Status = corev1.PodStatus{StartTime: &startTime, Conditions: conditions, ContainerStatuses: []corev1.ContainerStatus{
+				{RestartCount: 52},
+				{RestartCount: 24},
+			}}
+		}
 		err := controllerutil.SetControllerReference(owner, pod, r.Scheme)
 		require.NoError(t, err)
 		podsToTrack = append(podsToTrack, pod)

From c2300653882c00715be041bc59c395a5db195305 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Mon, 24 Feb 2025 17:10:29 -0500
Subject: [PATCH 05/12] revert condition fix

---
 controllers/idler/idler_controller.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index b06ade45..dda376c9 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -115,7 +115,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
 	// Find the earlier pod to kill and requeue. Otherwise, use the idler timeoutSeconds to requeue.
 	nextTime := nextPodToBeKilledAfter(logger, idler)
 	after := time.Duration(RequeueTimeThresholdSeconds) * time.Second
-	if nextTime == nil || (nextTime != nil && *nextTime > after) {
+	if nextTime == nil || *nextTime > after {
 		logger.Info("requeueing for next pod to check", "after_seconds", after.Seconds())
 		return reconcile.Result{
 			Requeue:      true,

From 82f8118a99f81e4c34d39486a26b46c419193af3 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Mon, 24 Feb 2025 17:52:56 -0500
Subject: [PATCH 06/12] fix comment, add comment when tracking controlled pods
 again, use constant for tests

---
 controllers/idler/idler_controller.go      |  7 +++++--
 controllers/idler/idler_controller_test.go | 12 ++++++------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index dda376c9..2bca04fd 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -35,8 +35,11 @@ import (
 )
 
 const (
-	RestartThreshold            = 50
-	RequeueTimeThresholdSeconds = 300
+	RestartThreshold                      = 50
+	RequeueTimeThresholdSeconds           = 300
+	RestartCountWithinThresholdContainer1 = 30
+	RestartCountWithinThresholdContainer2 = 24
+	RestartCountOverThreshold             = 52
 )
 
 var SupportedScaleResources = map[schema.GroupVersionKind]schema.GroupVersionResource{
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index d4a84dad..b0b8c0f6 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -250,7 +250,7 @@ func TestEnsureIdling(t *testing.T) {
 					VMRunning(podsTooEarlyToKill.virtualmachine).
 					VMRunning(noise.virtualmachine)
 
-				// Only tracks pods that have not been processed in this reconcile
+				// Only tracks pods that have not been deleted
 				memberoperatortest.AssertThatIdler(t, idler.Name, cl).
 					TracksPods(append(podsTooEarlyToKill.allPods, podsCrashLoopingWithinThreshold.allPods...)).
 					HasConditions(memberoperatortest.Running(), memberoperatortest.IdlerNotificationCreated())
@@ -266,7 +266,7 @@ func TestEnsureIdling(t *testing.T) {
 					require.NoError(t, err)
 					// Tracking existing pods only.
 					memberoperatortest.AssertThatIdler(t, idler.Name, cl).
-						TracksPods(append(append(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.controlledPods...), podsCrashLoopingWithinThreshold.allPods...), podsCrashLooping.controlledPods...)).
+						TracksPods(append(append(append(podsTooEarlyToKill.allPods, podsRunningForTooLong.controlledPods...), podsCrashLoopingWithinThreshold.allPods...), podsCrashLooping.controlledPods...)). // controlledPods are being tracked again because in unit tests scaling down doesn't delete pods
 						HasConditions(memberoperatortest.Running(), memberoperatortest.IdlerNotificationCreated())
 
 					assert.True(t, res.Requeue)
@@ -1338,8 +1338,8 @@ func preparePayloadCrashloopingPod(t *testing.T, r *Reconciler, namespace, nameP
 			Namespace: namespace,
 		},
 		Status: corev1.PodStatus{StartTime: &startTime, ContainerStatuses: []corev1.ContainerStatus{
-			{RestartCount: 52},
-			{RestartCount: 24},
+			{RestartCount: RestartCountOverThreshold},
+			{RestartCount: RestartCountWithinThresholdContainer2},
 		}},
 	}
 	err := r.AllNamespacesClient.Create(context.TODO(), pod)
@@ -1386,8 +1386,8 @@ func preparePayloadCrashloopingPodsWithinThreshold(t *testing.T, r *Reconciler,
 		pod := &corev1.Pod{
 			ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod-%d", sts.Name, i), Namespace: sts.Namespace},
 			Status: corev1.PodStatus{StartTime: &startTime, ContainerStatuses: []corev1.ContainerStatus{
-				{RestartCount: 30},
-				{RestartCount: 24},
+				{RestartCount: RestartCountWithinThresholdContainer1},
+				{RestartCount: RestartCountWithinThresholdContainer2},
 			}},
 		}
 		err := controllerutil.SetControllerReference(sts, pod, r.Scheme)

From b38555a279fa6b54b980b2ab98707159af6690b2 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Mon, 24 Feb 2025 18:39:56 -0500
Subject: [PATCH 07/12] move test's consts in test file

---
 controllers/idler/idler_controller.go      | 7 ++-----
 controllers/idler/idler_controller_test.go | 6 ++++++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index 2bca04fd..dda376c9 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -35,11 +35,8 @@ import (
 )
 
 const (
-	RestartThreshold                      = 50
-	RequeueTimeThresholdSeconds           = 300
-	RestartCountWithinThresholdContainer1 = 30
-	RestartCountWithinThresholdContainer2 = 24
-	RestartCountOverThreshold             = 52
+	RestartThreshold            = 50
+	RequeueTimeThresholdSeconds = 300
 )
 
 var SupportedScaleResources = map[schema.GroupVersionKind]schema.GroupVersionResource{
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index b0b8c0f6..fbd9a1d7 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -41,6 +41,12 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 )
 
+const (
+	RestartCountWithinThresholdContainer1 = 30
+	RestartCountWithinThresholdContainer2 = 24
+	RestartCountOverThreshold             = 52
+)
+
 func TestReconcile(t *testing.T) {
 
 	t.Run("No Idler resource found", func(t *testing.T) {

From a147e636496395427a349188b8cd8a91bfc020ea Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Mon, 24 Feb 2025 19:22:35 -0500
Subject: [PATCH 08/12] add rajiv's suggestions from review

---
 controllers/idler/idler_controller.go      | 10 +++++-----
 controllers/idler/idler_controller_test.go |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index dda376c9..5beba12b 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -112,7 +112,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
 		return reconcile.Result{}, r.wrapErrorWithStatusUpdate(ctx, idler, r.setStatusFailed, err,
 			"failed to ensure idling '%s'", idler.Name)
 	}
-	// Find the earlier pod to kill and requeue. Otherwise, use the idler timeoutSeconds to requeue.
+	// Find the earlier pod to kill and requeue. Otherwise, use the RequeueTimeThresholdSeconds to requeue.
 	nextTime := nextPodToBeKilledAfter(logger, idler)
 	after := time.Duration(RequeueTimeThresholdSeconds) * time.Second
 	if nextTime == nil || *nextTime > after {
@@ -145,7 +145,7 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 			if crashlooping {
 				podLogger.Info("Pod is restarting too often. Killing the pod", "restart_count", restartCount)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, podLogger, pod, true, r, idler)
+				err := deletePodsAndCreateNotification(ctx, podLogger, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -160,7 +160,7 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 			if time.Now().After(trackedPod.StartTime.Add(time.Duration(timeoutSeconds) * time.Second)) {
 				podLogger.Info("Pod running for too long. Killing the pod.", "start_time", trackedPod.StartTime.Format("2006-01-02T15:04:05Z"), "timeout_seconds", timeoutSeconds)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, podLogger, pod, false, r, idler)
+				err := deletePodsAndCreateNotification(ctx, podLogger, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -184,7 +184,7 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 // Check if the pod belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
 // if it is a standalone pod, delete it.
 // Send notification if the deleted pod was managed by a controller, was a standalone pod that was not completed or was crashlooping
-func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger, pod corev1.Pod, isCrashlooping bool, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
+func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger, pod corev1.Pod, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
 	lctx := log.IntoContext(ctx, podLogger)
 	logger := log.FromContext(ctx)
 	var podreason string
@@ -211,7 +211,7 @@ func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger,
 	}
 
 	// If a build pod is in "PodCompleted" status then it was not running so there's no reason to send an idler notification
-	if podreason != "PodCompleted" || deletedByController || isCrashlooping {
+	if podreason != "PodCompleted" || deletedByController {
 		// By now either a pod has been deleted or scaled to zero by controller, idler Triggered notification should be sent
 		if err := r.createNotification(ctx, idler, appName, appType); err != nil {
 			logger.Error(err, "failed to create Notification")
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index fbd9a1d7..5fb2342e 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -150,7 +150,7 @@ func TestEnsureIdling(t *testing.T) {
 
 		podsTooEarlyToKill := preparePayloads(t, reconciler, idler.Name, "", freshStartTimes(idler))
 		podsCrashLoopingWithinThreshold := preparePayloadCrashloopingPodsWithinThreshold(t, reconciler, idler.Name, "inThreshRestarts-", freshStartTimes(idler))
-		podsCrashLooping := preparePayloadCrashloopingPod(t, reconciler, idler.Name, "restartCount-")
+		podsCrashLooping := preparePayloadCrashloopingAboveThreshold(t, reconciler, idler.Name, "restartCount-")
 		podsRunningForTooLong := preparePayloads(t, reconciler, idler.Name, "todelete-", expiredStartTimes(idler))
 
 		noise := preparePayloads(t, reconciler, "another-namespace", "", expiredStartTimes(idler))
@@ -1333,11 +1333,11 @@ func preparePayloadsSinglePod(t *testing.T, r *Reconciler, namespace, namePrefix
 	}
 }
 
-func preparePayloadCrashloopingPod(t *testing.T, r *Reconciler, namespace, namePrefix string) payloads {
+func preparePayloadCrashloopingAboveThreshold(t *testing.T, r *Reconciler, namespace, namePrefix string) payloads {
 	standalonePods := make([]*corev1.Pod, 0, 1)
 	startTime := metav1.Now()
 	replicas := int32(3)
-	// Create a standalone pod with no owner which has restart count sum > 50
+	// Create a standalone pod with no owner which has at least one container with restart count > 50
 	pod := &corev1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      fmt.Sprintf("%s%s-pod-fail", namePrefix, namespace),

From 9164efed1c4569ec9780def2f3ef716fc2da1abe Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Tue, 25 Feb 2025 16:17:10 -0500
Subject: [PATCH 09/12] matous's suggestions

---
 controllers/idler/idler_controller.go | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index 5beba12b..60b6dcf6 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -141,11 +141,12 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 		podLogger := log.FromContext(ctx).WithValues("pod_name", pod.Name, "pod_phase", pod.Status.Phase)
 		if trackedPod := findPodByName(idler, pod.Name); trackedPod != nil {
 			// check the restart count for the trackedPod
-			crashlooping, restartCount := isRestartingOften(pod.Status)
-			if crashlooping {
+			restartCount := getHighestRestartCount(pod.Status)
+			if restartCount > RestartThreshold {
 				podLogger.Info("Pod is restarting too often. Killing the pod", "restart_count", restartCount)
+				lctx := log.IntoContext(ctx, podLogger)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, podLogger, pod, r, idler)
+				err := deletePodsAndCreateNotification(ctx, lctx, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -159,8 +160,9 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 			// Already tracking this pod. Check the timeout.
 			if time.Now().After(trackedPod.StartTime.Add(time.Duration(timeoutSeconds) * time.Second)) {
 				podLogger.Info("Pod running for too long. Killing the pod.", "start_time", trackedPod.StartTime.Format("2006-01-02T15:04:05Z"), "timeout_seconds", timeoutSeconds)
+				lctx := log.IntoContext(ctx, podLogger)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, podLogger, pod, r, idler)
+				err := deletePodsAndCreateNotification(ctx, lctx, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -184,14 +186,13 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 // Check if the pod belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
 // if it is a standalone pod, delete it.
 // Send notification if the deleted pod was managed by a controller, was a standalone pod that was not completed or was crashlooping
-func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger, pod corev1.Pod, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
-	lctx := log.IntoContext(ctx, podLogger)
+func deletePodsAndCreateNotification(ctx context.Context, lctx context.Context, pod corev1.Pod, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
 	logger := log.FromContext(ctx)
-	var podreason string
+	var podReason string
 	podCondition := pod.Status.Conditions
 	for _, podCond := range podCondition {
 		if podCond.Type == "Ready" {
-			podreason = podCond.Reason
+			podReason = podCond.Reason
 		}
 	}
 	appType, appName, deletedByController, err := r.scaleControllerToZero(lctx, pod.ObjectMeta)
@@ -199,11 +200,11 @@ func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger,
 		return err
 	}
 	if !deletedByController { // Pod not managed by a controller. We can just delete the pod.
-		log.FromContext(ctx).Info("Deleting pod without controller")
+		logger.Info("Deleting pod without controller")
 		if err := r.AllNamespacesClient.Delete(ctx, &pod); err != nil {
 			return err
 		}
-		podLogger.Info("Pod deleted")
+		logger.Info("Pod deleted")
 	}
 	if appName == "" {
 		appName = pod.Name
@@ -211,7 +212,7 @@ func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger,
 	}
 
 	// If a build pod is in "PodCompleted" status then it was not running so there's no reason to send an idler notification
-	if podreason != "PodCompleted" || deletedByController {
+	if podReason != "PodCompleted" || deletedByController {
 		// By now either a pod has been deleted or scaled to zero by controller, idler Triggered notification should be sent
 		if err := r.createNotification(ctx, idler, appName, appType); err != nil {
 			logger.Error(err, "failed to create Notification")
@@ -223,14 +224,14 @@ func deletePodsAndCreateNotification(ctx context.Context, podLogger logr.Logger,
 	return nil
 }
 
-func isRestartingOften(podstatus corev1.PodStatus) (bool, int32) {
+func getHighestRestartCount(podstatus corev1.PodStatus) int32 {
 	var restartCount int32
 	for _, status := range podstatus.ContainerStatuses {
 		if restartCount < status.RestartCount {
 			restartCount = status.RestartCount
 		}
 	}
-	return restartCount > RestartThreshold, restartCount
+	return restartCount
 }
 
 func (r *Reconciler) createNotification(ctx context.Context, idler *toolchainv1alpha1.Idler, appName string, appType string) error {

From a75d1097a8d0725376b672d66aece6c3f0eb7e5c Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Wed, 26 Feb 2025 00:09:12 -0500
Subject: [PATCH 10/12] change requeuetime to be the shortest, to fix e2e test
 failure

---
 controllers/idler/idler_controller.go      | 33 ++++++++++++++--------
 controllers/idler/idler_controller_test.go |  9 +++---
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index 60b6dcf6..c47255ae 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -112,20 +112,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
 		return reconcile.Result{}, r.wrapErrorWithStatusUpdate(ctx, idler, r.setStatusFailed, err,
 			"failed to ensure idling '%s'", idler.Name)
 	}
-	// Find the earlier pod to kill and requeue. Otherwise, use the RequeueTimeThresholdSeconds to requeue.
-	nextTime := nextPodToBeKilledAfter(logger, idler)
-	after := time.Duration(RequeueTimeThresholdSeconds) * time.Second
-	if nextTime == nil || *nextTime > after {
-		logger.Info("requeueing for next pod to check", "after_seconds", after.Seconds())
-		return reconcile.Result{
-			Requeue:      true,
-			RequeueAfter: after,
-		}, r.setStatusReady(ctx, idler)
-	}
-	logger.Info("requeueing for next pod to kill", "after_seconds", nextTime.Seconds())
+	// Requeue in idler.Spec.TimeoutSeconds or RequeueTimeThresholdSeconds, whichever is sooner.
+	nextPodToKillAfter := nextPodToBeKilledAfter(logger, idler)
+	maxRequeDuration := time.Duration(RequeueTimeThresholdSeconds) * time.Second
+	idlerTimeoutDuration := time.Duration(idler.Spec.TimeoutSeconds) * time.Second
+	after := findShortestDuration(nextPodToKillAfter, &maxRequeDuration, &idlerTimeoutDuration)
+
+	logger.Info("requeueing for next pod to check", "after_seconds", after.Seconds())
 	return reconcile.Result{
 		Requeue:      true,
-		RequeueAfter: *nextTime,
+		RequeueAfter: *after,
 	}, r.setStatusReady(ctx, idler)
 }
 
@@ -635,6 +631,19 @@ func nextPodToBeKilledAfter(log logr.Logger, idler *toolchainv1alpha1.Idler) *ti
 	return &d
 }
 
+// findShortestDuration finds the shortest duration the given durations
+func findShortestDuration(durations ...*time.Duration) *time.Duration {
+	var shortest *time.Duration
+	for _, d := range durations {
+		if d != nil {
+			if shortest == nil || *d < *shortest {
+				shortest = d
+			}
+		}
+	}
+	return shortest
+}
+
 // updateStatusPods updates the status pods to the new ones but only if something changed. Order is ignored.
 func (r *Reconciler) updateStatusPods(ctx context.Context, idler *toolchainv1alpha1.Idler, newPods []toolchainv1alpha1.Pod) error {
 	nothingChanged := len(idler.Status.Pods) == len(newPods)
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index 5fb2342e..92c4142d 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -45,6 +45,7 @@ const (
 	RestartCountWithinThresholdContainer1 = 30
 	RestartCountWithinThresholdContainer2 = 24
 	RestartCountOverThreshold             = 52
+	TestIdlerTimeOutSeconds               = 540
 )
 
 func TestReconcile(t *testing.T) {
@@ -112,7 +113,7 @@ func TestEnsureIdling(t *testing.T) {
 			ObjectMeta: metav1.ObjectMeta{
 				Name: "john-dev",
 			},
-			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: 30},
+			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: TestIdlerTimeOutSeconds},
 		}
 
 		reconciler, req, cl, _, _ := prepareReconcile(t, idler.Name, getHostCluster, idler)
@@ -140,7 +141,7 @@ func TestEnsureIdling(t *testing.T) {
 					toolchainv1alpha1.SpaceLabelKey: "alex",
 				},
 			},
-			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: 60},
+			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: TestIdlerTimeOutSeconds},
 		}
 		namespaces := []string{"dev", "stage"}
 		usernames := []string{"alex"}
@@ -317,7 +318,7 @@ func TestEnsureIdling(t *testing.T) {
 					toolchainv1alpha1.SpaceLabelKey: "alex",
 				},
 			},
-			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: 60},
+			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: TestIdlerTimeOutSeconds},
 		}
 		namespaces := []string{"dev", "stage"}
 		usernames := []string{"alex"}
@@ -442,7 +443,7 @@ func TestEnsureIdlingFailed(t *testing.T) {
 			ObjectMeta: metav1.ObjectMeta{
 				Name: "alex-stage",
 			},
-			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: 60},
+			Spec: toolchainv1alpha1.IdlerSpec{TimeoutSeconds: TestIdlerTimeOutSeconds},
 		}
 
 		vm := &unstructured.Unstructured{}

From bfc3cdab10e2b06e72dbf287053a7f7188fad488 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Wed, 26 Feb 2025 16:38:44 -0500
Subject: [PATCH 11/12] matous's suggestions second review

---
 controllers/idler/idler_controller.go      | 41 +++++++++++-----------
 controllers/idler/idler_controller_test.go |  6 ++--
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index c47255ae..33685b88 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -18,7 +18,6 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/predicate"
 
-	"github.com/go-logr/logr"
 	openshiftappsv1 "github.com/openshift/api/apps/v1"
 	errs "github.com/pkg/errors"
 	"github.com/redhat-cop/operator-utils/pkg/util"
@@ -35,8 +34,8 @@ import (
 )
 
 const (
-	RestartThreshold            = 50
-	RequeueTimeThresholdSeconds = 300
+	RestartThreshold     = 50
+	RequeueTimeThreshold = 300 * time.Second
 )
 
 var SupportedScaleResources = map[schema.GroupVersionKind]schema.GroupVersionResource{
@@ -112,16 +111,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
 		return reconcile.Result{}, r.wrapErrorWithStatusUpdate(ctx, idler, r.setStatusFailed, err,
 			"failed to ensure idling '%s'", idler.Name)
 	}
-	// Requeue in idler.Spec.TimeoutSeconds or RequeueTimeThresholdSeconds, whichever is sooner.
-	nextPodToKillAfter := nextPodToBeKilledAfter(logger, idler)
-	maxRequeDuration := time.Duration(RequeueTimeThresholdSeconds) * time.Second
-	idlerTimeoutDuration := time.Duration(idler.Spec.TimeoutSeconds) * time.Second
-	after := findShortestDuration(nextPodToKillAfter, &maxRequeDuration, &idlerTimeoutDuration)
-
+	// Requeue in shortest of the following values idler.Spec.TimeoutSeconds or RequeueTimeThreshold or nextPodToBeKilledAfter
+	after := findShortestRequeueDuration(idler)
 	logger.Info("requeueing for next pod to check", "after_seconds", after.Seconds())
 	return reconcile.Result{
 		Requeue:      true,
-		RequeueAfter: *after,
+		RequeueAfter: after,
 	}, r.setStatusReady(ctx, idler)
 }
 
@@ -135,14 +130,14 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 	for _, pod := range podList.Items {
 		pod := pod // TODO We won't need it after upgrading to go 1.22: https://go.dev/blog/loopvar-preview
 		podLogger := log.FromContext(ctx).WithValues("pod_name", pod.Name, "pod_phase", pod.Status.Phase)
+		podCtx := log.IntoContext(ctx, podLogger)
 		if trackedPod := findPodByName(idler, pod.Name); trackedPod != nil {
 			// check the restart count for the trackedPod
 			restartCount := getHighestRestartCount(pod.Status)
 			if restartCount > RestartThreshold {
 				podLogger.Info("Pod is restarting too often. Killing the pod", "restart_count", restartCount)
-				lctx := log.IntoContext(ctx, podLogger)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, lctx, pod, r, idler)
+				err := deletePodsAndCreateNotification(ctx, podCtx, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -156,9 +151,8 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 			// Already tracking this pod. Check the timeout.
 			if time.Now().After(trackedPod.StartTime.Add(time.Duration(timeoutSeconds) * time.Second)) {
 				podLogger.Info("Pod running for too long. Killing the pod.", "start_time", trackedPod.StartTime.Format("2006-01-02T15:04:05Z"), "timeout_seconds", timeoutSeconds)
-				lctx := log.IntoContext(ctx, podLogger)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, lctx, pod, r, idler)
+				err := deletePodsAndCreateNotification(ctx, podCtx, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -182,7 +176,7 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 // Check if the pod belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
 // if it is a standalone pod, delete it.
 // Send notification if the deleted pod was managed by a controller, was a standalone pod that was not completed or was crashlooping
-func deletePodsAndCreateNotification(ctx context.Context, lctx context.Context, pod corev1.Pod, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
+func deletePodsAndCreateNotification(ctx context.Context, podCtx context.Context, pod corev1.Pod, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
 	logger := log.FromContext(ctx)
 	var podReason string
 	podCondition := pod.Status.Conditions
@@ -191,7 +185,7 @@ func deletePodsAndCreateNotification(ctx context.Context, lctx context.Context,
 			podReason = podCond.Reason
 		}
 	}
-	appType, appName, deletedByController, err := r.scaleControllerToZero(lctx, pod.ObjectMeta)
+	appType, appName, deletedByController, err := r.scaleControllerToZero(podCtx, pod.ObjectMeta)
 	if err != nil {
 		return err
 	}
@@ -611,7 +605,7 @@ func findPodByName(idler *toolchainv1alpha1.Idler, name string) *toolchainv1alph
 // nextPodToBeKilledAfter checks the start times of all the tracked pods in the Idler and the timeout left
 // for the next pod to be killed.
 // If there is no pod to kill, the func returns `nil`
-func nextPodToBeKilledAfter(log logr.Logger, idler *toolchainv1alpha1.Idler) *time.Duration {
+func nextPodToBeKilledAfter(idler *toolchainv1alpha1.Idler) *time.Duration {
 	if len(idler.Status.Pods) == 0 {
 		// no pod tracked, so nothing to kill
 		return nil
@@ -627,12 +621,17 @@ func nextPodToBeKilledAfter(log logr.Logger, idler *toolchainv1alpha1.Idler) *ti
 	if d < 0 {
 		d = 0
 	}
-	log.Info("next pod to kill", "after", d)
 	return &d
 }
 
-// findShortestDuration finds the shortest duration the given durations
-func findShortestDuration(durations ...*time.Duration) *time.Duration {
+// findShortestRequeueDuration finds the shortest duration the given durations
+// returns the shortest duration to requeue after for idler
+func findShortestRequeueDuration(idler *toolchainv1alpha1.Idler) time.Duration {
+	durations := make([]*time.Duration, 0, 3)
+	nextPodToKillAfter := nextPodToBeKilledAfter(idler)
+	maxRequeueDuration := RequeueTimeThreshold
+	idlerTimeoutDuration := time.Duration(idler.Spec.TimeoutSeconds) * time.Second
+	durations = append(durations, nextPodToKillAfter, &maxRequeueDuration, &idlerTimeoutDuration)
 	var shortest *time.Duration
 	for _, d := range durations {
 		if d != nil {
@@ -641,7 +640,7 @@ func findShortestDuration(durations ...*time.Duration) *time.Duration {
 			}
 		}
 	}
-	return shortest
+	return *shortest
 }
 
 // updateStatusPods updates the status pods to the new ones but only if something changed. Order is ignored.
diff --git a/controllers/idler/idler_controller_test.go b/controllers/idler/idler_controller_test.go
index 92c4142d..1072985d 100644
--- a/controllers/idler/idler_controller_test.go
+++ b/controllers/idler/idler_controller_test.go
@@ -127,7 +127,7 @@ func TestEnsureIdling(t *testing.T) {
 		// no pods found - the controller will requeue after 5 mins
 		assert.Equal(t, reconcile.Result{
 			Requeue:      true,
-			RequeueAfter: RequeueTimeThresholdSeconds * time.Second,
+			RequeueAfter: RequeueTimeThreshold,
 		}, res)
 		memberoperatortest.AssertThatIdler(t, idler.Name, cl).HasConditions(memberoperatortest.Running())
 	})
@@ -301,7 +301,7 @@ func TestEnsureIdling(t *testing.T) {
 						// requeue after the idler timeout
 						assert.Equal(t, reconcile.Result{
 							Requeue:      true,
-							RequeueAfter: RequeueTimeThresholdSeconds * time.Second,
+							RequeueAfter: RequeueTimeThreshold,
 						}, res)
 					})
 				})
@@ -539,7 +539,7 @@ func TestEnsureIdlingFailed(t *testing.T) {
 				require.NoError(t, err) // 'NotFound' errors are ignored!
 				assert.Equal(t, reconcile.Result{
 					Requeue:      true,
-					RequeueAfter: RequeueTimeThresholdSeconds * time.Second,
+					RequeueAfter: RequeueTimeThreshold,
 				}, res)
 				memberoperatortest.AssertThatIdler(t, idler.Name, cl).ContainsCondition(memberoperatortest.Running())
 			}

From 96d7089a2aa3a139721e7bd1c3e4789a559f5b95 Mon Sep 17 00:00:00 2001
From: Kanika Rana <krana@redhat.com>
Date: Thu, 27 Feb 2025 10:23:02 -0500
Subject: [PATCH 12/12] only use podCtx in deletePodsAndCreateNotification

---
 controllers/idler/idler_controller.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/controllers/idler/idler_controller.go b/controllers/idler/idler_controller.go
index 33685b88..963d2778 100644
--- a/controllers/idler/idler_controller.go
+++ b/controllers/idler/idler_controller.go
@@ -137,7 +137,7 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 			if restartCount > RestartThreshold {
 				podLogger.Info("Pod is restarting too often. Killing the pod", "restart_count", restartCount)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, podCtx, pod, r, idler)
+				err := deletePodsAndCreateNotification(podCtx, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -152,7 +152,7 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 			if time.Now().After(trackedPod.StartTime.Add(time.Duration(timeoutSeconds) * time.Second)) {
 				podLogger.Info("Pod running for too long. Killing the pod.", "start_time", trackedPod.StartTime.Format("2006-01-02T15:04:05Z"), "timeout_seconds", timeoutSeconds)
 				// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
-				err := deletePodsAndCreateNotification(ctx, podCtx, pod, r, idler)
+				err := deletePodsAndCreateNotification(podCtx, pod, r, idler)
 				if err != nil {
 					return err
 				}
@@ -176,8 +176,8 @@ func (r *Reconciler) ensureIdling(ctx context.Context, idler *toolchainv1alpha1.
 // Check if the pod belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
 // if it is a standalone pod, delete it.
 // Send notification if the deleted pod was managed by a controller, was a standalone pod that was not completed or was crashlooping
-func deletePodsAndCreateNotification(ctx context.Context, podCtx context.Context, pod corev1.Pod, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
-	logger := log.FromContext(ctx)
+func deletePodsAndCreateNotification(podCtx context.Context, pod corev1.Pod, r *Reconciler, idler *toolchainv1alpha1.Idler) error {
+	logger := log.FromContext(podCtx)
 	var podReason string
 	podCondition := pod.Status.Conditions
 	for _, podCond := range podCondition {
@@ -191,7 +191,7 @@ func deletePodsAndCreateNotification(ctx context.Context, podCtx context.Context
 	}
 	if !deletedByController { // Pod not managed by a controller. We can just delete the pod.
 		logger.Info("Deleting pod without controller")
-		if err := r.AllNamespacesClient.Delete(ctx, &pod); err != nil {
+		if err := r.AllNamespacesClient.Delete(podCtx, &pod); err != nil {
 			return err
 		}
 		logger.Info("Pod deleted")
@@ -204,9 +204,9 @@ func deletePodsAndCreateNotification(ctx context.Context, podCtx context.Context
 	// If a build pod is in "PodCompleted" status then it was not running so there's no reason to send an idler notification
 	if podReason != "PodCompleted" || deletedByController {
 		// By now either a pod has been deleted or scaled to zero by controller, idler Triggered notification should be sent
-		if err := r.createNotification(ctx, idler, appName, appType); err != nil {
+		if err := r.createNotification(podCtx, idler, appName, appType); err != nil {
 			logger.Error(err, "failed to create Notification")
-			if err = r.setStatusIdlerNotificationCreationFailed(ctx, idler, err.Error()); err != nil {
+			if err = r.setStatusIdlerNotificationCreationFailed(podCtx, idler, err.Error()); err != nil {
 				logger.Error(err, "failed to set status IdlerNotificationCreationFailed")
 			} // not returning error to continue tracking remaining pods
 		}