kube-rs · nightkr · Jun 9, 2022 · Jun 8, 2022 · Jun 8, 2022 · Jun 8, 2022
diff --git a/kube-derive/src/custom_resource.rs b/kube-derive/src/custom_resource.rs
@@ -599,6 +599,6 @@ mod tests {
             struct FooSpec { foo: String }
         };
         let input = syn::parse2(input).unwrap();
-        let kube_attrs = KubeAttrs::from_derive_input(&input).unwrap();
+        let _kube_attrs = KubeAttrs::from_derive_input(&input).unwrap();
     }
 }
diff --git a/kube-runtime/src/controller/mod.rs b/kube-runtime/src/controller/mod.rs
@@ -16,14 +16,16 @@ use derivative::Derivative;
 use futures::{
     channel,
     future::{self, BoxFuture},
-    stream, Future, FutureExt, SinkExt, Stream, StreamExt, TryFuture, TryFutureExt, TryStream, TryStreamExt,
+    ready, stream, Future, FutureExt, Stream, StreamExt, TryFuture, TryFutureExt, TryStream, TryStreamExt,
 };
 use kube_client::api::{Api, DynamicObject, ListParams, Resource};
+use pin_project::pin_project;
 use serde::de::DeserializeOwned;
 use std::{
     fmt::{Debug, Display},
     hash::Hash,
     sync::Arc,
+    task::Poll,
     time::Duration,
 };
 use stream::BoxStream;
@@ -202,6 +204,8 @@ impl Display for ReconcileReason {
     }
 }
 
+const APPLIER_REQUEUE_BUF_SIZE: usize = 100;
+
 /// Apply a reconciler to an input stream, with a given retry policy
 ///
 /// Takes a `store` parameter for the core objects, which should usually be updated by a [`reflector`].
@@ -215,7 +219,7 @@ impl Display for ReconcileReason {
 /// (such as triggering from arbitrary [`Stream`]s), at the cost of being a bit more verbose.
 pub fn applier<K, QueueStream, ReconcilerFut, Ctx>(
     mut reconciler: impl FnMut(Arc<K>, Arc<Ctx>) -> ReconcilerFut,
-    mut error_policy: impl FnMut(&ReconcilerFut::Error, Arc<Ctx>) -> Action,
+    error_policy: impl Fn(&ReconcilerFut::Error, Arc<Ctx>) -> Action,
     context: Arc<Ctx>,
     store: Store<K>,
     queue: QueueStream,
@@ -230,22 +234,25 @@ where
     QueueStream::Error: std::error::Error + 'static,
 {
     let (scheduler_shutdown_tx, scheduler_shutdown_rx) = channel::oneshot::channel();
-    let err_context = context.clone();
-    let (scheduler_tx, scheduler_rx) = channel::mpsc::unbounded::<ScheduleRequest<ReconcileRequest<K>>>();
+    let (scheduler_tx, scheduler_rx) =
+        channel::mpsc::channel::<ScheduleRequest<ReconcileRequest<K>>>(APPLIER_REQUEUE_BUF_SIZE);
+    let error_policy = Arc::new(error_policy);
     // Create a stream of ObjectRefs that need to be reconciled
     trystream_try_via(
         // input: stream combining scheduled tasks and user specified inputs event
         Box::pin(stream::select(
             // 1. inputs from users queue stream
-            queue.map_err(Error::QueueError).map_ok(|request| ScheduleRequest {
-                message: request.into(),
-                run_at: Instant::now() + Duration::from_millis(1),
-            })
-            .on_complete(async move {
-                // On error: scheduler has already been shut down and there is nothing for us to do
-                let _ = scheduler_shutdown_tx.send(());
-                tracing::debug!("applier queue terminated, starting graceful shutdown")
-            }),
+            queue
+                .map_err(Error::QueueError)
+                .map_ok(|request| ScheduleRequest {
+                    message: request.into(),
+                    run_at: Instant::now() + Duration::from_millis(1),
+                })
+                .on_complete(async move {
+                    // On error: scheduler has already been shut down and there is nothing for us to do
+                    let _ = scheduler_shutdown_tx.send(());
+                    tracing::debug!("applier queue terminated, starting graceful shutdown")
+                }),
             // 2. requests sent to scheduler_tx
             scheduler_rx
                 .map(Ok)
@@ -258,56 +265,121 @@ where
                 let request = request.clone();
                 match store.get(&request.obj_ref) {
                     Some(obj) => {
-                        let reconciler_span = info_span!("reconciling object", "object.ref" = %request.obj_ref, object.reason = %request.reason);
-                        reconciler_span.in_scope(|| reconciler(obj, context.clone()))
-                        .into_future()
-                        .instrument(reconciler_span.clone())
-                        // Reconciler errors are OK from the applier's PoV, we need to apply the error policy
-                        // to them separately
-                        .map(|res| Ok((request.obj_ref, res, reconciler_span)))
-                        .left_future()
-                    },
-                    None => future::err(
-                        Error::ObjectNotFound(request.obj_ref.erase())
-                    )
-                    .right_future(),
+                        let scheduler_tx = scheduler_tx.clone();
+                        let error_policy_ctx = context.clone();
+                        let error_policy = error_policy.clone();
+                        let reconciler_span = info_span!(
+                            "reconciling object",
+                            "object.ref" = %request.obj_ref,
+                            object.reason = %request.reason
+                        );
+                        reconciler_span
+                            .in_scope(|| reconciler(obj, context.clone()))
+                            .into_future()
+                            .then(move |res| {
+                                let error_policy = error_policy;
+                                PostReconciler::new(
+                                    res,
+                                    |err| error_policy(err, error_policy_ctx),
+                                    request.obj_ref.clone(),
+                                    scheduler_tx,
+                                )
+                                // Reconciler errors are OK from the applier's PoV, we need to apply the error policy
+                                // to them separately
+                                .map(|res| Ok((request.obj_ref, res)))
+                            })
+                            .instrument(reconciler_span)
+                            .left_future()
+                    }
+                    None => future::err(Error::ObjectNotFound(request.obj_ref.erase())).right_future(),
                 }
             })
             .on_complete(async { tracing::debug!("applier runner terminated") })
         },
     )
     .on_complete(async { tracing::debug!("applier runner-merge terminated") })
     // finally, for each completed reconcile call:
-    .and_then(move |(obj_ref, reconciler_result, reconciler_span)| {
-        let (Action { requeue_after }, requeue_reason) = match &reconciler_result {
-            Ok(action) =>
-                // do what user told us
-                (action.clone(), ReconcileReason::ReconcilerRequestedRetry),
-            Err(err) =>
-                // reconciler fn call failed
-                (reconciler_span.in_scope(|| error_policy(err, err_context.clone())), ReconcileReason::ErrorPolicyRequestedRetry),
-        };
-        let mut scheduler_tx = scheduler_tx.clone();
-        async move {
-            // Transmit the requeue request to the scheduler (picked up again at top)
-            if let Some(delay) = requeue_after {
-                // Failure to schedule item = in graceful shutdown mode, ignore
-                let _ = scheduler_tx
-                    .send(ScheduleRequest {
-                        message: ReconcileRequest {obj_ref: obj_ref.clone(), reason: requeue_reason},
-                        run_at: Instant::now() + delay,
-                    })
-                    .await;
-            }
-            match reconciler_result {
-                Ok(action) => Ok((obj_ref, action)),
-                Err(err) => Err(Error::ReconcilerFailed(err, obj_ref.erase()))
-            }
+    .and_then(move |(obj_ref, reconciler_result)| async move {
+        match reconciler_result {
+            Ok(action) => Ok((obj_ref, action)),
+            Err(err) => Err(Error::ReconcilerFailed(err, obj_ref.erase())),
         }
     })
     .on_complete(async { tracing::debug!("applier terminated") })
 }
 
+/// Internal helper that runs post-reconciliation (such as requesting rescheduling) tasks in the scheduled context of the reconciler
+///
+/// This could be an `async fn`, but isn't because we want it to be [`Unpin`]
+#[pin_project]
+#[must_use]
+struct PostReconciler<K: Resource, ReconcilerErr> {
+    reschedule_tx: channel::mpsc::Sender<ScheduleRequest<ReconcileRequest<K>>>,
+
+    reschedule_request: Option<ScheduleRequest<ReconcileRequest<K>>>,
+    result: Option<Result<Action, ReconcilerErr>>,
+}
+
+impl<K, ReconcilerErr> PostReconciler<K, ReconcilerErr>
+where
+    K: Resource,
+{
+    fn new(
+        result: Result<Action, ReconcilerErr>,
+        error_policy: impl FnOnce(&ReconcilerErr) -> Action,
+        obj_ref: ObjectRef<K>,
+        reschedule_tx: channel::mpsc::Sender<ScheduleRequest<ReconcileRequest<K>>>,
+    ) -> Self {
+        let reconciler_finished_at = Instant::now();
+
+        let (action, reschedule_reason) = result.as_ref().map_or_else(
+            |err| (error_policy(err), ReconcileReason::ErrorPolicyRequestedRetry),
+            |action| (action.clone(), ReconcileReason::ReconcilerRequestedRetry),
+        );
+
+        Self {
+            reschedule_tx,
+            reschedule_request: action.requeue_after.map(|requeue_after| ScheduleRequest {
+                message: ReconcileRequest {
+                    obj_ref,
+                    reason: reschedule_reason,
+                },
+                run_at: reconciler_finished_at + requeue_after,
+            }),
+            result: Some(result),
+        }
+    }
+}
+
+impl<K, ReconcilerErr> Future for PostReconciler<K, ReconcilerErr>
+where
+    K: Resource,
+{
+    type Output = Result<Action, ReconcilerErr>;
+
+    fn poll(self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> {
+        let this = self.get_mut();
+
+        if this.reschedule_request.is_some() {
+            let rescheduler_ready = ready!(this.reschedule_tx.poll_ready(cx));
+            let reschedule_request = this
+                .reschedule_request
+                .take()
+                .expect("PostReconciler::reschedule_request was taken during processing");
+            // Failure to schedule item = in graceful shutdown mode, ignore
+            if let Ok(()) = rescheduler_ready {
+                let _ = this.reschedule_tx.start_send(reschedule_request);
+            }
+        }
+
+        Poll::Ready(
+            this.result
+                .take()
+                .expect("PostReconciler::result was already taken"),
+        )
+    }
+}
+
 /// Controller
 ///
 /// A controller is made up of:
@@ -736,7 +808,7 @@ where
     pub fn run<ReconcilerFut, Ctx>(
         self,
         mut reconciler: impl FnMut(Arc<K>, Arc<Ctx>) -> ReconcilerFut,
-        error_policy: impl FnMut(&ReconcilerFut::Error, Arc<Ctx>) -> Action,
+        error_policy: impl Fn(&ReconcilerFut::Error, Arc<Ctx>) -> Action,
         context: Arc<Ctx>,
     ) -> impl Stream<Item = Result<(ObjectRef<K>, Action), Error<ReconcilerFut::Error, watcher::Error>>>
     where
@@ -763,12 +835,18 @@ where
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
-    use super::Action;
-    use crate::Controller;
+    use std::{convert::Infallible, sync::Arc, time::Duration};
+
+    use super::{Action, APPLIER_REQUEUE_BUF_SIZE};
+    use crate::{
+        applier,
+        reflector::{self, ObjectRef},
+        watcher, Controller,
+    };
+    use futures::{StreamExt, TryStreamExt};
     use k8s_openapi::api::core::v1::ConfigMap;
-    use kube_client::Api;
+    use kube_client::{core::ObjectMeta, Api};
+    use tokio::time::timeout;
 
     fn assert_send<T: Send>(x: T) -> T {
         x
@@ -791,4 +869,54 @@ mod tests {
             ),
         );
     }
+
+    #[tokio::test]
+    async fn applier_must_not_deadlock_if_reschedule_buffer_fills() {
+        // This tests that `applier` handles reschedule queue backpressure correctly, by trying to flood it with no-op reconciles
+        // This is intended to avoid regressing on https://github.com/kube-rs/kube-rs/issues/926
+
+        // Assume that we can keep APPLIER_REQUEUE_BUF_SIZE flooded if we have 100x the number of objects "in rotation"
+        // On my (@teozkr)'s 3900X I can reliably trigger this with 10x, but let's have some safety margin to avoid false negatives
+        let items = APPLIER_REQUEUE_BUF_SIZE * 50;
+        // Assume that everything's OK if we can reconcile every object 3 times on average
+        let reconciles = items * 3;
+
+        let (queue_tx, queue_rx) = futures::channel::mpsc::unbounded::<ObjectRef<ConfigMap>>();
+        let (store_rx, mut store_tx) = reflector::store();
+        let applier = tokio::spawn(
+            applier(
+                |obj, _| {
+                    Box::pin(async move {
+                        // Try to flood the rescheduling buffer buffer by just putting it back in the queue immediately
+                        println!("reconciling {:?}", obj.metadata.name);
+                        Ok(Action::requeue(Duration::ZERO))
+                    })
+                },
+                |_: &Infallible, _| todo!(),
+                Arc::new(()),
+                store_rx,
+                queue_rx.map(Result::<_, Infallible>::Ok),
+            )
+            .take(reconciles)
+            .try_for_each(|_| async { Ok(()) }),
+        );
+        for i in 0..items {
+            let obj = ConfigMap {
+                metadata: ObjectMeta {
+                    name: Some(format!("cm-{i}")),
+                    namespace: Some("default".to_string()),
+                    ..Default::default()
+                },
+                ..Default::default()
+            };
+            store_tx.apply_watcher_event(&watcher::Event::Applied(obj.clone()));
+            queue_tx.unbounded_send(ObjectRef::from_obj(&obj)).unwrap();
+        }
+        // Keep the submission queue open to avoid going into graceful shutdown mode
+        timeout(Duration::from_secs(10), applier)
+            .await
+            .expect("test timeout expired, applier likely deadlocked")
+            .unwrap()
+            .unwrap();
+    }
 }
diff --git a/kube-runtime/src/utils/mod.rs b/kube-runtime/src/utils/mod.rs
@@ -228,3 +228,36 @@ pub(crate) trait KubeRuntimeStreamExt: Stream + Sized {
 }
 
 impl<S: Stream> KubeRuntimeStreamExt for S {}
+
+#[cfg(test)]
+mod tests {
+    use std::convert::Infallible;
+
+    use futures::stream::{self, StreamExt};
+
+    use super::trystream_try_via;
+
+    // Type-level test does not need to be executed
+    #[allow(dead_code)]
+    fn trystream_try_via_should_be_able_to_borrow() {
+        struct WeirdComplexObject {}
+        impl Drop for WeirdComplexObject {
+            fn drop(&mut self) {}
+        }
+
+        let mut x = WeirdComplexObject {};
+        let y = WeirdComplexObject {};
+        drop(trystream_try_via(
+            Box::pin(stream::once(async {
+                let _ = &mut x;
+                Result::<_, Infallible>::Ok(())
+            })),
+            |s| {
+                s.map(|_| {
+                    let _ = &y;
+                    Ok(())
+                })
+            },
+        ));
+    }
+}