From 85facb9200dea75e661447ed11e05806f42de65f Mon Sep 17 00:00:00 2001 From: Emanuele Altieri Date: Mon, 11 Nov 2024 12:16:25 -0800 Subject: [PATCH] Maintain thread affinity during non-TKO failovers Summary: When failing over, make an educated guess as to whether thread affinity still applies to the failover tier. This is done by inspecting the failover result code. It thread affinity is likely to be broken, `SRRoute` will attempt to jump threads to restore affinity. TKO errors should not affect thread affinity, as `traverse()` is aware of TKO hosts and should be able to predict a failover in that case: ``` FailoverRoute |-> traverse(): failover1 `-> route() |-> primary: TKO `-> failover1: SUCCESS // <- thread affinity ``` ``` FailoverRoute |-> traverse(): failover2 `-> route() |-> primary: TKO |-> failover1: TKO `-> failover2: SUCCESS // <- thread affinity ``` A non-TKO error, on the other hand, will break thread affinity. For instance, during S459689: ``` FailoverRoute |-> traverse(): failover2 `-> route() |-> primary: TKO |-> failover1: BUSY // <- original thread affinity `-> failover2: SUCCESS // jump threads ``` Reviewed By: disylh, stuclar Differential Revision: D65231378 fbshipit-source-id: be9f40aa08eb20d07374064ad99c09a63158b653 --- mcrouter/CarbonRouterClient-inl.h | 2 ++ mcrouter/CarbonRouterClient.cpp | 6 +++--- mcrouter/CarbonRouterInstance.h | 4 ++++ mcrouter/McrouterFiberContext.h | 17 +++++++++++++++++ mcrouter/ProxyRequestContext.h | 9 +++++++++ mcrouter/routes/FailoverRoute.h | 10 ++++++++++ 6 files changed, 45 insertions(+), 3 deletions(-) diff --git a/mcrouter/CarbonRouterClient-inl.h b/mcrouter/CarbonRouterClient-inl.h index 3c2f6aab7..5c3981aa7 100644 --- a/mcrouter/CarbonRouterClient-inl.h +++ b/mcrouter/CarbonRouterClient-inl.h @@ -392,6 +392,8 @@ CarbonRouterClient::makeProxyRequestContext( }); proxyRequestContext->setRequester(self_); + proxyRequestContext->setThreadAffinity( + mode_ == ThreadMode::AffinitizedRemoteThread); return proxyRequestContext; } diff --git a/mcrouter/CarbonRouterClient.cpp b/mcrouter/CarbonRouterClient.cpp index 9683ca014..34d988ca3 100644 --- a/mcrouter/CarbonRouterClient.cpp +++ b/mcrouter/CarbonRouterClient.cpp @@ -14,9 +14,9 @@ bool srHostInfoPtrFuncCarbonRouterClient( const HostInfoPtr& host, const RequestClass& requestClass, uint64_t& hash) { - if (!requestClass.is(RequestClass::kShadow) && host && - host->location().getTWTaskID()) { - hash = *host->location().getTWTaskID(); + if (!requestClass.is(RequestClass::kShadow) && host) { + // Host unique key is derived from IP and port + hash = host->location().hostUniqueKey(); return true; } return false; diff --git a/mcrouter/CarbonRouterInstance.h b/mcrouter/CarbonRouterInstance.h index d7a1e892f..094e86138 100644 --- a/mcrouter/CarbonRouterInstance.h +++ b/mcrouter/CarbonRouterInstance.h @@ -241,6 +241,10 @@ class CarbonRouterInstance cpuStatsWorker_.reset(); } + Proxy& getProxyFromHash(size_t hash) { + return *proxies_[hash % proxies_.size()]; + } + CarbonRouterInstance(const CarbonRouterInstance&) = delete; CarbonRouterInstance& operator=(const CarbonRouterInstance&) = delete; CarbonRouterInstance(CarbonRouterInstance&&) noexcept = delete; diff --git a/mcrouter/McrouterFiberContext.h b/mcrouter/McrouterFiberContext.h index 1b0b3c1b9..a232af751 100644 --- a/mcrouter/McrouterFiberContext.h +++ b/mcrouter/McrouterFiberContext.h @@ -82,6 +82,7 @@ class fiber_local { std::bitset featureFlags; int32_t selectedIndex{-1}; uint32_t failoverCount{0}; + bool jumpThreads{false}; std::optional bucketId; std::optional distributionTargetRegion; RequestClass requestClass; @@ -216,6 +217,22 @@ class fiber_local { return folly::fibers::local().failoverCount; } + /** + * Set when failing over may require the request to be moved to a different + * event base. + */ + static void enableJumpThreads() { + folly::fibers::local().jumpThreads = true; + } + + /** + * Check whether the request should be moved to a different eventbase as a + * result of failing over. + */ + static bool shouldJumpThreads() { + return folly::fibers::local().jumpThreads; + } + /** * Accumulate latency injected before request for current fiber and return the * new value diff --git a/mcrouter/ProxyRequestContext.h b/mcrouter/ProxyRequestContext.h index b4d38909c..6198dcec5 100644 --- a/mcrouter/ProxyRequestContext.h +++ b/mcrouter/ProxyRequestContext.h @@ -173,12 +173,21 @@ class ProxyRequestContext { reqContextScopeGuard_.reset(); } + void setThreadAffinity(bool enabled) { + threadAffinity_ = enabled; + } + + bool isThreadAffinityEnabled() const { + return threadAffinity_; + } + protected: // Keep on first cacheline. Used by ProxyRequestContextTyped const void* ptr_{nullptr}; carbon::Result finalResult_{carbon::Result::UNKNOWN}; int32_t poolStatIndex_{-1}; bool replied_{false}; + bool threadAffinity_{false}; ProxyRequestContext( ProxyBase& pr, diff --git a/mcrouter/routes/FailoverRoute.h b/mcrouter/routes/FailoverRoute.h index a2c8e7c63..f07e36340 100644 --- a/mcrouter/routes/FailoverRoute.h +++ b/mcrouter/routes/FailoverRoute.h @@ -231,6 +231,16 @@ class FailoverRoute { } } + // Check whether we may be on the wrong thread for the failover host. + // + // Thread affinity is based on the destination host discovered by + // traverse(). TKO states are taken into account by traverse(), so TKO + // errors should not affect thread affinity. + // + if (*reply.result_ref() != carbon::Result::TKO) { + fiber_local::enableJumpThreads(); + } + return false; }