From 47e51339be2a6390eb60d3777eb70370e19759c4 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Mon, 17 Jun 2024 16:31:09 -0400 Subject: [PATCH 01/15] mqbs:FileStore: Improve rc logging Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbs/mqbs_filestore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groups/mqb/mqbs/mqbs_filestore.cpp b/src/groups/mqb/mqbs/mqbs_filestore.cpp index 0134baf628..00aee21b19 100644 --- a/src/groups/mqb/mqbs/mqbs_filestore.cpp +++ b/src/groups/mqb/mqbs/mqbs_filestore.cpp @@ -5252,7 +5252,7 @@ int FileStore::open(const QueueKeyInfoMap& queueKeyInfoMap) BALL_LOG_ERROR << partitionDesc() << "Failed to open in recovery mode," << " rc:" << rc << ", reason: [" << errorDescription.str() << "]."; - return rc_RECOVERY_MODE_FAILURE; // RETURN + return rc * 10 + rc_RECOVERY_MODE_FAILURE; // RETURN } BSLS_ASSERT_SAFE(d_isOpen); From db43defff162917b750a65e15c62b4297abb406b Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Mon, 24 Jun 2024 15:22:12 -0400 Subject: [PATCH 02/15] mqbc::StorageManager.t: Remove the concept of replica healing stages Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp | 84 +++++++++---------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp index bc04c1b1fa..203f901960 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp @@ -2025,7 +2025,7 @@ static void test11_primaryHealingStage2DetectSelfReplica() helper.d_cluster_mp->stop(); } -static void test12_replicaHealingStage1DetectSelfPrimary() +static void test12_replicaHealingDetectSelfPrimary() // ------------------------------------------------------------------------ // BREATHING TEST // @@ -2035,7 +2035,7 @@ static void test12_replicaHealingStage1DetectSelfPrimary() // Plan: // 1) Create a StorageManager on the stack // 2) Invoke start. -// 3) Transition to Replica healing stage 1 and then detect self primary. +// 3) Transition to healing Replica and then detect self primary. // 4) Verify the actions as per FSM. // 5) Invoke stop. // @@ -2043,9 +2043,9 @@ static void test12_replicaHealingStage1DetectSelfPrimary() // Basic functionality. // ------------------------------------------------------------------------ { - mwctst::TestHelper::printTestName("BREATHING TEST - " - "REPLICA HEALING STAGE 1 DETECTS SELF AS" - " PRIMARY"); + mwctst::TestHelper::printTestName( + "BREATHING TEST - " + "HEALING REPLICA DETECTS SELF AS PRIMARY"); TestHelper helper; @@ -2102,7 +2102,7 @@ static void test12_replicaHealingStage1DetectSelfPrimary() BSLS_ASSERT_OPT(nodeToSeqNumCtxMap.size() == 1); - // Apply Detect Self Primary event to Self Node in replicaHealingStage1. + // Apply Detect Self Primary event to Self Node. storageManager.setPrimaryForPartition(k_PARTITION_ID, selfNode, @@ -2120,7 +2120,7 @@ static void test12_replicaHealingStage1DetectSelfPrimary() helper.d_cluster_mp->stop(); } -static void test13_replicaHealingStage1ReceivesReplicaStateRqst() +static void test13_replicaHealingReceivesReplicaStateRqst() // ------------------------------------------------------------------------ // BREATHING TEST // @@ -2130,7 +2130,7 @@ static void test13_replicaHealingStage1ReceivesReplicaStateRqst() // Plan: // 1) Create a StorageManager on the stack // 2) Invoke start. -// 3) Transition to Replica healing stage 1. +// 3) Transition to healing Replica. // 4) Send ReplicaStateRqst to this Replica. // 5) Check that Replica sends ReplicaStateRspn, stores primarySeqNum. // 6) Verify the actions as per FSM. @@ -2140,9 +2140,9 @@ static void test13_replicaHealingStage1ReceivesReplicaStateRqst() // Basic functionality. // ------------------------------------------------------------------------ { - mwctst::TestHelper::printTestName("BREATHING TEST - " - "REPLICA HEALING STAGE 1 RECEIVES " - "REPLICA STATE REQUEST"); + mwctst::TestHelper::printTestName( + "BREATHING TEST - " + "HEALING REPLICA RECEIVES REPLICA STATE REQUEST"); TestHelper helper; @@ -2227,7 +2227,7 @@ static void test13_replicaHealingStage1ReceivesReplicaStateRqst() helper.d_cluster_mp->stop(); } -static void test14_replicaHealingStage1ReceivesPrimaryStateRspn() +static void test14_replicaHealingReceivesPrimaryStateRspn() // ------------------------------------------------------------------------ // BREATHING TEST // @@ -2237,7 +2237,7 @@ static void test14_replicaHealingStage1ReceivesPrimaryStateRspn() // Plan: // 1) Create a StorageManager on the stack // 2) Invoke start. -// 3) Transition to Replica healing stage 1. +// 3) Transition to healing Replica. // 4) Send PrimaryStateRspn to this Replica. // 5) Check that Replica stores primarySeqNum. // 6) Verify the actions as per FSM. @@ -2247,9 +2247,9 @@ static void test14_replicaHealingStage1ReceivesPrimaryStateRspn() // Basic functionality. // ------------------------------------------------------------------------ { - mwctst::TestHelper::printTestName("BREATHING TEST - " - "REPLICA HEALING STAGE 1 RECEIVES " - "PRIMARY STATE RESPONSE"); + mwctst::TestHelper::printTestName( + "BREATHING TEST - " + "HEALING REPLICA RECEIVES PRIMARY STATE RESPONSE"); TestHelper helper; @@ -2332,7 +2332,7 @@ static void test14_replicaHealingStage1ReceivesPrimaryStateRspn() helper.d_cluster_mp->stop(); } -static void test15_replicaHealingStage1ReceivesFailedPrimaryStateRspn() +static void test15_replicaHealingReceivesFailedPrimaryStateRspn() // ------------------------------------------------------------------------ // BREATHING TEST // @@ -2342,7 +2342,7 @@ static void test15_replicaHealingStage1ReceivesFailedPrimaryStateRspn() // Plan: // 1) Create a StorageManager on the stack // 2) Invoke start. -// 3) Transition to Replica healing stage 1. +// 3) Transition to healing Replica. // 4) Send failed PrimaryStateRspn to this Replica. // 5) Check that Replica does not store primarySeqNum. // 6) Verify the actions as per FSM. @@ -2352,9 +2352,9 @@ static void test15_replicaHealingStage1ReceivesFailedPrimaryStateRspn() // Basic functionality. // ------------------------------------------------------------------------ { - mwctst::TestHelper::printTestName("BREATHING TEST - " - "REPLICA HEALING STAGE 1 RECEIVES " - "FAILED PRIMARY STATE RESPONSE"); + mwctst::TestHelper::printTestName( + "BREATHING TEST - " + "HEALING REPLICA RECEIVES FAILED PRIMARY STATE RESPONSE"); TestHelper helper; @@ -2426,7 +2426,7 @@ static void test15_replicaHealingStage1ReceivesFailedPrimaryStateRspn() helper.d_cluster_mp->stop(); } -static void test16_replicaHealingStage1ReceivesPrimaryStateRqst() +static void test16_replicaHealingReceivesPrimaryStateRqst() // ------------------------------------------------------------------------ // BREATHING TEST // @@ -2436,7 +2436,7 @@ static void test16_replicaHealingStage1ReceivesPrimaryStateRqst() // Plan: // 1) Create a StorageManager on the stack // 2) Invoke start. -// 3) Transition to Replica healing stage 1. +// 3) Transition to healing Replica. // 4) Send PrimaryStateRqst to this Replica. // 5) Check that Replica sends failed PrimaryStateRspn. // 6) Verify the actions as per FSM. @@ -2446,9 +2446,9 @@ static void test16_replicaHealingStage1ReceivesPrimaryStateRqst() // Basic functionality. // ------------------------------------------------------------------------ { - mwctst::TestHelper::printTestName("BREATHING TEST - " - "REPLICA HEALING STAGE 1 RECEIVES " - "PRIMARY STATE REQUEST"); + mwctst::TestHelper::printTestName( + "BREATHING TEST - " + "HEALING REPLICA RECEIVES PRIMARY STATE REQUEST"); TestHelper helper; @@ -2539,7 +2539,7 @@ static void test16_replicaHealingStage1ReceivesPrimaryStateRqst() helper.d_cluster_mp->stop(); } -static void test17_replicaHealingStage2ReceivesReplicaDataRqstPull() +static void test17_replicaHealingReceivesReplicaDataRqstPull() // ------------------------------------------------------------------------ // BREATHING TEST // @@ -2550,10 +2550,10 @@ static void test17_replicaHealingStage2ReceivesReplicaDataRqstPull() // Plan: // 1) Create a StorageManager on the stack // 2) Invoke start. -// 3) Transition to Replica healing stage 1. +// 3) Transition to healing Replica. // 4) Send ReplicaStateRqst to this Replica. // 5) Check that Replica sends ReplicaStateRspn, stores primarySeqNum. -// 6) Transition Replica to healing stage 2 and send ReplicaDataRqstPull +// 6) Send ReplicaDataRqstPull // 7) Check that Replica sends data chunks. // 8) Check that Replica sends ReplicaDataRspnPull. // 9) Invoke stop. @@ -2562,9 +2562,9 @@ static void test17_replicaHealingStage2ReceivesReplicaDataRqstPull() // Basic functionality. // ------------------------------------------------------------------------ { - mwctst::TestHelper::printTestName("BREATHING TEST - " - "REPLICA HEALING STAGE 2 RECEIVES " - "REPLICA DATA REQUEST PULL"); + mwctst::TestHelper::printTestName( + "BREATHING TEST - " + "HEALING REPLICA RECEIVES REPLICA DATA REQUEST PULL"); // TODO: debug on why the global allocator check fails for fileStore // allocating some memory through default allocator. @@ -2866,21 +2866,19 @@ int main(int argc, char* argv[]) case 0: // case 23: // test23_primaryHealingStage2SendsReplicaDataRqstPushDrop(); - // break; case 22: test22_replicaHealingStage2DetectSelfPrimary(); + // break; case 22: test22_replicaHealingDetectSelfPrimary(); // break; case 21: - // test21_replicaHealingStage2ReceivesReplicaDataRqstDrop(); + // test21_replicaHealingReceivesReplicaDataRqstDrop(); // break; case 20: - // test20_replicaHealingStage2ReceivesReplicaDataRqstPush(); + // test20_replicaHealingReceivesReplicaDataRqstPush(); // break; case 19: test19_primaryHealedSendsDataChunks(); break; case 18: test18_primaryHealingStage1SelfHighestSendsDataChunks(); break; - case 17: test17_replicaHealingStage2ReceivesReplicaDataRqstPull(); break; - case 16: test16_replicaHealingStage1ReceivesPrimaryStateRqst(); break; - case 15: - test15_replicaHealingStage1ReceivesFailedPrimaryStateRspn(); - break; - case 14: test14_replicaHealingStage1ReceivesPrimaryStateRspn(); break; - case 13: test13_replicaHealingStage1ReceivesReplicaStateRqst(); break; - case 12: test12_replicaHealingStage1DetectSelfPrimary(); break; + case 17: test17_replicaHealingReceivesReplicaDataRqstPull(); break; + case 16: test16_replicaHealingReceivesPrimaryStateRqst(); break; + case 15: test15_replicaHealingReceivesFailedPrimaryStateRspn(); break; + case 14: test14_replicaHealingReceivesPrimaryStateRspn(); break; + case 13: test13_replicaHealingReceivesReplicaStateRqst(); break; + case 12: test12_replicaHealingDetectSelfPrimary(); break; case 11: test11_primaryHealingStage2DetectSelfReplica(); break; case 10: test10_primaryHealingStage1QuorumSendsReplicaDataRequestPull(); From 23e8b2563210e04a7d2cd5d8bb0923be84089c0b Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Fri, 21 Jun 2024 21:03:21 -0400 Subject: [PATCH 03/15] mqbmock::StorageManager [new] Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbmock/doc/mqbmock.txt | 5 +- src/groups/mqb/mqbmock/mqbmock_domain.h | 1 - .../mqb/mqbmock/mqbmock_storagemanager.cpp | 309 ++++++++++++++++++ .../mqb/mqbmock/mqbmock_storagemanager.h | 305 +++++++++++++++++ src/groups/mqb/mqbmock/package/mqbmock.mem | 1 + 5 files changed, 619 insertions(+), 2 deletions(-) create mode 100644 src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp create mode 100644 src/groups/mqb/mqbmock/mqbmock_storagemanager.h diff --git a/src/groups/mqb/mqbmock/doc/mqbmock.txt b/src/groups/mqb/mqbmock/doc/mqbmock.txt index eb1c3180bb..ce2aacd9d4 100644 --- a/src/groups/mqb/mqbmock/doc/mqbmock.txt +++ b/src/groups/mqb/mqbmock/doc/mqbmock.txt @@ -9,7 +9,7 @@ /Hierarchical Synopsis /--------------------- -The 'mqbmock' package currently has 10 components having 2 levels of physical +The 'mqbmock' package currently has 11 components having 2 levels of physical dependency. The list below shows the hierarchical ordering of the components. .. 2. mqbmock_queue @@ -23,6 +23,7 @@ dependency. The list below shows the hierarchical ordering of the components. mqbmock_logidgenerator mqbmock_queueengine mqbmock_queuehandle + mqbmock_storagemanager .. /Component Synopsis @@ -57,3 +58,5 @@ dependency. The list below shows the hierarchical ordering of the components. : mqbmock_queuehandle: : Provide a mock Queue Handle implementation. : +: mqbmock_storagemanager: +: Provide a mock implementation of the 'mqbi::StorageManager' interface. diff --git a/src/groups/mqb/mqbmock/mqbmock_domain.h b/src/groups/mqb/mqbmock/mqbmock_domain.h index d768395041..2a6ebba06a 100644 --- a/src/groups/mqb/mqbmock/mqbmock_domain.h +++ b/src/groups/mqb/mqbmock/mqbmock_domain.h @@ -39,7 +39,6 @@ // with a leading underscore ('_'). // MQB - #include #include #include diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp new file mode 100644 index 0000000000..ce24956fd7 --- /dev/null +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp @@ -0,0 +1,309 @@ +// Copyright 2021-2024 Bloomberg Finance L.P. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// mqbmock_storagemanager.cpp -*-C++-*- +#include + +#include + +// BDE +#include + +namespace BloombergLP { +namespace mqbmock { + +// -------------------- +// class StorageManager +// -------------------- + +// CREATORS +StorageManager::StorageManager() +{ + // NOTHING +} + +StorageManager::~StorageManager() +{ + // NOTHING +} + +// MANIPULATORS +int StorageManager::start( + BSLS_ANNOTATION_UNUSED bsl::ostream& errorDescription) +{ + return 0; +} + +void StorageManager::stop() +{ + // NOTHING +} + +void StorageManager::initializeQueueKeyInfoMap( + BSLS_ANNOTATION_UNUSED const mqbc::ClusterState* clusterState) +{ +} + +void StorageManager::registerQueue( + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED const mqbu::StorageKey& queueKey, + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED const AppIdKeyPairs& appIdKeyPairs, + BSLS_ANNOTATION_UNUSED mqbi::Domain* domain) +{ + // NOTHING +} + +void StorageManager::unregisterQueue( + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED int partitionId) +{ + // NOTHING +} + +int StorageManager::updateQueue( + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED const mqbu::StorageKey& queueKey, + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED const AppIdKeyPairs& addedIdKeyPairs, + BSLS_ANNOTATION_UNUSED const AppIdKeyPairs& removedIdKeyPairs) +{ + return 0; +} + +void StorageManager::registerQueueReplica( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED const mqbu::StorageKey& queueKey, + BSLS_ANNOTATION_UNUSED mqbi::Domain* domain, + BSLS_ANNOTATION_UNUSED bool allowDuplicate) +{ + // NOTHING +} + +void StorageManager::unregisterQueueReplica( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED const mqbu::StorageKey& queueKey, + BSLS_ANNOTATION_UNUSED const mqbu::StorageKey& appKey) +{ + // NOTHING +} + +void StorageManager::updateQueueReplica( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED const mqbu::StorageKey& queueKey, + BSLS_ANNOTATION_UNUSED const AppIdKeyPairs& appIdKeyPairs, + BSLS_ANNOTATION_UNUSED mqbi::Domain* domain, + BSLS_ANNOTATION_UNUSED bool allowDuplicate) +{ + // NOTHING +} + +mqbu::StorageKey +StorageManager::generateAppKey(BSLS_ANNOTATION_UNUSED const bsl::string& appId, + BSLS_ANNOTATION_UNUSED int partitionId) +{ + return mqbu::StorageKey(); +} + +void StorageManager::setQueue(BSLS_ANNOTATION_UNUSED mqbi::Queue* queue, + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED int partitionId) +{ + // NOTHING +} + +void StorageManager::setQueueRaw(BSLS_ANNOTATION_UNUSED mqbi::Queue* queue, + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED int partitionId) +{ + // NOTHING +} + +void StorageManager::setPrimaryForPartition( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* primaryNode, + BSLS_ANNOTATION_UNUSED unsigned int primaryLeaseId) +{ + // NOTHING +} + +void StorageManager::clearPrimaryForPartition( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* primary) +{ + // NOTHING +} + +void StorageManager::setPrimaryStatusForPartition( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED bmqp_ctrlmsg::PrimaryStatus::Value value) +{ + // NOTHING +} + +void StorageManager::processPrimaryStateRequest( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::ControlMessage& message, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +void StorageManager::processReplicaStateRequest( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::ControlMessage& message, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +void StorageManager::processReplicaDataRequest( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::ControlMessage& message, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +int StorageManager::makeStorage( + BSLS_ANNOTATION_UNUSED bsl::ostream& errorDescription, + BSLS_ANNOTATION_UNUSED bslma::ManagedPtr* out, + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED const mqbu::StorageKey& queueKey, + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED const bsls::Types::Int64 messageTtl, + BSLS_ANNOTATION_UNUSED const int maxDeliveryAttempts, + BSLS_ANNOTATION_UNUSED const mqbconfm::StorageDefinition& storageDef) +{ + return 0; +} + +void StorageManager::processStorageEvent( + BSLS_ANNOTATION_UNUSED const mqbi::DispatcherStorageEvent& event) +{ + // NOTHING +} + +void StorageManager::processStorageSyncRequest( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::ControlMessage& message, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +void StorageManager::processPartitionSyncStateRequest( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::ControlMessage& message, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +void StorageManager::processPartitionSyncDataRequest( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::ControlMessage& message, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +void StorageManager::processPartitionSyncDataRequestStatus( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::ControlMessage& message, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +void StorageManager::processRecoveryEvent( + BSLS_ANNOTATION_UNUSED const mqbi::DispatcherRecoveryEvent& event) +{ + // NOTHING +} + +void StorageManager::processReceiptEvent( + BSLS_ANNOTATION_UNUSED const mqbi::DispatcherReceiptEvent& event) +{ + // NOTHING +} + +void StorageManager::processPrimaryStatusAdvisory( + BSLS_ANNOTATION_UNUSED const bmqp_ctrlmsg::PrimaryStatusAdvisory& advisory, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) +{ + // NOTHING +} + +void StorageManager::processReplicaStatusAdvisory( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source, + BSLS_ANNOTATION_UNUSED bmqp_ctrlmsg::NodeStatus::Value status) +{ + // NOTHING +} + +void StorageManager::processShutdownEvent() +{ + // NOTHING +} + +void StorageManager::applyForEachQueue( + BSLS_ANNOTATION_UNUSED int partitionId, + BSLS_ANNOTATION_UNUSED const QueueFunctor& functor) const +{ + // NOTHING +} + +int StorageManager::processCommand( + BSLS_ANNOTATION_UNUSED mqbcmd::StorageResult* result, + BSLS_ANNOTATION_UNUSED const mqbcmd::StorageCommand& command) +{ + return 0; +} + +void StorageManager::gcUnrecognizedDomainQueues() +{ + // NOTHING +} + +// ACCESSORS +mqbi::Dispatcher::ProcessorHandle StorageManager::processorForPartition( + BSLS_ANNOTATION_UNUSED int partitionId) const +{ + return mqbi::Dispatcher::k_INVALID_PROCESSOR_HANDLE; +} + +bool StorageManager::isStorageEmpty( + BSLS_ANNOTATION_UNUSED const bmqt::Uri& uri, + BSLS_ANNOTATION_UNUSED int partitionId) const +{ + return true; +} + +bdlbb::BlobBufferFactory* StorageManager::blobBufferFactory() const +{ + return 0; +} + +const mqbs::FileStore& +StorageManager::fileStore(BSLS_ANNOTATION_UNUSED int partitionId) const +{ +} + +bslma::ManagedPtr +StorageManager::getIterator(BSLS_ANNOTATION_UNUSED int partitionId) const +{ + return bslma::ManagedPtr(); +} + +} // close package namespace +} // close enterprise namespace diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.h b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h new file mode 100644 index 0000000000..57ffd957be --- /dev/null +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h @@ -0,0 +1,305 @@ +// Copyright 2021-2024 Bloomberg Finance L.P. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// mqbmock_storagemanager.h -*-C++-*- +#ifndef INCLUDED_MQBMOCK_STORAGEMANAGER +#define INCLUDED_MQBMOCK_STORAGEMANAGER + +//@PURPOSE: Provide a mock implementation of 'mqbi::StorageManager' interface. +// +//@CLASSES: +// mqbmock::StorageManager: Mock impl of 'mqbi::StorageManager' +// +//@DESCRIPTION: This component provides a mock implementation, +// 'mqbmock::StorageManager', of the 'mqbi::StorageManager' protocol. +// +/// Thread Safety +///------------- +// The 'mqbmock::StorageManager' object is not thread safe. + +// MQB +#include + +// BDE +#include + +namespace BloombergLP { +namespace mqbmock { + +// ==================== +// class StorageManager +// ==================== + +/// Mock implementation of `mqbi::StorageManager` interface. +class StorageManager : public mqbi::StorageManager { + public: + // CREATORS + + /// Create a new `StorageManager` instance + StorageManager(); + + /// Destructor + ~StorageManager() BSLS_KEYWORD_OVERRIDE; + + // MANIPULATORS + + /// Start this storage manager. Return 0 on success, or a non-zero rc + /// otherwise, populating the specified `errorDescription` with a + /// description of the error. + /// + /// THREAD: Executed by the cluster's dispatcher thread. + virtual int start(bsl::ostream& errorDescription) BSLS_KEYWORD_OVERRIDE; + + /// Stop this storage manager. + virtual void stop() BSLS_KEYWORD_OVERRIDE; + + /// Initialize the queue key info map based on information in the specified + /// `clusterState`. + virtual void initializeQueueKeyInfoMap( + const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + + /// Register a queue with the specified `uri`, `queueKey` and + /// `partitionId`, having the specified `appIdKeyPairs`, and belonging + /// to the specified `domain`. Load into the specified `storage` the + /// associated queue storage created. + /// + /// THREAD: Executed by the Client's dispatcher thread. + virtual void registerQueue(const bmqt::Uri& uri, + const mqbu::StorageKey& queueKey, + int partitionId, + const AppIdKeyPairs& appIdKeyPairs, + mqbi::Domain* domain) BSLS_KEYWORD_OVERRIDE; + + /// Synchronously unregister the queue with the specified `uri` from the + /// specified `partitionId`. Behavior is undefined unless this routine + /// is invoked from the cluster dispatcher thread. + /// + /// THREAD: Executed by the Client's dispatcher thread. + virtual void unregisterQueue(const bmqt::Uri& uri, + int partitionId) BSLS_KEYWORD_OVERRIDE; + + /// Configure the fanout queue having specified `uri` and `queueKey`, + /// assigned to the specified `partitionId` to have the specified + /// `addedIdKeyPairs` appId/appKey pairs added and `removedIdKeyPairs` + /// appId/appKey pairs removed. Return zero on success, and non-zero + /// value otherwise. Behavior is undefined unless this function is + /// invoked at the primary node. Behavior is also undefined unless the + /// queue is configured in fanout mode. + /// + /// THREAD: Executed by the Queue's dispatcher thread. + virtual int + updateQueue(const bmqt::Uri& uri, + const mqbu::StorageKey& queueKey, + int partitionId, + const AppIdKeyPairs& addedIdKeyPairs, + const AppIdKeyPairs& removedIdKeyPairs) BSLS_KEYWORD_OVERRIDE; + + virtual void + registerQueueReplica(int partitionId, + const bmqt::Uri& uri, + const mqbu::StorageKey& queueKey, + mqbi::Domain* domain = 0, + bool allowDuplicate = false) BSLS_KEYWORD_OVERRIDE; + + virtual void unregisterQueueReplica(int partitionId, + const bmqt::Uri& uri, + const mqbu::StorageKey& queueKey, + const mqbu::StorageKey& appKey) + BSLS_KEYWORD_OVERRIDE; + + virtual void + updateQueueReplica(int partitionId, + const bmqt::Uri& uri, + const mqbu::StorageKey& queueKey, + const AppIdKeyPairs& appIdKeyPairs, + mqbi::Domain* domain = 0, + bool allowDuplicate = false) BSLS_KEYWORD_OVERRIDE; + + /// Return a unique appKey for the specified `appId` for a queue + /// assigned to the specified `partitionId`. This routine can be + /// invoked by any thread. + mqbu::StorageKey generateAppKey(const bsl::string& appId, + int partitionId) BSLS_KEYWORD_OVERRIDE; + + /// Set the queue instance associated with the file-backed storage for + /// the specified `uri` mapped to the specified `partitionId` to the + /// specified `queue` value. Note that this method *does* *not* + /// synchronize on the queue-dispatcher thread. + virtual void setQueue(mqbi::Queue* queue, + const bmqt::Uri& uri, + int partitionId) BSLS_KEYWORD_OVERRIDE; + + /// Set the queue instance associated with the file-backed storage for + /// the specified `uri` mapped to the specified `partitionId` to the + /// specified `queue` value. Behavior is undefined unless `queue` is + /// non-null or unless this routine is invoked from the dispatcher + /// thread associated with the `partitionId`. + virtual void setQueueRaw(mqbi::Queue* queue, + const bmqt::Uri& uri, + int partitionId) BSLS_KEYWORD_OVERRIDE; + + /// Behavior is undefined unless the specified 'partitionId' is in range + /// and the specified 'primaryNode' is not null. + /// + /// THREAD: Executed in cluster dispatcher thread. + virtual void + setPrimaryForPartition(int partitionId, + mqbnet::ClusterNode* primaryNode, + unsigned int primaryLeaseId) BSLS_KEYWORD_OVERRIDE; + + /// Behavior is undefined unless the specified 'partitionId' is in range + /// and the specified 'primaryNode' is not null. + /// + /// THREAD: Executed in cluster dispatcher thread. + virtual void clearPrimaryForPartition(int partitionId, + mqbnet::ClusterNode* primary) + BSLS_KEYWORD_OVERRIDE; + + /// Set the primary status of the specified 'partitionId' to the specified + /// 'value'. + /// + /// THREAD: Executed in cluster dispatcher thread. + virtual void setPrimaryStatusForPartition( + int partitionId, + bmqp_ctrlmsg::PrimaryStatus::Value value) BSLS_KEYWORD_OVERRIDE; + + /// Process primary state request received from the specified `source` + /// with the specified `message`. + virtual void processPrimaryStateRequest( + const bmqp_ctrlmsg::ControlMessage& message, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + /// Process replica state request received from the specified `source` + /// with the specified `message`. + virtual void processReplicaStateRequest( + const bmqp_ctrlmsg::ControlMessage& message, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + /// Process replica data request received from the specified `source` + /// with the specified `message`. + virtual void processReplicaDataRequest( + const bmqp_ctrlmsg::ControlMessage& message, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + virtual int makeStorage(bsl::ostream& errorDescription, + bslma::ManagedPtr* out, + const bmqt::Uri& uri, + const mqbu::StorageKey& queueKey, + int partitionId, + const bsls::Types::Int64 messageTtl, + const int maxDeliveryAttempts, + const mqbconfm::StorageDefinition& storageDef) + BSLS_KEYWORD_OVERRIDE; + + /// Executed in cluster dispatcher thread. + virtual void processStorageEvent(const mqbi::DispatcherStorageEvent& event) + BSLS_KEYWORD_OVERRIDE; + + /// Executed by any thread. + virtual void processStorageSyncRequest( + const bmqp_ctrlmsg::ControlMessage& message, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + /// Executed by any thread. + virtual void processPartitionSyncStateRequest( + const bmqp_ctrlmsg::ControlMessage& message, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + /// Executed by any thread. + virtual void processPartitionSyncDataRequest( + const bmqp_ctrlmsg::ControlMessage& message, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + /// Executed by any thread. + virtual void processPartitionSyncDataRequestStatus( + const bmqp_ctrlmsg::ControlMessage& message, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + /// Executed in cluster dispatcher thread. + virtual void processRecoveryEvent( + const mqbi::DispatcherRecoveryEvent& event) BSLS_KEYWORD_OVERRIDE; + + /// Executed in cluster dispatcher thread. + virtual void processReceiptEvent(const mqbi::DispatcherReceiptEvent& event) + BSLS_KEYWORD_OVERRIDE; + + /// Executed by any thread. + virtual void processPrimaryStatusAdvisory( + const bmqp_ctrlmsg::PrimaryStatusAdvisory& advisory, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; + + /// Executed by any thread. + virtual void processReplicaStatusAdvisory( + int partitionId, + mqbnet::ClusterNode* source, + bmqp_ctrlmsg::NodeStatus::Value status) BSLS_KEYWORD_OVERRIDE; + + /// Executed by any thread. + virtual void processShutdownEvent() BSLS_KEYWORD_OVERRIDE; + + /// Invoke the specified `functor` with each queue associated to the + /// partition identified by the specified `partitionId` if that + /// partition has been successfully opened. The behavior is undefined + /// unless invoked from the queue thread corresponding to `partitionId`. + virtual void + applyForEachQueue(int partitionId, + const QueueFunctor& functor) const BSLS_KEYWORD_OVERRIDE; + + /// Process the specified `command`, and load the result to the + /// specified `result`. Return 0 if the command was successfully + /// processed, or a non-zero value otherwise. This function can be + /// invoked from any thread, and will block until the potentially + /// asynchronous operation is complete. + virtual int processCommand(mqbcmd::StorageResult* result, + const mqbcmd::StorageCommand& command) + BSLS_KEYWORD_OVERRIDE; + + /// GC the queues from unrecognized domains, if any. + virtual void gcUnrecognizedDomainQueues() BSLS_KEYWORD_OVERRIDE; + + // ACCESSORS + + /// Return the processor handle in charge of the specified + /// `partitionId`. The behavior is undefined if `partitionId` does not + /// represent a valid partition id. + virtual mqbi::Dispatcher::ProcessorHandle + processorForPartition(int partitionId) const BSLS_KEYWORD_OVERRIDE; + + /// Return true if the queue having specified `uri` and assigned to the + /// specified `partitionId` has no messages, false in any other case. + /// Behavior is undefined unless this routine is invoked from cluster + /// dispatcher thread. + virtual bool isStorageEmpty(const bmqt::Uri& uri, + int partitionId) const BSLS_KEYWORD_OVERRIDE; + + /// Return the blob buffer factory to use. + virtual bdlbb::BlobBufferFactory* + blobBufferFactory() const BSLS_KEYWORD_OVERRIDE; + + /// Return partition corresponding to the specified `partitionId`. The + /// behavior is undefined if `partitionId` does not represent a valid + /// partition id. + virtual const mqbs::FileStore& + fileStore(int partitionId) const BSLS_KEYWORD_OVERRIDE; + + /// Return a StorageManagerIterator for the specified `partitionId`. + virtual bslma::ManagedPtr + getIterator(int partitionId) const BSLS_KEYWORD_OVERRIDE; +}; + +} // close package namespace +} // close enterprise namespace + +#endif diff --git a/src/groups/mqb/mqbmock/package/mqbmock.mem b/src/groups/mqb/mqbmock/package/mqbmock.mem index 234229fdec..98846d1b6f 100644 --- a/src/groups/mqb/mqbmock/package/mqbmock.mem +++ b/src/groups/mqb/mqbmock/package/mqbmock.mem @@ -8,3 +8,4 @@ mqbmock_logidgenerator mqbmock_queue mqbmock_queueengine mqbmock_queuehandle +mqbmock_storagemanager From 0300683aaf8698ef7a13b5d2308eef6e326af262 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Wed, 3 Jul 2024 14:57:54 -0400 Subject: [PATCH 04/15] mqbc::ClusterData: Clean up manipulators and accessors Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbc/mqbc_clusterdata.h | 48 +++++++++++++++----------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/groups/mqb/mqbc/mqbc_clusterdata.h b/src/groups/mqb/mqbc/mqbc_clusterdata.h index 712c2bb788..0bc0cf2adf 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterdata.h +++ b/src/groups/mqb/mqbc/mqbc_clusterdata.h @@ -238,6 +238,12 @@ class ClusterData { // MANIPULATORS + /// Get a modifiable reference to this object's event scheduler. + bdlmt::EventScheduler* scheduler(); + + /// Get a modifiable reference to this object's buffer factory. + bdlbb::BlobBufferFactory* bufferFactory(); + /// Get a modifiable reference to this object's blobSpPool. BlobSpPool* blobSpPool(); @@ -274,7 +280,8 @@ class ClusterData { /// Get a modifiable reference to this object's domainFactory. mqbi::DomainFactory* domainFactory(); - mqbnet::TransportManager* transportManager() const; + /// Get a modifiable reference to this object's transportManager. + mqbnet::TransportManager* transportManager(); /// Get a modifiable reference to this object's cluster stats. mqbstat::ClusterStats& stats(); @@ -282,13 +289,15 @@ class ClusterData { /// Get a modifiable reference to this object's clusterNodesStatContext. StatContextMp& clusterNodesStatContext(); + /// Get a modifiable reference to this object's stateSpPool. StateSpPool* stateSpPool(); + /// Get a modifiable reference to this object's miscWorkThreadPool. + bdlmt::FixedThreadPool* miscWorkThreadPool(); + // ACCESSORS /// Return the value of the corresponding member of this object. - bdlmt::EventScheduler* scheduler() const; - bdlbb::BlobBufferFactory* bufferFactory() const; const mqbi::DispatcherClientData& dispatcherClientData() const; const mqbcfg::ClusterDefinition& clusterConfig() const; const mqbcfg::ClusterProxyDefinition& clusterProxyConfig() const; @@ -297,7 +306,6 @@ class ClusterData { const ClusterDataIdentity& identity() const; const mqbi::Cluster* cluster() const; const StatContextMp& clusterNodesStatContext() const; - bdlmt::FixedThreadPool* miscWorkThreadPool(); }; // ============================================================================ @@ -342,6 +350,16 @@ ClusterDataIdentity::identity() const // ----------------- // MANIPULATORS +inline bdlmt::EventScheduler* ClusterData::scheduler() +{ + return d_scheduler_p; +} + +inline bdlbb::BlobBufferFactory* ClusterData::bufferFactory() +{ + return d_bufferFactory_p; +} + inline ClusterData::BlobSpPool* ClusterData::blobSpPool() { return d_blobSpPool_p; @@ -402,7 +420,7 @@ inline mqbi::DomainFactory* ClusterData::domainFactory() return d_domainFactory_p; } -inline mqbnet::TransportManager* ClusterData::transportManager() const +inline mqbnet::TransportManager* ClusterData::transportManager() { return d_transportManager_p; } @@ -417,17 +435,17 @@ inline ClusterData::StatContextMp& ClusterData::clusterNodesStatContext() return d_clusterNodesStatContext_mp; } -// ACCESSORS -inline bdlmt::EventScheduler* ClusterData::scheduler() const +inline ClusterData::StateSpPool* ClusterData::stateSpPool() { - return d_scheduler_p; + return &d_stateSpPool; } -inline bdlbb::BlobBufferFactory* ClusterData::bufferFactory() const +inline bdlmt::FixedThreadPool* ClusterData::miscWorkThreadPool() { - return d_bufferFactory_p; + return &d_miscWorkThreadPool; } +// ACCESSORS inline const mqbi::DispatcherClientData& ClusterData::dispatcherClientData() const { @@ -471,16 +489,6 @@ ClusterData::clusterNodesStatContext() const return d_clusterNodesStatContext_mp; } -inline ClusterData::StateSpPool* ClusterData::stateSpPool() -{ - return &d_stateSpPool; -} - -inline bdlmt::FixedThreadPool* ClusterData::miscWorkThreadPool() -{ - return &d_miscWorkThreadPool; -} - } // close package namespace } // close enterprise namespace From 9a8b5b40bc3c76f7b0da90728b1ca0ad12c09c76 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Fri, 5 Jul 2024 15:39:04 -0400 Subject: [PATCH 05/15] mqbc::RecoveryMgr: Thread safety improvements Signed-off-by: Yuan Jing Vincent Yan --- .../mqb/mqbblp/mqbblp_recoverymanager.cpp | 17 +- .../mqb/mqbc/mqbc_partitionstatetable.h | 30 ++-- src/groups/mqb/mqbc/mqbc_recoverymanager.cpp | 150 ++++++++++-------- src/groups/mqb/mqbc/mqbc_recoverymanager.h | 69 ++++++-- src/groups/mqb/mqbc/mqbc_recoveryutil.cpp | 5 +- src/groups/mqb/mqbc/mqbc_recoveryutil.h | 7 +- src/groups/mqb/mqbc/mqbc_storagemanager.cpp | 9 -- 7 files changed, 171 insertions(+), 116 deletions(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp b/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp index cbbb617fff..cb694af382 100644 --- a/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp @@ -1722,14 +1722,15 @@ int RecoveryManager::replayPartition( bmqp::StorageMessageType::Enum storageMsgType = bmqp::StorageMessageType::e_UNDEFINED; - rc = mqbc::RecoveryUtil::incrementCurrentSeqNum(¤tSeqNum, - &journalRecordBase, - fti->journalFd(), - toSequenceNum, - pid, - *destination, - *d_clusterData_p, - journalIt); + rc = mqbc::RecoveryUtil::incrementCurrentSeqNum( + ¤tSeqNum, + &journalRecordBase, + fti->journalFd(), + toSequenceNum, + pid, + *destination, + d_clusterData_p->identity().description(), + journalIt); if (rc != 0) { break; // BREAK } diff --git a/src/groups/mqb/mqbc/mqbc_partitionstatetable.h b/src/groups/mqb/mqbc/mqbc_partitionstatetable.h index e341b031fe..30c4f01b8c 100644 --- a/src/groups/mqb/mqbc/mqbc_partitionstatetable.h +++ b/src/groups/mqb/mqbc/mqbc_partitionstatetable.h @@ -332,6 +332,8 @@ class PartitionStateTableActions { void do_resetReceiveDataCtx_flagFailedReplicaSeq_checkQuorumSeq( const ARGS& args); + void do_resetReceiveDataCtx_closeRecoveryFileSet(const ARGS& args); + void do_closeRecoveryFileSet_openStorage_startSendDataChunks(const ARGS& args); @@ -342,7 +344,7 @@ class PartitionStateTableActions { void do_setExpectedDataChunkRange_replicaDataRequestPull(const ARGS& args); void - do_storeSelfSeq_resetReceiveDataCtx_closeRecoveryFileSet_openStorage_replicaDataRequestPush_replicaDataRequestDrop_startSendDataChunks_incrementNumRplcaDataRspn_checkQuorumRplcaDataRspn( + do_storeSelfSeq_openStorage_replicaDataRequestPush_replicaDataRequestDrop_startSendDataChunks_incrementNumRplcaDataRspn_checkQuorumRplcaDataRspn( const ARGS& args); void @@ -374,7 +376,7 @@ class PartitionStateTableActions { do_failureReplicaDataResponsePull_cleanupSeqnums_reapplyDetectSelfReplica( const ARGS& args); - void do_resetReceiveDataCtx_cleanupSeqnums_reapplyDetectSelfPrimary( + void do_cleanupSeqnums_resetReceiveDataCtx_reapplyDetectSelfPrimary( const ARGS& args); void @@ -519,14 +521,18 @@ class PartitionStateTable RECOVERY_DATA, updateStorage, PRIMARY_HEALING_STG2); + PST_CFG(PRIMARY_HEALING_STG2, + DONE_RECEIVING_DATA_CHUNKS, + resetReceiveDataCtx_closeRecoveryFileSet, + PRIMARY_HEALING_STG2); PST_CFG(PRIMARY_HEALING_STG2, ERROR_RECEIVING_DATA_CHUNKS, - resetReceiveDataCtx_cleanupSeqnums_reapplyDetectSelfPrimary, + cleanupSeqnums_resetReceiveDataCtx_reapplyDetectSelfPrimary, UNKNOWN); PST_CFG( PRIMARY_HEALING_STG2, REPLICA_DATA_RSPN_PULL, - storeSelfSeq_resetReceiveDataCtx_closeRecoveryFileSet_openStorage_replicaDataRequestPush_replicaDataRequestDrop_startSendDataChunks_incrementNumRplcaDataRspn_checkQuorumRplcaDataRspn, + storeSelfSeq_openStorage_replicaDataRequestPush_replicaDataRequestDrop_startSendDataChunks_incrementNumRplcaDataRspn_checkQuorumRplcaDataRspn, PRIMARY_HEALING_STG2); PST_CFG(PRIMARY_HEALING_STG2, REPLICA_DATA_RSPN_PUSH, @@ -813,6 +819,14 @@ void PartitionStateTableActions:: do_checkQuorumSeq(args); } +template +void PartitionStateTableActions< + ARGS>::do_resetReceiveDataCtx_closeRecoveryFileSet(const ARGS& args) +{ + do_resetReceiveDataCtx(args); + do_closeRecoveryFileSet(args); +} + template void PartitionStateTableActions:: do_closeRecoveryFileSet_openStorage_startSendDataChunks(const ARGS& args) @@ -845,12 +859,10 @@ void PartitionStateTableActions:: template void PartitionStateTableActions:: - do_storeSelfSeq_resetReceiveDataCtx_closeRecoveryFileSet_openStorage_replicaDataRequestPush_replicaDataRequestDrop_startSendDataChunks_incrementNumRplcaDataRspn_checkQuorumRplcaDataRspn( + do_storeSelfSeq_openStorage_replicaDataRequestPush_replicaDataRequestDrop_startSendDataChunks_incrementNumRplcaDataRspn_checkQuorumRplcaDataRspn( const ARGS& args) { do_storeSelfSeq(args); - do_resetReceiveDataCtx(args); - do_closeRecoveryFileSet(args); do_openStorage(args); do_replicaDataRequestPush(args); do_replicaDataRequestDrop(args); @@ -952,11 +964,11 @@ void PartitionStateTableActions:: template void PartitionStateTableActions:: - do_resetReceiveDataCtx_cleanupSeqnums_reapplyDetectSelfPrimary( + do_cleanupSeqnums_resetReceiveDataCtx_reapplyDetectSelfPrimary( const ARGS& args) { - do_resetReceiveDataCtx(args); do_cleanupSeqnums(args); + do_resetReceiveDataCtx(args); do_reapplyDetectSelfPrimary(args); } diff --git a/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp b/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp index 2cb9096c91..27ebe81def 100644 --- a/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp @@ -118,18 +118,19 @@ void RecoveryManager::ReceiveDataContext::reset() // CREATORS RecoveryManager::RecoveryManager( + bdlbb::BlobBufferFactory* bufferFactory, const mqbcfg::ClusterDefinition& clusterConfig, - mqbc::ClusterData* clusterData, + const mqbc::ClusterData& clusterData, const mqbs::DataStoreConfig& dataStoreConfig, bslma::Allocator* allocator) : d_allocator_p(allocator) +, d_bufferFactory_p(bufferFactory) , d_clusterConfig(clusterConfig) , d_dataStoreConfig(dataStoreConfig) -, d_clusterData_p(clusterData) +, d_clusterData(clusterData) , d_recoveryContextVec(allocator) { // PRECONDITIONS - BSLS_ASSERT_SAFE(clusterData); BSLS_ASSERT_SAFE(allocator); d_recoveryContextVec.resize( @@ -156,7 +157,7 @@ void RecoveryManager::stop() void RecoveryManager::deprecateFileSet(int partitionId) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS BSLS_ASSERT_SAFE(partitionId >= 0 && @@ -172,7 +173,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to truncate journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() @@ -187,7 +188,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to flush journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() @@ -199,7 +200,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) rc = mqbs::FileSystemUtil::close(&recoveryCtx.d_mappedJournalFd); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to close journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() @@ -211,7 +212,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) d_dataStoreConfig.archiveLocation()); if (0 != rc) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to move file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "] " @@ -226,7 +227,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to truncate data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc @@ -239,7 +240,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to flush data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc @@ -250,7 +251,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) rc = mqbs::FileSystemUtil::close(&recoveryCtx.d_mappedDataFd); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to close data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc @@ -261,7 +262,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) d_dataStoreConfig.archiveLocation()); if (0 != rc) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to move file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "] " @@ -279,9 +280,10 @@ void RecoveryManager::setExpectedDataChunkRange( const bmqp_ctrlmsg::PartitionSequenceNumber& endSeqNum, int requestId) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS + BSLS_ASSERT_SAFE(fs.inDispatcherThread()); BSLS_ASSERT_SAFE(partitionId >= 0 && partitionId < d_clusterConfig.partitionConfig().numPartitions()); @@ -291,7 +293,7 @@ void RecoveryManager::setExpectedDataChunkRange( RecoveryContext& recoveryCtx = d_recoveryContextVec[partitionId]; ReceiveDataContext& receiveDataCtx = recoveryCtx.d_receiveDataContext; if (receiveDataCtx.d_expectChunks) { - BALL_LOG_ERROR << d_clusterData_p->identity().description() + BALL_LOG_ERROR << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Got notification to expect chunks when self is " << "already expecting chunks. Self's view: " @@ -322,7 +324,7 @@ void RecoveryManager::setExpectedDataChunkRange( BALL_LOG_INFO_BLOCK { - BALL_LOG_OUTPUT_STREAM << d_clusterData_p->identity().description() + BALL_LOG_OUTPUT_STREAM << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Got notification to expect data chunks " << "of range " << beginSeqNum << " to " @@ -337,7 +339,7 @@ void RecoveryManager::setExpectedDataChunkRange( void RecoveryManager::resetReceiveDataCtx(int partitionId) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS BSLS_ASSERT_SAFE(partitionId >= 0 && @@ -355,7 +357,10 @@ int RecoveryManager::processSendDataChunks( const mqbs::FileStore& fs, PartitionDoneSendDataChunksCb doneDataChunksCb) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' + + // PRECONDITIONS + BSLS_ASSERT_SAFE(fs.inDispatcherThread()); enum RcEnum { // Value for the various RC error categories @@ -426,7 +431,7 @@ int RecoveryManager::processSendDataChunks( bmqp::StorageEventBuilder builder(mqbs::FileStoreProtocol::k_VERSION, bmqp::EventType::e_PARTITION_SYNC, - d_clusterData_p->bufferFactory(), + d_bufferFactory_p, d_allocator_p); // Note that partition has to be replayed from the record *after* @@ -439,14 +444,15 @@ int RecoveryManager::processSendDataChunks( bmqp::StorageMessageType::Enum storageMsgType = bmqp::StorageMessageType::e_UNDEFINED; - rc = RecoveryUtil::incrementCurrentSeqNum(¤tSeqNum, - &journalRecordBase, - *mappedJournalFd, - endSeqNum, - partitionId, - *destination, - *d_clusterData_p, - journalIt); + rc = RecoveryUtil::incrementCurrentSeqNum( + ¤tSeqNum, + &journalRecordBase, + *mappedJournalFd, + endSeqNum, + partitionId, + *destination, + d_clusterData.identity().description(), + journalIt); if (rc == 1) { break; } @@ -525,7 +531,7 @@ int RecoveryManager::processSendDataChunks( } if (currentSeqNum != endSeqNum) { - BALL_LOG_WARN << d_clusterData_p->identity().description() + BALL_LOG_WARN << d_clusterData.identity().description() << " Partition [" << partitionId << "]: incomplete replay of partition. Sequence number " << "of last record sent: " << currentSeqNum @@ -545,8 +551,8 @@ int RecoveryManager::processSendDataChunks( } } - BALL_LOG_INFO << d_clusterData_p->identity().description() - << " Partition [" << partitionId << "]: " + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" + << partitionId << "]: " << "Sent data chunks from " << beginSeqNum << " to " << endSeqNum << " to node: " << destination->nodeDescription() << "."; @@ -560,11 +566,11 @@ int RecoveryManager::processReceiveDataChunks( mqbs::FileStore* fs, int partitionId) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS + BSLS_ASSERT_SAFE(fs && fs->inDispatcherThread()); BSLS_ASSERT_SAFE(source); - BSLS_ASSERT_SAFE(fs); BSLS_ASSERT_SAFE(0 <= partitionId); enum RcEnum { @@ -587,7 +593,7 @@ int RecoveryManager::processReceiveDataChunks( ReceiveDataContext& receiveDataCtx = recoveryCtx.d_receiveDataContext; if (!receiveDataCtx.d_expectChunks) { MWCTSK_ALARMLOG_ALARM("RECOVERY") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Received partition-sync event from node " << source->nodeDescription() @@ -599,7 +605,7 @@ int RecoveryManager::processReceiveDataChunks( BSLS_ASSERT_SAFE(receiveDataCtx.d_recoveryDataSource_p); if (receiveDataCtx.d_recoveryDataSource_p->nodeId() != source->nodeId()) { MWCTSK_ALARMLOG_ALARM("RECOVERY") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Received partition-sync event from node " << source->nodeDescription() @@ -625,7 +631,7 @@ int RecoveryManager::processReceiveDataChunks( } else if (receiveDataCtx.d_currSeqNum > receiveDataCtx.d_endSeqNum) { MWCTSK_ALARMLOG_ALARM("REPLICATION") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "The last partition sync msg inside a storage event " << "processed by FileStore has sequenceNumber " @@ -652,7 +658,7 @@ int RecoveryManager::processReceiveDataChunks( mwcu::BlobObjectProxy recHeader; mwcu::MemOutStream partitionDesc; - partitionDesc << d_clusterData_p->identity().description() + partitionDesc << d_clusterData.identity().description() << " Partition [" << partitionId << "]: "; int rc = mqbs::StorageUtil::loadRecordHeaderAndPos( @@ -674,7 +680,7 @@ int RecoveryManager::processReceiveDataChunks( if (recordSeqNum <= receiveDataCtx.d_currSeqNum) { MWCTSK_ALARMLOG_ALARM("REPLICATION") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Received partition sync msg of type " << header.messageType() << " with sequenceNumber " @@ -689,7 +695,7 @@ int RecoveryManager::processReceiveDataChunks( if (recordSeqNum > receiveDataCtx.d_endSeqNum) { MWCTSK_ALARMLOG_ALARM("REPLICATION") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Received partition sync msg of type " << header.messageType() << " with sequenceNumber " @@ -718,7 +724,7 @@ int RecoveryManager::processReceiveDataChunks( // Source's and self views of the journal have diverged. MWCTSK_ALARMLOG_ALARM("REPLICATION") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Received journal record of type [" << header.messageType() << "] with journal offset mismatch. " @@ -856,7 +862,7 @@ int RecoveryManager::processReceiveDataChunks( if (mqbs::QueueOpType::e_CREATION != queueRec->type() && mqbs::QueueOpType::e_ADDITION != queueRec->type()) { BALL_LOG_ERROR - << d_clusterData_p->identity().description() + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << " Unexpected QueueOpType: " << queueRec->type(); return rc_INVALID_QUEUE_RECORD; // RETURN @@ -877,14 +883,16 @@ int RecoveryManager::createRecoveryFileSet(bsl::ostream& errorDescription, mqbs::FileStore* fs, int partitionId) { + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' + // PRECONDITIONS - BSLS_ASSERT_SAFE(fs); + BSLS_ASSERT_SAFE(fs && fs->inDispatcherThread()); bsl::shared_ptr fileSetSp; mwcu::MemOutStream partitionDesc; partitionDesc << "Partition [" << partitionId - << "] (cluster: " << d_clusterData_p->cluster()->name() + << "] (cluster: " << d_clusterData.cluster()->name() << "): "; int rc = mqbs::FileStoreUtil::create(errorDescription, @@ -916,8 +924,8 @@ int RecoveryManager::createRecoveryFileSet(bsl::ostream& errorDescription, BSLS_ASSERT_SAFE(recoveryCtx.d_mappedJournalFd.isValid()); BSLS_ASSERT_SAFE(recoveryCtx.d_mappedDataFd.isValid()); - BALL_LOG_INFO << d_clusterData_p->identity().description() - << " Partition [" << partitionId << "]: " + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" + << partitionId << "]: " << "Created recovery data file store set: " << recoveryCtx.d_recoveryFileSet << ", journal file position: " @@ -931,7 +939,7 @@ int RecoveryManager::createRecoveryFileSet(bsl::ostream& errorDescription, int RecoveryManager::openRecoveryFileSet(bsl::ostream& errorDescription, int partitionId) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS BSLS_ASSERT_SAFE(partitionId >= 0 && @@ -954,7 +962,7 @@ int RecoveryManager::openRecoveryFileSet(bsl::ostream& errorDescription, if (recoveryCtx.d_mappedJournalFd.isValid()) { BSLS_ASSERT_SAFE(recoveryCtx.d_mappedDataFd.isValid()); - BALL_LOG_INFO << d_clusterData_p->identity().description() + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Not opening recovery file set because it's already " << "opened. Current recovery file set: " @@ -991,7 +999,7 @@ int RecoveryManager::openRecoveryFileSet(bsl::ostream& errorDescription, mqbs::MappedFileDescriptor()); if (rc != 0) { - errorDescription << d_clusterData_p->identity().description() + errorDescription << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "File set: " << recoveryCtx.d_recoveryFileSet << " validation failed, rc: " << rc; @@ -1059,8 +1067,8 @@ int RecoveryManager::openRecoveryFileSet(bsl::ostream& errorDescription, } } - BALL_LOG_INFO << d_clusterData_p->identity().description() - << " Partition [" << partitionId << "]: " + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" + << partitionId << "]: " << "Opened recovery file set: " << recoveryCtx.d_recoveryFileSet << ", journal file position: " @@ -1073,7 +1081,7 @@ int RecoveryManager::openRecoveryFileSet(bsl::ostream& errorDescription, int RecoveryManager::closeRecoveryFileSet(int partitionId) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS BSLS_ASSERT_SAFE(partitionId >= 0 && @@ -1097,7 +1105,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to truncate journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() @@ -1112,7 +1120,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to flush journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() @@ -1124,7 +1132,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) rc = mqbs::FileSystemUtil::close(&recoveryCtx.d_mappedJournalFd); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to close journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() @@ -1132,7 +1140,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) return rc * 10 + rc_JOURNAL_FD_CLOSE_FAILURE; // RETURN } - BALL_LOG_INFO << d_clusterData_p->identity().description() + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Closed journal file in recovery file set; " << "journal file position was " @@ -1146,7 +1154,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to truncate data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc @@ -1159,7 +1167,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) errorDesc); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to flush data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc @@ -1170,7 +1178,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) rc = mqbs::FileSystemUtil::close(&recoveryCtx.d_mappedDataFd); if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") - << d_clusterData_p->identity().description() << " Partition [" + << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Failed to close data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc @@ -1178,7 +1186,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) return rc * 10 + rc_DATA_FD_CLOSE_FAILURE; // RETURN } - BALL_LOG_INFO << d_clusterData_p->identity().description() + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Closed data file in recovery file set; " << "data file position was " @@ -1193,7 +1201,7 @@ int RecoveryManager::recoverSeqNum( bmqp_ctrlmsg::PartitionSequenceNumber* seqNum, int partitionId) { - // executed by the *STORAGE (QUEUE) DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS BSLS_ASSERT_SAFE(seqNum); @@ -1226,7 +1234,7 @@ int RecoveryManager::recoverSeqNum( false, // needQList false); // needData if (rc != 0) { - BALL_LOG_ERROR << d_clusterData_p->identity().description() + BALL_LOG_ERROR << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Error while iterating recovered files, rc: " << rc << ", description: " << errorDesc.str(); @@ -1236,7 +1244,7 @@ int RecoveryManager::recoverSeqNum( if (jit.hasRecordSizeRemaining()) { const mqbs::RecordHeader& lastRecordHeader = jit.lastRecordHeader(); - BALL_LOG_INFO << d_clusterData_p->identity().description() + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Recovered Sequence Number " << lastRecordHeader.partitionSequenceNumber() @@ -1246,7 +1254,7 @@ int RecoveryManager::recoverSeqNum( *seqNum = lastRecordHeader.partitionSequenceNumber(); } else { - BALL_LOG_INFO << d_clusterData_p->identity().description() + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Journal file has no record. Storing (0, 0) as self " << "sequence number."; @@ -1260,6 +1268,8 @@ int RecoveryManager::recoverSeqNum( void RecoveryManager::setLiveDataSource(mqbnet::ClusterNode* source, int partitionId) { + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' + // PRECONDITIONS BSLS_ASSERT_SAFE(source); BSLS_ASSERT_SAFE(0 <= partitionId); @@ -1267,8 +1277,8 @@ void RecoveryManager::setLiveDataSource(mqbnet::ClusterNode* source, RecoveryContext& recoveryCtx = d_recoveryContextVec[partitionId]; - BALL_LOG_INFO << d_clusterData_p->identity().description() - << " Partition [" << partitionId << "]: " + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" + << partitionId << "]: " << "Setting live data source from " << (recoveryCtx.d_liveDataSource_p ? recoveryCtx.d_liveDataSource_p->nodeDescription() @@ -1286,6 +1296,8 @@ void RecoveryManager::bufferStorageEvent( const bsl::shared_ptr& blob, mqbnet::ClusterNode* source) { + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' + // PRECONDITIONS BSLS_ASSERT_SAFE(0 <= partitionId); BSLS_ASSERT_SAFE(source); @@ -1293,7 +1305,7 @@ void RecoveryManager::bufferStorageEvent( RecoveryContext& recoveryCtx = d_recoveryContextVec[partitionId]; BSLS_ASSERT_SAFE(recoveryCtx.d_liveDataSource_p); if (recoveryCtx.d_liveDataSource_p->nodeId() != source->nodeId()) { - BALL_LOG_ERROR << d_clusterData_p->identity().description() + BALL_LOG_ERROR << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Storage event from node " << source->nodeDescription() << "cannot be buffered, " @@ -1307,8 +1319,8 @@ void RecoveryManager::bufferStorageEvent( recoveryCtx.d_bufferedEvents.push_back(blob); - BALL_LOG_INFO << d_clusterData_p->identity().description() - << " Partition [" << partitionId << "]: " + BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" + << partitionId << "]: " << "Buffered a storage event from primary node " << source->nodeDescription() << " as self is still healing the partition."; @@ -1319,6 +1331,8 @@ int RecoveryManager::loadBufferedStorageEvents( const mqbnet::ClusterNode* source, int partitionId) { + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' + // PRECONDITIONS BSLS_ASSERT_SAFE(out); BSLS_ASSERT_SAFE(source); @@ -1334,7 +1348,7 @@ int RecoveryManager::loadBufferedStorageEvents( BSLS_ASSERT_SAFE(recoveryCtx.d_liveDataSource_p); if (recoveryCtx.d_liveDataSource_p->nodeId() != source->nodeId()) { - BALL_LOG_ERROR << d_clusterData_p->identity().description() + BALL_LOG_ERROR << d_clusterData.identity().description() << " Partition [" << partitionId << "]: " << "Cannot load buffered storage events from node " << source->nodeDescription() @@ -1359,6 +1373,8 @@ void RecoveryManager::loadReplicaDataResponsePush( bmqp_ctrlmsg::ControlMessage* out, int partitionId) const { + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' + // PRECONDITIONS BSLS_ASSERT_SAFE(out); BSLS_ASSERT_SAFE(partitionId >= 0 && diff --git a/src/groups/mqb/mqbc/mqbc_recoverymanager.h b/src/groups/mqb/mqbc/mqbc_recoverymanager.h index b0c89e7646..df9f147169 100644 --- a/src/groups/mqb/mqbc/mqbc_recoverymanager.h +++ b/src/groups/mqb/mqbc/mqbc_recoverymanager.h @@ -259,21 +259,28 @@ class RecoveryManager { private: // DATA bslma::Allocator* d_allocator_p; - // Allocator to use + /// Allocator to use + /// Blob buffer factory to use + bdlbb::BlobBufferFactory* d_bufferFactory_p; + + /// Cluster configuration to use const mqbcfg::ClusterDefinition& d_clusterConfig; - // Cluster configuration to use - mqbs::DataStoreConfig d_dataStoreConfig; - // Configuration for file store to use + /// Configuration for file store to use + const mqbs::DataStoreConfig d_dataStoreConfig; - mqbc::ClusterData* d_clusterData_p; - // Associated non-persistent cluster - // data for this node + /// Associated non-persistent cluster + /// data for this node + const mqbc::ClusterData& d_clusterData; + /// Vector per partition which maintains + /// information about RecoveryContext. + // + // THREAD: Except during the ctor, the i-th index of this data member + // **must** be accessed in the associated Queue dispatcher thread + // for the i-th partitionId. RecoveryContextVec d_recoveryContextVec; - // Vector per partition which maintains - // information about RecoveryContext. private: // NOT IMPLEMENTED @@ -286,11 +293,12 @@ class RecoveryManager { // CREATORS - /// Create a `RecoveryManager` object with the specified - /// `clusterConfig`, `dataStoreConfig`, `clusterData`. Use the specified - /// `allocator` for any memory allocation. - RecoveryManager(const mqbcfg::ClusterDefinition& clusterConfig, - mqbc::ClusterData* clusterData, + /// Create a `RecoveryManager` object with the specified `bufferFactory`, + /// `clusterConfig`, `dataStoreConfig`, and `clusterData`. Use the + /// specified `allocator` for any memory allocation. + RecoveryManager(bdlbb::BlobBufferFactory* bufferFactory, + const mqbcfg::ClusterDefinition& clusterConfig, + const mqbc::ClusterData& clusterData, const mqbs::DataStoreConfig& dataStoreConfig, bslma::Allocator* allocator); @@ -311,7 +319,7 @@ class RecoveryManager { /// when self's storage is out of sync with primary and cannot be healed /// trivially. /// - /// THREAD: Executed in the dispatcher thread associated with the + /// THREAD: Executed by the queue dispatcher thread associated with the /// specified `partitionId`. void deprecateFileSet(int partitionId); @@ -322,7 +330,7 @@ class RecoveryManager { /// not open, ensure that the journal and data files in the recovery /// file set is open. /// - /// THREAD: Executed in the dispatcher thread associated with the + /// THREAD: Executed by the queue dispatcher thread associated with the /// specified 'partitionId'. void setExpectedDataChunkRange( int partitionId, @@ -368,6 +376,9 @@ class RecoveryManager { /// `partitionId`, using the specified `fs`. Return 0 on success, non /// zero value otherwise along with populating the specified /// `errorDescription` with a brief reason for logging purposes. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. int createRecoveryFileSet(bsl::ostream& errorDescription, mqbs::FileStore* fs, int partitionId); @@ -378,24 +389,39 @@ class RecoveryManager { /// the specified `errorDescription` with a brief reason for logging /// purposes. Note that a return value of `1` is special and indicates /// that no recovery file set is found. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. int openRecoveryFileSet(bsl::ostream& errorDescription, int partitionId); /// Close the recovery file set for the specified 'partitionId'. Return /// 0 on success, non zero value otherwise. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. int closeRecoveryFileSet(int partitionId); /// Recover latest sequence number from storage for the specified /// `partitionId` and populate the output in the specified `seqNum`. /// Return 0 on success and non-zero otherwise. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. int recoverSeqNum(bmqp_ctrlmsg::PartitionSequenceNumber* seqNum, int partitionId); /// Set the live data source of the specified 'partitionId' to the /// specified 'source', and clear any existing buffered storage events. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. void setLiveDataSource(mqbnet::ClusterNode* source, int partitionId); /// Buffer the storage event for the specified `partitionId` contained /// in the specified `blob` sent from the specified `source`. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. void bufferStorageEvent(int partitionId, const bsl::shared_ptr& blob, mqbnet::ClusterNode* source); @@ -404,6 +430,9 @@ class RecoveryManager { /// specified `partitionId`, verifying that they are sent from the /// specified `source`, then clear the buffer. Return 0 on success and /// non-zero otherwise. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. int loadBufferedStorageEvents(bsl::vector >* out, const mqbnet::ClusterNode* source, @@ -413,11 +442,17 @@ class RecoveryManager { /// Return true if the specified `partitionId` is expecting data chunks, /// false otherwise. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. bool expectedDataChunks(int partitionId) const; /// Load into the specified `out` a ReplicaDataResponsePush using /// information in self's ReceiveDataContext for the specified /// `partitionId`. + /// + /// THREAD: Executed in the dispatcher thread associated with the + /// specified `partitionId`. void loadReplicaDataResponsePush(bmqp_ctrlmsg::ControlMessage* out, int partitionId) const; }; @@ -512,6 +547,8 @@ inline RecoveryManager::RecoveryContext::RecoveryContext( // ACCESSORS inline bool RecoveryManager::expectedDataChunks(int partitionId) const { + // executed by the *QUEUE DISPATCHER* thread associated with 'partitionId' + // PRECONDITIONS BSLS_ASSERT_SAFE(partitionId >= 0 && partitionId < diff --git a/src/groups/mqb/mqbc/mqbc_recoveryutil.cpp b/src/groups/mqb/mqbc/mqbc_recoveryutil.cpp index 5adef7a40f..3b0b1b935e 100644 --- a/src/groups/mqb/mqbc/mqbc_recoveryutil.cpp +++ b/src/groups/mqb/mqbc/mqbc_recoveryutil.cpp @@ -247,7 +247,7 @@ int RecoveryUtil::incrementCurrentSeqNum( const bmqp_ctrlmsg::PartitionSequenceNumber& endSeqNum, int partitionId, const mqbnet::ClusterNode& destination, - const mqbc::ClusterData& clusterData, + const bsl::string& clusterDescription, mqbs::JournalFileIterator& journalIt) { // PRECONDITIONS @@ -283,8 +283,7 @@ int RecoveryUtil::incrementCurrentSeqNum( // smaller or equal. BALL_LOG_ERROR - << clusterData.identity().description() << " PartitionId [" - << partitionId + << clusterDescription << " PartitionId [" << partitionId << "]: incorrect sequence number encountered while attempting " << "to replay partition to peer: " << *currentSeqNum << ". Sequence number cannot be greater than: " << endSeqNum diff --git a/src/groups/mqb/mqbc/mqbc_recoveryutil.h b/src/groups/mqb/mqbc/mqbc_recoveryutil.h index 785b3dc140..d580f9d257 100644 --- a/src/groups/mqb/mqbc/mqbc_recoveryutil.h +++ b/src/groups/mqb/mqbc/mqbc_recoveryutil.h @@ -42,11 +42,10 @@ // BDE #include #include +#include #include #include #include -#include -#include namespace BloombergLP { @@ -115,7 +114,7 @@ struct RecoveryUtil { /// than or equal to the specified `endSeqNum` or else it returns /// appropriate non-zero return code. /// This operation is performed for the specified `partitionId`, the - /// specified `destination` node and the specified `clusterData` is + /// specified `destination` node and the specified `clusterDescription` is /// used for logging purposes. The specified `journalIt` is used. /// The function return 0 if successful, 1 if end of journal file is /// reached, and non-zero for failure scenarios. @@ -126,7 +125,7 @@ struct RecoveryUtil { const bmqp_ctrlmsg::PartitionSequenceNumber& endSeqNum, int partitionId, const mqbnet::ClusterNode& destination, - const mqbc::ClusterData& clusterData, + const bsl::string& clusterDescription, mqbs::JournalFileIterator& journalIt); /// This function operates on the record currently being pointed by the diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp index 3e46ec96a3..227867e846 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp @@ -1022,15 +1022,6 @@ void StorageManager::processReplicaDataResponseDispatched( << "ignoring."; } break; case bmqp_ctrlmsg::ReplicaDataType::E_PULL: { - if (d_recoveryManager_mp->expectedDataChunks(partitionId)) { - BALL_LOG_ERROR << d_clusterData_p->identity().description() - << " Partition [" << partitionId - << "]: Ignoring premature ReplicaDataResponse_PULL " - << "because self is still expecting data chunks."; - - return; // RETURN - } - dispatchEventToPartition(fs, PartitionFSM::Event::e_REPLICA_DATA_RSPN_PULL, eventDataVec); From b94c7e35201dafc7d4e0358d9a215e5c4a2bb26e Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Tue, 25 Jun 2024 16:44:50 -0400 Subject: [PATCH 06/15] Fix mqbc::StorageMgr: Initialize queue key info map in cluster thread Signed-off-by: Yuan Jing Vincent Yan --- .../mqb/mqbblp/mqbblp_storagemanager.cpp | 11 +++ src/groups/mqb/mqbblp/mqbblp_storagemanager.h | 4 + .../mqb/mqbc/mqbc_clusterstatemanager.cpp | 12 +++ .../mqb/mqbc/mqbc_clusterstatemanager.h | 3 + .../mqb/mqbc/mqbc_clusterstatemanager.t.cpp | 4 + src/groups/mqb/mqbc/mqbc_clusterstatetable.h | 16 +++- .../mqb/mqbc/mqbc_partitionstatetable.h | 16 ++-- src/groups/mqb/mqbc/mqbc_storagemanager.cpp | 89 +++++++++++-------- src/groups/mqb/mqbc/mqbc_storagemanager.h | 15 +++- src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp | 2 + src/groups/mqb/mqbi/mqbi_storagemanager.h | 8 ++ 11 files changed, 127 insertions(+), 53 deletions(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp index e11353daea..35eecc46f9 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp @@ -1510,6 +1510,17 @@ void StorageManager::stop() bdlf::PlaceHolders::_2)); // latch } +void StorageManager::initializeQueueKeyInfoMap( + BSLS_ANNOTATION_UNUSED const mqbc::ClusterState* clusterState) +{ + // executed by cluster *DISPATCHER* thread + + // PRECONDITION + BSLS_ASSERT_SAFE(d_dispatcher_p->inDispatcherThread(d_cluster_p)); + + BSLS_ASSERT_OPT(false && "This method should only be invoked in FSM mode"); +} + void StorageManager::setPrimaryForPartition(int partitionId, mqbnet::ClusterNode* primaryNode, unsigned int primaryLeaseId) diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h index f4fabc6f72..d7dadf5ea5 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h @@ -506,6 +506,10 @@ class StorageManager : public mqbi::StorageManager { /// Stop this storage manager. virtual void stop() BSLS_KEYWORD_OVERRIDE; + /// Initialize the queue key info map based on information in the specified + /// `clusterState`. + virtual void initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the specified `appIdKeyPairs`, and belonging /// to the specified `domain`. Load into the specified `storage` the diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp index ad05f33753..13bb40540c 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -258,6 +259,17 @@ void ClusterStateManager::do_applyCSLSelf(const ClusterFSMArgsSp& args) d_clusterStateLedger_mp->apply(clusterStateSnapshot); } +void ClusterStateManager::do_initializeQueueKeyInfoMap(const ClusterFSMArgsSp& args) +{ + // executed by the cluster *DISPATCHER* thread + + // PRECONDITIONS + BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); + BSLS_ASSERT_SAFE(d_clusterFSM.isSelfHealed()); + + d_storageManager_p->initializeQueueKeyInfoMap(d_state_p); +} + void ClusterStateManager::do_sendFollowerLSNRequests( BSLS_ANNOTATION_UNUSED const ClusterFSMArgsSp& args) { diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h index 5edd18d041..a1fa0c34b7 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h +++ b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h @@ -194,6 +194,9 @@ class ClusterStateManager virtual void do_applyCSLSelf(const ClusterFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; + virtual void + do_initializeQueueKeyInfoMap(const ClusterFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; + virtual void do_sendFollowerLSNRequests(const ClusterFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.t.cpp b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.t.cpp index 52132092e3..adecd84df5 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.t.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.t.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,7 @@ struct Tester { bslma::ManagedPtr d_cluster_mp; mqbmock::ClusterStateLedger* d_clusterStateLedger_p; bslma::ManagedPtr d_clusterStateManager_mp; + mqbmock::StorageManager d_storageManager; public: // CREATORS @@ -100,6 +102,7 @@ struct Tester { , d_cluster_mp(0) , d_clusterStateLedger_p(0) , d_clusterStateManager_mp(0) + , d_storageManager() { // Create the cluster mqbmock::Cluster::ClusterNodeDefs clusterNodeDefs(s_allocator_p); @@ -173,6 +176,7 @@ struct Tester { k_WATCHDOG_TIMEOUT_DURATION, s_allocator_p), s_allocator_p); + d_clusterStateManager_mp->setStorageManager(&d_storageManager); // Start the cluster and the cluster state manager mwcu::MemOutStream errorDescription; diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatetable.h b/src/groups/mqb/mqbc/mqbc_clusterstatetable.h index fee5ae2798..ead606c0ac 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatetable.h +++ b/src/groups/mqb/mqbc/mqbc_clusterstatetable.h @@ -216,6 +216,8 @@ class ClusterStateTableActions { virtual void do_applyCSLSelf(const ARGS& args) = 0; + virtual void do_initializeQueueKeyInfoMap(const ARGS& args) = 0; + virtual void do_sendFollowerLSNRequests(const ARGS& args) = 0; virtual void do_sendFollowerLSNResponse(const ARGS& args) = 0; @@ -276,6 +278,8 @@ class ClusterStateTableActions { void do_stopWatchDog_cancelRequests_reapplyEvent(const ARGS& args); + void do_stopWatchDog_initializeQueueKeyInfoMap(const ARGS& args); + void do_stopWatchDog_cleanupLSNs_cancelRequests(const ARGS& args); void @@ -395,7 +399,7 @@ class ClusterStateTable FOL_CSL_RQST, sendFollowerClusterStateResponse, FOL_HEALING); - CST_CFG(FOL_HEALING, CSL_CMT_SUCCESS, stopWatchDog, FOL_HEALED); + CST_CFG(FOL_HEALING, CSL_CMT_SUCCESS, stopWatchDog_initializeQueueKeyInfoMap, FOL_HEALED); CST_CFG(FOL_HEALING, CSL_CMT_FAIL, triggerWatchDog, UNKNOWN); CST_CFG(FOL_HEALING, RST_UNKNOWN, @@ -509,7 +513,7 @@ class ClusterStateTable REGISTRATION_RQST, storeFollowerLSNs_sendRegistrationResponse, LDR_HEALING_STG2); - CST_CFG(LDR_HEALING_STG2, CSL_CMT_SUCCESS, stopWatchDog, LDR_HEALED); + CST_CFG(LDR_HEALING_STG2, CSL_CMT_SUCCESS, stopWatchDog_initializeQueueKeyInfoMap, LDR_HEALED); CST_CFG(LDR_HEALING_STG2, CSL_CMT_FAIL, triggerWatchDog, UNKNOWN); CST_CFG(LDR_HEALING_STG2, RST_UNKNOWN, @@ -624,6 +628,14 @@ void ClusterStateTableActions< do_reapplyEvent(args); } +template +void ClusterStateTableActions::do_stopWatchDog_initializeQueueKeyInfoMap( + const ARGS& args) +{ + do_stopWatchDog(args); + do_initializeQueueKeyInfoMap(args); +} + template void ClusterStateTableActions< ARGS>::do_stopWatchDog_cleanupLSNs_cancelRequests(const ARGS& args) diff --git a/src/groups/mqb/mqbc/mqbc_partitionstatetable.h b/src/groups/mqb/mqbc/mqbc_partitionstatetable.h index 30c4f01b8c..037c89b172 100644 --- a/src/groups/mqb/mqbc/mqbc_partitionstatetable.h +++ b/src/groups/mqb/mqbc/mqbc_partitionstatetable.h @@ -209,8 +209,6 @@ class PartitionStateTableActions { virtual void do_stopWatchDog(const ARGS& args) = 0; - virtual void do_populateQueueKeyInfoMap(const ARGS& args) = 0; - virtual void do_openRecoveryFileSet(const ARGS& args) = 0; virtual void do_closeRecoveryFileSet(const ARGS& args) = 0; @@ -300,11 +298,11 @@ class PartitionStateTableActions { virtual void do_reapplyDetectSelfReplica(const ARGS& args) = 0; void - do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_replicaStateRequest_checkQuorumSeq_populateQueueKeyInfoMap( + do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_replicaStateRequest_checkQuorumSeq( const ARGS& args); void - do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_primaryStateRequest_populateQueueKeyInfoMap( + do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_primaryStateRequest( const ARGS& args); void @@ -444,12 +442,12 @@ class PartitionStateTable PST_CFG( UNKNOWN, DETECT_SELF_PRIMARY, - startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_replicaStateRequest_checkQuorumSeq_populateQueueKeyInfoMap, + startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_replicaStateRequest_checkQuorumSeq, PRIMARY_HEALING_STG1); PST_CFG( UNKNOWN, DETECT_SELF_REPLICA, - startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_primaryStateRequest_populateQueueKeyInfoMap, + startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_primaryStateRequest, REPLICA_HEALING); PST_CFG(UNKNOWN, PUT, nackPut, UNKNOWN); PST_CFG(PRIMARY_HEALING_STG1, @@ -704,7 +702,7 @@ void PartitionStateTableActions::do_none(const ARGS& args) template void PartitionStateTableActions:: - do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_replicaStateRequest_checkQuorumSeq_populateQueueKeyInfoMap( + do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_replicaStateRequest_checkQuorumSeq( const ARGS& args) { do_startWatchDog(args); @@ -713,12 +711,11 @@ void PartitionStateTableActions:: do_storeSelfSeq(args); do_replicaStateRequest(args); do_checkQuorumSeq(args); - do_populateQueueKeyInfoMap(args); } template void PartitionStateTableActions:: - do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_primaryStateRequest_populateQueueKeyInfoMap( + do_startWatchDog_storePartitionInfo_openRecoveryFileSet_storeSelfSeq_primaryStateRequest( const ARGS& args) { do_startWatchDog(args); @@ -726,7 +723,6 @@ void PartitionStateTableActions:: do_openRecoveryFileSet(args); do_storeSelfSeq(args); do_primaryStateRequest(args); - do_populateQueueKeyInfoMap(args); } template diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp index 227867e846..083566cf56 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp @@ -1151,43 +1151,6 @@ void StorageManager::do_stopWatchDog(const PartitionFSMArgsSp& args) d_watchDogEventHandles[partitionId].release(); } -void StorageManager::do_populateQueueKeyInfoMap( - BSLS_ANNOTATION_UNUSED const PartitionFSMArgsSp& args) -{ - if (!bsl::all_of(d_queueKeyInfoMapVec.cbegin(), - d_queueKeyInfoMapVec.cend(), - bdlf::MemFnUtil::memFn(&QueueKeyInfoMap::empty))) { - // If the queue key info map vec has already been populated, no need to - // populate again. - - return; // RETURN - } - - // Populate 'd_queueKeyInfoMapVec' from cluster state - for (DomainStatesCIter dscit = d_clusterState.domainStates().cbegin(); - dscit != d_clusterState.domainStates().cend(); - ++dscit) { - for (UriToQueueInfoMapCIter cit = dscit->second->queuesInfo().cbegin(); - cit != dscit->second->queuesInfo().cend(); - ++cit) { - BSLS_ASSERT_SAFE(cit->second); - const ClusterStateQueueInfo& csQinfo = *(cit->second); - - mqbs::DataStoreConfigQueueInfo qinfo; - qinfo.setCanonicalQueueUri(csQinfo.uri().asString()); - qinfo.setPartitionId(csQinfo.partitionId()); - for (AppIdInfosCIter appIdCit = csQinfo.appIdInfos().cbegin(); - appIdCit != csQinfo.appIdInfos().cend(); - ++appIdCit) { - qinfo.addAppIdKeyPair(*appIdCit); - } - - d_queueKeyInfoMapVec.at(csQinfo.partitionId()) - .insert(bsl::make_pair(csQinfo.key(), qinfo)); - } - } -} - void StorageManager::do_openRecoveryFileSet(const PartitionFSMArgsSp& args) { // executed by the *DISPATCHER* thread @@ -2675,8 +2638,10 @@ void StorageManager::do_resetReceiveDataCtx(const PartitionFSMArgsSp& args) void StorageManager::do_openStorage(const PartitionFSMArgsSp& args) { // executed by the *DISPATCHER* thread + // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); + BSLS_ASSERT_SAFE(d_isQueueKeyInfoMapVecInitialized); const PartitionFSM::EventWithData& eventWithData = args->eventsQueue()->front(); @@ -3216,6 +3181,7 @@ StorageManager::StorageManager( , d_nodeToSeqNumCtxMapVec(allocator) , d_seqNumQuorum((d_clusterConfig.nodes().size() / 2) + 1) // TODO: Config?? , d_numReplicaDataResponsesReceivedVec(allocator) +, d_isQueueKeyInfoMapVecInitialized(false) , d_queueKeyInfoMapVec(allocator) , d_minimumRequiredDiskSpace(0) , d_storageMonitorEventHandle() @@ -3448,6 +3414,50 @@ void StorageManager::stop() bdlf::PlaceHolders::_2)); // latch } +void StorageManager::initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) +{ + // executed by the *CLUSTER DISPATCHER* thread + + // PRECONDITIONS + BSLS_ASSERT_SAFE(d_dispatcher_p->inDispatcherThread(d_cluster_p)); + BSLS_ASSERT_SAFE(clusterState); + + if (d_isQueueKeyInfoMapVecInitialized) { + // The queue key info map vec should only be initialized once. + return; // RETURN + } + + BSLS_ASSERT_SAFE(bsl::all_of(d_queueKeyInfoMapVec.cbegin(), + d_queueKeyInfoMapVec.cend(), + bdlf::MemFnUtil::memFn(&QueueKeyInfoMap::empty))); + + // Populate 'd_queueKeyInfoMapVec' from cluster state + for (DomainStatesCIter dscit = clusterState->domainStates().cbegin(); + dscit != clusterState->domainStates().cend(); + ++dscit) { + for (UriToQueueInfoMapCIter cit = dscit->second->queuesInfo().cbegin(); + cit != dscit->second->queuesInfo().cend(); + ++cit) { + BSLS_ASSERT_SAFE(cit->second); + const ClusterStateQueueInfo& csQinfo = *(cit->second); + + mqbs::DataStoreConfigQueueInfo qinfo; + qinfo.setCanonicalQueueUri(csQinfo.uri().asString()); + qinfo.setPartitionId(csQinfo.partitionId()); + for (AppIdInfosCIter appIdCit = csQinfo.appIdInfos().cbegin(); + appIdCit != csQinfo.appIdInfos().cend(); + ++appIdCit) { + qinfo.addAppIdKeyPair(*appIdCit); + } + + d_queueKeyInfoMapVec.at(csQinfo.partitionId()) + .insert(bsl::make_pair(csQinfo.key(), qinfo)); + } + } + + d_isQueueKeyInfoMapVecInitialized = true; +} + void StorageManager::registerQueue(const bmqt::Uri& uri, const mqbu::StorageKey& queueKey, int partitionId, @@ -3742,18 +3752,21 @@ void StorageManager::clearPrimaryForPartition(int partitionId, BSLS_ASSERT_SAFE(0 <= partitionId && partitionId < static_cast(d_fileStores.size())); BSLS_ASSERT_SAFE(primary); + BSLS_ASSERT_SAFE(primary->nodeId() == + d_partitionInfoVec[partitionId].primary()->nodeId()); BALL_LOG_INFO << d_clusterData_p->identity().description() << " Partition [" << partitionId << "]: " << "Self Transition back to Unknown in the Partition FSM."; + EventData eventDataVec; eventDataVec.emplace_back(d_clusterData_p->membership().selfNode(), -1, // placeholder requestId partitionId, 1, primary, - 1); + d_partitionInfoVec[partitionId].primaryLeaseId()); mqbs::FileStore* fs = d_fileStores[partitionId].get(); BSLS_ASSERT_SAFE(fs); diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.h b/src/groups/mqb/mqbc/mqbc_storagemanager.h index cb9ab7198d..aff49ca68a 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.h +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.h @@ -356,6 +356,9 @@ class StorageManager // responses received, indexed by // partitionId. + bsls::AtomicBool d_isQueueKeyInfoMapVecInitialized; + // Whether 'd_queueKeyInfoMapVec' has been initialized. + QueueKeyInfoMapVec d_queueKeyInfoMapVec; // Mapping from queue key to queue // info indexed by partitionId, @@ -365,6 +368,11 @@ class StorageManager // when recovering messages, and to // create domains and file-backed // storages during 'recoveredQueuesCb'. + // + // THREAD: This data member **must** be initialized in the cluster + // dispatcher thread, where 'd_isQueueKeyInfoMapVecInitialized' + // will be set to 'true'. Afterwards, it **must not** be modified + // again, and hence is safe to read from any thread. bsls::Types::Uint64 d_minimumRequiredDiskSpace; // The bare minimum space required for @@ -572,9 +580,6 @@ class StorageManager virtual void do_stopWatchDog(const PartitionFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; - virtual void do_populateQueueKeyInfoMap(const PartitionFSMArgsSp& args) - BSLS_KEYWORD_OVERRIDE; - virtual void do_openRecoveryFileSet(const PartitionFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; @@ -768,6 +773,10 @@ class StorageManager /// THREAD: Executed by the cluster's dispatcher thread. virtual void stop() BSLS_KEYWORD_OVERRIDE; + /// Initialize the queue key info map based on information in the specified + /// `clusterState`. + virtual void initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the spcified `appIdKeyPairs`, and belonging to /// the specified `domain`. Load into the specified `storage` the diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp index 203f901960..cd9f5b4b5a 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp @@ -2603,6 +2603,7 @@ static void test17_replicaHealingReceivesReplicaDataRqstPull() int rc = storageManager.start(errorDescription); BSLS_ASSERT_OPT(rc == 0); + storageManager.initializeQueueKeyInfoMap(&helper.d_cluster_mp->_state()); mqbs::FileStore& fs = storageManager.fileStore(k_PARTITION_ID); fs.setIgnoreCrc32c(true); @@ -2745,6 +2746,7 @@ static void test18_primaryHealingStage1SelfHighestSendsDataChunks() const int rc = storageManager.start(errorDescription); BSLS_ASSERT_OPT(rc == 0); + storageManager.initializeQueueKeyInfoMap(&helper.d_cluster_mp->_state()); mqbs::FileStore& fs = storageManager.fileStore(k_PARTITION_ID); fs.setIgnoreCrc32c(true); diff --git a/src/groups/mqb/mqbi/mqbi_storagemanager.h b/src/groups/mqb/mqbi/mqbi_storagemanager.h index 1e6060935c..135dc34d76 100644 --- a/src/groups/mqb/mqbi/mqbi_storagemanager.h +++ b/src/groups/mqb/mqbi/mqbi_storagemanager.h @@ -58,6 +58,10 @@ namespace BloombergLP { // FORWARD DECLARATION +namespace mqbc { +class ClusterState; +} + namespace mqbcmd { class StorageCommand; } @@ -218,6 +222,10 @@ class StorageManager : public mqbi::AppKeyGenerator { /// Stop this storage manager. virtual void stop() = 0; + /// Initialize the queue key info map based on information in the specified + /// `clusterState`. + virtual void initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) = 0; + /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the specified `appIdKeyPairs`, and belonging /// to the specified `domain`. Load into the specified `storage` the From 9ef2c9fcfad99088f884a8bbe5c0920b3cb97552 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Tue, 16 Jul 2024 17:31:00 -0400 Subject: [PATCH 07/15] mqbc::StorageMgr.t: Set partition primary also in cluster state Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp | 146 +++++++++++------- 1 file changed, 86 insertions(+), 60 deletions(-) diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp index cd9f5b4b5a..f96c9fd64c 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp @@ -201,6 +201,12 @@ struct TestHelper { BSLS_ASSERT_OPT(rc == 0); } + void setPartitionPrimary(mqbc::StorageManager* storageManager, int partitionId, unsigned int leaseId, mqbnet::ClusterNode* node) + { + d_cluster_mp->_state().setPartitionPrimary(partitionId, leaseId, node); + storageManager->setPrimaryForPartition(partitionId, node, leaseId); + } + void clearChannels() { for (TestChannelMapCIter cit = d_cluster_mp->_channels().cbegin(); @@ -1015,9 +1021,10 @@ static void test2_unknownDetectSelfPrimary() BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_UNKNOWN); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); ASSERT_EQ(storageManager.nodeToSeqNumCtxMap(k_PARTITION_ID).size(), 1U); ASSERT_EQ(storageManager.partitionHealthState(k_PARTITION_ID), @@ -1085,9 +1092,10 @@ static void test3_unknownDetectSelfReplica() BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_UNKNOWN); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); ASSERT_EQ(storageManager.nodeToSeqNumCtxMap(k_PARTITION_ID).size(), 1U); ASSERT_EQ(storageManager.partitionHealthState(k_PARTITION_ID), @@ -1157,9 +1165,10 @@ static void test4_primaryHealingStage1DetectSelfReplica() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -1180,9 +1189,10 @@ static void test4_primaryHealingStage1DetectSelfReplica() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); ASSERT_EQ(storageManager.nodeToSeqNumCtxMap(k_PARTITION_ID).size(), 1U); ASSERT_EQ(storageManager.partitionHealthState(k_PARTITION_ID), @@ -1255,9 +1265,10 @@ static void test5_primaryHealingStage1ReceivesReplicaStateRqst() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -1367,9 +1378,10 @@ static void test6_primaryHealingStage1ReceivesReplicaStateRspnQuorum() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -1474,9 +1486,10 @@ static void test7_primaryHealingStage1ReceivesPrimaryStateRequestQuorum() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -1587,9 +1600,10 @@ static void test8_primaryHealingStage1ReceivesPrimaryStateRqst() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -1697,9 +1711,10 @@ static void test9_primaryHealingStage1ReceivesReplicaStateRspnNoQuorum() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -1812,9 +1827,10 @@ static void test10_primaryHealingStage1QuorumSendsReplicaDataRequestPull() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -1935,9 +1951,10 @@ static void test11_primaryHealingStage2DetectSelfReplica() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); @@ -2009,9 +2026,10 @@ static void test11_primaryHealingStage2DetectSelfReplica() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); ASSERT_EQ(storageManager.nodeToSeqNumCtxMap(k_PARTITION_ID).size(), 1U); ASSERT_EQ(storageManager.partitionHealthState(k_PARTITION_ID), @@ -2087,9 +2105,10 @@ static void test12_replicaHealingDetectSelfPrimary() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_REPLICA_HEALING); @@ -2104,9 +2123,10 @@ static void test12_replicaHealingDetectSelfPrimary() // Apply Detect Self Primary event to Self Node. - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + selfNode); ASSERT_EQ(storageManager.nodeToSeqNumCtxMap(k_PARTITION_ID).size(), 1U); ASSERT_EQ(storageManager.partitionHealthState(k_PARTITION_ID), @@ -2179,9 +2199,10 @@ static void test13_replicaHealingReceivesReplicaStateRqst() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_REPLICA_HEALING); @@ -2286,9 +2307,10 @@ static void test14_replicaHealingReceivesPrimaryStateRspn() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_REPLICA_HEALING); @@ -2391,9 +2413,10 @@ static void test15_replicaHealingReceivesFailedPrimaryStateRspn() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_REPLICA_HEALING); @@ -2485,9 +2508,10 @@ static void test16_replicaHealingReceivesPrimaryStateRqst() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_REPLICA_HEALING); @@ -2612,9 +2636,10 @@ static void test17_replicaHealingReceivesReplicaDataRqstPull() ->membership() .netCluster() ->lookupNode(primaryNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - primaryNode, - 1); // primaryLeaseId + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + 1, // primaryLeaseId + primaryNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_REPLICA_HEALING); @@ -2764,9 +2789,10 @@ static void test18_primaryHealingStage1SelfHighestSendsDataChunks() .netCluster() ->lookupNode(selfNodeId); - storageManager.setPrimaryForPartition(k_PARTITION_ID, - selfNode, - k_PRIMARY_LEASE_ID); + helper.setPartitionPrimary(&storageManager, + k_PARTITION_ID, + k_PRIMARY_LEASE_ID, // primaryLeaseId + selfNode); BSLS_ASSERT_OPT(storageManager.partitionHealthState(k_PARTITION_ID) == mqbc::PartitionFSM::State::e_PRIMARY_HEALING_STG1); From 6dbad8403f35eae8e1bce539e1f18d03f432f95a Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Mon, 22 Jul 2024 15:59:53 -0400 Subject: [PATCH 08/15] mqbc::StorageMgr: Thread safety improvements Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbblp/mqbblp_cluster.cpp | 2 +- src/groups/mqb/mqbblp/mqbblp_cluster.h | 3 +- .../mqb/mqbblp/mqbblp_storagemanager.cpp | 52 +-- src/groups/mqb/mqbblp/mqbblp_storagemanager.h | 22 +- src/groups/mqb/mqbc/mqbc_clusterdata.h | 16 +- src/groups/mqb/mqbc/mqbc_clusterutil.cpp | 8 +- src/groups/mqb/mqbc/mqbc_clusterutil.h | 10 +- src/groups/mqb/mqbc/mqbc_recoveryutil.h | 2 - src/groups/mqb/mqbc/mqbc_storagemanager.cpp | 296 +++++++++++------- src/groups/mqb/mqbc/mqbc_storagemanager.h | 115 ++++--- src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp | 5 +- src/groups/mqb/mqbc/mqbc_storageutil.cpp | 253 +++++++++------ src/groups/mqb/mqbc/mqbc_storageutil.h | 107 ++++--- src/groups/mqb/mqbi/mqbi_storagemanager.h | 4 - .../mqb/mqbmock/mqbmock_storagemanager.cpp | 5 - .../mqb/mqbmock/mqbmock_storagemanager.h | 4 - src/groups/mqb/mqbs/mqbs_filestore.cpp | 25 ++ src/groups/mqb/mqbs/mqbs_filestore.h | 6 +- src/groups/mqb/mqbs/mqbs_filestore.t.cpp | 21 +- src/groups/mqb/mqbs/mqbs_storageprintutil.cpp | 12 +- src/groups/mqb/mqbs/mqbs_storageprintutil.h | 13 +- 21 files changed, 600 insertions(+), 381 deletions(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_cluster.cpp b/src/groups/mqb/mqbblp/mqbblp_cluster.cpp index 8c03c7fe48..af05409afa 100644 --- a/src/groups/mqb/mqbblp/mqbblp_cluster.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_cluster.cpp @@ -178,7 +178,7 @@ void Cluster::startDispatched(bsl::ostream* errorDescription, int* rc) // Start the StorageManager d_storageManager_mp.load( isFSMWorkflow() - ? static_cast( + ? static_cast( new (*storageManagerAllocator) mqbc::StorageManager( d_clusterData.clusterConfig(), this, diff --git a/src/groups/mqb/mqbblp/mqbblp_cluster.h b/src/groups/mqb/mqbblp/mqbblp_cluster.h index 06062109d9..ab834f6131 100644 --- a/src/groups/mqb/mqbblp/mqbblp_cluster.h +++ b/src/groups/mqb/mqbblp/mqbblp_cluster.h @@ -76,6 +76,7 @@ #include #include #include +#include #include namespace BloombergLP { @@ -265,7 +266,7 @@ class Cluster : public mqbi::Cluster, // This flag is used only inside this // component. - bool d_isStopping; + bsls::AtomicBool d_isStopping; // Flag to indicate if this cluster is // stopping. This flag is exposed via // an accessor. diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp index 35eecc46f9..c52c64e5df 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp @@ -360,7 +360,7 @@ void StorageManager::shutdownCb(int partitionId, bslmt::Latch* latch) mqbc::StorageUtil::shutdown(partitionId, latch, &d_fileStores, - d_clusterData_p, + d_clusterData_p->identity().description(), d_clusterConfig); } @@ -483,6 +483,7 @@ void StorageManager::recoveredQueuesCb(int partitionId, &d_appKeysVec[partitionId], &d_appKeysLock, d_domainFactory_p, + &d_unrecognizedDomainsLock, &d_unrecognizedDomains[partitionId], d_clusterData_p->identity().description(), partitionId, @@ -495,6 +496,7 @@ void StorageManager::recoveredQueuesCb(int partitionId, mqbc::StorageUtil::dumpUnknownRecoveredDomains( d_clusterData_p->identity().description(), + &d_unrecognizedDomainsLock, d_unrecognizedDomains); } @@ -650,11 +652,12 @@ void StorageManager::clearPrimaryForPartitionDispatched( mqbs::FileStore* fs = d_fileStores[partitionId].get(); PartitionInfo& pinfo = d_partitionInfoVec[partitionId]; - mqbc::StorageUtil::clearPrimaryForPartition(fs, - &pinfo, - *d_clusterData_p, - partitionId, - primary); + mqbc::StorageUtil::clearPrimaryForPartition( + fs, + &pinfo, + d_clusterData_p->identity().description(), + partitionId, + primary); } void StorageManager::processStorageEventDispatched( @@ -742,10 +745,15 @@ void StorageManager::processPartitionSyncEvent( return; // RETURN } + PartitionInfo pinfo; + const ClusterStatePartitionInfo& cspinfo = d_clusterState.partition(pid); + pinfo.setPrimary(cspinfo.primaryNode()); + pinfo.setPrimaryLeaseId(cspinfo.primaryLeaseId()); + pinfo.setPrimaryStatus(cspinfo.primaryStatus()); if (!mqbc::StorageUtil::validatePartitionSyncEvent(rawEvent, pid, source, - d_clusterState, + pinfo, *d_clusterData_p, false) // isFSMWorkflow ) { @@ -977,6 +985,7 @@ StorageManager::StorageManager( , d_allocators(d_allocator_p) , d_isStarted(false) , d_lowDiskspaceWarning(false) +, d_unrecognizedDomainsLock() , d_unrecognizedDomains(allocator) , d_blobSpPool_p(clusterData->blobSpPool()) , d_domainFactory_p(domainFactory) @@ -1095,7 +1104,7 @@ void StorageManager::unregisterQueue(const bmqt::Uri& uri, int partitionId) &d_storagesLock, d_clusterData_p, partitionId, - d_partitionInfoVec[partitionId], + bsl::cref(d_partitionInfoVec[partitionId]), uri)); d_fileStores[partitionId]->dispatchEvent(queueEvent); @@ -1489,6 +1498,11 @@ int StorageManager::start(bsl::ostream& errorDescription) void StorageManager::stop() { + // executed by cluster *DISPATCHER* thread + + // PRECONDITION + BSLS_ASSERT_SAFE(d_dispatcher_p->inDispatcherThread(d_cluster_p)); + if (!d_isStarted) { return; // RETURN } @@ -1502,8 +1516,8 @@ void StorageManager::stop() d_recoveryManager_mp->stop(); mqbc::StorageUtil::stop( - d_clusterData_p, &d_fileStores, + d_clusterData_p->identity().description(), bdlf::BindUtil::bind(&StorageManager::shutdownCb, this, bdlf::PlaceHolders::_1, // partitionId @@ -1703,14 +1717,15 @@ void StorageManager::processStorageEvent( d_clusterData_p->membership().selfNodeStatus() || isZero(d_clusterData_p->electorInfo().leaderMessageSequence()); const ClusterStatePartitionInfo& pinfo = d_clusterState.partition(pid); - if (!mqbc::StorageUtil::validateStorageEvent(rawEvent, - pid, - source, - pinfo.primaryNode(), - pinfo.primaryStatus(), - *d_clusterData_p, - skipAlarm, - false)) { // isFSMWorkflow + if (!mqbc::StorageUtil::validateStorageEvent( + rawEvent, + pid, + source, + pinfo.primaryNode(), + pinfo.primaryStatus(), + d_clusterData_p->identity().description(), + skipAlarm, + false)) { // isFSMWorkflow return; // RETURN } @@ -2056,7 +2071,7 @@ void StorageManager::processReplicaStatusAdvisory( d_clusterData_p, fs, partitionId, - d_partitionInfoVec[partitionId], + bsl::cref(d_partitionInfoVec[partitionId]), source, status)); } @@ -2116,6 +2131,7 @@ void StorageManager::gcUnrecognizedDomainQueues() d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); mqbc::StorageUtil::gcUnrecognizedDomainQueues(&d_fileStores, + &d_unrecognizedDomainsLock, d_unrecognizedDomains); } diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h index d7dadf5ea5..5e3e4a051e 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h @@ -203,6 +203,9 @@ class StorageManager : public mqbi::StorageManager { // used *only* for logging purposes // (see 'storageMonitorCb' impl) + bslmt::Mutex d_unrecognizedDomainsLock; + // Mutex to protect access to 'd_unrecognizedDomains' and its elements. + DomainQueueMessagesCountMaps d_unrecognizedDomains; // List of DomainQueueMessagesMap, // indexed by 'partitionId'. @@ -468,10 +471,6 @@ class StorageManager : public mqbi::StorageManager { /// Explicitly call `flush` on all FileStores to enforce their GC. void forceFlushFileStores(); - private: - // PRIVATE ACCESSORS - bool isLocalCluster() const; - public: // TRAITS BSLMF_NESTED_TRAIT_DECLARATION(StorageManager, bslma::UsesBslmaAllocator) @@ -724,10 +723,6 @@ class StorageManager : public mqbi::StorageManager { virtual bool isStorageEmpty(const bmqt::Uri& uri, int partitionId) const BSLS_KEYWORD_OVERRIDE; - /// Return the blob buffer factory to use. - virtual bdlbb::BlobBufferFactory* - blobBufferFactory() const BSLS_KEYWORD_OVERRIDE; - /// Return partition corresponding to the specified `partitionId`. The /// behavior is undefined if `partitionId` does not represent a valid /// partition id. @@ -815,17 +810,6 @@ class StorageManagerIterator : public mqbi::StorageManagerIterator { // class StorageManager // -------------------- -// PRIVATE ACCESSORS -inline bool StorageManager::isLocalCluster() const -{ - return d_clusterData_p->cluster()->isLocal(); -} - -inline bdlbb::BlobBufferFactory* StorageManager::blobBufferFactory() const -{ - return d_clusterData_p->bufferFactory(); -} - // PUBLIC ACCESSORS inline mqbi::Dispatcher::ProcessorHandle StorageManager::processorForPartition(int partitionId) const diff --git a/src/groups/mqb/mqbc/mqbc_clusterdata.h b/src/groups/mqb/mqbc/mqbc_clusterdata.h index 0bc0cf2adf..bc27d0ad7c 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterdata.h +++ b/src/groups/mqb/mqbc/mqbc_clusterdata.h @@ -79,13 +79,13 @@ class ClusterDataIdentity { private: // DATA - bsl::string d_name; + const bsl::string d_name; // Name of the cluster - bsl::string d_description; + const bsl::string d_description; // Description of the cluster - bmqp_ctrlmsg::ClientIdentity d_identity; + const bmqp_ctrlmsg::ClientIdentity d_identity; // Information sent to the primary node of // a queue while sending a clusterOpenQueue // request to that node @@ -177,7 +177,7 @@ class ClusterData { ClusterMembership d_membership; // The membership information of the cluster - ClusterDataIdentity d_identity; + const ClusterDataIdentity d_identity; // The identity of the cluster mqbi::Cluster* d_cluster_p; @@ -262,9 +262,6 @@ class ClusterData { /// Get a modifiable reference to this object's cluster membership. ClusterMembership& membership(); - /// Get a modifiable reference to this object's cluster identity. - ClusterDataIdentity& identity(); - /// Get a modifiable reference to this object's cluster. mqbi::Cluster* cluster(); @@ -390,11 +387,6 @@ inline ClusterMembership& ClusterData::membership() return d_membership; } -inline ClusterDataIdentity& ClusterData::identity() -{ - return d_identity; -} - inline mqbi::Cluster* ClusterData::cluster() { return d_cluster_p; diff --git a/src/groups/mqb/mqbc/mqbc_clusterutil.cpp b/src/groups/mqb/mqbc/mqbc_clusterutil.cpp index 872c739805..5a25a34ffb 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterutil.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterutil.cpp @@ -277,7 +277,7 @@ void getNextPrimarys(NumNewPartitionsMap* numNewPartitions, // THREAD: This method is invoked in the associated cluster's // dispatcher thread. { - // executed by the *DISPATCHER* thread + // executed by the cluster *DISPATCHER* thread // PRECONDITIONS BSLS_ASSERT_SAFE(clusterData.cluster()->dispatcher()->inDispatcherThread( @@ -868,6 +868,7 @@ ClusterUtil::assignQueue(ClusterState* clusterState, bmqp_ctrlmsg::Status* status) { // executed by the cluster *DISPATCHER* thread + // PRECONDITIONS BSLS_ASSERT_SAFE(cluster->dispatcher()->inDispatcherThread(cluster)); BSLS_ASSERT_SAFE(!cluster->isRemote()); @@ -1062,7 +1063,7 @@ void ClusterUtil::registerQueueInfo(ClusterState* clusterState, const QueueAssigningCb& queueAssigningCb, bool forceUpdate) { - // executed by the *DISPATCHER* thread + // executed by the cluster *DISPATCHER* thread // PRECONDITIONS BSLS_ASSERT_SAFE(cluster->dispatcher()->inDispatcherThread(cluster)); @@ -2239,6 +2240,9 @@ void ClusterUtil::loadQueuesInfo(bsl::vector* out, void ClusterUtil::loadPeerNodes(bsl::vector* out, const ClusterData& clusterData) { + // executed by the cluster *DISPATCHER* thread or the *QUEUE_DISPATCHER* + // thread + // PRECONDITIONS BSLS_ASSERT_SAFE(out); diff --git a/src/groups/mqb/mqbc/mqbc_clusterutil.h b/src/groups/mqb/mqbc/mqbc_clusterutil.h index 431cd523bf..92f354fd8b 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterutil.h +++ b/src/groups/mqb/mqbc/mqbc_clusterutil.h @@ -23,6 +23,11 @@ // mqbc::ClusterUtil: Generic utilities for a cluster. // //@DESCRIPTION: 'mqbc::ClusterUtil' provides generic utilities for a cluster. +// +/// Thread Safety +///------------- +/// This component is designed to be executed only by the cluster *DISPATCHER* +/// thread. // MQB @@ -315,7 +320,7 @@ struct ClusterUtil { /// cluster state to that `node` only. Otherwise, broadcast to all /// followers. Behavior is undefined unless this node is the leader, /// and at least one of `sendPartitionPrimaryInfo` or `sendQueuesInfo` - /// is true. TODO + /// is true. /// /// THREAD: This method is invoked in the associated cluster's /// dispatcher thread. @@ -394,6 +399,9 @@ struct ClusterUtil { /// Load into the specified `out` the list of peer nodes using the /// specified `clusterData`. + /// + /// THREAD: Executed by the cluster *DISPATCHER* thread or the + // *QUEUE_DISPATCHER* thread. static void loadPeerNodes(bsl::vector* out, const ClusterData& clusterData); diff --git a/src/groups/mqb/mqbc/mqbc_recoveryutil.h b/src/groups/mqb/mqbc/mqbc_recoveryutil.h index d580f9d257..d7f8b6ec39 100644 --- a/src/groups/mqb/mqbc/mqbc_recoveryutil.h +++ b/src/groups/mqb/mqbc/mqbc_recoveryutil.h @@ -25,8 +25,6 @@ //@DESCRIPTION: 'mqbc::RecoveryUtil' provides generic utilities. // MQB - -#include #include #include #include diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp index 083566cf56..e53e869eb9 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp @@ -38,6 +38,7 @@ #include // BDE +#include #include #include #include @@ -111,7 +112,7 @@ void StorageManager::shutdownCb(int partitionId, bslmt::Latch* latch) StorageUtil::shutdown(partitionId, latch, &d_fileStores, - d_clusterData_p, + d_clusterData_p->identity().description(), d_clusterConfig); } @@ -140,6 +141,7 @@ void StorageManager::recoveredQueuesCb(int partitionId, &d_appKeysVec[partitionId], &d_appKeysLock, d_domainFactory_p, + &d_unrecognizedDomainsLock, &d_unrecognizedDomains[partitionId], d_clusterData_p->identity().description(), partitionId, @@ -152,6 +154,7 @@ void StorageManager::recoveredQueuesCb(int partitionId, StorageUtil::dumpUnknownRecoveredDomains( d_clusterData_p->identity().description(), + &d_unrecognizedDomainsLock, d_unrecognizedDomains); } @@ -362,6 +365,7 @@ void StorageManager::processPrimaryDetect(int partitionId, unsigned int primaryLeaseId) { // executed by the cluster *DISPATCHER* thread + // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); BSLS_ASSERT_SAFE(primaryNode->nodeId() == @@ -402,6 +406,7 @@ void StorageManager::processReplicaDetect(int partitionId, unsigned int primaryLeaseId) { // executed by the cluster *DISPATCHER* thread + // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); BSLS_ASSERT_SAFE(primaryNode->nodeId() != @@ -532,7 +537,9 @@ void StorageManager::processReplicaDataRequestPush( const int partitionId = replicaDataRequest.partitionId(); BSLS_ASSERT_SAFE(0 <= partitionId && partitionId < static_cast(d_fileStores.size())); - BSLS_ASSERT_SAFE(source == d_partitionInfoVec[partitionId].primary()); + BSLS_ASSERT_SAFE( + source->nodeId() == + d_clusterState.partitionsInfo().at(partitionId).primaryNodeId()); BALL_LOG_INFO << d_clusterData_p->identity().description() << " Partition [" << partitionId << "]: " @@ -554,8 +561,8 @@ void StorageManager::processReplicaDataRequestPush( partitionId, 1, source, - d_partitionInfoVec[partitionId].primaryLeaseId(), - d_nodeToSeqNumCtxMapVec[partitionId][source].first, + d_clusterState.partitionsInfo().at(partitionId).primaryLeaseId(), + bmqp_ctrlmsg::PartitionSequenceNumber(), source, PartitionSeqNumDataRange(replicaDataRequest.beginSequenceNumber(), replicaDataRequest.endSequenceNumber())); @@ -600,7 +607,9 @@ void StorageManager::processReplicaDataRequestDrop( const int partitionId = replicaDataRequest.partitionId(); BSLS_ASSERT_SAFE(0 <= partitionId && partitionId < static_cast(d_fileStores.size())); - BSLS_ASSERT_SAFE(source == d_partitionInfoVec[partitionId].primary()); + BSLS_ASSERT_SAFE( + source->nodeId() == + d_clusterState.partitionsInfo().at(partitionId).primaryNodeId()); BALL_LOG_INFO << d_clusterData_p->identity().description() << " Partition [" << partitionId << "]: " @@ -779,9 +788,8 @@ void StorageManager::processReplicaStateResponseDispatched( return; // RETURN } - EventData eventDataVec(d_allocator_p); - EventData failedEventDataVec(d_allocator_p); - mqbs::FileStore* fs = d_fileStores[requestPartitionId].get(); + EventData eventDataVec(d_allocator_p); + EventData failedEventDataVec(d_allocator_p); for (NodeResponsePairsCIter cit = pairs.cbegin(); cit != pairs.cend(); ++cit) { @@ -831,13 +839,14 @@ void StorageManager::processReplicaStateResponseDispatched( << "Received ReplicaStateResponse " << cit->second << " from " << cit->first->nodeDescription(); - BSLS_ASSERT_SAFE( - d_partitionInfoVec[response.partitionId()].primary()->nodeId() == - d_clusterData_p->membership().selfNode()->nodeId()); - - unsigned int primaryLeaseId = - d_partitionInfoVec[response.partitionId()].primaryLeaseId(); + BSLS_ASSERT_SAFE(d_clusterState.partitionsInfo() + .at(response.partitionId()) + .primaryNodeId() == + d_clusterData_p->membership().selfNode()->nodeId()); + const unsigned int primaryLeaseId = d_clusterState.partitionsInfo() + .at(response.partitionId()) + .primaryLeaseId(); eventDataVec.emplace_back(cit->first, responseId, response.partitionId(), @@ -849,6 +858,7 @@ void StorageManager::processReplicaStateResponseDispatched( BSLS_ASSERT_SAFE(requestPartitionId == response.partitionId()); } + mqbs::FileStore* fs = d_fileStores[requestPartitionId].get(); if (eventDataVec.size() > 0) { dispatchEventToPartition(fs, PartitionFSM::Event::e_REPLICA_STATE_RSPN, @@ -1078,9 +1088,12 @@ void StorageManager::processShutdownEventDispatched(int partitionId) partitionId, 1); - dispatchEventToPartition(fs, - PartitionFSM::Event::e_RST_UNKNOWN, - eventDataVec); + bsl::shared_ptr > queueSp = + bsl::allocate_shared >( + d_allocator_p); + queueSp->emplace(PartitionFSM::Event::e_RST_UNKNOWN, eventDataVec); + + d_partitionFSMVec[partitionId]->applyEvent(queueSp); } StorageUtil::processShutdownEventDispatched( @@ -1092,14 +1105,15 @@ void StorageManager::processShutdownEventDispatched(int partitionId) void StorageManager::forceFlushFileStores() { - // executed by scheduler's dispatcher thread + // executed by event scheduler's dispatcher thread StorageUtil::forceFlushFileStores(&d_fileStores); } void StorageManager::do_startWatchDog(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1128,7 +1142,8 @@ void StorageManager::do_startWatchDog(const PartitionFSMArgsSp& args) void StorageManager::do_stopWatchDog(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1153,7 +1168,8 @@ void StorageManager::do_stopWatchDog(const PartitionFSMArgsSp& args) void StorageManager::do_openRecoveryFileSet(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1210,7 +1226,8 @@ void StorageManager::do_openRecoveryFileSet(const PartitionFSMArgsSp& args) void StorageManager::do_closeRecoveryFileSet(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1235,7 +1252,8 @@ void StorageManager::do_closeRecoveryFileSet(const PartitionFSMArgsSp& args) void StorageManager::do_storeSelfSeq(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1282,7 +1300,8 @@ void StorageManager::do_storeSelfSeq(const PartitionFSMArgsSp& args) void StorageManager::do_storePrimarySeq(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1332,7 +1351,8 @@ void StorageManager::do_storePrimarySeq(const PartitionFSMArgsSp& args) void StorageManager::do_storeReplicaSeq(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1385,7 +1405,8 @@ void StorageManager::do_storeReplicaSeq(const PartitionFSMArgsSp& args) void StorageManager::do_storePartitionInfo(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1422,7 +1443,8 @@ void StorageManager::do_storePartitionInfo(const PartitionFSMArgsSp& args) void StorageManager::do_clearPartitionInfo(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1442,16 +1464,18 @@ void StorageManager::do_clearPartitionInfo(const PartitionFSMArgsSp& args) PartitionInfo& pinfo = d_partitionInfoVec[partitionId]; BSLS_ASSERT_SAFE(fs->inDispatcherThread()); - StorageUtil::clearPrimaryForPartition(fs, - &pinfo, - *d_clusterData_p, - partitionId, - primaryNode); + StorageUtil::clearPrimaryForPartition( + fs, + &pinfo, + d_clusterData_p->identity().description(), + partitionId, + primaryNode); } void StorageManager::do_replicaStateRequest(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1503,7 +1527,8 @@ void StorageManager::do_replicaStateRequest(const PartitionFSMArgsSp& args) void StorageManager::do_replicaStateResponse(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1553,7 +1578,8 @@ void StorageManager::do_replicaStateResponse(const PartitionFSMArgsSp& args) void StorageManager::do_failureReplicaStateResponse( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1598,6 +1624,9 @@ void StorageManager::do_failureReplicaStateResponse( void StorageManager::do_logFailureReplicaStateResponse( const PartitionFSMArgsSp& args) { + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' + // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1625,6 +1654,9 @@ void StorageManager::do_logFailureReplicaStateResponse( void StorageManager::do_logFailurePrimaryStateResponse( const PartitionFSMArgsSp& args) { + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' + // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1649,7 +1681,8 @@ void StorageManager::do_logFailurePrimaryStateResponse( void StorageManager::do_primaryStateRequest(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1714,7 +1747,8 @@ void StorageManager::do_primaryStateRequest(const PartitionFSMArgsSp& args) void StorageManager::do_primaryStateResponse(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1762,7 +1796,8 @@ void StorageManager::do_primaryStateResponse(const PartitionFSMArgsSp& args) void StorageManager::do_failurePrimaryStateResponse( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1806,7 +1841,8 @@ void StorageManager::do_failurePrimaryStateResponse( void StorageManager::do_replicaDataRequestPush(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1919,7 +1955,8 @@ void StorageManager::do_replicaDataRequestPush(const PartitionFSMArgsSp& args) void StorageManager::do_replicaDataResponsePush(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -1984,7 +2021,8 @@ void StorageManager::do_replicaDataResponsePush(const PartitionFSMArgsSp& args) void StorageManager::do_replicaDataRequestDrop(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2012,7 +2050,8 @@ void StorageManager::do_replicaDataRequestDrop(const PartitionFSMArgsSp& args) return; // RETURN } - mqbnet::ClusterNode* selfNode = d_clusterData_p->membership().selfNode(); + mqbnet::ClusterNode* const selfNode = + d_clusterData_p->membership().selfNode(); NodeToSeqNumCtxMap& nodeToSeqNumCtxMap = d_nodeToSeqNumCtxMapVec[partitionId]; @@ -2090,7 +2129,8 @@ void StorageManager::do_replicaDataRequestDrop(const PartitionFSMArgsSp& args) void StorageManager::do_replicaDataRequestPull(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2161,7 +2201,8 @@ void StorageManager::do_replicaDataRequestPull(const PartitionFSMArgsSp& args) void StorageManager::do_replicaDataResponsePull(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2227,7 +2268,8 @@ void StorageManager::do_failureReplicaDataResponsePush( void StorageManager::do_bufferLiveData(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2250,14 +2292,15 @@ void StorageManager::do_bufferLiveData(const PartitionFSMArgsSp& args) bool skipAlarm = partitionHealthState(partitionId) == PartitionFSM::State::e_UNKNOWN; - if (!StorageUtil::validateStorageEvent(rawEvent, - partitionId, - source, - pinfo.primary(), - pinfo.primaryStatus(), - *d_clusterData_p, - skipAlarm, - true)) { // isFSMWorkflow + if (!StorageUtil::validateStorageEvent( + rawEvent, + partitionId, + source, + pinfo.primary(), + pinfo.primaryStatus(), + d_clusterData_p->identity().description(), + skipAlarm, + true)) { // isFSMWorkflow return; // RETURN } @@ -2268,7 +2311,8 @@ void StorageManager::do_bufferLiveData(const PartitionFSMArgsSp& args) void StorageManager::do_processBufferedLiveData(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2337,7 +2381,8 @@ void StorageManager::do_processBufferedLiveData(const PartitionFSMArgsSp& args) void StorageManager::do_processLiveData(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2368,14 +2413,15 @@ void StorageManager::do_processLiveData(const PartitionFSMArgsSp& args) bool skipAlarm = partitionHealthState(partitionId) == PartitionFSM::State::e_UNKNOWN; - if (!StorageUtil::validateStorageEvent(rawEvent, - partitionId, - source, - pinfo.primary(), - pinfo.primaryStatus(), - *d_clusterData_p, - skipAlarm, - true)) { // isFSMWorkflow + if (!StorageUtil::validateStorageEvent( + rawEvent, + partitionId, + source, + pinfo.primary(), + pinfo.primaryStatus(), + d_clusterData_p->identity().description(), + skipAlarm, + true)) { // isFSMWorkflow return; // RETURN } @@ -2401,7 +2447,8 @@ void StorageManager::do_nackPut( void StorageManager::do_cleanupSeqnums(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2425,7 +2472,8 @@ void StorageManager::do_cleanupSeqnums(const PartitionFSMArgsSp& args) void StorageManager::do_startSendDataChunks(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2547,7 +2595,8 @@ void StorageManager::do_startSendDataChunks(const PartitionFSMArgsSp& args) void StorageManager::do_setExpectedDataChunkRange( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2621,7 +2670,8 @@ void StorageManager::do_setExpectedDataChunkRange( void StorageManager::do_resetReceiveDataCtx(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2637,7 +2687,8 @@ void StorageManager::do_resetReceiveDataCtx(const PartitionFSMArgsSp& args) void StorageManager::do_openStorage(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2676,7 +2727,8 @@ void StorageManager::do_openStorage(const PartitionFSMArgsSp& args) void StorageManager::do_updateStorage(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2700,16 +2752,16 @@ void StorageManager::do_updateStorage(const PartitionFSMArgsSp& args) // 2) A newly chosen primary ('source') sends missing storage events to // replica (self). + const PartitionInfo& pinfo = d_partitionInfoVec[partitionId]; if (!StorageUtil::validatePartitionSyncEvent(rawEvent, partitionId, source, - d_clusterState, + pinfo, *d_clusterData_p, true)) { // isFSMWorkflow return; // RETURN } - const PartitionInfo& pinfo = d_partitionInfoVec[partitionId]; if (pinfo.primary()->nodeId() == d_clusterData_p->membership().selfNode()->nodeId()) { // If self is primary for this partition, self must be passive. @@ -2775,7 +2827,8 @@ void StorageManager::do_updateStorage(const PartitionFSMArgsSp& args) void StorageManager::do_removeStorage(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2813,7 +2866,8 @@ void StorageManager::do_removeStorage(const PartitionFSMArgsSp& args) void StorageManager::do_incrementNumRplcaDataRspn( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2839,7 +2893,8 @@ void StorageManager::do_incrementNumRplcaDataRspn( void StorageManager::do_checkQuorumRplcaDataRspn( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2874,7 +2929,8 @@ void StorageManager::do_checkQuorumRplcaDataRspn( void StorageManager::do_clearRplcaDataRspnCnt(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2898,7 +2954,8 @@ void StorageManager::do_clearRplcaDataRspnCnt(const PartitionFSMArgsSp& args) void StorageManager::do_reapplyEvent(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2920,7 +2977,8 @@ void StorageManager::do_reapplyEvent(const PartitionFSMArgsSp& args) void StorageManager::do_checkQuorumSeq(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -2954,7 +3012,8 @@ void StorageManager::do_checkQuorumSeq(const PartitionFSMArgsSp& args) void StorageManager::do_findHighestSeq(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -3021,7 +3080,8 @@ void StorageManager::do_findHighestSeq(const PartitionFSMArgsSp& args) void StorageManager::do_flagFailedReplicaSeq(const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -3047,7 +3107,9 @@ void StorageManager::do_flagFailedReplicaSeq(const PartitionFSMArgsSp& args) void StorageManager::do_transitionToActivePrimary( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' + // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -3073,7 +3135,9 @@ void StorageManager::do_transitionToActivePrimary( void StorageManager::do_reapplyDetectSelfPrimary( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' + // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -3107,7 +3171,8 @@ void StorageManager::do_reapplyDetectSelfPrimary( void StorageManager::do_reapplyDetectSelfReplica( const PartitionFSMArgsSp& args) { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with the paritionId + // contained in 'args' // PRECONDITIONS BSLS_ASSERT_SAFE(!args->eventsQueue()->empty()); @@ -3157,6 +3222,7 @@ StorageManager::StorageManager( , d_watchDogEventHandles(allocator) , d_watchDogTimeoutInterval(watchDogTimeoutDuration) , d_lowDiskspaceWarning(false) +, d_unrecognizedDomainsLock() , d_unrecognizedDomains(allocator) , d_blobSpPool_p(clusterData->blobSpPool()) , d_domainFactory_p(domainFactory) @@ -3183,7 +3249,8 @@ StorageManager::StorageManager( , d_numReplicaDataResponsesReceivedVec(allocator) , d_isQueueKeyInfoMapVecInitialized(false) , d_queueKeyInfoMapVec(allocator) -, d_minimumRequiredDiskSpace(0) +, d_minimumRequiredDiskSpace( + StorageUtil::findMinReqDiskSpace(d_clusterConfig.partitionConfig())) , d_storageMonitorEventHandle() , d_gcMessagesEventHandle() , d_recoveryManager_mp() @@ -3217,9 +3284,6 @@ StorageManager::StorageManager( d_partitionFSMVec.back()->registerObserver(this); } - d_minimumRequiredDiskSpace = StorageUtil::findMinReqDiskSpace( - partitionCfg); - // Set the default replication-factor to one more than half of the cluster. // Do this here (rather than in the initializer-list) to avoid accessing // 'd_cluster_p' before the above non-nullness check. @@ -3344,11 +3408,12 @@ int StorageManager::start(bsl::ostream& errorDescription) bslma::Allocator* recoveryManagerAllocator = d_allocators.get( "RecoveryManager"); - d_recoveryManager_mp.load(new (*recoveryManagerAllocator) - RecoveryManager(d_clusterConfig, - d_clusterData_p, - dsCfg, - recoveryManagerAllocator), + d_recoveryManager_mp.load(new (*recoveryManagerAllocator) RecoveryManager( + d_clusterData_p->bufferFactory(), + d_clusterConfig, + *d_clusterData_p, + dsCfg, + recoveryManagerAllocator), recoveryManagerAllocator); rc = d_recoveryManager_mp->start(errorDescription); @@ -3397,7 +3462,10 @@ void StorageManager::stop() for (int p = 0; p < d_clusterConfig.partitionConfig().numPartitions(); p++) { - d_partitionFSMVec[p]->unregisterObserver(this); + d_fileStores[p]->execute( + bdlf::BindUtil::bind(&PartitionFSM::unregisterObserver, + d_partitionFSMVec[p].get(), + this)); } d_clusterData_p->scheduler()->cancelEventAndWait(&d_gcMessagesEventHandle); @@ -3406,8 +3474,8 @@ void StorageManager::stop() d_recoveryManager_mp->stop(); StorageUtil::stop( - d_clusterData_p, &d_fileStores, + d_clusterData_p->identity().description(), bdlf::BindUtil::bind(&StorageManager::shutdownCb, this, bdlf::PlaceHolders::_1, // partitionId @@ -3512,7 +3580,7 @@ void StorageManager::unregisterQueue(const bmqt::Uri& uri, int partitionId) &d_storagesLock, d_clusterData_p, partitionId, - d_partitionInfoVec[partitionId], + bsl::cref(d_partitionInfoVec[partitionId]), uri)); d_fileStores[partitionId]->dispatchEvent(queueEvent); @@ -3699,7 +3767,6 @@ void StorageManager::setQueue(mqbi::Queue* queue, queue)); d_fileStores[partitionId]->dispatchEvent(queueEvent); - ; } void StorageManager::setQueueRaw(mqbi::Queue* queue, @@ -3752,21 +3819,22 @@ void StorageManager::clearPrimaryForPartition(int partitionId, BSLS_ASSERT_SAFE(0 <= partitionId && partitionId < static_cast(d_fileStores.size())); BSLS_ASSERT_SAFE(primary); - BSLS_ASSERT_SAFE(primary->nodeId() == - d_partitionInfoVec[partitionId].primary()->nodeId()); + // We always clear the primary info from ClusterState first + BSLS_ASSERT_SAFE( + !d_clusterState.partitionsInfo().at(partitionId).primaryNode()); BALL_LOG_INFO << d_clusterData_p->identity().description() << " Partition [" << partitionId << "]: " << "Self Transition back to Unknown in the Partition FSM."; - EventData eventDataVec; - eventDataVec.emplace_back(d_clusterData_p->membership().selfNode(), - -1, // placeholder requestId - partitionId, - 1, - primary, - d_partitionInfoVec[partitionId].primaryLeaseId()); + eventDataVec.emplace_back( + d_clusterData_p->membership().selfNode(), + -1, // placeholder requestId + partitionId, + 1, + primary, + d_clusterState.partitionsInfo().at(partitionId).primaryLeaseId()); mqbs::FileStore* fs = d_fileStores[partitionId].get(); BSLS_ASSERT_SAFE(fs); @@ -4022,7 +4090,7 @@ void StorageManager::processStorageEvent( } // Ensure that 'pid' is valid. - if (static_cast(pid) >= d_clusterState.partitions().size()) { + if (pid >= d_clusterState.partitions().size()) { MWCTSK_ALARMLOG_ALARM("STORAGE") << d_cluster_p->description() << " Partition [" << pid << "]: " << "Received " @@ -4130,9 +4198,10 @@ void StorageManager::processPrimaryStatusAdvisory( const bmqp_ctrlmsg::PrimaryStatusAdvisory& advisory, mqbnet::ClusterNode* source) { - // executed by *ANY* thread + // executed by *CLUSTER DISPATCHER* thread // PRECONDITIONS + BSLS_ASSERT_SAFE(d_dispatcher_p->inDispatcherThread(d_cluster_p)); BSLS_ASSERT_SAFE(source); BSLS_ASSERT_SAFE(d_fileStores.size() > static_cast(advisory.partitionId())); @@ -4163,9 +4232,10 @@ void StorageManager::processReplicaStatusAdvisory( mqbnet::ClusterNode* source, bmqp_ctrlmsg::NodeStatus::Value status) { - // executed by *ANY* thread + // executed by *CLUSTER DISPATCHER* thread // PRECONDITIONS + BSLS_ASSERT_SAFE(d_dispatcher_p->inDispatcherThread(d_cluster_p)); BSLS_ASSERT_SAFE(source); BSLS_ASSERT_SAFE(d_fileStores.size() > static_cast(partitionId)); @@ -4185,7 +4255,7 @@ void StorageManager::processReplicaStatusAdvisory( d_clusterData_p, fs, partitionId, - d_partitionInfoVec[partitionId], + bsl::cref(d_partitionInfoVec[partitionId]), source, status)); } @@ -4211,9 +4281,11 @@ void StorageManager::processShutdownEvent() void StorageManager::applyForEachQueue(int partitionId, const QueueFunctor& functor) const { - // executed by the *DISPATCHER* thread + // executed by the *QUEUE DISPATCHER* thread associated with 'paritionId' + // PRECONDITIONS const mqbs::FileStore& fs = fileStore(partitionId); + BSLS_ASSERT_SAFE(fs.inDispatcherThread()); if (!fs.isOpen()) { return; // RETURN @@ -4259,6 +4331,7 @@ void StorageManager::gcUnrecognizedDomainQueues() d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); StorageUtil::gcUnrecognizedDomainQueues(&d_fileStores, + &d_unrecognizedDomainsLock, d_unrecognizedDomains); } @@ -4288,11 +4361,6 @@ bool StorageManager::isStorageEmpty(const bmqt::Uri& uri, partitionId); } -bdlbb::BlobBufferFactory* StorageManager::blobBufferFactory() const -{ - return d_clusterData_p->bufferFactory(); -} - const mqbs::FileStore& StorageManager::fileStore(int partitionId) const { // executed by *ANY* thread diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.h b/src/groups/mqb/mqbc/mqbc_storagemanager.h index aff49ca68a..5e4889dca6 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.h +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.h @@ -91,9 +91,6 @@ class ClusterNode; namespace mqbs { class ReplicatedStorage; } -namespace bdlbb { -class BlobBufferFactory; -} namespace mqbc { @@ -195,8 +192,12 @@ class StorageManager EventHandles d_watchDogEventHandles; // List of event handles for the watch // dog, indexed by partitionId. + // + // THREAD: Except during the ctor, the i-th index of this data member + // **must** be accessed in the associated Queue dispatcher thread + // for the i-th partitionId. - bsls::TimeInterval d_watchDogTimeoutInterval; + const bsls::TimeInterval d_watchDogTimeoutInterval; // Timeout interval for the watch dog bool d_lowDiskspaceWarning; @@ -204,6 +205,12 @@ class StorageManager // warning was issued. This flag is // used *only* for logging purposes // (see 'storageMonitorCb' impl) + // + // THREAD: Except during the ctor, this data member **must** be accessed in + // the event scheduler's dispatcher thread. + + bslmt::Mutex d_unrecognizedDomainsLock; + // Mutex to protect access to 'd_unrecognizedDomains' and its elements. DomainQueueMessagesCountMaps d_unrecognizedDomains; // List of DomainQueueMessagesMap, @@ -215,6 +222,8 @@ class StorageManager // storage recovery either due to // genuine domain migration or // misconfiguration. + // + // THREAD: Protected by 'd_unrecognizedDomainsLock'. BlobSpPool* d_blobSpPool_p; // SharedObjectPool of blobs to use @@ -229,12 +238,16 @@ class StorageManager // Associated cluster object ClusterData* d_clusterData_p; - // Associated non-persistent cluster - // data for this node + // Associated non-persistent cluster data for this node + // + // THREAD: This data member is safe to be accessed in the cluster + // dispatcher thread. However, we do access from other threads, + // and please be vigilant when doing so. const mqbc::ClusterState& d_clusterState; - // Associated persistent cluster data - // for this node + // Associated persistent cluster data for this node + // + // THREAD: **Must** be accessed in the cluster dispatcher thread. const mqbcfg::ClusterDefinition& d_clusterConfig; // Cluster config to use @@ -242,6 +255,11 @@ class StorageManager FileStores d_fileStores; // List of all partitions, indexed by // 'partitionId' + // + // THREAD: The i-th index of this data member is safe to be accessed in the + // associated Queue dispatcher thread for the i-th partitionId. + // However, we do access from other threads, and please be vigilant + // when doing so. bdlmt::FixedThreadPool d_miscWorkThreadPool; // Thread pool used for any standalone @@ -250,9 +268,9 @@ class StorageManager // It is used by the partitions owned // by this object. - RecoveryStatusCb d_recoveryStatusCb; + const RecoveryStatusCb d_recoveryStatusCb; - PartitionPrimaryStatusCb d_partitionPrimaryStatusCb; + const PartitionPrimaryStatusCb d_partitionPrimaryStatusCb; mutable bslmt::Mutex d_storagesLock; // Mutex to protect access to @@ -272,6 +290,8 @@ class StorageManager // because they are accessed from // partitions' dispatcher threads, as // well as cluster dispatcher thread. + // + // THREAD: Protected by 'd_storagesLock'. bslmt::Mutex d_appKeysLock; // Mutex to protect access to @@ -305,14 +325,24 @@ class StorageManager // accessed from partitions' dispatcher // threads as well as cluster // dispatcher threads. + // + // THREAD: Protected by 'd_appKeysLock' PartitionInfoVec d_partitionInfoVec; // Vector of 'PartitionInfo' indexed by // partitionId + // + // THREAD: Except during the ctor, the i-th index of this data member + // **must** be accessed in the associated Queue dispatcher thread + // for the i-th partitionId. PartitionFSMVec d_partitionFSMVec; // Vector of 'PartitionFSM' indexed by // partitionId + // + // THREAD: Except during the ctor, the i-th index of this data member + // **must** be accessed in the associated Queue dispatcher thread + // for the i-th partitionId. bsls::AtomicInt d_numPartitionsRecoveredFully; // Number of partitions whose recovery @@ -333,19 +363,20 @@ class StorageManager bsl::vector d_recoveryStartTimes; // Vector of partition recovery start // times indexed by partitionId. + // + // THREAD: Except during the ctor, the i-th index of this data member + // **must** be accessed in the associated Queue dispatcher thread + // for the i-th partitionId. NodeToSeqNumCtxMapPartitionVec d_nodeToSeqNumCtxMapVec; // Vector of 'NodeToSeqNumCtxMap' - // indexed by partitionId. Note, that - // each element of the vector should - // only be accessed in corresponding - // thread attached to the partitionId. - // Currently, false sharing is not much - // of a performance bottleneck since - // update to elements of this vector is - // not a highly frequent operation. - - unsigned int d_seqNumQuorum; + // indexed by partitionId. + // + // THREAD: Except during the ctor, the i-th index of this data member + // **must** be accessed in the associated Queue dispatcher thread + // for the i-th partitionId. + + const unsigned int d_seqNumQuorum; // Quorum config to use for Sequence // numbers being collected by self if // primary while getting the latest @@ -355,6 +386,10 @@ class StorageManager // Vector of number of replica data // responses received, indexed by // partitionId. + // + // THREAD: Except during the ctor, the i-th index of this data member + // **must** be accessed in the associated Queue dispatcher thread + // for the i-th partitionId. bsls::AtomicBool d_isQueueKeyInfoMapVecInitialized; // Whether 'd_queueKeyInfoMapVec' has been initialized. @@ -374,7 +409,7 @@ class StorageManager // will be set to 'true'. Afterwards, it **must not** be modified // again, and hence is safe to read from any thread. - bsls::Types::Uint64 d_minimumRequiredDiskSpace; + const bsls::Types::Uint64 d_minimumRequiredDiskSpace; // The bare minimum space required for // storage manager to be able to // successfully load all partitions. @@ -393,6 +428,8 @@ class StorageManager int d_replicationFactor; // Replication factor used to configure // FileStores. + // + // THREAD: **Must** be accessed in the cluster dispatcher thread private: // NOT IMPLEMENTED @@ -405,18 +442,18 @@ class StorageManager /// Return the dispatcher of the associated cluster. mqbi::Dispatcher* dispatcher(); - // Encode and send the specified schema 'message' to the specified peer - // 'destination'. - // - // THREAD: This method is invoked in the associated cluster's dispatcher - // thread. + /// Encode and send the specified schema 'message' to the specified peer + /// 'destination'. + /// + /// THREAD: This method is invoked in the associated cluster's dispatcher + /// thread. void sendMessage(const bmqp_ctrlmsg::ControlMessage& message, mqbnet::ClusterNode* destination); /// Callback to start the recovery for the specified `partitionId`. /// /// THREAD: This method is invoked in the associated Queue dispatcher - /// thread for the specified 'partitionId. + /// thread for the specified 'partitionId'. void startRecoveryCb(int partitionId); /// Gracefully shut down the partition associated with the specified @@ -712,10 +749,6 @@ class StorageManager virtual void do_reapplyDetectSelfReplica(const PartitionFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; - private: - // PRIVATE ACCESSORS - bool isLocalCluster() const; - public: // TRAITS BSLMF_NESTED_TRAIT_DECLARATION(StorageManager, bslma::UsesBslmaAllocator) @@ -790,8 +823,7 @@ class StorageManager mqbi::Domain* domain) BSLS_KEYWORD_OVERRIDE; /// Synchronously unregister the queue with the specified `uri` from the - /// specified `partitionId`. Behavior is undefined unless this routine - /// is invoked from the cluster dispatcher thread. + /// specified `partitionId`. /// /// THREAD: Executed by the Client's dispatcher thread. virtual void unregisterQueue(const bmqt::Uri& uri, @@ -944,12 +976,12 @@ class StorageManager processReceiptEvent(const bmqp::Event& event, mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; - /// Executed by any thread. + /// Executed in cluster dispatcher thread. virtual void processPrimaryStatusAdvisory( const bmqp_ctrlmsg::PrimaryStatusAdvisory& advisory, mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; - /// Executed by any thread. + /// Executed in cluster dispatcher thread. virtual void processReplicaStatusAdvisory( int partitionId, mqbnet::ClusterNode* source, @@ -989,6 +1021,9 @@ class StorageManager /// Return the processor handle in charge of the specified /// `partitionId`. The behavior is undefined if `partitionId` does not /// represent a valid partition id. + // + // THREAD: executed by any thread. It is safe because process handle is set + // at ctor and never modified afterwards. virtual mqbi::Dispatcher::ProcessorHandle processorForPartition(int partitionId) const BSLS_KEYWORD_OVERRIDE; @@ -999,10 +1034,6 @@ class StorageManager virtual bool isStorageEmpty(const bmqt::Uri& uri, int partitionId) const BSLS_KEYWORD_OVERRIDE; - /// Return the blob buffer factory to use. - virtual bdlbb::BlobBufferFactory* - blobBufferFactory() const BSLS_KEYWORD_OVERRIDE; - /// Return partition corresponding to the specified `partitionId`. The /// behavior is undefined if `partitionId` does not represent a valid /// partition id. @@ -1158,12 +1189,6 @@ inline mqbi::Dispatcher* StorageManager::dispatcher() return d_clusterData_p->dispatcherClientData().dispatcher(); } -// PRIVATE ACCESSORS -inline bool StorageManager::isLocalCluster() const -{ - return d_clusterData_p->cluster()->isLocal(); -} - // ACCESSORS inline mqbi::Dispatcher::ProcessorHandle StorageManager::processorForPartition(int partitionId) const diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp index f96c9fd64c..a51101ed0d 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp @@ -201,7 +201,10 @@ struct TestHelper { BSLS_ASSERT_OPT(rc == 0); } - void setPartitionPrimary(mqbc::StorageManager* storageManager, int partitionId, unsigned int leaseId, mqbnet::ClusterNode* node) + void setPartitionPrimary(mqbc::StorageManager* storageManager, + int partitionId, + unsigned int leaseId, + mqbnet::ClusterNode* node) { d_cluster_mp->_state().setPartitionPrimary(partitionId, leaseId, node); storageManager->setPrimaryForPartition(partitionId, node, leaseId); diff --git a/src/groups/mqb/mqbc/mqbc_storageutil.cpp b/src/groups/mqb/mqbc/mqbc_storageutil.cpp index 72816a742b..7fa81591a6 100644 --- a/src/groups/mqb/mqbc/mqbc_storageutil.cpp +++ b/src/groups/mqb/mqbc/mqbc_storageutil.cpp @@ -973,6 +973,8 @@ int StorageUtil::validateDiskSpace(const mqbcfg::PartitionConfig& config, const mqbc::ClusterData& clusterData, const bsls::Types::Uint64& minDiskSpace) { + // executed by the *CLUSTER DISPATCHER* thread + enum RcEnum { // Value for the various RC error categories rc_SUCCESS = 0, @@ -1062,10 +1064,13 @@ bool StorageUtil::validateStorageEvent( const mqbnet::ClusterNode* source, const mqbnet::ClusterNode* primary, bmqp_ctrlmsg::PrimaryStatus::Value status, - const mqbc::ClusterData& clusterData, + const bsl::string& clusterDescription, bool skipAlarm, bool isFSMWorkflow) { + // executed by *QUEUE_DISPATCHER* thread associated with 'partitionId' or + // by the *CLUSTER DISPATCHER* thread + // PRECONDITIONS BSLS_ASSERT_SAFE(event.isStorageEvent()); BSLS_ASSERT_SAFE(source); @@ -1078,7 +1083,7 @@ bool StorageUtil::validateStorageEvent( } MWCTSK_ALARMLOG_ALARM("STORAGE") - << clusterData.identity().description() << ": Received storage " + << clusterDescription << ": Received storage " << "event from node " << source->nodeDescription() << " for " << "PartitionId [" << partitionId << "] which has no primary as " << "perceived by this node. Ignoring entire storage event." @@ -1092,7 +1097,7 @@ bool StorageUtil::validateStorageEvent( } MWCTSK_ALARMLOG_ALARM("STORAGE") - << clusterData.identity().description() << ": Received storage " + << clusterDescription << ": Received storage " << "event from node " << source->nodeDescription() << " for " << "PartitionId [" << partitionId << "] which has different " << "primary as perceived by this node: " @@ -1108,7 +1113,7 @@ bool StorageUtil::validateStorageEvent( } MWCTSK_ALARMLOG_ALARM("STORAGE") - << clusterData.identity().description() << ": Received storage " + << clusterDescription << ": Received storage " << "event for PartitionId [" << partitionId << "] from: " << source->nodeDescription() << ", which is perceived as " @@ -1124,35 +1129,35 @@ bool StorageUtil::validatePartitionSyncEvent( const bmqp::Event& event, int partitionId, const mqbnet::ClusterNode* source, - const mqbc::ClusterState& clusterState, + const PartitionInfo& partitionInfo, const mqbc::ClusterData& clusterData, bool isFSMWorkflow) { + // executed by *QUEUE_DISPATCHER* thread associated with 'partitionId' or + // by the *CLUSTER DISPATCHER* thread + // PRECONDITIONS BSLS_ASSERT_SAFE(event.isPartitionSyncEvent()); BSLS_ASSERT_SAFE(source); // Check that either self is primary or 'source' is perceived as primary // for the partition. - const ClusterStatePartitionInfo& pinfo = clusterState.partition( - partitionId); - - if (pinfo.primaryNode() != clusterData.membership().selfNode() && - pinfo.primaryNode() != source) { + if (partitionInfo.primary() != clusterData.membership().selfNode() && + partitionInfo.primary() != source) { BALL_LOG_ERROR << clusterData.identity().description() << " PartitionId [" << partitionId << "]: Received partition-sync event from peer: " << source->nodeDescription() << " but neither self nor peer is primary. Perceived" << " primary: " - << (pinfo.primaryNode() - ? pinfo.primaryNode()->nodeDescription() + << (partitionInfo.primary() + ? partitionInfo.primary()->nodeDescription() : "** none **"); return false; // RETURN } - if (!isFSMWorkflow && - bmqp_ctrlmsg::PrimaryStatus::E_PASSIVE != pinfo.primaryStatus()) { + if (!isFSMWorkflow && bmqp_ctrlmsg::PrimaryStatus::E_PASSIVE != + partitionInfo.primaryStatus()) { // Either self or source is primary. Whichever is the primary, it must // be perceived as a passive one. @@ -1160,10 +1165,11 @@ bool StorageUtil::validatePartitionSyncEvent( << " PartitionId [" << partitionId << "]: Received partition-sync event from: " << source->nodeDescription() - << " but primary status is: " << pinfo.primaryStatus() + << " but primary status is: " + << partitionInfo.primaryStatus() << ", perceived primary: " - << (pinfo.primaryNode() - ? pinfo.primaryNode()->nodeDescription() + << (partitionInfo.primary() + ? partitionInfo.primary()->nodeDescription() : "** none **"); return false; // RETURN } @@ -1213,6 +1219,11 @@ int StorageUtil::assignPartitionDispatcherThreads( const bdlb::NullableValue& queueCreationCb, const bdlb::NullableValue& queueDeletionCb) { + // executed by the cluster *DISPATCHER* thread + + // PRECONDITIONS + BSLS_ASSERT_SAFE(dispatcher->inDispatcherThread(&cluster)); + enum RcEnum { // Value for the various RC error categories rc_SUCCESS = 0, @@ -1285,16 +1296,18 @@ int StorageUtil::assignPartitionDispatcherThreads( return rc_SUCCESS; } -void StorageUtil::clearPrimaryForPartition(mqbs::FileStore* fs, - PartitionInfo* partitionInfo, - const ClusterData& clusterData, - int partitionId, - mqbnet::ClusterNode* primary) +void StorageUtil::clearPrimaryForPartition( + mqbs::FileStore* fs, + PartitionInfo* partitionInfo, + const bsl::string& clusterDescription, + + int partitionId, + mqbnet::ClusterNode* primary) { - // executed by *DISPATCHER* thread + // executed by *QUEUE_DISPATCHER* thread associated with 'partitionId' // PRECONDITION - BSLS_ASSERT_SAFE(fs); + BSLS_ASSERT_SAFE(fs && fs->inDispatcherThread()); BSLS_ASSERT_SAFE(partitionInfo); BSLS_ASSERT_SAFE(0 <= partitionId); @@ -1308,8 +1321,7 @@ void StorageUtil::clearPrimaryForPartition(mqbs::FileStore* fs, return; // RETURN } - BALL_LOG_INFO << clusterData.identity().description() << " PartitionId [" - << partitionId + BALL_LOG_INFO << clusterDescription << " PartitionId [" << partitionId << "]: processing 'clear-primary' event. Current primary: " << partitionInfo->primary()->nodeDescription() << ", current leaseId: " << partitionInfo->primaryLeaseId() @@ -1343,8 +1355,11 @@ void StorageUtil::transitionToActivePrimary(PartitionInfo* partitionInfo, mqbc::ClusterData* clusterData, int partitionId) { + // executed by *QUEUE_DISPATCHER* thread associated with 'partitionId' + // PRECONDITIONS BSLS_ASSERT_SAFE(partitionInfo); + BSLS_ASSERT_SAFE(clusterData); partitionInfo->setPrimaryStatus(bmqp_ctrlmsg::PrimaryStatus::E_ACTIVE); @@ -1372,7 +1387,7 @@ void StorageUtil::onPartitionPrimarySync( // executed by *QUEUE_DISPATCHER* thread associated with 'partitionId' // PRECONDITIONS - BSLS_ASSERT_SAFE(fs); + BSLS_ASSERT_SAFE(fs && fs->inDispatcherThread()); BSLS_ASSERT_SAFE(fs->isOpen()); BSLS_ASSERT_SAFE(pinfo); BSLS_ASSERT_SAFE(clusterData); @@ -1446,6 +1461,7 @@ void StorageUtil::recoveredQueuesCb( AppKeys* appKeys, bslmt::Mutex* appKeysLock, mqbi::DomainFactory* domainFactory, + bslmt::Mutex* unrecognizedDomainsLock, DomainQueueMessagesCountMap* unrecognizedDomains, const bsl::string& clusterDescription, int partitionId, @@ -1458,6 +1474,7 @@ void StorageUtil::recoveredQueuesCb( BSLS_ASSERT_SAFE(storageMap); BSLS_ASSERT_SAFE(fs); BSLS_ASSERT_SAFE(appKeys); + BSLS_ASSERT_SAFE(unrecognizedDomainsLock); BSLS_ASSERT_SAFE(unrecognizedDomains && unrecognizedDomains->empty()); BSLS_ASSERT_SAFE(0 <= partitionId); BSLS_ASSERT_SAFE(fs->inDispatcherThread()); @@ -1619,18 +1636,24 @@ void StorageUtil::recoveredQueuesCb( << "]: domain creation step complete. Checking if all " << "domains were created successfully."; - for (DomainMapIter dit = domainMap.begin(); dit != domainMap.end(); - ++dit) { - if (dit->second == 0 || !dit->second->cluster()->isClusterMember()) { - // Two scenarios: - // 1. Failed to create domain for this domain name. - // 2. Domain is associated with a proxy cluster. - // - // Will add it to the map of unrecognized domain names for further - // investigation. - unrecognizedDomains->insert( - bsl::make_pair(dit->first, - mqbs::StorageUtil::QueueMessagesCountMap())); + { + bslmt::LockGuard unrecognizedDomainsLockGuard( + unrecognizedDomainsLock); // LOCK + + for (DomainMapIter dit = domainMap.begin(); dit != domainMap.end(); + ++dit) { + if (dit->second == 0 || + !dit->second->cluster()->isClusterMember()) { + // Two scenarios: + // 1. Failed to create domain for this domain name. + // 2. Domain is associated with a proxy cluster. + // + // Will add it to the map of unrecognized domain names for + // further investigation. + unrecognizedDomains->insert(bsl::make_pair( + dit->first, + mqbs::StorageUtil::QueueMessagesCountMap())); + } } } @@ -1692,9 +1715,15 @@ void StorageUtil::recoveredQueuesCb( BSLS_ASSERT_SAFE(appKey == existingAppKey); } - BSLS_ASSERT_SAFE( - unrecognizedDomains->find(queueUri.qualifiedDomain()) == - unrecognizedDomains->end()); + { + bslmt::LockGuard + unrecognizedDomainsLockGuard( + unrecognizedDomainsLock); // LOCK + + BSLS_ASSERT_SAFE(unrecognizedDomains->find( + queueUri.qualifiedDomain()) == + unrecognizedDomains->end()); + } BALL_LOG_INFO << clusterDescription << ": PartitionId [" << partitionId << "]: encountered queueUri [" @@ -1724,11 +1753,16 @@ void StorageUtil::recoveredQueuesCb( // If domain name is unrecognized, do not create storage. const bslstl::StringRef& domainName = queueUri.qualifiedDomain(); - DomainQueueMessagesCountMap::iterator iter = unrecognizedDomains->find( - domainName); - if (iter != unrecognizedDomains->end()) { - iter->second.insert(bsl::make_pair(queueUri, 0)); - continue; // CONTINUE + { + bslmt::LockGuard unrecognizedDomainsLockGuard( + unrecognizedDomainsLock); // LOCK + + DomainQueueMessagesCountMap::iterator iter = + unrecognizedDomains->find(domainName); + if (iter != unrecognizedDomains->end()) { + iter->second.insert(bsl::make_pair(queueUri, 0)); + continue; // CONTINUE + } } DomainMapIter dit = domainMap.find(domainName); @@ -2060,33 +2094,46 @@ void StorageUtil::recoveredQueuesCb( void StorageUtil::dumpUnknownRecoveredDomains( const bsl::string& clusterDescription, + bslmt::Mutex* unrecognizedDomainsLock, const DomainQueueMessagesCountMaps& unrecognizedDomains) { - // Since 'unrecognizedDomains' has been resized to the number of partitions - // upon construction, we need to check whether each map in this vector is - // empty to verify emptiness. - if (static_cast(bsl::count_if( - unrecognizedDomains.cbegin(), - unrecognizedDomains.cend(), - bdlf::MemFnUtil::memFn(&DomainQueueMessagesCountMap::empty))) == - unrecognizedDomains.size()) { - return; // RETURN - } + // executed by *QUEUE_DISPATCHER* thread - // All partitions have gone through 'recoveredQueuesCb', but we have - // encountered some unrecognized domains. We will print a warning in the - // log with statistics about them, allowing BlazingMQ developers to - // investigate. + // PRECONDITIONS + BSLS_ASSERT_SAFE(unrecognizedDomainsLock); - // 1. Collapse 'unrecognizedDomains' from a list of maps to a single map. DomainQueueMessagesCountMap unrecognizedDomainsFlat; - for (DomainQueueMessagesCountMaps::const_iterator cit = - unrecognizedDomains.cbegin(); - cit != unrecognizedDomains.cend(); - ++cit) { - mqbs::StorageUtil::mergeDomainQueueMessagesCountMap( - &unrecognizedDomainsFlat, - *cit); + + // 1. Collapse 'unrecognizedDomains' from a list of maps to a single map. + { + bslmt::LockGuard unrecognizedDomainsLockGuard( + unrecognizedDomainsLock); // LOCK + + // Since 'unrecognizedDomains' has been resized to the number of + // partitions upon construction, we need to check whether each map in + // this vector is empty to verify emptiness. + if (static_cast( + bsl::count_if(unrecognizedDomains.cbegin(), + unrecognizedDomains.cend(), + bdlf::MemFnUtil::memFn( + &DomainQueueMessagesCountMap::empty))) == + unrecognizedDomains.size()) { + return; // RETURN + } + + // All partitions have gone through 'recoveredQueuesCb', but we have + // encountered some unrecognized domains. We will print a warning in + // the log with statistics about them, allowing BlazingMQ developers to + // investigate. + + for (DomainQueueMessagesCountMaps::const_iterator cit = + unrecognizedDomains.cbegin(); + cit != unrecognizedDomains.cend(); + ++cit) { + mqbs::StorageUtil::mergeDomainQueueMessagesCountMap( + &unrecognizedDomainsFlat, + *cit); + } } // 2. Print statistics using the collapsed map, in sorted order of domain @@ -2146,16 +2193,33 @@ void StorageUtil::dumpUnknownRecoveredDomains( void StorageUtil::gcUnrecognizedDomainQueues( FileStores* fileStores, + bslmt::Mutex* unrecognizedDomainsLock, const DomainQueueMessagesCountMaps& unrecognizedDomains) { // executed by cluster *DISPATCHER* thread // PRECONDITIONS BSLS_ASSERT_SAFE(fileStores); + BSLS_ASSERT_SAFE(unrecognizedDomainsLock); BSLS_ASSERT_SAFE(unrecognizedDomains.size() == fileStores->size()); - for (size_t i = 0; i < unrecognizedDomains.size(); ++i) { - if (!unrecognizedDomains[i].empty()) { + bsl::vector bitset(unrecognizedDomains.size(), false); + // Did not use bsl::bitset because we do not know the required size at + // compile time. + + { + bslmt::LockGuard unrecognizedDomainsLockGuard( + unrecognizedDomainsLock); // LOCK + + for (size_t i = 0; i < unrecognizedDomains.size(); ++i) { + if (!unrecognizedDomains[i].empty()) { + bitset[i] = true; + } + } + } + + for (size_t i = 0; i < bitset.size(); ++i) { + if (bitset[i]) { // Unrecognized domains are found in Partition 'i'. We initiate a // forced rollover to ensure that queues from those domains are // GC'd. @@ -2170,16 +2234,18 @@ void StorageUtil::gcUnrecognizedDomainQueues( } } -void StorageUtil::stop(ClusterData* clusterData, - FileStores* fileStores, - const ShutdownCb& shutdownCb) +void StorageUtil::stop(FileStores* fileStores, + const bsl::string& clusterDescription, + const ShutdownCb& shutdownCb) { + // executed by cluster *DISPATCHER* thread + // Note that we won't delete any objects until dispatcher has stopped. The // storages have already been closed in BBQueue.close. // Enqueue event to close all FileStores. - BALL_LOG_INFO << clusterData->identity().description() + BALL_LOG_INFO << clusterDescription << ": Enqueuing event to close FileStores."; bslmt::Latch latch(fileStores->size()); @@ -2199,13 +2265,13 @@ void StorageUtil::stop(ClusterData* clusterData, } } - BALL_LOG_INFO << clusterData->identity().description() + BALL_LOG_INFO << clusterDescription << ": About to wait for partition shutdown to complete."; latch.wait(); bsls::Types::Int64 shutdownEndTime = mwcsys::Time::highResolutionTimer(); BALL_LOG_INFO - << clusterData->identity().description() + << clusterDescription << ": Shutdown complete for all partitions. Total time spent in " << "shutdown: " << mwcu::PrintUtil::prettyTimeInterval(shutdownEndTime - @@ -2216,9 +2282,11 @@ void StorageUtil::stop(ClusterData* clusterData, void StorageUtil::shutdown(int partitionId, bslmt::Latch* latch, FileStores* fileStores, - ClusterData* clusterData, + const bsl::string& clusterDescription, const mqbcfg::ClusterDefinition& clusterConfig) { + // executed by *QUEUE_DISPATCHER* thread with the specified 'partitionId' + // PRECONDITIONS BSLS_ASSERT_SAFE(0 <= partitionId && partitionId < static_cast(fileStores->size())); @@ -2232,13 +2300,13 @@ void StorageUtil::shutdown(int partitionId, if (fs) { BSLS_ASSERT_SAFE(fs->inDispatcherThread()); - BALL_LOG_INFO << clusterData->identity().description() - << ": Closing PartitionId [" << partitionId << "]."; + BALL_LOG_INFO << clusterDescription << ": Closing PartitionId [" + << partitionId << "]."; fs->close(clusterConfig.partitionConfig().flushAtShutdown()); - BALL_LOG_INFO << clusterData->identity().description() - << ": PartitionId [" << partitionId << "] closed."; + BALL_LOG_INFO << clusterDescription << ": PartitionId [" << partitionId + << "] closed."; } latch->arrive(); @@ -3270,8 +3338,10 @@ void StorageUtil::processPrimaryStatusAdvisoryDispatched( mqbnet::ClusterNode* source, bool isFSMWorkflow) { + // executed by *QUEUE_DISPATCHER* thread with the specified 'partitionId' + // PRECONDITIONS - BSLS_ASSERT_SAFE(fs); + BSLS_ASSERT_SAFE(fs && fs->inDispatcherThread()); BSLS_ASSERT_SAFE(pinfo); BSLS_ASSERT_SAFE(source); BSLS_ASSERT_SAFE(bmqp_ctrlmsg::PrimaryStatus::E_UNDEFINED != @@ -3360,8 +3430,10 @@ void StorageUtil::processReplicaStatusAdvisoryDispatched( mqbnet::ClusterNode* source, bmqp_ctrlmsg::NodeStatus::Value status) { + // executed by *QUEUE_DISPATCHER* thread with the specified 'partitionId' + // PRECONDITIONS - BSLS_ASSERT_SAFE(fs); + BSLS_ASSERT_SAFE(fs && fs->inDispatcherThread()); BSLS_ASSERT_SAFE(0 <= partitionId); // If self is *active* primary, force-issue a syncPt. @@ -3522,9 +3594,9 @@ void StorageUtil::purgeDomainDispatched( bsl::vector domainStorages; { - const StorageSpMap& partitionStorages = (*storageMapVec)[partitionId]; - bslmt::LockGuard guard(storagesLock); // LOCK + + const StorageSpMap& partitionStorages = (*storageMapVec)[partitionId]; for (StorageSpMap::const_iterator it = partitionStorages.cbegin(); it != partitionStorages.cend(); it++) { @@ -3561,9 +3633,8 @@ void StorageUtil::purgeQueueDispatched( // executed by *QUEUE_DISPATCHER* thread with the specified 'fileStore' // PRECONDITIONS - BSLS_ASSERT_SAFE(fileStore->inDispatcherThread()); + BSLS_ASSERT_SAFE(fileStore && fileStore->inDispatcherThread()); BSLS_ASSERT_SAFE(purgedQueueResult); - BSLS_ASSERT_SAFE(fileStore); BSLS_ASSERT_SAFE(storage); BSLS_ASSERT_SAFE(fileStore->config().partitionId() == storage->partitionId()); @@ -3653,6 +3724,8 @@ int StorageUtil::processCommand(mqbcmd::StorageResult* result, const bslstl::StringRef& partitionLocation, bslma::Allocator* allocator) { + // executed by cluster *DISPATCHER* thread + // PRECONDITIONS BSLS_ASSERT_SAFE(result); BSLS_ASSERT_SAFE(fileStores); @@ -3859,11 +3932,11 @@ void StorageUtil::forceIssueAdvisoryAndSyncPt(mqbc::ClusterData* clusterData, mqbnet::ClusterNode* destination, const PartitionInfo& pinfo) { - // executed by *DISPATCHER* thread + // executed by *QUEUE_DISPATCHER* thread with the specified 'partitionId' // PRECONDITIONS + BSLS_ASSERT_SAFE(fs && fs->inDispatcherThread()); BSLS_ASSERT_SAFE(clusterData); - BSLS_ASSERT_SAFE(fs); BSLS_ASSERT_SAFE(pinfo.primary() == clusterData->membership().selfNode()); BSLS_ASSERT_SAFE(pinfo.primary() == fs->primaryNode()); BSLS_ASSERT_SAFE(fs->primaryLeaseId() == pinfo.primaryLeaseId()); diff --git a/src/groups/mqb/mqbc/mqbc_storageutil.h b/src/groups/mqb/mqbc/mqbc_storageutil.h index 8ab1c0a328..f98f91ff3c 100644 --- a/src/groups/mqb/mqbc/mqbc_storageutil.h +++ b/src/groups/mqb/mqbc/mqbc_storageutil.h @@ -191,7 +191,7 @@ struct StorageUtil { /// `appKeysLock` is provided, lock it. Return true if there are any /// added or removed appId/key pairs, false otherwise. /// - /// THREAD: Executed by the cluster-dispatcher thread. + /// THREAD: Executed by the cluster dispatcher thread. static bool loadUpdatedAppIdKeyPairs(AppIdKeyPairs* addedAppIdKeyPairs, AppIdKeyPairs* removedAppIdKeyPairs, @@ -434,6 +434,8 @@ struct StorageUtil { /// Validate the disk space required for storing partitions as per the /// specified `config` for the specified `clusterData` by comparing it /// with the specified `minDiskSpace`. + /// + /// THREAD: Executed by the cluster dispatcher thread. static int validateDiskSpace(const mqbcfg::PartitionConfig& config, const mqbc::ClusterData& clusterData, const bsls::Types::Uint64& minDiskSpace); @@ -454,26 +456,32 @@ struct StorageUtil { /// to skip alarming if 'source' is not active primary. Use the /// specified 'isFSMWorkflow' flag to help with validation. Return true /// if valid, false otherwise. + /// + /// THREAD: Executed by the Queue's dispatcher thread for the specified + /// `partitionId` or by the cluster dispatcher thread. static bool validateStorageEvent(const bmqp::Event& event, int partitionId, const mqbnet::ClusterNode* source, const mqbnet::ClusterNode* primary, bmqp_ctrlmsg::PrimaryStatus::Value status, - const mqbc::ClusterData& clusterData, - bool skipAlarm, - bool isFSMWorkflow); + const bsl::string& clusterDescription, + bool skipAlarm, + bool isFSMWorkflow); /// Validate that every partition sync message in the specified `event` /// have the same specified `partitionId`, and that ether self or the /// specified event `source` is the primary node in the specified - /// `clusterState`. Use the specified `clusterData` and `isFSMWorkflow` + /// `partitionInfo`. Use the specified `clusterData` and `isFSMWorkflow` /// flag to help with validation. Return true if valid, false /// otherwise. + /// + /// THREAD: Executed by the Queue's dispatcher thread for the specified + /// `partitionId` or by the cluster dispatcher thread. static bool validatePartitionSyncEvent(const bmqp::Event& event, int partitionId, const mqbnet::ClusterNode* source, - const mqbc::ClusterState& clusterState, + const PartitionInfo& partitionInfo, const mqbc::ClusterData& clusterData, bool isFSMWorkflow); @@ -485,6 +493,8 @@ struct StorageUtil { /// 'allocators', any errors are captured using the specified /// 'errorDescription' and there are specified 'recoveredQueuesCb', /// optionally specified 'queueCreationCb' and 'queueDeletionCb'. + /// + /// THREAD: Executed by the cluster *DISPATCHER* thread. static int assignPartitionDispatcherThreads( bdlmt::FixedThreadPool* threadPool, mqbc::ClusterData* clusterData, @@ -504,15 +514,15 @@ struct StorageUtil { /// Clear the specified `primary` of the specified `partitionId` from /// the specified `fs` and `partitionInfo`, using the specified - /// `clusterData`. Behavior is undefined unless the specified + /// `clusterDescription`. Behavior is undefined unless the specified /// `partitionId` is in range and the specified `primary` is not null. /// - /// THREAD: Executed by the dispatcher thread for the specified - /// `partitionId`. - static void clearPrimaryForPartition(mqbs::FileStore* fs, - PartitionInfo* partitionInfo, - const ClusterData& clusterData, - int partitionId, + /// THREAD: Executed by the queue dispatcher thread associated with + /// 'partitionId'. + static void clearPrimaryForPartition(mqbs::FileStore* fs, + PartitionInfo* partitionInfo, + const bsl::string& clusterDescription, + int partitionId, mqbnet::ClusterNode* primary); /// Find the minimum required disk space using the specified `config`. @@ -523,13 +533,19 @@ struct StorageUtil { /// load this info into the specified `partitionInfo`. Then, broadcast /// a primary status advisory to peers using the specified /// `clusterData`. + /// + /// THREAD: Executed by the queue dispatcher thread associated with + /// 'partitionId'. static void transitionToActivePrimary(PartitionInfo* partitionInfo, mqbc::ClusterData* clusterData, int partitionId); - /// Stop all the underlying partitions using the specified `clusterData` - /// by scheduling execution of the specified `shutdownCb` for each - /// FileStore in the specified `fileStores` + /// Callback executed after primary sync for the specified 'partitionId' + /// is complete with the specified 'status'. Use the specified 'fs', + /// 'pinfo', 'clusterData' and 'partitionPrimaryStatusCb'. + /// + /// THREAD: Executed by the queue dispatcher thread associated with + /// 'partitionId'. static void onPartitionPrimarySync( mqbs::FileStore* fs, PartitionInfo* pinfo, @@ -537,13 +553,13 @@ struct StorageUtil { const PartitionPrimaryStatusCb& partitionPrimaryStatusCb, int partitionId, int status); - // Callback executed after primary sync for the specified 'partitionId' - // is complete with the specified 'status'. Use the specified 'fs', - // 'pinfo', 'clusterData' and 'partitionPrimaryStatusCb'. - // - // THREAD: Executed by the dispatcher thread associated with - // 'partitionId'. + /// Callback executed when the partition having the specified + /// 'partitionId' has performed recovery and recovered file-backed + /// queues and their virtual storages in the specified + /// 'queueKeyInfoMap'. + /// + /// THREAD: Executed by the dispatcher thread of the partition. static void recoveredQueuesCb(StorageSpMap* storageMap, bslmt::Mutex* storagesLock, @@ -551,43 +567,49 @@ struct StorageUtil { AppKeys* appKeys, bslmt::Mutex* appKeysLock, mqbi::DomainFactory* domainFactory, + bslmt::Mutex* unrecognizedDomainsLock, DomainQueueMessagesCountMap* unrecognizedDomains, const bsl::string& clusterDescription, int partitionId, const QueueKeyInfoMap& queueKeyInfoMap, bool isCSLMode); - // Callback executed when the partition having the specified - // 'partitionId' has performed recovery and recovered file-backed - // queues and their virtual storages in the specified - // 'queueKeyInfoMap'. - // - // THREAD: Executed by the dispatcher thread of the partition. + /// Print statistics regarding the specified 'unrecognizedDomains', + /// protected by the specified 'unrecognizedDomainsLock', encountered + /// during recovery of the specified 'clusterDescription', if any. static void dumpUnknownRecoveredDomains( const bsl::string& clusterDescription, + bslmt::Mutex* unrecognizedDomainsLock, const DomainQueueMessagesCountMaps& unrecognizedDomains); - // Print statistics regarding the specified 'unrecognizedDomains' - // encountered during recovery of the specified 'clusterDescription', - // if any. + /// GC the queues of the specified 'unrecognizedDomains', protected by the + /// specified 'unrecognizedDomainsLock', from the specified 'fileStores', + /// if any. static void gcUnrecognizedDomainQueues( FileStores* fileStores, + bslmt::Mutex* unrecognizedDomainsLock, const DomainQueueMessagesCountMaps& unrecognizedDomains); - // GC the queues of the specified 'unrecognizedDomains' from the - // specified 'fileStores', if any. - static void stop(ClusterData* clusterData, - FileStores* fileStores, - const ShutdownCb& shutdownCb); + /// Stop all the underlying partitions by scheduling execution of the + /// specified `shutdownCb` for each FileStore in the specified + /// `fileStores`. Use the specified `clusterDescription` for logging. + /// + /// THREAD: Executed by cluster *DISPATCHER* thread. + static void stop(FileStores* fileStores, + const bsl::string& clusterDescription, + const ShutdownCb& shutdownCb); /// Shutdown the underlying partition associated with the specified /// `partitionId` from the specified `fileStores` by using the specified /// `latch`, and the specified `clusterConfig`. The cluster information - /// is printed using the specified `clusterData`. + /// is printed using the specified `clusterDescription`. + // + // THREAD: executed by *QUEUE_DISPATCHER* thread with the specified + // 'partitionId'. static void shutdown(int partitionId, bslmt::Latch* latch, FileStores* fileStores, - ClusterData* clusterData, + const bsl::string& clusterDescription, const mqbcfg::ClusterDefinition& clusterConfig); /// Return a unique appKey for the specified `appId` for a queue, and @@ -720,6 +742,8 @@ struct StorageUtil { const int maxDeliveryAttempts, const mqbconfm::StorageDefinition& storageDef); + /// THREAD: Executed by the queue dispatcher thread associated with + /// 'partitionId'. static void processPrimaryStatusAdvisoryDispatched( mqbs::FileStore* fs, PartitionInfo* pinfo, @@ -728,6 +752,8 @@ struct StorageUtil { mqbnet::ClusterNode* source, bool isFSMWorkflow); + /// THREAD: Executed by the queue dispatcher thread associated with + /// 'partitionId'. static void processReplicaStatusAdvisoryDispatched( mqbc::ClusterData* clusterData, mqbs::FileStore* fs, @@ -756,6 +782,8 @@ struct StorageUtil { /// was successfully processed, or a non-zero value otherwise. This /// function can be invoked from any thread, and will block until the /// potentially asynchronous operation is complete. + // + /// THREAD: Executed by the cluster-dispatcher thread. static int processCommand(mqbcmd::StorageResult* result, FileStores* fileStores, StorageSpMapVec* storageMapVec, @@ -774,6 +802,9 @@ struct StorageUtil { const bsl::string& domainName, int partitionId); + /// THREAD: Executed by the Queue's dispatcher thread for the partitionId + /// of + /// the specified `fs`. static void forceIssueAdvisoryAndSyncPt(mqbc::ClusterData* clusterData, mqbs::FileStore* fs, mqbnet::ClusterNode* destination, diff --git a/src/groups/mqb/mqbi/mqbi_storagemanager.h b/src/groups/mqb/mqbi/mqbi_storagemanager.h index 135dc34d76..915a061784 100644 --- a/src/groups/mqb/mqbi/mqbi_storagemanager.h +++ b/src/groups/mqb/mqbi/mqbi_storagemanager.h @@ -43,7 +43,6 @@ // BDE #include -#include #include #include #include @@ -428,9 +427,6 @@ class StorageManager : public mqbi::AppKeyGenerator { virtual bool isStorageEmpty(const bmqt::Uri& uri, int partitionId) const = 0; - /// Return the blob buffer factory to use. - virtual bdlbb::BlobBufferFactory* blobBufferFactory() const = 0; - /// Return partition corresponding to the specified `partitionId`. The /// behavior is undefined if `partitionId` does not represent a valid /// partition id. diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp index ce24956fd7..7679c6b84e 100644 --- a/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp @@ -289,11 +289,6 @@ bool StorageManager::isStorageEmpty( return true; } -bdlbb::BlobBufferFactory* StorageManager::blobBufferFactory() const -{ - return 0; -} - const mqbs::FileStore& StorageManager::fileStore(BSLS_ANNOTATION_UNUSED int partitionId) const { diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.h b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h index 57ffd957be..99c5d93ac7 100644 --- a/src/groups/mqb/mqbmock/mqbmock_storagemanager.h +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h @@ -284,10 +284,6 @@ class StorageManager : public mqbi::StorageManager { virtual bool isStorageEmpty(const bmqt::Uri& uri, int partitionId) const BSLS_KEYWORD_OVERRIDE; - /// Return the blob buffer factory to use. - virtual bdlbb::BlobBufferFactory* - blobBufferFactory() const BSLS_KEYWORD_OVERRIDE; - /// Return partition corresponding to the specified `partitionId`. The /// behavior is undefined if `partitionId` does not represent a valid /// partition id. diff --git a/src/groups/mqb/mqbs/mqbs_filestore.cpp b/src/groups/mqb/mqbs/mqbs_filestore.cpp index 00aee21b19..de99589abf 100644 --- a/src/groups/mqb/mqbs/mqbs_filestore.cpp +++ b/src/groups/mqb/mqbs/mqbs_filestore.cpp @@ -275,6 +275,11 @@ void FileStore::cancelUnreceipted(const DataStoreRecordKey& recordKey) int FileStore::openInNonRecoveryMode() { + // executed by the *DISPATCHER* thread + + // PRECONDITIONS + BSLS_ASSERT_SAFE(inDispatcherThread()); + FileSetSp fileSetSp; int rc = create(&fileSetSp); if (0 == rc) { @@ -288,6 +293,11 @@ int FileStore::openInNonRecoveryMode() int FileStore::openInRecoveryMode(bsl::ostream& errorDescription, const QueueKeyInfoMap& queueKeyInfoMap) { + // executed by the *DISPATCHER* thread + + // PRECONDITIONS + BSLS_ASSERT_SAFE(inDispatcherThread()); + enum { rc_NO_FILES_TO_RECOVER = 1 // Reserved rc , @@ -5208,6 +5218,11 @@ FileStore::~FileStore() // MANIPULATORS int FileStore::open(const QueueKeyInfoMap& queueKeyInfoMap) { + // executed by the *DISPATCHER* thread + + // PRECONDITIONS + BSLS_ASSERT_SAFE(inDispatcherThread()); + enum { rc_SUCCESS = 0, rc_NON_RECOVERY_MODE_FAILURE = -1, @@ -6276,6 +6291,10 @@ void FileStore::processStorageEvent(const bsl::shared_ptr& blob, int FileStore::processRecoveryEvent(const bsl::shared_ptr& blob) { + // executed by the *DISPATCHER* thread + + // PRECONDITIONS + BSLS_ASSERT_SAFE(inDispatcherThread()); BSLS_ASSERT_SAFE(blob); enum { @@ -6589,7 +6608,10 @@ int FileStore::issueSyncPoint() void FileStore::setPrimary(mqbnet::ClusterNode* primaryNode, unsigned int primaryLeaseId) { + // executed by the *DISPATCHER* thread + // PRECONDITIONS + BSLS_ASSERT_SAFE(inDispatcherThread()); BSLS_ASSERT_SAFE(0 < primaryLeaseId); BSLS_ASSERT_SAFE(0 != primaryNode); @@ -7296,6 +7318,9 @@ void FileStore::getStorages(StorageList* storages, void FileStore::loadSummary(mqbcmd::FileStore* fileStore) const { + // executed by *QUEUE_DISPATCHER* thread with the specified `fileStore`'s + // partitionId + // PRECONDITIONS BSLS_ASSERT_SAFE(fileStore); diff --git a/src/groups/mqb/mqbs/mqbs_filestore.h b/src/groups/mqb/mqbs/mqbs_filestore.h index b9931b6299..3aeeec1bfb 100644 --- a/src/groups/mqb/mqbs/mqbs_filestore.h +++ b/src/groups/mqb/mqbs/mqbs_filestore.h @@ -77,6 +77,7 @@ #include #include #include +#include #include #include @@ -304,7 +305,7 @@ class FileStore : public DataStore { mutable AliasedBufferDeleterSpPool d_aliasedBufferDeleterSpPool; - volatile bool d_isOpen; + bsls::AtomicBool d_isOpen; // Flag to indicate open/close status // of this instance. @@ -924,6 +925,9 @@ class FileStore : public DataStore { /// Load the summary of this partition to the specified `fileStore` /// object. + /// + /// THREAD: Executed by the queue dispatcher thread associated with the + /// specified `fileStore`'s partitionId. void loadSummary(mqbcmd::FileStore* fileStore) const; // ACCESSORS diff --git a/src/groups/mqb/mqbs/mqbs_filestore.t.cpp b/src/groups/mqb/mqbs/mqbs_filestore.t.cpp index 7727254a71..6e4ca278ef 100644 --- a/src/groups/mqb/mqbs/mqbs_filestore.t.cpp +++ b/src/groups/mqb/mqbs/mqbs_filestore.t.cpp @@ -18,6 +18,7 @@ // MQB #include +#include #include #include #include @@ -159,6 +160,7 @@ struct Tester { mqbnet::ClusterNode* d_node_p; mqbs::DataStoreConfig d_dsCfg; bdlmt::FixedThreadPool d_miscWorkThreadPool; + mqbi::DispatcherClientData d_dispatcherClientData; mqbmock::Dispatcher d_dispatcher; // must outlive FileStore bslma::ManagedPtr d_fs_mp; @@ -188,6 +190,7 @@ struct Tester { s_allocator_p)) , d_clusterStats(s_allocator_p) , d_miscWorkThreadPool(1, 1, s_allocator_p) + , d_dispatcherClientData() , d_dispatcher(s_allocator_p) , d_statePool(1024, s_allocator_p) { @@ -253,17 +256,13 @@ struct Tester { bdlf::PlaceHolders::_2, // partitionId bdlf::PlaceHolders::_3, // QueueUri bdlf::PlaceHolders::_4)) // QueueKey - .setRecoveredQueuesCb( - bdlf::BindUtil::bind(&recoveredQueuesCb, - bdlf::PlaceHolders::_1, // partitionId - bdlf::PlaceHolders::_2)); - // queueKeyInfoMap - - // ******* IMPORTANT ******* - // We have not written a mock dispatcher yet. We pass a null - // dispatcher ptr, and rely on the internal implementation of FileStore - // to know that we will be ok. - // ************************* + .setRecoveredQueuesCb(bdlf::BindUtil::bind( + &recoveredQueuesCb, + bdlf::PlaceHolders::_1, // partitionId + bdlf::PlaceHolders::_2)); // queueKeyInfoMap + + d_dispatcherClientData.setDispatcher(&d_dispatcher); + d_dispatcher._setInDispatcherThread(true); d_clusterStats.initialize("testCluster", 1, // numPartitions diff --git a/src/groups/mqb/mqbs/mqbs_storageprintutil.cpp b/src/groups/mqb/mqbs/mqbs_storageprintutil.cpp index 6a8cdda55c..205919e135 100644 --- a/src/groups/mqb/mqbs/mqbs_storageprintutil.cpp +++ b/src/groups/mqb/mqbs/mqbs_storageprintutil.cpp @@ -139,12 +139,12 @@ int StoragePrintUtil::listMessages(mqbcmd::QueueContents* queueContents, } void StoragePrintUtil::printRecoveredStorages( - bsl::ostream& out, - bslmt::Mutex* storagesLock, - const StorageSpMap& storageMap, - int partitionId, - const bsl::string& clusterDescription, - bsls::Types::Int64 recoveryStartTime) + bsl::ostream& out, + bslmt::Mutex* storagesLock, + const StorageSpMap& storageMap, + int partitionId, + const bsl::string& clusterDescription, + const bsls::Types::Int64 recoveryStartTime) { // PRECONDITIONS BSLS_ASSERT_SAFE(storagesLock); diff --git a/src/groups/mqb/mqbs/mqbs_storageprintutil.h b/src/groups/mqb/mqbs/mqbs_storageprintutil.h index 0dd2db4420..0a20a02b5c 100644 --- a/src/groups/mqb/mqbs/mqbs_storageprintutil.h +++ b/src/groups/mqb/mqbs/mqbs_storageprintutil.h @@ -107,12 +107,13 @@ struct StoragePrintUtil { /// the specified `storageMap` belonging to the specified `partitionId`, /// locking the specified `storagesLock` and using the specified /// `clusterDescription` and `recoveryStartTime`. - static void printRecoveredStorages(bsl::ostream& out, - bslmt::Mutex* storagesLock, - const StorageSpMap& storageMap, - int partitionId, - const bsl::string& clusterDescription, - bsls::Types::Int64 recoveryStartTime); + static void + printRecoveredStorages(bsl::ostream& out, + bslmt::Mutex* storagesLock, + const StorageSpMap& storageMap, + int partitionId, + const bsl::string& clusterDescription, + const bsls::Types::Int64 recoveryStartTime); /// Print to the specified `out` a summary message upon storage recovery /// completion using the specified `fileStores` and From 075a334a364ea7ab6ce54e461d9dea5de5c1adeb Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Mon, 22 Jul 2024 16:22:20 -0400 Subject: [PATCH 09/15] Apply clang-format Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbblp/mqbblp_storagemanager.h | 3 ++- .../mqb/mqbc/mqbc_clusterstatemanager.cpp | 3 ++- src/groups/mqb/mqbc/mqbc_clusterstatemanager.h | 4 ++-- src/groups/mqb/mqbc/mqbc_clusterstatetable.h | 10 ++++++++-- src/groups/mqb/mqbc/mqbc_storagemanager.cpp | 18 ++++++++++-------- src/groups/mqb/mqbc/mqbc_storagemanager.h | 3 ++- src/groups/mqb/mqbi/mqbi_storagemanager.h | 3 ++- 7 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h index 5e3e4a051e..fdb7f2832a 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h @@ -507,7 +507,8 @@ class StorageManager : public mqbi::StorageManager { /// Initialize the queue key info map based on information in the specified /// `clusterState`. - virtual void initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + virtual void initializeQueueKeyInfoMap( + const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the specified `appIdKeyPairs`, and belonging diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp index 13bb40540c..90b6603693 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp @@ -259,7 +259,8 @@ void ClusterStateManager::do_applyCSLSelf(const ClusterFSMArgsSp& args) d_clusterStateLedger_mp->apply(clusterStateSnapshot); } -void ClusterStateManager::do_initializeQueueKeyInfoMap(const ClusterFSMArgsSp& args) +void ClusterStateManager::do_initializeQueueKeyInfoMap( + const ClusterFSMArgsSp& args) { // executed by the cluster *DISPATCHER* thread diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h index a1fa0c34b7..4ab48ddd74 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h +++ b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.h @@ -194,8 +194,8 @@ class ClusterStateManager virtual void do_applyCSLSelf(const ClusterFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; - virtual void - do_initializeQueueKeyInfoMap(const ClusterFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; + virtual void do_initializeQueueKeyInfoMap(const ClusterFSMArgsSp& args) + BSLS_KEYWORD_OVERRIDE; virtual void do_sendFollowerLSNRequests(const ClusterFSMArgsSp& args) BSLS_KEYWORD_OVERRIDE; diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatetable.h b/src/groups/mqb/mqbc/mqbc_clusterstatetable.h index ead606c0ac..7a26d51d81 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatetable.h +++ b/src/groups/mqb/mqbc/mqbc_clusterstatetable.h @@ -399,7 +399,10 @@ class ClusterStateTable FOL_CSL_RQST, sendFollowerClusterStateResponse, FOL_HEALING); - CST_CFG(FOL_HEALING, CSL_CMT_SUCCESS, stopWatchDog_initializeQueueKeyInfoMap, FOL_HEALED); + CST_CFG(FOL_HEALING, + CSL_CMT_SUCCESS, + stopWatchDog_initializeQueueKeyInfoMap, + FOL_HEALED); CST_CFG(FOL_HEALING, CSL_CMT_FAIL, triggerWatchDog, UNKNOWN); CST_CFG(FOL_HEALING, RST_UNKNOWN, @@ -513,7 +516,10 @@ class ClusterStateTable REGISTRATION_RQST, storeFollowerLSNs_sendRegistrationResponse, LDR_HEALING_STG2); - CST_CFG(LDR_HEALING_STG2, CSL_CMT_SUCCESS, stopWatchDog_initializeQueueKeyInfoMap, LDR_HEALED); + CST_CFG(LDR_HEALING_STG2, + CSL_CMT_SUCCESS, + stopWatchDog_initializeQueueKeyInfoMap, + LDR_HEALED); CST_CFG(LDR_HEALING_STG2, CSL_CMT_FAIL, triggerWatchDog, UNKNOWN); CST_CFG(LDR_HEALING_STG2, RST_UNKNOWN, diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp index e53e869eb9..5bac241159 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp @@ -2300,8 +2300,8 @@ void StorageManager::do_bufferLiveData(const PartitionFSMArgsSp& args) pinfo.primaryStatus(), d_clusterData_p->identity().description(), skipAlarm, - true)) { // isFSMWorkflow - return; // RETURN + true)) { // isFSMWorkflow + return; // RETURN } d_recoveryManager_mp->bufferStorageEvent(partitionId, @@ -2421,8 +2421,8 @@ void StorageManager::do_processLiveData(const PartitionFSMArgsSp& args) pinfo.primaryStatus(), d_clusterData_p->identity().description(), skipAlarm, - true)) { // isFSMWorkflow - return; // RETURN + true)) { // isFSMWorkflow + return; // RETURN } mqbs::FileStore* fs = d_fileStores[static_cast(partitionId)].get(); @@ -3482,7 +3482,8 @@ void StorageManager::stop() bdlf::PlaceHolders::_2)); // latch } -void StorageManager::initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) +void StorageManager::initializeQueueKeyInfoMap( + const mqbc::ClusterState* clusterState) { // executed by the *CLUSTER DISPATCHER* thread @@ -3495,9 +3496,10 @@ void StorageManager::initializeQueueKeyInfoMap(const mqbc::ClusterState* cluster return; // RETURN } - BSLS_ASSERT_SAFE(bsl::all_of(d_queueKeyInfoMapVec.cbegin(), - d_queueKeyInfoMapVec.cend(), - bdlf::MemFnUtil::memFn(&QueueKeyInfoMap::empty))); + BSLS_ASSERT_SAFE( + bsl::all_of(d_queueKeyInfoMapVec.cbegin(), + d_queueKeyInfoMapVec.cend(), + bdlf::MemFnUtil::memFn(&QueueKeyInfoMap::empty))); // Populate 'd_queueKeyInfoMapVec' from cluster state for (DomainStatesCIter dscit = clusterState->domainStates().cbegin(); diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.h b/src/groups/mqb/mqbc/mqbc_storagemanager.h index 5e4889dca6..e61df3b778 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.h +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.h @@ -808,7 +808,8 @@ class StorageManager /// Initialize the queue key info map based on information in the specified /// `clusterState`. - virtual void initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + virtual void initializeQueueKeyInfoMap( + const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the spcified `appIdKeyPairs`, and belonging to diff --git a/src/groups/mqb/mqbi/mqbi_storagemanager.h b/src/groups/mqb/mqbi/mqbi_storagemanager.h index 915a061784..3518fd1d10 100644 --- a/src/groups/mqb/mqbi/mqbi_storagemanager.h +++ b/src/groups/mqb/mqbi/mqbi_storagemanager.h @@ -223,7 +223,8 @@ class StorageManager : public mqbi::AppKeyGenerator { /// Initialize the queue key info map based on information in the specified /// `clusterState`. - virtual void initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) = 0; + virtual void + initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) = 0; /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the specified `appIdKeyPairs`, and belonging From d0aa95a2cc589fa97665de5f0ee13a226b6b7604 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Tue, 20 Aug 2024 18:25:44 -0400 Subject: [PATCH 10/15] PR#367: Address feedback Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp | 2 +- src/groups/mqb/mqbblp/mqbblp_storagemanager.h | 5 +++-- src/groups/mqb/mqbc/mqbc_clusterdata.h | 6 +++--- src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp | 4 ++-- src/groups/mqb/mqbc/mqbc_storagemanager.cpp | 15 ++++++++++----- src/groups/mqb/mqbc/mqbc_storagemanager.h | 5 +++-- src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp | 4 ++-- src/groups/mqb/mqbi/mqbi_storagemanager.h | 5 +++-- src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp | 2 +- src/groups/mqb/mqbmock/mqbmock_storagemanager.h | 5 +++-- 10 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp index c52c64e5df..0fc06a1c3c 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp @@ -1525,7 +1525,7 @@ void StorageManager::stop() } void StorageManager::initializeQueueKeyInfoMap( - BSLS_ANNOTATION_UNUSED const mqbc::ClusterState* clusterState) + BSLS_ANNOTATION_UNUSED const mqbc::ClusterState& clusterState) { // executed by cluster *DISPATCHER* thread diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h index fdb7f2832a..2d0144fe59 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.h +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.h @@ -506,9 +506,10 @@ class StorageManager : public mqbi::StorageManager { virtual void stop() BSLS_KEYWORD_OVERRIDE; /// Initialize the queue key info map based on information in the specified - /// `clusterState`. + /// `clusterState`. Note that this method should only be called once; + /// subsequent calls will be ignored. virtual void initializeQueueKeyInfoMap( - const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + const mqbc::ClusterState& clusterState) BSLS_KEYWORD_OVERRIDE; /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the specified `appIdKeyPairs`, and belonging diff --git a/src/groups/mqb/mqbc/mqbc_clusterdata.h b/src/groups/mqb/mqbc/mqbc_clusterdata.h index bc27d0ad7c..7e59813b3e 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterdata.h +++ b/src/groups/mqb/mqbc/mqbc_clusterdata.h @@ -79,13 +79,13 @@ class ClusterDataIdentity { private: // DATA - const bsl::string d_name; + bsl::string d_name; // Name of the cluster - const bsl::string d_description; + bsl::string d_description; // Description of the cluster - const bmqp_ctrlmsg::ClientIdentity d_identity; + bmqp_ctrlmsg::ClientIdentity d_identity; // Information sent to the primary node of // a queue while sending a clusterOpenQueue // request to that node diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp index 90b6603693..7da446bc27 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp @@ -260,7 +260,7 @@ void ClusterStateManager::do_applyCSLSelf(const ClusterFSMArgsSp& args) } void ClusterStateManager::do_initializeQueueKeyInfoMap( - const ClusterFSMArgsSp& args) + BSLS_ANNOTATION_UNUSED const ClusterFSMArgsSp& args) { // executed by the cluster *DISPATCHER* thread @@ -268,7 +268,7 @@ void ClusterStateManager::do_initializeQueueKeyInfoMap( BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); BSLS_ASSERT_SAFE(d_clusterFSM.isSelfHealed()); - d_storageManager_p->initializeQueueKeyInfoMap(d_state_p); + d_storageManager_p->initializeQueueKeyInfoMap(*d_state_p); } void ClusterStateManager::do_sendFollowerLSNRequests( diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp index 5bac241159..221ba472e3 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp @@ -3483,16 +3483,21 @@ void StorageManager::stop() } void StorageManager::initializeQueueKeyInfoMap( - const mqbc::ClusterState* clusterState) + const mqbc::ClusterState& clusterState) { // executed by the *CLUSTER DISPATCHER* thread // PRECONDITIONS BSLS_ASSERT_SAFE(d_dispatcher_p->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(clusterState); if (d_isQueueKeyInfoMapVecInitialized) { - // The queue key info map vec should only be initialized once. + BALL_LOG_WARN << d_clusterData_p->identity().description() + << ": Queue key info map should only be initialized " + << "once, but the initalization method is called more " + << "than once. This can happen if the node goes " + << "back-and-forth between healing and healed FSM " + << "states. Please check."; + return; // RETURN } @@ -3502,8 +3507,8 @@ void StorageManager::initializeQueueKeyInfoMap( bdlf::MemFnUtil::memFn(&QueueKeyInfoMap::empty))); // Populate 'd_queueKeyInfoMapVec' from cluster state - for (DomainStatesCIter dscit = clusterState->domainStates().cbegin(); - dscit != clusterState->domainStates().cend(); + for (DomainStatesCIter dscit = clusterState.domainStates().cbegin(); + dscit != clusterState.domainStates().cend(); ++dscit) { for (UriToQueueInfoMapCIter cit = dscit->second->queuesInfo().cbegin(); cit != dscit->second->queuesInfo().cend(); diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.h b/src/groups/mqb/mqbc/mqbc_storagemanager.h index e61df3b778..701dccd8c9 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.h +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.h @@ -807,9 +807,10 @@ class StorageManager virtual void stop() BSLS_KEYWORD_OVERRIDE; /// Initialize the queue key info map based on information in the specified - /// `clusterState`. + /// `clusterState`. Note that this method should only be called once; + /// subsequent calls will be ignored. virtual void initializeQueueKeyInfoMap( - const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + const mqbc::ClusterState& clusterState) BSLS_KEYWORD_OVERRIDE; /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the spcified `appIdKeyPairs`, and belonging to diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp index a51101ed0d..c3d1b04901 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp @@ -2630,7 +2630,7 @@ static void test17_replicaHealingReceivesReplicaDataRqstPull() int rc = storageManager.start(errorDescription); BSLS_ASSERT_OPT(rc == 0); - storageManager.initializeQueueKeyInfoMap(&helper.d_cluster_mp->_state()); + storageManager.initializeQueueKeyInfoMap(helper.d_cluster_mp->_state()); mqbs::FileStore& fs = storageManager.fileStore(k_PARTITION_ID); fs.setIgnoreCrc32c(true); @@ -2774,7 +2774,7 @@ static void test18_primaryHealingStage1SelfHighestSendsDataChunks() const int rc = storageManager.start(errorDescription); BSLS_ASSERT_OPT(rc == 0); - storageManager.initializeQueueKeyInfoMap(&helper.d_cluster_mp->_state()); + storageManager.initializeQueueKeyInfoMap(helper.d_cluster_mp->_state()); mqbs::FileStore& fs = storageManager.fileStore(k_PARTITION_ID); fs.setIgnoreCrc32c(true); diff --git a/src/groups/mqb/mqbi/mqbi_storagemanager.h b/src/groups/mqb/mqbi/mqbi_storagemanager.h index 3518fd1d10..a896835115 100644 --- a/src/groups/mqb/mqbi/mqbi_storagemanager.h +++ b/src/groups/mqb/mqbi/mqbi_storagemanager.h @@ -222,9 +222,10 @@ class StorageManager : public mqbi::AppKeyGenerator { virtual void stop() = 0; /// Initialize the queue key info map based on information in the specified - /// `clusterState`. + /// `clusterState`. Note that this method should only be called once; + /// subsequent calls will be ignored. virtual void - initializeQueueKeyInfoMap(const mqbc::ClusterState* clusterState) = 0; + initializeQueueKeyInfoMap(const mqbc::ClusterState& clusterState) = 0; /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the specified `appIdKeyPairs`, and belonging diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp index 7679c6b84e..9237ee8e2d 100644 --- a/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp @@ -52,7 +52,7 @@ void StorageManager::stop() } void StorageManager::initializeQueueKeyInfoMap( - BSLS_ANNOTATION_UNUSED const mqbc::ClusterState* clusterState) + BSLS_ANNOTATION_UNUSED const mqbc::ClusterState& clusterState) { } diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.h b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h index 99c5d93ac7..bb525c34bb 100644 --- a/src/groups/mqb/mqbmock/mqbmock_storagemanager.h +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h @@ -66,9 +66,10 @@ class StorageManager : public mqbi::StorageManager { virtual void stop() BSLS_KEYWORD_OVERRIDE; /// Initialize the queue key info map based on information in the specified - /// `clusterState`. + /// `clusterState`. Note that this method should only be called once; + /// subsequent calls will be ignored. virtual void initializeQueueKeyInfoMap( - const mqbc::ClusterState* clusterState) BSLS_KEYWORD_OVERRIDE; + const mqbc::ClusterState& clusterState) BSLS_KEYWORD_OVERRIDE; /// Register a queue with the specified `uri`, `queueKey` and /// `partitionId`, having the specified `appIdKeyPairs`, and belonging From 309c0b852189c4c49dda61f51e48593f5b4f22cf Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Fri, 23 Aug 2024 16:22:58 -0400 Subject: [PATCH 11/15] mqbc::ClusterData: Return reference not pointer for non-nullables Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbblp/mqbblp_cluster.cpp | 31 ++++++----- .../mqb/mqbblp/mqbblp_clusterorchestrator.cpp | 20 ++++---- .../mqb/mqbblp/mqbblp_clusterorchestrator.h | 2 +- src/groups/mqb/mqbblp/mqbblp_clusterproxy.cpp | 20 ++++---- .../mqb/mqbblp/mqbblp_clusterqueuehelper.cpp | 19 +++---- .../mqb/mqbblp/mqbblp_clusterstatemanager.cpp | 2 +- .../mqb/mqbblp/mqbblp_clusterstatemanager.h | 2 +- .../mqb/mqbblp/mqbblp_clusterstatemonitor.cpp | 10 ++-- .../mqb/mqbblp/mqbblp_recoverymanager.cpp | 36 +++++++------ .../mqb/mqbblp/mqbblp_storagemanager.cpp | 24 ++++----- src/groups/mqb/mqbc/mqbc_clusterdata.cpp | 4 ++ src/groups/mqb/mqbc/mqbc_clusterdata.h | 48 ++++++++--------- .../mqb/mqbc/mqbc_clusterstatemanager.cpp | 51 +++++++++---------- src/groups/mqb/mqbc/mqbc_clusterutil.cpp | 29 +++++------ .../mqbc/mqbc_incoreclusterstateledger.cpp | 48 ++++++++--------- .../mqb/mqbc/mqbc_incoreclusterstateledger.h | 12 ++--- src/groups/mqb/mqbc/mqbc_recoverymanager.cpp | 3 +- src/groups/mqb/mqbc/mqbc_storagemanager.cpp | 48 ++++++++--------- src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp | 8 +-- src/groups/mqb/mqbc/mqbc_storageutil.cpp | 8 +-- src/groups/mqb/mqbmock/mqbmock_cluster.cpp | 8 ++- src/groups/mqb/mqbmock/mqbmock_cluster.h | 12 +++++ .../mqbmock/mqbmock_clusterstateledger.cpp | 48 ++++++++--------- .../mqb/mqbmock/mqbmock_clusterstateledger.h | 20 ++++---- .../mqb/mqbu/mqbu_messageguidutil.t.cpp | 1 - 25 files changed, 265 insertions(+), 249 deletions(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_cluster.cpp b/src/groups/mqb/mqbblp/mqbblp_cluster.cpp index af05409afa..d177fbbada 100644 --- a/src/groups/mqb/mqbblp/mqbblp_cluster.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_cluster.cpp @@ -219,12 +219,12 @@ void Cluster::startDispatched(bsl::ostream* errorDescription, int* rc) bdlf::PlaceHolders::_3), // primary leaseId d_clusterData.domainFactory(), dispatcher(), - d_clusterData.miscWorkThreadPool(), + &d_clusterData.miscWorkThreadPool(), storageManagerAllocator)), storageManagerAllocator); // Start the misc work thread pool - *rc = d_clusterData.miscWorkThreadPool()->start(); + *rc = d_clusterData.miscWorkThreadPool().start(); if (*rc != 0) { d_clusterOrchestrator.stop(); *rc = *rc * 10 + rc_MISC_FAILURE; @@ -278,14 +278,14 @@ void Cluster::startDispatched(bsl::ostream* errorDescription, int* rc) d_clusterMonitor.registerObserver(this); // Start a recurring clock for summary print - d_clusterData.scheduler()->scheduleRecurringEvent( + d_clusterData.scheduler().scheduleRecurringEvent( &d_logSummarySchedulerHandle, bsls::TimeInterval(k_LOG_SUMMARY_INTERVAL), bdlf::BindUtil::bind(&Cluster::logSummaryState, this)); // Start a recurring clock for gc'ing expired queues. - d_clusterData.scheduler()->scheduleRecurringEvent( + d_clusterData.scheduler().scheduleRecurringEvent( &d_queueGcSchedulerHandle, bsls::TimeInterval(k_QUEUE_GC_INTERVAL), bdlf::BindUtil::bind(&Cluster::gcExpiredQueues, this)); @@ -314,9 +314,8 @@ void Cluster::stopDispatched() // Cancel recurring events. - d_clusterData.scheduler()->cancelEventAndWait(&d_queueGcSchedulerHandle); - d_clusterData.scheduler()->cancelEventAndWait( - &d_logSummarySchedulerHandle); + d_clusterData.scheduler().cancelEventAndWait(&d_queueGcSchedulerHandle); + d_clusterData.scheduler().cancelEventAndWait(&d_logSummarySchedulerHandle); // NOTE: The scheduler event does a dispatching to execute 'logSummary' // from the scheduler thread to the dispatcher thread, but there is // no race issue here because stop does a double synchronize, so it's @@ -335,7 +334,7 @@ void Cluster::stopDispatched() d_state.unregisterObserver(this); d_clusterData.electorInfo().unregisterObserver(this); - d_clusterData.scheduler()->cancelEventAndWait( + d_clusterData.scheduler().cancelEventAndWait( d_clusterData.electorInfo().leaderSyncEventHandle()); // Ignore rc @@ -347,7 +346,7 @@ void Cluster::stopDispatched() d_clusterOrchestrator.stop(); - d_clusterData.miscWorkThreadPool()->stop(); + d_clusterData.miscWorkThreadPool().stop(); // Notify peers before going down. This should be the last message sent // out. @@ -646,7 +645,7 @@ void Cluster::initiateShutdownDispatched(const VoidFunctor& callback) SessionSpVec sessions; for (mqbnet::TransportManagerIterator sessIt( - d_clusterData.transportManager()); + &d_clusterData.transportManager()); sessIt; ++sessIt) { bsl::shared_ptr sessionSp = sessIt.session().lock(); @@ -1025,7 +1024,7 @@ void Cluster::onPutEvent(const mqbi::DispatcherEvent& event) BSLS_ASSERT_SAFE(ns); bmqp::Event rawEvent(realEvent->blob().get(), d_allocator_p); - bmqp::PutMessageIterator putIt(d_clusterData.bufferFactory(), + bmqp::PutMessageIterator putIt(&d_clusterData.bufferFactory(), d_allocator_p); BSLS_ASSERT_SAFE(rawEvent.isPutEvent()); @@ -1149,7 +1148,7 @@ void Cluster::onPutEvent(const mqbi::DispatcherEvent& event) // Retrieve the payload of that message bsl::shared_ptr appDataSp = - d_clusterData.blobSpPool()->getObject(); + d_clusterData.blobSpPool().getObject(); rc = putIt.loadApplicationData(appDataSp.get()); if (BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(rc != 0)) { BSLS_PERFORMANCEHINT_UNLIKELY_HINT; @@ -1984,7 +1983,7 @@ void Cluster::onRelayPushEvent(const mqbi::DispatcherEvent& event) bmqp::Event rawEvent(realEvent->blob().get(), d_allocator_p); BSLS_ASSERT_SAFE(rawEvent.isPushEvent()); bdlma::LocalSequentialAllocator<1024> lsa(d_allocator_p); - bmqp::PushMessageIterator pushIt(d_clusterData.bufferFactory(), &lsa); + bmqp::PushMessageIterator pushIt(&d_clusterData.bufferFactory(), &lsa); rawEvent.loadPushMessageIterator(&pushIt, false); BSLS_ASSERT_SAFE(pushIt.isValid()); @@ -2041,12 +2040,12 @@ void Cluster::onRelayPushEvent(const mqbi::DispatcherEvent& event) bsl::shared_ptr optionsSp; if (atMostOnce) { // If it's at-most-once delivery, forward the blob too. - appDataSp = d_clusterData.blobSpPool()->getObject(); + appDataSp = d_clusterData.blobSpPool().getObject(); rc = pushIt.loadApplicationData(appDataSp.get()); BSLS_ASSERT_SAFE(rc == 0); } else if (pushIt.hasOptions()) { - optionsSp = d_clusterData.blobSpPool()->getObject(); + optionsSp = d_clusterData.blobSpPool().getObject(); rc = pushIt.loadOptions(optionsSp.get()); BSLS_ASSERT_SAFE(0 == rc); } @@ -3207,7 +3206,7 @@ void Cluster::processEvent(const bmqp::Event& event, { \ mqbi::DispatcherEvent* _evt = dispatcher()->getEvent(this); \ bsl::shared_ptr _blobSp = \ - d_clusterData.blobSpPool()->getObject(); \ + d_clusterData.blobSpPool().getObject(); \ *_blobSp = *(event.blob()); \ (*_evt) \ .setType(T) \ diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp b/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp index de6463212a..e624c17e7d 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp @@ -502,7 +502,7 @@ void ClusterOrchestrator::timerCb() d_clusterData_p->dispatcherClientData().dispatcher()->execute( bdlf::BindUtil::bind(&ClusterOrchestrator::timerCbDispatched, this), - d_clusterData_p->cluster()); + &d_clusterData_p->cluster()); } void ClusterOrchestrator::timerCbDispatched() @@ -511,7 +511,7 @@ void ClusterOrchestrator::timerCbDispatched() // PRECONDITIONS BSLS_ASSERT_SAFE( - dispatcher()->inDispatcherThread(d_clusterData_p->cluster())); + dispatcher()->inDispatcherThread(&d_clusterData_p->cluster())); const bsls::Types::Int64 timer = mwcsys::Time::highResolutionTimer(); @@ -578,7 +578,7 @@ ClusterOrchestrator::ClusterOrchestrator( , d_stateManager_mp( clusterConfig.clusterAttributes().isFSMWorkflow() ? static_cast( - new (*d_allocator_p) mqbc::ClusterStateManager( + new(*d_allocator_p) mqbc::ClusterStateManager( clusterConfig, d_cluster_p, d_clusterData_p, @@ -593,11 +593,11 @@ ClusterOrchestrator::ClusterOrchestrator( // Strong d_clusterData_p, clusterState, - d_clusterData_p->bufferFactory())), + &d_clusterData_p->bufferFactory())), k_WATCHDOG_TIMEOUT_DURATION, d_allocators.get("ClusterStateManager"))) : static_cast( - new (*d_allocator_p) ClusterStateManager( + new(*d_allocator_p) ClusterStateManager( clusterConfig, d_cluster_p, d_clusterData_p, @@ -612,7 +612,7 @@ ClusterOrchestrator::ClusterOrchestrator( // Strong d_clusterData_p, clusterState, - d_clusterData_p->bufferFactory())), + &d_clusterData_p->bufferFactory())), d_allocators.get("ClusterStateManager"))), d_allocator_p) , d_queueHelper(d_clusterData_p, @@ -699,7 +699,7 @@ int ClusterOrchestrator::start(bsl::ostream& errorDescription) d_elector_mp.load( new (*d_allocator_p) mqbnet::Elector( d_clusterConfig.elector(), - d_clusterData_p->cluster(), + &d_clusterData_p->cluster(), bdlf::BindUtil::bind(&ClusterOrchestrator::onElectorStateChange, this, _1, // ElectorState @@ -707,7 +707,7 @@ int ClusterOrchestrator::start(bsl::ostream& errorDescription) _3, // LeaderNodeId _4), // Term electorTerm, - d_clusterData_p->bufferFactory(), + &d_clusterData_p->bufferFactory(), d_allocator_p), d_allocator_p); @@ -720,7 +720,7 @@ int ClusterOrchestrator::start(bsl::ostream& errorDescription) bsls::TimeInterval interval; interval.setTotalMilliseconds( d_clusterConfig.queueOperations().consumptionMonitorPeriodMs()); - d_clusterData_p->scheduler()->scheduleRecurringEvent( + d_clusterData_p->scheduler().scheduleRecurringEvent( &d_consumptionMonitorEventHandle, interval, bdlf::BindUtil::bind(&ClusterOrchestrator::timerCb, this)); @@ -740,7 +740,7 @@ void ClusterOrchestrator::stop() d_isStarted = false; BSLS_ASSERT_SAFE(d_clusterData_p); - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &d_consumptionMonitorEventHandle); d_stateManager_mp->stop(); diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.h b/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.h index 05c8203e76..c649b62a2c 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.h +++ b/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.h @@ -559,7 +559,7 @@ inline mqbi::Dispatcher* ClusterOrchestrator::dispatcher() // PRIVATE ACCESSORS inline bool ClusterOrchestrator::isLocal() const { - return d_clusterData_p->cluster()->isLocal(); + return d_clusterData_p->cluster().isLocal(); } // MANIPULATORS diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterproxy.cpp b/src/groups/mqb/mqbblp/mqbblp_clusterproxy.cpp index 4bf68fe995..c44616d0ac 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterproxy.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_clusterproxy.cpp @@ -135,7 +135,7 @@ void ClusterProxy::startDispatched() // and all session to build initial state and then schedule a refresh to // find active node if there is none after processing all pending events. - d_activeNodeManager.initialize(d_clusterData.transportManager()); + d_activeNodeManager.initialize(&d_clusterData.transportManager()); // Ready to read. d_clusterData.membership().netCluster()->enableRead(); @@ -150,7 +150,7 @@ void ClusterProxy::startDispatched() bsls::TimeInterval interval = mwcsys::Time::nowMonotonicClock() + bsls::TimeInterval( k_ACTIVE_NODE_INITIAL_WAIT); - d_clusterData.scheduler()->scheduleEvent( + d_clusterData.scheduler().scheduleEvent( &d_activeNodeLookupEventHandle, interval, bdlf::BindUtil::bind(&ClusterProxy::onActiveNodeLookupTimerExpired, @@ -178,7 +178,7 @@ void ClusterProxy::initiateShutdownDispatched(const VoidFunctor& callback) clusterProxyConfig()->queueOperations().shutdownTimeoutMs()); for (mqbnet::TransportManagerIterator sessIt( - d_clusterData.transportManager()); + &d_clusterData.transportManager()); sessIt; ++sessIt) { bsl::shared_ptr sessionSp = sessIt.session().lock(); @@ -242,7 +242,7 @@ void ClusterProxy::stopDispatched() // Cancel scheduler event if (d_activeNodeLookupEventHandle) { - d_clusterData.scheduler()->cancelEventAndWait( + d_clusterData.scheduler().cancelEventAndWait( &d_activeNodeLookupEventHandle); } @@ -336,7 +336,7 @@ void ClusterProxy::processActiveNodeManagerResult( if (result & mqbnet::ClusterActiveNodeManager::e_NEW_ACTIVE) { // Cancel the scheduler event, if any. if (d_activeNodeLookupEventHandle) { - d_clusterData.scheduler()->cancelEvent( + d_clusterData.scheduler().cancelEvent( &d_activeNodeLookupEventHandle); d_activeNodeManager.enableExtendedSelection(); } @@ -435,7 +435,7 @@ void ClusterProxy::onPushEvent(const mqbi::DispatcherPushEvent& event) bmqp::Event rawEvent(event.blob().get(), d_allocator_p); bdlma::LocalSequentialAllocator<1024> lsa(d_allocator_p); - bmqp::PushMessageIterator iter(d_clusterData.bufferFactory(), &lsa); + bmqp::PushMessageIterator iter(&d_clusterData.bufferFactory(), &lsa); rawEvent.loadPushMessageIterator(&iter, false); BSLS_ASSERT_SAFE(iter.isValid()); @@ -450,13 +450,13 @@ void ClusterProxy::onPushEvent(const mqbi::DispatcherPushEvent& event) } bsl::shared_ptr appDataSp = - d_clusterData.blobSpPool()->getObject(); + d_clusterData.blobSpPool().getObject(); rc = iter.loadApplicationData(appDataSp.get()); BSLS_ASSERT_SAFE(rc == 0); bsl::shared_ptr optionsSp; if (iter.hasOptions()) { - optionsSp = d_clusterData.blobSpPool()->getObject(); + optionsSp = d_clusterData.blobSpPool().getObject(); rc = iter.loadOptions(optionsSp.get()); BSLS_ASSERT_SAFE(0 == rc); } @@ -717,7 +717,7 @@ void ClusterProxy::processEvent(const bmqp::Event& event, case bmqp::EventType::e_PUSH: { mqbi::DispatcherEvent* dispEvent = dispatcher()->getEvent(this); bsl::shared_ptr blobSp = - d_clusterData.blobSpPool()->getObject(); + d_clusterData.blobSpPool().getObject(); *blobSp = *(event.blob()); (*dispEvent) .setType(mqbi::DispatcherEventType::e_PUSH) @@ -728,7 +728,7 @@ void ClusterProxy::processEvent(const bmqp::Event& event, case bmqp::EventType::e_ACK: { mqbi::DispatcherEvent* dispEvent = dispatcher()->getEvent(this); bsl::shared_ptr blobSp = - d_clusterData.blobSpPool()->getObject(); + d_clusterData.blobSpPool().getObject(); *blobSp = *(event.blob()); (*dispEvent) .setType(mqbi::DispatcherEventType::e_ACK) diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterqueuehelper.cpp b/src/groups/mqb/mqbblp/mqbblp_clusterqueuehelper.cpp index 9d3999b7e0..94870c7c47 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterqueuehelper.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_clusterqueuehelper.cpp @@ -1510,7 +1510,7 @@ void ClusterQueueHelper::onReopenQueueResponse( after.addMilliseconds(d_clusterData_p->clusterConfig() .queueOperations() .reopenRetryIntervalMs()); - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( after, bdlf::BindUtil::bind(&ClusterQueueHelper::onReopenQueueRetry, this, @@ -2102,9 +2102,9 @@ bsl::shared_ptr ClusterQueueHelper::createQueueFactory( context.d_queueContext_p->partitionId(), context.d_domain_p, d_storageManager_p, - d_clusterData_p->bufferFactory(), - d_clusterData_p->scheduler(), - d_clusterData_p->miscWorkThreadPool(), + &d_clusterData_p->bufferFactory(), + &d_clusterData_p->scheduler(), + &d_clusterData_p->miscWorkThreadPool(), openQueueResponse.routingConfiguration(), d_allocator_p), d_allocator_p); @@ -2113,7 +2113,7 @@ bsl::shared_ptr ClusterQueueHelper::createQueueFactory( queueSp->createRemote( openQueueResponse.deduplicationTimeMs(), d_clusterData_p->clusterConfig().queueOperations().ackWindowSize(), - d_clusterData_p->stateSpPool()); + &d_clusterData_p->stateSpPool()); if (context.d_domain_p->registerQueue(errorDescription, queueSp) != 0) { @@ -3360,7 +3360,7 @@ bool ClusterQueueHelper::subtractCounters( << "] on close-queue request for queue [" << handleParameters.uri() << "]."; if (itSubStream->value().d_timer) { - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &itSubStream->value().d_timer); } qinfo->d_subQueueIds.erase(itSubStream); @@ -3878,7 +3878,7 @@ void ClusterQueueHelper::cancelAllTimers(QueueContext* queueContext) << ", subStream: " << iter->appId() << "(" << iter->subId() << ")"; - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &iter->value().d_timer); } } @@ -4477,7 +4477,7 @@ ClusterQueueHelper::ClusterQueueHelper( , d_nextQueueId(0) , d_clusterData_p(clusterData) , d_clusterState_p(clusterState) -, d_cluster_p(clusterData->cluster()) +, d_cluster_p(&clusterData->cluster()) , d_clusterStateManager_p(clusterStateManager) , d_storageManager_p(0) , d_queues(allocator) @@ -4929,6 +4929,7 @@ void ClusterQueueHelper::processPeerOpenQueueRequest( return; // RETURN } + BSLS_ASSERT_SAFE(d_clusterData_p->domainFactory()); d_clusterData_p->domainFactory()->createDomain( bmqt::Uri(handleParams.uri()).qualifiedDomain(), bdlf::BindUtil::bind(&ClusterQueueHelper::onGetDomain, @@ -5760,7 +5761,7 @@ void ClusterQueueHelper::waitForUnconfirmedDispatched( if (subStreamIt != subQueueIds.end()) { subStreamIt->value().d_timer.release(); - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( &subStreamIt->value().d_timer, t, bdlf::BindUtil::bind(&ClusterQueueHelper::checkUnconfirmed, diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.cpp b/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.cpp index cba1a4078c..7f24cf09be 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.cpp @@ -1353,7 +1353,7 @@ void ClusterStateManager::initiateLeaderSync(bool wait) bsls::TimeInterval after(mwcsys::Time::nowMonotonicClock()); after.addMilliseconds(leaderSyncDelayMs); - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( d_clusterData_p->electorInfo().leaderSyncEventHandle(), after, bdlf::BindUtil::bind(&ClusterStateManager::leaderSyncCb, diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.h b/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.h index b862e03e5f..771a704ded 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.h +++ b/src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.h @@ -633,7 +633,7 @@ inline mqbi::Dispatcher* ClusterStateManager::dispatcher() // PRIVATE ACCESSORS inline bool ClusterStateManager::isLocal() const { - return d_clusterData_p->cluster()->isLocal(); + return d_clusterData_p->cluster().isLocal(); } inline const mqbi::Dispatcher* ClusterStateManager::dispatcher() const diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterstatemonitor.cpp b/src/groups/mqb/mqbblp/mqbblp_clusterstatemonitor.cpp index 11e91a2124..5514548c25 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterstatemonitor.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_clusterstatemonitor.cpp @@ -318,7 +318,7 @@ void ClusterStateMonitor::verifyAllStatesDispatched() // failover state { - status = !d_clusterData_p->cluster()->isFailoverInProgress(); + status = !d_clusterData_p->cluster().isFailoverInProgress(); isCurrentlyHealthy = isCurrentlyHealthy && status; stateTransition = checkAndUpdateState(&d_failoverState, status, now); shouldAlarm = shouldAlarm || (stateTransition == e_BAD); @@ -354,7 +354,7 @@ mqbi::Dispatcher* ClusterStateMonitor::dispatcher() mqbi::DispatcherClient* ClusterStateMonitor::dispatcherClient() { - return d_clusterData_p->cluster(); + return &d_clusterData_p->cluster(); } void ClusterStateMonitor::onMonitorStateChange(const StateType& state) @@ -370,7 +370,7 @@ void ClusterStateMonitor::onMonitorStateChange(const StateType& state) os << "'" << d_clusterData_p->identity().name() << "' has been in " << "invalid state above the threshold amount of time.\n"; // Log only a summary in the alarm - d_clusterData_p->cluster()->printClusterStateSummary(os, 0, 4); + d_clusterData_p->cluster().printClusterStateSummary(os, 0, 4); BALL_LOG_INFO << os.str(); } break; // BREAK case ClusterStateMonitor::e_ALARMING: { @@ -380,7 +380,7 @@ void ClusterStateMonitor::onMonitorStateChange(const StateType& state) // Log the entire cluster state in the alarm mqbcmd::Result result; mqbcmd::ClusterResult clusterResult; - d_clusterData_p->cluster()->loadClusterStatus(&clusterResult); + d_clusterData_p->cluster().loadClusterStatus(&clusterResult); if (clusterResult.isClusterStatusValue()) { result.makeClusterStatus(clusterResult.clusterStatus()); } @@ -419,7 +419,7 @@ ClusterStateMonitor::ClusterStateMonitor( , d_nodeStates(allocator) , d_partitionStates(allocator) , d_failoverState() -, d_scheduler_p(clusterData->scheduler()) +, d_scheduler_p(&clusterData->scheduler()) , d_eventHandle() , d_clusterData_p(clusterData) , d_clusterState_p(clusterState) diff --git a/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp b/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp index cb694af382..56e664a84f 100644 --- a/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp @@ -761,7 +761,7 @@ void RecoveryManager::sendStorageSyncRequesterHelper(RecoveryContext* context, timeoutMs.setTotalMilliseconds(d_clusterConfig.partitionConfig() .syncConfig() .storageSyncReqTimeoutMs()); - bmqt::GenericResult::Enum status = d_clusterData_p->cluster()->sendRequest( + bmqt::GenericResult::Enum status = d_clusterData_p->cluster().sendRequest( request, context->recoveryPeer(), timeoutMs); @@ -796,7 +796,7 @@ void RecoveryManager::sendStorageSyncRequesterHelper(RecoveryContext* context, .startupRecoveryMaxDurationMs()); BSLS_ASSERT_SAFE(!context->recoveryStatusCheckHandle()); - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( &(context->recoveryStatusCheckHandle()), after, bdlf::BindUtil::bind(&RecoveryManager::recoveryStatusCb, @@ -1338,9 +1338,9 @@ void RecoveryManager::onPartitionRecoveryStatus(int partitionId, int status) RecoveryContext& recoveryCtx = d_recoveryContexts[partitionId]; BSLS_ASSERT_SAFE(recoveryCtx.inRecovery()); - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &recoveryCtx.recoveryStartupWaitHandle()); - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &recoveryCtx.recoveryStatusCheckHandle()); // Close all files if they are open and inform storage manager. @@ -1419,7 +1419,7 @@ void RecoveryManager::onPartitionPrimarySyncStatus(int partitionId, int status) PrimarySyncContext& primarySyncCtx = d_primarySyncContexts[partitionId]; BSLS_ASSERT_SAFE(primarySyncCtx.primarySyncInProgress()); - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &primarySyncCtx.primarySyncStatusEventHandle()); primarySyncCtx.partitionPrimarySyncCb()(partitionId, status); @@ -1524,7 +1524,7 @@ int RecoveryManager::sendFile(RequestContext* context, unsigned int sequenceNumber = 0; bsls::Types::Uint64 currOffset = beginOffset; - bmqp::RecoveryEventBuilder builder(d_clusterData_p->bufferFactory(), + bmqp::RecoveryEventBuilder builder(&d_clusterData_p->bufferFactory(), d_allocator_p); while ((currOffset + chunkSize) < endOffset) { @@ -1709,7 +1709,7 @@ int RecoveryManager::replayPartition( bmqp::StorageEventBuilder builder(mqbs::FileStoreProtocol::k_VERSION, bmqp::EventType::e_PARTITION_SYNC, - d_clusterData_p->bufferFactory(), + &d_clusterData_p->bufferFactory(), d_allocator_p); // Note that partition has to be replayed from the record *after* @@ -2615,10 +2615,8 @@ void RecoveryManager::onPartitionSyncStateQueryResponseDispatched( timeoutMs.setTotalMilliseconds(d_clusterConfig.partitionConfig() .syncConfig() .partitionSyncDataReqTimeoutMs()); - bmqt::GenericResult::Enum status = d_clusterData_p->cluster()->sendRequest( - request, - maxSeqNode, - timeoutMs); + bmqt::GenericResult::Enum status = + d_clusterData_p->cluster().sendRequest(request, maxSeqNode, timeoutMs); if (bmqt::GenericResult::e_SUCCESS != status) { // Request failed to encode/be sent; process error handling (note that @@ -2925,7 +2923,7 @@ void RecoveryManager::startRecovery( recoveryCtx.setRecoveryStatus(true); - if (d_clusterData_p->cluster()->isLocal()) { + if (d_clusterData_p->cluster().isLocal()) { onPartitionRecoveryStatus(partitionId, 0 /* status */); return; // RETURN } @@ -2958,7 +2956,7 @@ void RecoveryManager::startRecovery( bsls::TimeInterval after(mwcsys::Time::nowMonotonicClock()); after.addMilliseconds(startupWaitMs); - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( &recoveryCtx.recoveryStartupWaitHandle(), after, bdlf::BindUtil::bind(&RecoveryManager::recoveryStartupWaitCb, @@ -3275,7 +3273,7 @@ void RecoveryManager::processStorageEvent( bmqp::StorageEventBuilder seb(mqbs::FileStoreProtocol::k_VERSION, bmqp::EventType::e_STORAGE, - d_clusterData_p->bufferFactory(), + &d_clusterData_p->bufferFactory(), d_allocator_p); while (1 == iter.next()) { @@ -3303,7 +3301,7 @@ void RecoveryManager::processStorageEvent( if (0 < seb.messageCount()) { bsl::shared_ptr blobSp; blobSp.createInplace(d_allocator_p, - d_clusterData_p->bufferFactory(), + &d_clusterData_p->bufferFactory(), d_allocator_p); *blobSp = seb.blob(); recoveryCtx.addStorageEvent(blobSp); @@ -3615,7 +3613,7 @@ void RecoveryManager::processShutdownEvent(int partitionId) << "]: received shutdown event."; RecoveryContext& recoveryCtx = d_recoveryContexts[partitionId]; - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &recoveryCtx.recoveryStartupWaitHandle()); if (isRecoveryInProgress(partitionId)) { // Recovery is in progress. Cancel it. @@ -3623,7 +3621,7 @@ void RecoveryManager::processShutdownEvent(int partitionId) } PrimarySyncContext& primarySyncCtx = d_primarySyncContexts[partitionId]; - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &primarySyncCtx.primarySyncStatusEventHandle()); } @@ -4297,7 +4295,7 @@ void RecoveryManager::startPartitionPrimarySync( primarySyncCtx.setSelfLastSyncPtOffsetPair(fs->syncPoints().back()); } - if (d_clusterData_p->cluster()->isLocal()) { + if (d_clusterData_p->cluster().isLocal()) { BSLS_ASSERT_SAFE(peers.empty()); onPartitionPrimarySyncStatus(pid, 0 /* status */); return; // RETURN @@ -4321,7 +4319,7 @@ void RecoveryManager::startPartitionPrimarySync( after.addMilliseconds(d_clusterConfig.partitionConfig() .syncConfig() .masterSyncMaxDurationMs()); - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( &primarySyncCtx.primarySyncStatusEventHandle(), after, bdlf::BindUtil::bind(&RecoveryManager::primarySyncStatusCb, diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp index 0fc06a1c3c..aede9e7376 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp @@ -323,7 +323,7 @@ void StorageManager::onPartitionRecovery( // partition's dispatcher thread for GC'ing expired messages as well as // cleaning history. - d_clusterData_p->scheduler()->scheduleRecurringEvent( + d_clusterData_p->scheduler().scheduleRecurringEvent( &d_gcMessagesEventHandle, bsls::TimeInterval(k_GC_MESSAGES_INTERVAL_SECONDS), bdlf::BindUtil::bind(&StorageManager::forceFlushFileStores, this)); @@ -987,7 +987,7 @@ StorageManager::StorageManager( , d_lowDiskspaceWarning(false) , d_unrecognizedDomainsLock() , d_unrecognizedDomains(allocator) -, d_blobSpPool_p(clusterData->blobSpPool()) +, d_blobSpPool_p(&clusterData->blobSpPool()) , d_domainFactory_p(domainFactory) , d_dispatcher_p(dispatcher) , d_clusterConfig(clusterConfig) @@ -1380,7 +1380,7 @@ int StorageManager::start(bsl::ostream& errorDescription) // Schedule a periodic event (every minute) which monitors storage (disk // space, archive clean up, etc). - d_clusterData_p->scheduler()->scheduleRecurringEvent( + d_clusterData_p->scheduler().scheduleRecurringEvent( &d_storageMonitorEventHandle, bsls::TimeInterval(bdlt::TimeUnitRatio::k_SECONDS_PER_MINUTE), bdlf::BindUtil::bind(&mqbc::StorageUtil::storageMonitorCb, @@ -1509,9 +1509,9 @@ void StorageManager::stop() d_isStarted = false; - d_clusterData_p->scheduler()->cancelEventAndWait(&d_gcMessagesEventHandle); + d_clusterData_p->scheduler().cancelEventAndWait(&d_gcMessagesEventHandle); - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait( &d_storageMonitorEventHandle); d_recoveryManager_mp->stop(); @@ -1542,7 +1542,7 @@ void StorageManager::setPrimaryForPartition(int partitionId, // executed by cluster *DISPATCHER* thread BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(0 <= partitionId); BSLS_ASSERT_SAFE(primaryNode); @@ -1592,7 +1592,7 @@ void StorageManager::clearPrimaryForPartition(int partitionId, // PRECONDITION BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(0 <= partitionId); unsigned int pid = static_cast(partitionId); @@ -1616,7 +1616,7 @@ void StorageManager::setPrimaryStatusForPartition( // PRECONDITION BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); BSLS_ASSERT_OPT(false && "This method should only be invoked in CSL mode"); } @@ -1790,7 +1790,7 @@ void StorageManager::processPartitionSyncStateRequest( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(source); BSLS_ASSERT_SAFE(bmqp_ctrlmsg::NodeStatus::E_AVAILABLE == d_clusterData_p->membership().selfNodeStatus()); @@ -1834,7 +1834,7 @@ void StorageManager::processPartitionSyncDataRequest( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(source); BSLS_ASSERT_SAFE(bmqp_ctrlmsg::NodeStatus::E_AVAILABLE == d_clusterData_p->membership().selfNodeStatus()); @@ -2101,7 +2101,7 @@ int StorageManager::processCommand(mqbcmd::StorageResult* result, // PRECONDITIONS BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); if (!d_isStarted) { result->makeError(); @@ -2128,7 +2128,7 @@ void StorageManager::gcUnrecognizedDomainQueues() // PRECONDITIONS BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); mqbc::StorageUtil::gcUnrecognizedDomainQueues(&d_fileStores, &d_unrecognizedDomainsLock, diff --git a/src/groups/mqb/mqbc/mqbc_clusterdata.cpp b/src/groups/mqb/mqbc/mqbc_clusterdata.cpp index 9f536a9702..590e929aed 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterdata.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterdata.cpp @@ -146,7 +146,11 @@ ClusterData::ClusterData( { // PRECONDITIONS BSLS_ASSERT_SAFE(d_allocator_p); + BSLS_ASSERT_SAFE(d_scheduler_p); + BSLS_ASSERT_SAFE(d_bufferFactory_p); + BSLS_ASSERT_SAFE(d_blobSpPool_p); BSLS_ASSERT_SAFE(d_cluster_p); + BSLS_ASSERT_SAFE(d_transportManager_p); BSLS_ASSERT(scheduler->clockType() == bsls::SystemClockType::e_MONOTONIC); // Initialize the clusterStats object - under the hood this creates a new diff --git a/src/groups/mqb/mqbc/mqbc_clusterdata.h b/src/groups/mqb/mqbc/mqbc_clusterdata.h index 7e59813b3e..c6fc82af67 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterdata.h +++ b/src/groups/mqb/mqbc/mqbc_clusterdata.h @@ -239,13 +239,13 @@ class ClusterData { // MANIPULATORS /// Get a modifiable reference to this object's event scheduler. - bdlmt::EventScheduler* scheduler(); + bdlmt::EventScheduler& scheduler(); /// Get a modifiable reference to this object's buffer factory. - bdlbb::BlobBufferFactory* bufferFactory(); + bdlbb::BlobBufferFactory& bufferFactory(); /// Get a modifiable reference to this object's blobSpPool. - BlobSpPool* blobSpPool(); + BlobSpPool& blobSpPool(); /// Get a modifiable reference to this object's dispatcherClientData. mqbi::DispatcherClientData& dispatcherClientData(); @@ -263,7 +263,7 @@ class ClusterData { ClusterMembership& membership(); /// Get a modifiable reference to this object's cluster. - mqbi::Cluster* cluster(); + mqbi::Cluster& cluster(); /// Get a modifiable reference to this object's messageTransmitter. ControlMessageTransmitter& messageTransmitter(); @@ -278,7 +278,7 @@ class ClusterData { mqbi::DomainFactory* domainFactory(); /// Get a modifiable reference to this object's transportManager. - mqbnet::TransportManager* transportManager(); + mqbnet::TransportManager& transportManager(); /// Get a modifiable reference to this object's cluster stats. mqbstat::ClusterStats& stats(); @@ -287,10 +287,10 @@ class ClusterData { StatContextMp& clusterNodesStatContext(); /// Get a modifiable reference to this object's stateSpPool. - StateSpPool* stateSpPool(); + StateSpPool& stateSpPool(); /// Get a modifiable reference to this object's miscWorkThreadPool. - bdlmt::FixedThreadPool* miscWorkThreadPool(); + bdlmt::FixedThreadPool& miscWorkThreadPool(); // ACCESSORS @@ -301,7 +301,7 @@ class ClusterData { const ElectorInfo& electorInfo() const; const ClusterMembership& membership() const; const ClusterDataIdentity& identity() const; - const mqbi::Cluster* cluster() const; + const mqbi::Cluster& cluster() const; const StatContextMp& clusterNodesStatContext() const; }; @@ -347,19 +347,19 @@ ClusterDataIdentity::identity() const // ----------------- // MANIPULATORS -inline bdlmt::EventScheduler* ClusterData::scheduler() +inline bdlmt::EventScheduler& ClusterData::scheduler() { - return d_scheduler_p; + return *d_scheduler_p; } -inline bdlbb::BlobBufferFactory* ClusterData::bufferFactory() +inline bdlbb::BlobBufferFactory& ClusterData::bufferFactory() { - return d_bufferFactory_p; + return *d_bufferFactory_p; } -inline ClusterData::BlobSpPool* ClusterData::blobSpPool() +inline ClusterData::BlobSpPool& ClusterData::blobSpPool() { - return d_blobSpPool_p; + return *d_blobSpPool_p; } inline mqbi::DispatcherClientData& ClusterData::dispatcherClientData() @@ -387,9 +387,9 @@ inline ClusterMembership& ClusterData::membership() return d_membership; } -inline mqbi::Cluster* ClusterData::cluster() +inline mqbi::Cluster& ClusterData::cluster() { - return d_cluster_p; + return *d_cluster_p; } inline ControlMessageTransmitter& ClusterData::messageTransmitter() @@ -412,9 +412,9 @@ inline mqbi::DomainFactory* ClusterData::domainFactory() return d_domainFactory_p; } -inline mqbnet::TransportManager* ClusterData::transportManager() +inline mqbnet::TransportManager& ClusterData::transportManager() { - return d_transportManager_p; + return *d_transportManager_p; } inline mqbstat::ClusterStats& ClusterData::stats() @@ -427,14 +427,14 @@ inline ClusterData::StatContextMp& ClusterData::clusterNodesStatContext() return d_clusterNodesStatContext_mp; } -inline ClusterData::StateSpPool* ClusterData::stateSpPool() +inline ClusterData::StateSpPool& ClusterData::stateSpPool() { - return &d_stateSpPool; + return d_stateSpPool; } -inline bdlmt::FixedThreadPool* ClusterData::miscWorkThreadPool() +inline bdlmt::FixedThreadPool& ClusterData::miscWorkThreadPool() { - return &d_miscWorkThreadPool; + return d_miscWorkThreadPool; } // ACCESSORS @@ -470,9 +470,9 @@ inline const ClusterDataIdentity& ClusterData::identity() const return d_identity; } -inline const mqbi::Cluster* ClusterData::cluster() const +inline const mqbi::Cluster& ClusterData::cluster() const { - return d_cluster_p; + return *d_cluster_p; } inline const ClusterData::StatContextMp& diff --git a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp index 7da446bc27..801b0ca1bb 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp @@ -117,9 +117,9 @@ void ClusterStateManager::do_startWatchDog( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( &d_watchDogEventHandle, - d_clusterData_p->scheduler()->now() + d_watchDogTimeoutInterval, + d_clusterData_p->scheduler().now() + d_watchDogTimeoutInterval, bdlf::BindUtil::bind(&ClusterStateManager::onWatchDog, this)); } @@ -131,8 +131,7 @@ void ClusterStateManager::do_stopWatchDog( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - if (d_clusterData_p->scheduler()->cancelEvent(d_watchDogEventHandle) != - 0) { + if (d_clusterData_p->scheduler().cancelEvent(d_watchDogEventHandle) != 0) { BALL_LOG_ERROR << d_clusterData_p->identity().description() << ": Failed to cancel WatchDog."; } @@ -146,9 +145,9 @@ void ClusterStateManager::do_triggerWatchDog( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - if (d_clusterData_p->scheduler()->rescheduleEvent( + if (d_clusterData_p->scheduler().rescheduleEvent( d_watchDogEventHandle, - d_clusterData_p->scheduler()->now()) != 0) { + d_clusterData_p->scheduler().now()) != 0) { BALL_LOG_ERROR << d_clusterData_p->identity().description() << ": Failed to trigger WatchDog."; } @@ -281,7 +280,7 @@ void ClusterStateManager::do_sendFollowerLSNRequests( BSLS_ASSERT_SAFE(d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfLeader()); - if (d_clusterData_p->cluster()->isLocal()) { + if (d_clusterData_p->cluster().isLocal()) { return; // RETURN } @@ -317,7 +316,7 @@ void ClusterStateManager::do_sendFollowerLSNResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(!d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfFollower()); @@ -352,7 +351,7 @@ void ClusterStateManager::do_sendFailureFollowerLSNResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); const ClusterFSMEventMetadata& metadata = args->front().second; BSLS_ASSERT_SAFE(metadata.inputMessages().size() == 1); @@ -415,7 +414,7 @@ void ClusterStateManager::do_sendFollowerClusterStateRequest( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfLeader()); @@ -463,7 +462,7 @@ void ClusterStateManager::do_sendFollowerClusterStateResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(!d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfFollower()); @@ -509,7 +508,7 @@ void ClusterStateManager::do_sendFailureFollowerClusterStateResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); const ClusterFSMEventMetadata& metadata = args->front().second; BSLS_ASSERT_SAFE(metadata.inputMessages().size() == 1); @@ -561,7 +560,7 @@ void ClusterStateManager::do_storeFollowerLSNs(const ClusterFSMArgsSp& args) // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfLeader()); @@ -585,7 +584,7 @@ void ClusterStateManager::do_removeFollowerLSN(const ClusterFSMArgsSp& args) // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfLeader()); @@ -646,7 +645,7 @@ void ClusterStateManager::do_sendRegistrationRequest( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(!d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfFollower()); @@ -694,7 +693,7 @@ void ClusterStateManager::do_sendRegistrationResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfLeader()); @@ -727,7 +726,7 @@ void ClusterStateManager::do_sendFailureRegistrationResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); const ClusterFSMEventMetadata& metadata = args->front().second; BSLS_ASSERT_SAFE(metadata.inputMessages().size() == 1); @@ -757,7 +756,7 @@ void ClusterStateManager::do_logStaleFollowerLSNResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(!d_clusterData_p->electorInfo().isSelfLeader() && !d_clusterFSM.isSelfLeader()); // Response is not stale if self is leader @@ -783,7 +782,7 @@ void ClusterStateManager::do_logStaleFollowerClusterStateResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterFSM.state() != ClusterFSM::State::e_LDR_HEALING_STG2); @@ -806,7 +805,7 @@ void ClusterStateManager::do_logErrorLeaderNotHealed( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(!d_clusterData_p->electorInfo().isSelfLeader()); BSLS_ASSERT_SAFE(d_clusterFSM.state() == ClusterFSM::State::e_FOL_HEALING); @@ -830,7 +829,7 @@ void ClusterStateManager::do_logFailFollowerLSNResponses( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfLeader()); @@ -853,7 +852,7 @@ void ClusterStateManager::do_logFailFollowerClusterStateResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfLeader()); @@ -887,7 +886,7 @@ void ClusterStateManager::do_logFailRegistrationResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(!d_clusterData_p->electorInfo().isSelfLeader() && d_clusterFSM.isSelfFollower()); @@ -1163,7 +1162,7 @@ void ClusterStateManager::onFollowerLSNResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterFSM.isSelfLeader() || (d_clusterFSM.state() == ClusterFSM::State::e_STOPPING) || (d_clusterFSM.state() == ClusterFSM::State::e_UNKNOWN)); @@ -1247,7 +1246,7 @@ void ClusterStateManager::onRegistrationResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(!d_clusterData_p->electorInfo().isSelfLeader()); BSLS_ASSERT_SAFE(d_clusterFSM.isSelfFollower() || (d_clusterFSM.state() == ClusterFSM::State::e_STOPPING) || @@ -1314,7 +1313,7 @@ void ClusterStateManager::onFollowerClusterStateResponse( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(d_clusterFSM.isSelfLeader() || (d_clusterFSM.state() == ClusterFSM::State::e_STOPPING) || (d_clusterFSM.state() == ClusterFSM::State::e_UNKNOWN)); diff --git a/src/groups/mqb/mqbc/mqbc_clusterutil.cpp b/src/groups/mqb/mqbc/mqbc_clusterutil.cpp index 5a25a34ffb..4407b434cb 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterutil.cpp +++ b/src/groups/mqb/mqbc/mqbc_clusterutil.cpp @@ -280,8 +280,8 @@ void getNextPrimarys(NumNewPartitionsMap* numNewPartitions, // executed by the cluster *DISPATCHER* thread // PRECONDITIONS - BSLS_ASSERT_SAFE(clusterData.cluster()->dispatcher()->inDispatcherThread( - clusterData.cluster())); + BSLS_ASSERT_SAFE(clusterData.cluster().dispatcher()->inDispatcherThread( + &clusterData.cluster())); BSLS_ASSERT_SAFE(mqbnet::ElectorState::e_LEADER == clusterData.electorInfo().electorState()); BSLS_ASSERT_SAFE(numNewPartitions && numNewPartitions->empty()); @@ -420,8 +420,8 @@ void ClusterUtil::assignPartitions( // executed by the cluster *DISPATCHER* thread // PRECONDITIONS - BSLS_ASSERT_SAFE(clusterData.cluster()->dispatcher()->inDispatcherThread( - clusterData.cluster())); + BSLS_ASSERT_SAFE(clusterData.cluster().dispatcher()->inDispatcherThread( + &clusterData.cluster())); BSLS_ASSERT_SAFE(partitions && partitions->empty()); BSLS_ASSERT_SAFE(mqbnet::ElectorState::e_LEADER == clusterData.electorInfo().electorState()); @@ -608,9 +608,8 @@ void ClusterUtil::onPartitionPrimaryAssignment( // PRECONDITIONS BSLS_ASSERT_SAFE(clusterData); - BSLS_ASSERT_SAFE(clusterData->cluster()); - BSLS_ASSERT_SAFE(clusterData->cluster()->dispatcher()->inDispatcherThread( - clusterData->cluster())); + BSLS_ASSERT_SAFE(clusterData->cluster().dispatcher()->inDispatcherThread( + &clusterData->cluster())); BSLS_ASSERT_SAFE(storageManager); BSLS_ASSERT_SAFE(0 <= partitionId); if (primary) { @@ -912,6 +911,7 @@ ClusterUtil::assignQueue(ClusterState* clusterState, } if (domIt->second->domain() == 0) { + BSLS_ASSERT_SAFE(clusterData->domainFactory()); clusterData->domainFactory()->createDomain( uri.qualifiedDomain(), bdlf::BindUtil::bind(&createDomainCb, @@ -1527,9 +1527,8 @@ void ClusterUtil::sendClusterState( // PRECONDITIONS BSLS_ASSERT_SAFE(clusterData); - BSLS_ASSERT_SAFE(clusterData->cluster()); - BSLS_ASSERT_SAFE(clusterData->cluster()->dispatcher()->inDispatcherThread( - clusterData->cluster())); + BSLS_ASSERT_SAFE(clusterData->cluster().dispatcher()->inDispatcherThread( + &clusterData->cluster())); BSLS_ASSERT_SAFE(mqbnet::ElectorState::e_LEADER == clusterData->electorInfo().electorState()); BSLS_ASSERT_SAFE(ledger && ledger->isOpen()); @@ -1568,7 +1567,7 @@ void ClusterUtil::sendClusterState( advisory.partitions() = partitions; loadQueuesInfo(&advisory.queues(), clusterState, - clusterData->cluster()->isCSLModeEnabled()); + clusterData->cluster().isCSLModeEnabled()); } else if (sendPartitionPrimaryInfo) { bmqp_ctrlmsg::PartitionPrimaryAdvisory& advisory = @@ -1590,10 +1589,10 @@ void ClusterUtil::sendClusterState( loadQueuesInfo(&advisory.queues(), clusterState, - clusterData->cluster()->isCSLModeEnabled()); + clusterData->cluster().isCSLModeEnabled()); } - if (!clusterData->cluster()->isCSLModeEnabled()) { + if (!clusterData->cluster().isCSLModeEnabled()) { if (node) { clusterData->messageTransmitter().sendMessage(controlMessage, node); @@ -1997,8 +1996,8 @@ int ClusterUtil::load(ClusterState* state, // PRECONDITIONS BSLS_ASSERT_SAFE(state); BSLS_ASSERT_SAFE(iterator); - BSLS_ASSERT_SAFE(clusterData.cluster()->dispatcher()->inDispatcherThread( - clusterData.cluster())); + BSLS_ASSERT_SAFE(clusterData.cluster().dispatcher()->inDispatcherThread( + &clusterData.cluster())); enum RcEnum { // Value for the various RC error categories diff --git a/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.cpp b/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.cpp index fdf1eec2f4..8b597cb1de 100644 --- a/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.cpp +++ b/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.cpp @@ -797,8 +797,8 @@ int IncoreClusterStateLedger::applyImpl(const bdlbb::Blob& event, // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(source); BSLS_ASSERT_SAFE(source->nodeId() != d_clusterData_p->membership().selfNode()->nodeId()); @@ -1253,8 +1253,8 @@ void IncoreClusterStateLedger::onClusterLeader( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); if (status == ElectorInfoLeaderStatus::e_PASSIVE) { return; // RETURN @@ -1271,8 +1271,8 @@ int IncoreClusterStateLedger::open() // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BALL_LOG_INFO << description() << ": Opening IncoreCSL with config: " << d_ledgerConfig; @@ -1328,8 +1328,8 @@ int IncoreClusterStateLedger::close() // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); enum RcEnum { // Value for the various RC error categories @@ -1363,8 +1363,8 @@ int IncoreClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -1382,8 +1382,8 @@ int IncoreClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -1401,8 +1401,8 @@ int IncoreClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -1420,8 +1420,8 @@ int IncoreClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -1439,8 +1439,8 @@ int IncoreClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -1458,8 +1458,8 @@ int IncoreClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); const bmqp_ctrlmsg::ClusterMessageChoice& choice = clusterMessage.choice(); @@ -1501,8 +1501,8 @@ int IncoreClusterStateLedger::apply(const bdlbb::Blob& event, // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return applyImpl(event, source, false); // delayed @@ -1523,8 +1523,8 @@ IncoreClusterStateLedger::getIterator() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); bslma::ManagedPtr mp( new (*d_allocator_p) diff --git a/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.h b/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.h index e04656ad7b..cc1227b298 100644 --- a/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.h +++ b/src/groups/mqb/mqbc/mqbc_incoreclusterstateledger.h @@ -464,8 +464,8 @@ inline bool IncoreClusterStateLedger::isSelfLeader() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return d_clusterData_p->electorInfo().isSelfLeader(); } @@ -485,8 +485,8 @@ inline bool IncoreClusterStateLedger::isOpen() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return d_isOpen; } @@ -503,8 +503,8 @@ inline const mqbsi::Ledger* IncoreClusterStateLedger::ledger() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return d_ledger_mp.get(); } diff --git a/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp b/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp index 27ebe81def..af1040138a 100644 --- a/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp @@ -892,8 +892,7 @@ int RecoveryManager::createRecoveryFileSet(bsl::ostream& errorDescription, mwcu::MemOutStream partitionDesc; partitionDesc << "Partition [" << partitionId - << "] (cluster: " << d_clusterData.cluster()->name() - << "): "; + << "] (cluster: " << d_clusterData.cluster().name() << "): "; int rc = mqbs::FileStoreUtil::create(errorDescription, &fileSetSp, diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp index 221ba472e3..d1c0289e67 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.cpp @@ -280,7 +280,7 @@ void StorageManager::onPartitionRecovery(int partitionId) // partition's dispatcher thread for GC'ing expired messages as // well as cleaning history. - d_clusterData_p->scheduler()->scheduleRecurringEvent( + d_clusterData_p->scheduler().scheduleRecurringEvent( &d_gcMessagesEventHandle, bsls::TimeInterval(k_GC_MESSAGES_INTERVAL_SECONDS), bdlf::BindUtil::bind(&StorageManager::forceFlushFileStores, @@ -647,7 +647,7 @@ void StorageManager::processPrimaryStateResponseDispatched( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(context->response().rId() != NULL); if (context->request().rId().isNull()) { @@ -764,7 +764,7 @@ void StorageManager::processReplicaStateResponseDispatched( BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); const NodeResponsePairs& pairs = requestContext->response(); - if (d_clusterData_p->cluster()->isLocal()) { + if (d_clusterData_p->cluster().isLocal()) { BSLS_ASSERT_SAFE(pairs.empty()); return; // RETURN } @@ -894,7 +894,7 @@ void StorageManager::processReplicaDataResponseDispatched( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(context->response().rId() != NULL); if (context->request().rId().isNull()) { @@ -1134,9 +1134,9 @@ void StorageManager::do_startWatchDog(const PartitionFSMArgsSp& args) return; // RETURN } - d_clusterData_p->scheduler()->scheduleEvent( + d_clusterData_p->scheduler().scheduleEvent( &d_watchDogEventHandles[partitionId], - d_clusterData_p->scheduler()->now() + d_watchDogTimeoutInterval, + d_clusterData_p->scheduler().now() + d_watchDogTimeoutInterval, bdlf::BindUtil::bind(&StorageManager::onWatchDog, this, partitionId)); } @@ -1155,7 +1155,7 @@ void StorageManager::do_stopWatchDog(const PartitionFSMArgsSp& args) const int partitionId = eventDataVec[0].partitionId(); - const int rc = d_clusterData_p->scheduler()->cancelEvent( + const int rc = d_clusterData_p->scheduler().cancelEvent( d_watchDogEventHandles[partitionId]); if (rc != 0) { BALL_LOG_ERROR << d_clusterData_p->identity().description() @@ -1727,7 +1727,7 @@ void StorageManager::do_primaryStateRequest(const PartitionFSMArgsSp& args) bdlf::PlaceHolders::_1, destNode)); - bmqt::GenericResult::Enum status = d_clusterData_p->cluster()->sendRequest( + bmqt::GenericResult::Enum status = d_clusterData_p->cluster().sendRequest( request, destNode, bsls::TimeInterval(10)); @@ -1929,9 +1929,9 @@ void StorageManager::do_replicaDataRequestPush(const PartitionFSMArgsSp& args) destNode)); const bmqt::GenericResult::Enum status = - d_clusterData_p->cluster()->sendRequest(request, - destNode, - bsls::TimeInterval(10)); + d_clusterData_p->cluster().sendRequest(request, + destNode, + bsls::TimeInterval(10)); BALL_LOG_INFO << d_clusterData_p->identity().description() << " Partition [" << partitionId << "]: " @@ -2100,9 +2100,9 @@ void StorageManager::do_replicaDataRequestDrop(const PartitionFSMArgsSp& args) destNode)); const bmqt::GenericResult::Enum status = - d_clusterData_p->cluster()->sendRequest(request, - destNode, - bsls::TimeInterval(10)); + d_clusterData_p->cluster().sendRequest(request, + destNode, + bsls::TimeInterval(10)); BALL_LOG_INFO << d_clusterData_p->identity().description() << " Partition [" << partitionId << "]: " @@ -2181,7 +2181,7 @@ void StorageManager::do_replicaDataRequestPull(const PartitionFSMArgsSp& args) bdlf::PlaceHolders::_1, destNode)); - bmqt::GenericResult::Enum status = d_clusterData_p->cluster()->sendRequest( + bmqt::GenericResult::Enum status = d_clusterData_p->cluster().sendRequest( request, destNode, bsls::TimeInterval(10)); @@ -3224,7 +3224,7 @@ StorageManager::StorageManager( , d_lowDiskspaceWarning(false) , d_unrecognizedDomainsLock() , d_unrecognizedDomains(allocator) -, d_blobSpPool_p(clusterData->blobSpPool()) +, d_blobSpPool_p(&clusterData->blobSpPool()) , d_domainFactory_p(domainFactory) , d_dispatcher_p(dispatcher) , d_cluster_p(cluster) @@ -3364,7 +3364,7 @@ int StorageManager::start(bsl::ostream& errorDescription) // Schedule a periodic event (every minute) which monitors storage (disk // space, archive clean up, etc). - d_clusterData_p->scheduler()->scheduleRecurringEvent( + d_clusterData_p->scheduler().scheduleRecurringEvent( &d_storageMonitorEventHandle, bsls::TimeInterval(bdlt::TimeUnitRatio::k_SECONDS_PER_MINUTE), bdlf::BindUtil::bind(&StorageUtil::storageMonitorCb, @@ -3409,7 +3409,7 @@ int StorageManager::start(bsl::ostream& errorDescription) "RecoveryManager"); d_recoveryManager_mp.load(new (*recoveryManagerAllocator) RecoveryManager( - d_clusterData_p->bufferFactory(), + &d_clusterData_p->bufferFactory(), d_clusterConfig, *d_clusterData_p, dsCfg, @@ -3468,8 +3468,8 @@ void StorageManager::stop() this)); } - d_clusterData_p->scheduler()->cancelEventAndWait(&d_gcMessagesEventHandle); - d_clusterData_p->scheduler()->cancelEventAndWait( + d_clusterData_p->scheduler().cancelEventAndWait(&d_gcMessagesEventHandle); + d_clusterData_p->scheduler().cancelEventAndWait( &d_storageMonitorEventHandle); d_recoveryManager_mp->stop(); @@ -3859,7 +3859,7 @@ void StorageManager::setPrimaryStatusForPartition( // PRECONDITION BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(0 <= partitionId && partitionId < static_cast(d_fileStores.size())); @@ -4069,7 +4069,7 @@ void StorageManager::processStorageEvent( // PRECONDITIONS BSLS_ASSERT_SAFE(dispatcher()->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_SAFE(!d_clusterData_p->cluster()->isLocal()); + BSLS_ASSERT_SAFE(!d_clusterData_p->cluster().isLocal()); BSLS_ASSERT_SAFE(event.isRelay() == false); if (BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(!d_isStarted)) { @@ -4308,7 +4308,7 @@ int StorageManager::processCommand(mqbcmd::StorageResult* result, // PRECONDITIONS BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); if (!d_isStarted) { result->makeError(); @@ -4335,7 +4335,7 @@ void StorageManager::gcUnrecognizedDomainQueues() // PRECONDITIONS BSLS_ASSERT_SAFE( - d_dispatcher_p->inDispatcherThread(d_clusterData_p->cluster())); + d_dispatcher_p->inDispatcherThread(&d_clusterData_p->cluster())); StorageUtil::gcUnrecognizedDomainQueues(&d_fileStores, &d_unrecognizedDomainsLock, diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp index c3d1b04901..940d932a82 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.t.cpp @@ -850,8 +850,8 @@ struct TestHelper { d_cluster_mp->_clusterDefinition().partitionConfig(); mqbs::DataStoreConfig dsCfg; - dsCfg.setScheduler(d_cluster_mp->_clusterData()->scheduler()) - .setBufferFactory(d_cluster_mp->_clusterData()->bufferFactory()) + dsCfg.setScheduler(&d_cluster_mp->_scheduler()) + .setBufferFactory(&d_cluster_mp->_clusterData()->bufferFactory()) .setPreallocate(partitionCfg.preallocate()) .setPrefaultPages(partitionCfg.prefaultPages()) .setLocation(partitionCfg.location()) @@ -874,8 +874,8 @@ struct TestHelper { d_cluster_mp->dispatcher(), &d_cluster_mp->netCluster(), &d_cluster_mp->_clusterData()->stats(), - d_cluster_mp->_clusterData()->blobSpPool(), - d_cluster_mp->_clusterData()->stateSpPool(), + &d_cluster_mp->_clusterData()->blobSpPool(), + &d_cluster_mp->_clusterData()->stateSpPool(), &threadPool, d_cluster_mp->isCSLModeEnabled(), d_cluster_mp->isFSMWorkflow(), diff --git a/src/groups/mqb/mqbc/mqbc_storageutil.cpp b/src/groups/mqb/mqbc/mqbc_storageutil.cpp index 7fa81591a6..d7e9171f13 100644 --- a/src/groups/mqb/mqbc/mqbc_storageutil.cpp +++ b/src/groups/mqb/mqbc/mqbc_storageutil.cpp @@ -1248,8 +1248,8 @@ int StorageUtil::assignPartitionDispatcherThreads( for (int i = 0; i < config.numPartitions(); ++i) { int processorId = i % numProcessors; mqbs::DataStoreConfig dsCfg; - dsCfg.setScheduler(clusterData->scheduler()) - .setBufferFactory(clusterData->bufferFactory()) + dsCfg.setScheduler(&clusterData->scheduler()) + .setBufferFactory(&clusterData->bufferFactory()) .setPreallocate(config.preallocate()) .setPrefaultPages(config.prefaultPages()) .setLocation(config.location()) @@ -1282,7 +1282,7 @@ int StorageUtil::assignPartitionDispatcherThreads( clusterData->membership().netCluster(), &clusterData->stats(), blobSpPool, - clusterData->stateSpPool(), + &clusterData->stateSpPool(), threadPool, cluster.isCSLModeEnabled(), cluster.isFSMWorkflow(), @@ -1393,7 +1393,7 @@ void StorageUtil::onPartitionPrimarySync( BSLS_ASSERT_SAFE(clusterData); BSLS_ASSERT_SAFE(0 <= partitionId); - if (clusterData->cluster()->isStopping()) { + if (clusterData->cluster().isStopping()) { BALL_LOG_WARN << clusterData->identity().description() << ": Cluster is stopping; skipping partition primary " << "sync notification."; diff --git a/src/groups/mqb/mqbmock/mqbmock_cluster.cpp b/src/groups/mqb/mqbmock/mqbmock_cluster.cpp index c24a5f9d9c..c0ab9f225c 100644 --- a/src/groups/mqb/mqbmock/mqbmock_cluster.cpp +++ b/src/groups/mqb/mqbmock/mqbmock_cluster.cpp @@ -221,6 +221,12 @@ Cluster::Cluster(bdlbb::BlobBufferFactory* bufferFactory, , d_clusterDefinition(allocator) , d_itemPool(mqbnet::Channel::k_ITEM_SIZE, allocator) , d_channels(allocator) +, d_negotiator_mp() +, d_transportManager(&d_scheduler, + bufferFactory, + d_negotiator_mp, + 0, // mqbstat::StatController* + allocator) , d_netCluster_mp(0) , d_clusterData_mp(0) , d_isClusterMember(isClusterMember) @@ -267,7 +273,7 @@ Cluster::Cluster(bdlbb::BlobBufferFactory* bufferFactory, d_netCluster_mp, this, 0, // domainFactory - 0, // transportManager + &d_transportManager, d_statContext_sp.get(), d_statContexts, d_allocator_p), diff --git a/src/groups/mqb/mqbmock/mqbmock_cluster.h b/src/groups/mqb/mqbmock/mqbmock_cluster.h index 05cdd9e928..07a319a7ef 100644 --- a/src/groups/mqb/mqbmock/mqbmock_cluster.h +++ b/src/groups/mqb/mqbmock/mqbmock_cluster.h @@ -49,6 +49,7 @@ #include #include #include +#include // MWC #include @@ -97,6 +98,9 @@ class ClusterResult; namespace mqbi { class Domain; } +namespace mqbnet { +class Negotiator; +} namespace mqbmock { @@ -113,6 +117,8 @@ class Cluster : public mqbi::Cluster { typedef bsl::function EventProcessor; + typedef bslma::ManagedPtr NegotiatorMp; + typedef bslma::ManagedPtr NetClusterMp; typedef bslma::ManagedPtr ClusterDataMp; @@ -178,6 +184,12 @@ class Cluster : public mqbi::Cluster { TestChannelMap d_channels; // Test channels + NegotiatorMp d_negotiator_mp; + // Session negotiator + + mqbnet::TransportManager d_transportManager; + // Transport manager + NetClusterMp d_netCluster_mp; // Net cluster used by this cluster diff --git a/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.cpp b/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.cpp index 5df6feb794..b111b470b2 100644 --- a/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.cpp +++ b/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.cpp @@ -39,8 +39,8 @@ int ClusterStateLedger::applyAdvisoryInternal( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); d_uncommittedAdvisories.emplace_back(clusterMessage); @@ -75,8 +75,8 @@ int ClusterStateLedger::open() // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); d_isOpen = true; @@ -89,8 +89,8 @@ int ClusterStateLedger::close() // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); d_isOpen = false; @@ -104,8 +104,8 @@ int ClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -121,8 +121,8 @@ int ClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -138,8 +138,8 @@ int ClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -155,8 +155,8 @@ int ClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); bmqp_ctrlmsg::ClusterMessage clusterMessage; @@ -171,8 +171,8 @@ int ClusterStateLedger::apply(const bmqp_ctrlmsg::LeaderAdvisory& advisory) // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); // NOTE: We remove the assert below to allow artificially setting the // ledger snapshot before the leader is elected. // @@ -191,8 +191,8 @@ int ClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); BSLS_ASSERT_SAFE(isSelfLeader()); const bmqp_ctrlmsg::ClusterMessageChoice& choice = clusterMessage.choice(); @@ -233,8 +233,8 @@ int ClusterStateLedger::apply( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); // NOT IMPLEMENTED return -1; @@ -248,8 +248,8 @@ void ClusterStateLedger::_commitAdvisories( // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); for (AdvisoriesCIter cit = d_uncommittedAdvisories.cbegin(); cit != d_uncommittedAdvisories.cend(); @@ -319,8 +319,8 @@ ClusterStateLedger::getIterator() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return bslma::ManagedPtr( new (*d_allocator_p) mqbmock::ClusterStateLedgerIterator(d_records), diff --git a/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.h b/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.h index 1f480c9aed..4a29c17e81 100644 --- a/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.h +++ b/src/groups/mqb/mqbmock/mqbmock_clusterstateledger.h @@ -246,8 +246,8 @@ inline bool ClusterStateLedger::isSelfLeader() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return d_clusterData_p->electorInfo().isSelfLeader(); } @@ -261,8 +261,8 @@ ClusterStateLedger::setIsFirstLeaderAdvisory(bool isFirstLeaderAdvisory) // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); d_isFirstLeaderAdvisory = isFirstLeaderAdvisory; } @@ -279,8 +279,8 @@ inline void ClusterStateLedger::_setPauseCommitCb(bool value) // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); d_pauseCommitCb = value; } @@ -293,8 +293,8 @@ inline bool ClusterStateLedger::isOpen() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return d_isOpen; } @@ -307,8 +307,8 @@ ClusterStateLedger::_uncommittedAdvisories() const // PRECONDITIONS BSLS_ASSERT_SAFE( - d_clusterData_p->cluster()->dispatcher()->inDispatcherThread( - d_clusterData_p->cluster())); + d_clusterData_p->cluster().dispatcher()->inDispatcherThread( + &d_clusterData_p->cluster())); return d_uncommittedAdvisories; } diff --git a/src/groups/mqb/mqbu/mqbu_messageguidutil.t.cpp b/src/groups/mqb/mqbu/mqbu_messageguidutil.t.cpp index 266ed6d89d..6456f2ac20 100644 --- a/src/groups/mqb/mqbu/mqbu_messageguidutil.t.cpp +++ b/src/groups/mqb/mqbu/mqbu_messageguidutil.t.cpp @@ -1383,4 +1383,3 @@ int main(int argc, char* argv[]) } // ---------------------------------------------------------------------------- -// NOTICE: From d720c91207592b0fb190fb05e97a98c3ca84e905 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Fri, 23 Aug 2024 16:23:27 -0400 Subject: [PATCH 12/15] mqbc: Make copy constructor and copy assignment private Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbblp/mqbblp_cluster.h | 6 +++--- src/groups/mqb/mqbc/mqbc_clusterdata.h | 6 ++++++ src/groups/mqb/mqbc/mqbc_recoverymanager.h | 1 + src/groups/mqb/mqbc/mqbc_storagemanager.h | 4 ++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_cluster.h b/src/groups/mqb/mqbblp/mqbblp_cluster.h index ab834f6131..ee16b1d1b0 100644 --- a/src/groups/mqb/mqbblp/mqbblp_cluster.h +++ b/src/groups/mqb/mqbblp/mqbblp_cluster.h @@ -77,7 +77,7 @@ #include #include #include -#include +#include namespace BloombergLP { @@ -346,10 +346,10 @@ class Cluster : public mqbi::Cluster, private: // NOT IMPLEMENTED - Cluster(const Cluster&) BSLS_CPP11_DELETED; + Cluster(const Cluster&) BSLS_KEYWORD_DELETED; /// Copy constructor and assignment operator are not implemented. - Cluster& operator=(const Cluster&) BSLS_CPP11_DELETED; + Cluster& operator=(const Cluster&) BSLS_KEYWORD_DELETED; private: // PRIVATE MANIPULATORS diff --git a/src/groups/mqb/mqbc/mqbc_clusterdata.h b/src/groups/mqb/mqbc/mqbc_clusterdata.h index c6fc82af67..8fad5a13cf 100644 --- a/src/groups/mqb/mqbc/mqbc_clusterdata.h +++ b/src/groups/mqb/mqbc/mqbc_clusterdata.h @@ -65,6 +65,7 @@ #include #include #include +#include #include namespace BloombergLP { @@ -212,6 +213,11 @@ class ClusterData { // work that can be offloaded to any // non-dispatcher threads. + private: + // NOT IMPLEMENTED + ClusterData(const ClusterData&) BSLS_KEYWORD_DELETED; + ClusterData& operator=(const ClusterData&) BSLS_KEYWORD_DELETED; + public: // TRAITS BSLMF_NESTED_TRAIT_DECLARATION(ClusterData, bslma::UsesBslmaAllocator) diff --git a/src/groups/mqb/mqbc/mqbc_recoverymanager.h b/src/groups/mqb/mqbc/mqbc_recoverymanager.h index df9f147169..0c9fa62963 100644 --- a/src/groups/mqb/mqbc/mqbc_recoverymanager.h +++ b/src/groups/mqb/mqbc/mqbc_recoverymanager.h @@ -51,6 +51,7 @@ #include #include #include +#include #include namespace BloombergLP { diff --git a/src/groups/mqb/mqbc/mqbc_storagemanager.h b/src/groups/mqb/mqbc/mqbc_storagemanager.h index 701dccd8c9..3b1bbd597b 100644 --- a/src/groups/mqb/mqbc/mqbc_storagemanager.h +++ b/src/groups/mqb/mqbc/mqbc_storagemanager.h @@ -433,8 +433,8 @@ class StorageManager private: // NOT IMPLEMENTED - StorageManager(const StorageManager&); // = delete; - StorageManager& operator=(const StorageManager&); // = delete; + StorageManager(const StorageManager&) BSLS_KEYWORD_DELETED; + StorageManager& operator=(const StorageManager&) BSLS_KEYWORD_DELETED; private: // PRIVATE MANIPULATORS From b1de8eb7aeb922875a955dd4356803d1a1323bb8 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Fri, 23 Aug 2024 16:25:15 -0400 Subject: [PATCH 13/15] blp::StorageManager: Improve assert logging Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp index aede9e7376..8562aa7dc1 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp @@ -1532,7 +1532,8 @@ void StorageManager::initializeQueueKeyInfoMap( // PRECONDITION BSLS_ASSERT_SAFE(d_dispatcher_p->inDispatcherThread(d_cluster_p)); - BSLS_ASSERT_OPT(false && "This method should only be invoked in FSM mode"); + BSLS_ASSERT_OPT(false && "Only the FSM version of this method from " + "mqbc::StorageManager should be invoked."); } void StorageManager::setPrimaryForPartition(int partitionId, From 91c4b022c66f716a965ff02fdd6e4d0d859f3286 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Fri, 23 Aug 2024 16:39:38 -0400 Subject: [PATCH 14/15] mqbmock::StorageMgr: Fix compilation errors Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp | 3 ++- src/groups/mqb/mqbmock/mqbmock_storagemanager.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp index 9237ee8e2d..f18b842a17 100644 --- a/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.cpp @@ -231,7 +231,8 @@ void StorageManager::processRecoveryEvent( } void StorageManager::processReceiptEvent( - BSLS_ANNOTATION_UNUSED const mqbi::DispatcherReceiptEvent& event) + BSLS_ANNOTATION_UNUSED const bmqp::Event& event, + BSLS_ANNOTATION_UNUSED mqbnet::ClusterNode* source) { // NOTHING } diff --git a/src/groups/mqb/mqbmock/mqbmock_storagemanager.h b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h index bb525c34bb..96d248ea3c 100644 --- a/src/groups/mqb/mqbmock/mqbmock_storagemanager.h +++ b/src/groups/mqb/mqbmock/mqbmock_storagemanager.h @@ -232,9 +232,10 @@ class StorageManager : public mqbi::StorageManager { virtual void processRecoveryEvent( const mqbi::DispatcherRecoveryEvent& event) BSLS_KEYWORD_OVERRIDE; - /// Executed in cluster dispatcher thread. - virtual void processReceiptEvent(const mqbi::DispatcherReceiptEvent& event) - BSLS_KEYWORD_OVERRIDE; + /// Executed in IO thread. + virtual void + processReceiptEvent(const bmqp::Event& event, + mqbnet::ClusterNode* source) BSLS_KEYWORD_OVERRIDE; /// Executed by any thread. virtual void processPrimaryStatusAdvisory( From d0301c8bce94f8cd4f2a7ba53e43a24f99d246d8 Mon Sep 17 00:00:00 2001 From: Yuan Jing Vincent Yan Date: Fri, 23 Aug 2024 16:53:11 -0400 Subject: [PATCH 15/15] mqbc::RecoveryManager: Apply clang format Signed-off-by: Yuan Jing Vincent Yan --- src/groups/mqb/mqbc/mqbc_recoverymanager.cpp | 118 ++++++++----------- 1 file changed, 50 insertions(+), 68 deletions(-) diff --git a/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp b/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp index af1040138a..9a29305817 100644 --- a/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp +++ b/src/groups/mqb/mqbc/mqbc_recoverymanager.cpp @@ -174,8 +174,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to truncate journal file [" + << partitionId << "]: " << "Failed to truncate journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; @@ -189,8 +188,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to flush journal file [" + << partitionId << "]: " << "Failed to flush journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; @@ -201,8 +199,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to close journal file [" + << partitionId << "]: " << "Failed to close journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "], rc: " << rc << MWCTSK_ALARMLOG_END; } @@ -213,8 +210,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (0 != rc) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to move file [" + << partitionId << "]: " << "Failed to move file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "] " << "to location [" << d_dataStoreConfig.archiveLocation() << "] rc: " << rc << MWCTSK_ALARMLOG_END; @@ -228,8 +224,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to truncate data file [" + << partitionId << "]: " << "Failed to truncate data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; errorDesc.reset(); @@ -241,8 +236,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to flush data file [" + << partitionId << "]: " << "Failed to flush data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; errorDesc.reset(); @@ -252,8 +246,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to close data file [" + << partitionId << "]: " << "Failed to close data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc << MWCTSK_ALARMLOG_END; } @@ -263,8 +256,7 @@ void RecoveryManager::deprecateFileSet(int partitionId) if (0 != rc) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to move file [" + << partitionId << "]: " << "Failed to move file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "] " << "to location [" << d_dataStoreConfig.archiveLocation() << "] rc: " << rc << MWCTSK_ALARMLOG_END; @@ -324,12 +316,12 @@ void RecoveryManager::setExpectedDataChunkRange( BALL_LOG_INFO_BLOCK { - BALL_LOG_OUTPUT_STREAM << d_clusterData.identity().description() - << " Partition [" << partitionId << "]: " - << "Got notification to expect data chunks " - << "of range " << beginSeqNum << " to " - << endSeqNum << " from " - << source->nodeDescription() << "."; + BALL_LOG_OUTPUT_STREAM + << d_clusterData.identity().description() << " Partition [" + << partitionId + << "]: " << "Got notification to expect data chunks " + << "of range " << beginSeqNum << " to " << endSeqNum << " from " + << source->nodeDescription() << "."; if (requestId != -1) { BALL_LOG_OUTPUT_STREAM << " Recovery requestId is " << requestId << "."; @@ -552,9 +544,8 @@ int RecoveryManager::processSendDataChunks( } BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Sent data chunks from " << beginSeqNum << " to " - << endSeqNum + << partitionId << "]: " << "Sent data chunks from " + << beginSeqNum << " to " << endSeqNum << " to node: " << destination->nodeDescription() << "."; return rc_SUCCESS; @@ -594,8 +585,8 @@ int RecoveryManager::processReceiveDataChunks( if (!receiveDataCtx.d_expectChunks) { MWCTSK_ALARMLOG_ALARM("RECOVERY") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Received partition-sync event from node " + << partitionId + << "]: " << "Received partition-sync event from node " << source->nodeDescription() << ", but self is not expecting data chunks. " << "Ignoring this event." << MWCTSK_ALARMLOG_END; @@ -606,8 +597,8 @@ int RecoveryManager::processReceiveDataChunks( if (receiveDataCtx.d_recoveryDataSource_p->nodeId() != source->nodeId()) { MWCTSK_ALARMLOG_ALARM("RECOVERY") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Received partition-sync event from node " + << partitionId + << "]: " << "Received partition-sync event from node " << source->nodeDescription() << ", which is not identified as recovery peer node " << receiveDataCtx.d_recoveryDataSource_p->nodeDescription() @@ -681,8 +672,8 @@ int RecoveryManager::processReceiveDataChunks( if (recordSeqNum <= receiveDataCtx.d_currSeqNum) { MWCTSK_ALARMLOG_ALARM("REPLICATION") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Received partition sync msg of type " + << partitionId + << "]: " << "Received partition sync msg of type " << header.messageType() << " with sequenceNumber " << recordSeqNum << ", smaller than or equal to self current sequence number: " @@ -696,8 +687,8 @@ int RecoveryManager::processReceiveDataChunks( if (recordSeqNum > receiveDataCtx.d_endSeqNum) { MWCTSK_ALARMLOG_ALARM("REPLICATION") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Received partition sync msg of type " + << partitionId + << "]: " << "Received partition sync msg of type " << header.messageType() << " with sequenceNumber " << recordSeqNum << ", larger than self's expected ending sequence number of " @@ -725,9 +716,8 @@ int RecoveryManager::processReceiveDataChunks( MWCTSK_ALARMLOG_ALARM("REPLICATION") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Received journal record of type [" << header.messageType() - << "] with journal offset mismatch. " + << partitionId << "]: " << "Received journal record of type [" + << header.messageType() << "] with journal offset mismatch. " << "Source's journal offset: " << sourceJournalOffset << ", self journal offset: " << journalPos << ", msg sequence number (" << recHeader->primaryLeaseId() @@ -861,10 +851,10 @@ int RecoveryManager::processReceiveDataChunks( recordOffset); if (mqbs::QueueOpType::e_CREATION != queueRec->type() && mqbs::QueueOpType::e_ADDITION != queueRec->type()) { - BALL_LOG_ERROR - << d_clusterData.identity().description() - << " Partition [" << partitionId << "]: " - << " Unexpected QueueOpType: " << queueRec->type(); + BALL_LOG_ERROR << d_clusterData.identity().description() + << " Partition [" << partitionId + << "]: " << " Unexpected QueueOpType: " + << queueRec->type(); return rc_INVALID_QUEUE_RECORD; // RETURN } } @@ -924,8 +914,8 @@ int RecoveryManager::createRecoveryFileSet(bsl::ostream& errorDescription, BSLS_ASSERT_SAFE(recoveryCtx.d_mappedDataFd.isValid()); BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Created recovery data file store set: " + << partitionId + << "]: " << "Created recovery data file store set: " << recoveryCtx.d_recoveryFileSet << ", journal file position: " << recoveryCtx.d_journalFilePosition @@ -1067,8 +1057,7 @@ int RecoveryManager::openRecoveryFileSet(bsl::ostream& errorDescription, } BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Opened recovery file set: " + << partitionId << "]: " << "Opened recovery file set: " << recoveryCtx.d_recoveryFileSet << ", journal file position: " << recoveryCtx.d_journalFilePosition @@ -1105,8 +1094,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to truncate journal file [" + << partitionId << "]: " << "Failed to truncate journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; @@ -1120,8 +1108,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to flush journal file [" + << partitionId << "]: " << "Failed to flush journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; @@ -1132,16 +1119,15 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to close journal file [" + << partitionId << "]: " << "Failed to close journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "], rc: " << rc << MWCTSK_ALARMLOG_END; return rc * 10 + rc_JOURNAL_FD_CLOSE_FAILURE; // RETURN } BALL_LOG_INFO << d_clusterData.identity().description() - << " Partition [" << partitionId << "]: " - << "Closed journal file in recovery file set; " + << " Partition [" << partitionId + << "]: " << "Closed journal file in recovery file set; " << "journal file position was " << recoveryCtx.d_journalFilePosition; } @@ -1154,8 +1140,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to truncate data file [" + << partitionId << "]: " << "Failed to truncate data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; errorDesc.reset(); @@ -1167,8 +1152,7 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to flush data file [" + << partitionId << "]: " << "Failed to flush data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc << ", error: " << errorDesc.str() << MWCTSK_ALARMLOG_END; errorDesc.reset(); @@ -1178,16 +1162,15 @@ int RecoveryManager::closeRecoveryFileSet(int partitionId) if (rc != 0) { MWCTSK_ALARMLOG_ALARM("FILE_IO") << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Failed to close data file [" + << partitionId << "]: " << "Failed to close data file [" << recoveryCtx.d_recoveryFileSet.dataFile() << "], rc: " << rc << MWCTSK_ALARMLOG_END; return rc * 10 + rc_DATA_FD_CLOSE_FAILURE; // RETURN } BALL_LOG_INFO << d_clusterData.identity().description() - << " Partition [" << partitionId << "]: " - << "Closed data file in recovery file set; " + << " Partition [" << partitionId + << "]: " << "Closed data file in recovery file set; " << "data file position was " << recoveryCtx.d_dataFilePosition; } @@ -1244,8 +1227,8 @@ int RecoveryManager::recoverSeqNum( const mqbs::RecordHeader& lastRecordHeader = jit.lastRecordHeader(); BALL_LOG_INFO << d_clusterData.identity().description() - << " Partition [" << partitionId << "]: " - << "Recovered Sequence Number " + << " Partition [" << partitionId + << "]: " << "Recovered Sequence Number " << lastRecordHeader.partitionSequenceNumber() << " from journal file [" << recoveryCtx.d_recoveryFileSet.journalFile() << "]."; @@ -1277,8 +1260,7 @@ void RecoveryManager::setLiveDataSource(mqbnet::ClusterNode* source, RecoveryContext& recoveryCtx = d_recoveryContextVec[partitionId]; BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Setting live data source from " + << partitionId << "]: " << "Setting live data source from " << (recoveryCtx.d_liveDataSource_p ? recoveryCtx.d_liveDataSource_p->nodeDescription() : "** NULL **") @@ -1305,8 +1287,8 @@ void RecoveryManager::bufferStorageEvent( BSLS_ASSERT_SAFE(recoveryCtx.d_liveDataSource_p); if (recoveryCtx.d_liveDataSource_p->nodeId() != source->nodeId()) { BALL_LOG_ERROR << d_clusterData.identity().description() - << " Partition [" << partitionId << "]: " - << "Storage event from node " + << " Partition [" << partitionId + << "]: " << "Storage event from node " << source->nodeDescription() << "cannot be buffered, " << "because it is different from the expected live " << "data source node " @@ -1319,8 +1301,8 @@ void RecoveryManager::bufferStorageEvent( recoveryCtx.d_bufferedEvents.push_back(blob); BALL_LOG_INFO << d_clusterData.identity().description() << " Partition [" - << partitionId << "]: " - << "Buffered a storage event from primary node " + << partitionId + << "]: " << "Buffered a storage event from primary node " << source->nodeDescription() << " as self is still healing the partition."; }