diff --git a/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp b/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp index 163c93bb90..d155c9a81e 100644 --- a/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp @@ -1770,7 +1770,7 @@ void ClusterOrchestrator::processPrimaryStatusAdvisory( // TBD: may need to review the order of invoking these routines. BALL_LOG_INFO << d_clusterData_p->identity().description() - << " PartitionId [" << primaryAdv.partitionId() + << " Partition [" << primaryAdv.partitionId() << "]: received primary status advisory: " << primaryAdv << ", from: " << source->nodeDescription(); diff --git a/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp b/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp index e90ef1b400..0347a77328 100644 --- a/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_recoverymanager.cpp @@ -1417,6 +1417,15 @@ void RecoveryManager::onPartitionPrimarySyncStatus(int partitionId, int status) PrimarySyncContext& primarySyncCtx = d_primarySyncContexts[partitionId]; BSLS_ASSERT_SAFE(primarySyncCtx.primarySyncInProgress()); + BALL_LOG_INFO << d_clusterData_p->identity().description() + << "For Partition [" << partitionId + << "], primary sync returned with status: " << status + << ". Resetting primary sync peer from " + << (primarySyncCtx.syncPeer() + ? primarySyncCtx.syncPeer()->nodeDescription() + : "** null **") + << " to ** null **."; + d_clusterData_p->scheduler().cancelEventAndWait( &primarySyncCtx.primarySyncStatusEventHandle()); @@ -1427,6 +1436,12 @@ void RecoveryManager::onPartitionPrimarySyncStatus(int partitionId, int status) // still mapped. It will be cleaned up when the chunk deleter // eventually invokes 'partitionSyncCleanupDispatched' routine. + // However, we have already received all sync data chunks from the sync + // peer, so we can reset our sync peer to *null*. This prevents false + // alarm of primary sync failure if that peer happens to go down before + // 'partitionSyncCleanupDispatched' is invoked. + primarySyncCtx.setPrimarySyncPeer(0); + return; // RETURN } diff --git a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp index 2f8572cfef..1c880a49e5 100644 --- a/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp +++ b/src/groups/mqb/mqbblp/mqbblp_storagemanager.cpp @@ -796,13 +796,15 @@ void StorageManager::processPartitionSyncEventDispatched( return; // RETURN } - if (source != d_recoveryManager_mp->primarySyncPeer(partitionId)) { + mqbnet::ClusterNode* syncPeer = d_recoveryManager_mp->primarySyncPeer( + partitionId); + if (source != syncPeer) { BALL_LOG_ERROR << d_clusterData_p->identity().description() << " Partition [" << partitionId << "]: received a partition sync event from: " << source->nodeDescription() << ", while partition-sync peer is: " - << source->nodeDescription(); + << syncPeer->nodeDescription(); return; // RETURN } diff --git a/src/groups/mqb/mqbs/mqbs_filebackedstorage.cpp b/src/groups/mqb/mqbs/mqbs_filebackedstorage.cpp index 5ea7fd38ce..7196ef3b89 100644 --- a/src/groups/mqb/mqbs/mqbs_filebackedstorage.cpp +++ b/src/groups/mqb/mqbs/mqbs_filebackedstorage.cpp @@ -470,7 +470,7 @@ FileBackedStorage::releaseRef(const bmqt::MessageGUID& guid) if (0 != rc) { BMQTSK_ALARMLOG_ALARM("FILE_IO") - << "PartitionId [" << partitionId() << "] failed to write " + << "Partition [" << partitionId() << "] failed to write " << "DELETION record for GUID: " << guid << ", for queue '" << d_queueUri << "', queueKey '" << d_queueKey << "' while attempting to purge the message, rc: " << rc diff --git a/src/groups/mqb/mqbs/mqbs_virtualstoragecatalog.cpp b/src/groups/mqb/mqbs/mqbs_virtualstoragecatalog.cpp index 07c77d43c5..d9085dd4bb 100644 --- a/src/groups/mqb/mqbs/mqbs_virtualstoragecatalog.cpp +++ b/src/groups/mqb/mqbs/mqbs_virtualstoragecatalog.cpp @@ -342,7 +342,7 @@ VirtualStorageCatalog::removeAll(const mqbu::StorageKey& appKey) else { if (result == mqbi::StorageResult::e_GUID_NOT_FOUND) { BALL_LOG_WARN - << "#STORAGE_PURGE_ERROR " << "PartitionId [" + << "#STORAGE_PURGE_ERROR " << "Partition [" << d_storage_p->partitionId() << "]" << ": Attempting to purge GUID '" << itData->first << "' from virtual storage with appId '" @@ -359,7 +359,7 @@ VirtualStorageCatalog::removeAll(const mqbu::StorageKey& appKey) } else { BMQTSK_ALARMLOG_ALARM("STORAGE_PURGE_ERROR") - << "PartitionId [" << d_storage_p->partitionId() << "]" + << "Partition [" << d_storage_p->partitionId() << "]" << ": Attempting to purge GUID '" << itData->first << "' from virtual storage with appId '" << itVs->value()->appId() << "' & appKey '" << appKey