-
Notifications
You must be signed in to change notification settings - Fork 144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix mqbc::IncoreCSL: Rollover fixes and improvements #595
base: main
Are you sure you want to change the base?
Changes from all commits
5c9fa2b
b67493c
8581fb0
b270821
baed418
3720fe2
32f9f53
0ada70c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1678,11 +1678,12 @@ int ClusterUtil::validateState(bsl::ostream& errorDescription, | |
bsl::vector<ClusterStatePartitionInfo> incorrectPartitions; | ||
for (size_t pid = 0; pid < state.partitions().size(); ++pid) { | ||
const ClusterStatePartitionInfo& stateInfo = state.partition(pid); | ||
BSLS_ASSERT_SAFE(stateInfo.partitionId() == pid); | ||
BSLS_ASSERT_SAFE(static_cast<size_t>(stateInfo.partitionId()) == pid); | ||
|
||
const ClusterStatePartitionInfo& referenceInfo = reference.partition( | ||
pid); | ||
BSLS_ASSERT_SAFE(referenceInfo.partitionId() == pid); | ||
BSLS_ASSERT_SAFE(static_cast<size_t>(referenceInfo.partitionId()) == | ||
pid); | ||
if (stateInfo.primaryLeaseId() != referenceInfo.primaryLeaseId()) { | ||
// Partition information mismatch. Note that we don't compare | ||
// primaryNodeIds here because 'state' is initialized with cluster | ||
|
@@ -1723,7 +1724,8 @@ int ClusterUtil::validateState(bsl::ostream& errorDescription, | |
for (size_t pid = 0; pid < state.partitions().size(); ++pid) { | ||
const ClusterStatePartitionInfo& referenceInfo = | ||
reference.partitions()[pid]; | ||
BSLS_ASSERT_SAFE(referenceInfo.partitionId() == pid); | ||
BSLS_ASSERT_SAFE( | ||
static_cast<size_t>(referenceInfo.partitionId()) == pid); | ||
bdlb::Print::newlineAndIndent(out, level + 1); | ||
out << "Partition [" << pid | ||
<< "]: primaryLeaseId: " << referenceInfo.primaryLeaseId() | ||
|
@@ -1998,10 +2000,6 @@ int ClusterUtil::load(ClusterState* state, | |
return rc * 10 + rc_ITERATION_ERROR; // RETURN | ||
} | ||
|
||
typedef bsl::unordered_map<bmqp_ctrlmsg::LeaderMessageSequence, | ||
bmqp_ctrlmsg::ClusterMessage> | ||
AdvisoriesMap; | ||
AdvisoriesMap advisories; | ||
do { | ||
BSLS_ASSERT_SAFE(latestIter->isValid()); | ||
|
||
|
@@ -2012,110 +2010,22 @@ int ClusterUtil::load(ClusterState* state, | |
return rc * 10 + rc_MESSAGE_LOAD_ERROR; // RETURN | ||
} | ||
|
||
// Track if advisory, apply if commit | ||
// Apply advisories, whether committed or not. Can ignore commit | ||
// records | ||
typedef bmqp_ctrlmsg::ClusterMessageChoice MsgChoice; // shortcut | ||
switch (clusterMessage.choice().selectionId()) { | ||
case MsgChoice::SELECTION_ID_PARTITION_PRIMARY_ADVISORY: { | ||
const bmqp_ctrlmsg::LeaderMessageSequence& lms = | ||
clusterMessage.choice() | ||
.partitionPrimaryAdvisory() | ||
.sequenceNumber(); | ||
bsl::pair<AdvisoriesMap::iterator, bool> insertRc = | ||
advisories.insert(bsl::make_pair(lms, clusterMessage)); | ||
if (!insertRc.second) { | ||
BALL_LOG_WARN << clusterData.identity().description() | ||
<< ": When loading from cluster state ledger, " | ||
<< "discovered records with duplicate LSN [" | ||
<< lms << "]. Older record type: " | ||
<< advisories.at(lms).choice().selectionId() | ||
<< "; newer record: " << clusterMessage; | ||
}; | ||
} break; // BREAK | ||
case MsgChoice::SELECTION_ID_LEADER_ADVISORY: { | ||
const bmqp_ctrlmsg::LeaderMessageSequence& lms = | ||
clusterMessage.choice().leaderAdvisory().sequenceNumber(); | ||
bsl::pair<AdvisoriesMap::iterator, bool> insertRc = | ||
advisories.insert(bsl::make_pair(lms, clusterMessage)); | ||
if (!insertRc.second) { | ||
BALL_LOG_WARN << clusterData.identity().description() | ||
<< ": When loading from cluster state ledger, " | ||
<< "discovered records with duplicate LSN [" | ||
<< lms << "]. Older record type: " | ||
<< advisories.at(lms).choice().selectionId() | ||
<< "; newer record type:" | ||
<< latestIter->header().recordType(); | ||
}; | ||
} break; // BREAK | ||
case MsgChoice::SELECTION_ID_QUEUE_ASSIGNMENT_ADVISORY: { | ||
const bmqp_ctrlmsg::LeaderMessageSequence& lms = | ||
clusterMessage.choice() | ||
.queueAssignmentAdvisory() | ||
.sequenceNumber(); | ||
bsl::pair<AdvisoriesMap::iterator, bool> insertRc = | ||
advisories.insert(bsl::make_pair(lms, clusterMessage)); | ||
if (!insertRc.second) { | ||
BALL_LOG_WARN << clusterData.identity().description() | ||
<< ": When loading from cluster state ledger, " | ||
<< "discovered records with duplicate LSN [" | ||
<< lms << "]. Older record type: " | ||
<< advisories.at(lms).choice().selectionId() | ||
<< "; newer record: " << clusterMessage; | ||
}; | ||
} break; // BREAK | ||
case MsgChoice::SELECTION_ID_QUEUE_UNASSIGNED_ADVISORY: { | ||
const bmqp_ctrlmsg::LeaderMessageSequence& lms = | ||
clusterMessage.choice() | ||
.queueUnassignedAdvisory() | ||
.sequenceNumber(); | ||
bsl::pair<AdvisoriesMap::iterator, bool> insertRc = | ||
advisories.insert(bsl::make_pair(lms, clusterMessage)); | ||
if (!insertRc.second) { | ||
BALL_LOG_WARN << clusterData.identity().description() | ||
<< ": When loading from cluster state ledger, " | ||
<< "discovered records with duplicate LSN [" | ||
<< lms << "]. Older record type: " | ||
<< advisories.at(lms).choice().selectionId() | ||
<< "; newer record: " << clusterMessage; | ||
}; | ||
} break; // BREAK | ||
case MsgChoice::SELECTION_ID_PARTITION_PRIMARY_ADVISORY: | ||
case MsgChoice::SELECTION_ID_LEADER_ADVISORY: | ||
case MsgChoice::SELECTION_ID_QUEUE_ASSIGNMENT_ADVISORY: | ||
case MsgChoice::SELECTION_ID_QUEUE_UNASSIGNED_ADVISORY: | ||
case MsgChoice::SELECTION_ID_QUEUE_UPDATE_ADVISORY: { | ||
const bmqp_ctrlmsg::LeaderMessageSequence& lms = | ||
clusterMessage.choice().queueUpdateAdvisory().sequenceNumber(); | ||
bsl::pair<AdvisoriesMap::iterator, bool> insertRc = | ||
advisories.insert(bsl::make_pair(lms, clusterMessage)); | ||
if (!insertRc.second) { | ||
BALL_LOG_WARN << clusterData.identity().description() | ||
<< ": When loading from cluster state ledger, " | ||
<< "discovered records with duplicate LSN [" | ||
<< lms << "]. Older record type: " | ||
<< advisories.at(lms).choice().selectionId() | ||
<< "; newer record: " << clusterMessage; | ||
}; | ||
} break; | ||
case MsgChoice::SELECTION_ID_LEADER_ADVISORY_COMMIT: { | ||
const bmqp_ctrlmsg::LeaderMessageSequence& lmsCommitted = | ||
clusterMessage.choice() | ||
.leaderAdvisoryCommit() | ||
.sequenceNumberCommitted(); | ||
|
||
AdvisoriesMap::const_iterator iter = advisories.find(lmsCommitted); | ||
if (iter == advisories.end()) { | ||
BALL_LOG_WARN << clusterData.identity().description() | ||
<< ": Recovered a commit in IncoreCSL for which" | ||
<< " a corresponding advisory was not found: " | ||
<< clusterMessage; | ||
break; // BREAK | ||
} | ||
// Finally, the advisory is applied to the state | ||
const bmqp_ctrlmsg::ClusterMessage& advisory = iter->second; | ||
BALL_LOG_INFO << "#CSL_RECOVERY " | ||
<< clusterData.identity().description() | ||
<< ": Applying a commit recovered from IncoreCSL. " | ||
<< "Commit: " | ||
<< clusterMessage.choice().leaderAdvisoryCommit() | ||
<< ", advisory: " << advisory << "."; | ||
apply(state, advisory, clusterData); | ||
advisories.erase(iter); | ||
<< ": Applying a recovered record from IncoreCSL: " | ||
<< clusterMessage << "."; | ||
apply(state, clusterMessage, clusterData); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs clarification (and comment). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider the case where the leader applies an advisory, receives enough acks, and commits the advisory, but then crashes before the followers have a chance to write the commit. One of the followers becomes the new leader. The new leader and the remaining followers, will see this as an uncommitted advisory; they must carry out the last wish of the previous leader and commit this advisory. That is why upon We apply uncommitted advisories as replica in Will add some comments in the code too to help clarify. |
||
} break; // BREAK | ||
case MsgChoice::SELECTION_ID_LEADER_ADVISORY_COMMIT: { | ||
} break; // BREAK | ||
case MsgChoice::SELECTION_ID_UNDEFINED: | ||
default: { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is
ClusterUtil::apply
, same name, different meaning. That one gets called on commit, is that correct?