Skip to content

Commit

Permalink
DAOS-17111 cart: Fix csm_alive_count
Browse files Browse the repository at this point in the history
In swim, csm_alive_count may underflow because some
cst->cst_state.sms_status changes in csm overlook the count. Moreover,
not counting SUSPECT members seems to be a mistake. Consider a
membership of three, {x, y, z}. If x enters a state where it can't
receive any SWIM messages, and it picks y in the next period, then it
will suspect y, causing csm_alive_count to drop from 3 to 2, which
prevents x from declaring an "outage". (In the subsequent period, x will
suspect z, causing csm_alive_count to drop from 2 to 1 quickly.) Since x
keeps pinging SUSPECT members, it seems reasonable to count them in and
expect them to send messages to x until they become DEAD.

This patch fixes the underflow, and counts SUSPECT members in addition
to ALIVE members in csm_alive_count (renamed to
csm_alive_or_suspect_count).

Signed-off-by: Li Wei <[email protected]>
  • Loading branch information
liw committed Feb 27, 2025
1 parent 30fc716 commit 571cd65
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 19 deletions.
62 changes: 44 additions & 18 deletions src/cart/crt_swim.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -179,6 +180,12 @@ crt_swim_membs_iterate(struct crt_swim_membs *csm, d_hash_traverse_cb_t cb, void
return d_hash_table_traverse(csm->csm_table, cb, arg);
}

static inline bool
crt_swim_status_alive_or_suspect(enum swim_member_status status)
{
return status == SWIM_MEMBER_ALIVE || status == SWIM_MEMBER_SUSPECT;
}

/* Move cst into the csm. */
static int
crt_swim_membs_add(struct crt_swim_membs *csm, struct crt_swim_target *cst)
Expand Down Expand Up @@ -214,6 +221,9 @@ crt_swim_membs_add(struct crt_swim_membs *csm, struct crt_swim_target *cst)
if (csm->csm_target == CRT_SWIM_TARGET_INVALID)
csm->csm_target = 0;

if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
csm->csm_alive_or_suspect_count++;

return 0;
}

Expand Down Expand Up @@ -256,6 +266,9 @@ crt_swim_membs_del(struct crt_swim_membs *csm, d_rank_t rank)
deleted = d_hash_rec_delete_at(csm->csm_table, &cst->cst_link);
D_ASSERT(deleted);

if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
csm->csm_alive_or_suspect_count--;

return cst;
}

Expand Down Expand Up @@ -952,12 +965,12 @@ static int crt_swim_set_member_state(struct swim_context *ctx,
crt_swim_csm_lock(csm);
cst = crt_swim_membs_find(csm, id);
if (cst != NULL && state->sms_incarnation >= cst->cst_state.sms_incarnation) {
if (cst->cst_state.sms_status != SWIM_MEMBER_ALIVE &&
state->sms_status == SWIM_MEMBER_ALIVE)
csm->csm_alive_count++;
else if (cst->cst_state.sms_status == SWIM_MEMBER_ALIVE &&
state->sms_status != SWIM_MEMBER_ALIVE)
csm->csm_alive_count--;
if (!crt_swim_status_alive_or_suspect(cst->cst_state.sms_status) &&
crt_swim_status_alive_or_suspect(state->sms_status))
csm->csm_alive_or_suspect_count++;
else if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status) &&
!crt_swim_status_alive_or_suspect(state->sms_status))
csm->csm_alive_or_suspect_count--;
state_prev = cst->cst_state;
cst->cst_state = *state;
rc = 0;
Expand Down Expand Up @@ -1057,7 +1070,7 @@ static int64_t crt_swim_progress_cb(crt_context_t crt_ctx, int64_t timeout_us, v
* The max_delay should be less suspicion timeout to guarantee
* the already suspected members will not be expired.
*/
if (csm->csm_alive_count > 2) {
if (csm->csm_alive_or_suspect_count > 2) {
uint64_t hlc1 = csm->csm_last_unpack_hlc;
uint64_t hlc2 = d_hlc_get();
uint64_t delay = d_hlc2msec(hlc2 - hlc1);
Expand Down Expand Up @@ -1150,7 +1163,7 @@ int crt_swim_init(int crt_ctx_idx)

csm->csm_crt_ctx_idx = crt_ctx_idx;
csm->csm_last_unpack_hlc = hlc;
csm->csm_alive_count = 0;
csm->csm_alive_or_suspect_count = 0;
csm->csm_nglitches = 0;
csm->csm_nmessages = 0;
/*
Expand Down Expand Up @@ -1350,31 +1363,42 @@ void crt_swim_disable_all(void)
old_ctx_idx, NULL);
}

struct crt_swim_suspend_arg {
struct crt_swim_membs *csm;
swim_id_t self_id;
};

static int
crt_swim_suspend_cb(d_list_t *link, void *arg)
crt_swim_suspend_cb(d_list_t *link, void *varg)
{
struct crt_swim_target *cst = crt_swim_target_obj(link);
swim_id_t *self_id = arg;
struct crt_swim_target *cst = crt_swim_target_obj(link);
struct crt_swim_suspend_arg *arg = varg;

if (cst->cst_id != *self_id)
if (cst->cst_id != arg->self_id) {
if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
arg->csm->csm_alive_or_suspect_count--;
cst->cst_state.sms_status = SWIM_MEMBER_INACTIVE;
}
return 0;
}

void crt_swim_suspend_all(void)
{
struct crt_grp_priv *grp_priv = crt_gdata.cg_grp->gg_primary_grp;
struct crt_swim_membs *csm = &grp_priv->gp_membs_swim;
swim_id_t self_id;
int rc;
struct crt_grp_priv *grp_priv = crt_gdata.cg_grp->gg_primary_grp;
struct crt_swim_membs *csm = &grp_priv->gp_membs_swim;
struct crt_swim_suspend_arg arg;
int rc;

if (!crt_gdata.cg_swim_inited)
return;

csm->csm_ctx->sc_glitch = 1;
self_id = swim_self_get(csm->csm_ctx);

arg.csm = csm;
arg.self_id = swim_self_get(csm->csm_ctx);

crt_swim_csm_lock(csm);
rc = crt_swim_membs_iterate(csm, crt_swim_suspend_cb, &self_id);
rc = crt_swim_membs_iterate(csm, crt_swim_suspend_cb, &arg);
D_ASSERTF(rc == 0, "suspend SWIM members: "DF_RC"\n", DP_RC(rc));
crt_swim_csm_unlock(csm);
}
Expand Down Expand Up @@ -1625,6 +1649,8 @@ crt_swim_rank_check(struct crt_grp_priv *grp_priv, d_rank_t rank, uint64_t incar
if (cst->cst_state.sms_incarnation < incarnation) {
state_prev = cst->cst_state;
cst->cst_state.sms_incarnation = incarnation;
if (!crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
csm->csm_alive_or_suspect_count++;
cst->cst_state.sms_status = SWIM_MEMBER_ALIVE;
state = cst->cst_state;
updated = true;
Expand Down
3 changes: 2 additions & 1 deletion src/cart/crt_swim.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2019-2022 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -47,7 +48,7 @@ struct crt_swim_membs {
struct swim_context *csm_ctx;
uint64_t csm_incarnation;
uint64_t csm_last_unpack_hlc;
uint64_t csm_alive_count;
uint64_t csm_alive_or_suspect_count;
int csm_crt_ctx_idx;
int csm_nglitches;
int csm_nmessages;
Expand Down

0 comments on commit 571cd65

Please sign in to comment.