Skip to content

Commit

Permalink
Special vdevs weren't special enough for embedded_logs
Browse files Browse the repository at this point in the history
People keep wanting to use part of their special device as a slog,
but manually partitioning off some space for it seems unnecessarily complex.

So let's just redo the embedded_log dance, but for special vdevs, and make
the allocator prefer those if they exist.

Also plumbs in a vdev property for turning off this behavior on devices
in case this is not desirable.

Signed-off-by: Rich Ercolani <[email protected]>
  • Loading branch information
rincebrain committed Mar 13, 2024
1 parent 8f2f6cd commit 7d75cde
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 12 deletions.
22 changes: 20 additions & 2 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -6427,6 +6427,7 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops;
spa->spa_special_embedded_log_class->mc_ops = &zdb_metaslab_ops;

zcb->zcb_vd_obsolete_counts =
umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
Expand Down Expand Up @@ -6567,8 +6568,11 @@ zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)

for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class ==
spa_embedded_log_class(spa)) ?
ASSERT3P(msp->ms_group, ==, (
(msp->ms_group->mg_class ==
spa_embedded_log_class(spa)) ||
(msp->ms_group->mg_class ==
spa_special_embedded_log_class(spa))) ?
vd->vdev_log_mg : vd->vdev_mg);

/*
Expand Down Expand Up @@ -6799,6 +6803,8 @@ dump_block_stats(spa_t *spa)
zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
zcb->zcb_totalasize +=
metaslab_class_get_alloc(spa_embedded_log_class(spa));
zcb->zcb_totalasize +=
metaslab_class_get_alloc(spa_special_embedded_log_class(spa));
zcb->zcb_start = zcb->zcb_lastprint = gethrtime();
err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb);

Expand Down Expand Up @@ -6848,6 +6854,7 @@ dump_block_stats(spa_t *spa)
metaslab_class_get_alloc(spa_log_class(spa)) +
metaslab_class_get_alloc(spa_embedded_log_class(spa)) +
metaslab_class_get_alloc(spa_special_class(spa)) +
metaslab_class_get_alloc(spa_special_embedded_log_class(spa)) +
metaslab_class_get_alloc(spa_dedup_class(spa)) +
get_unflushed_alloc_space(spa);
total_found =
Expand Down Expand Up @@ -6930,6 +6937,17 @@ dump_block_stats(spa_t *spa)
"Embedded log class", (u_longlong_t)alloc,
100.0 * alloc / space);
}
if (spa_special_embedded_log_class(spa)->mc_allocator[0].mca_rotor
!= NULL) {
uint64_t alloc = metaslab_class_get_alloc(
spa_special_embedded_log_class(spa));
uint64_t space = metaslab_class_get_space(
spa_special_embedded_log_class(spa));

(void) printf("\t%-16s %14llu used: %5.2f%%\n",
"Special embedded log class", (u_longlong_t)alloc,
100.0 * alloc / space);
}

for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
if (zcb->zcb_embedded_blocks[i] == 0)
Expand Down
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ typedef enum {
VDEV_PROP_RAIDZ_EXPANDING,
VDEV_PROP_SLOW_IO_N,
VDEV_PROP_SLOW_IO_T,
VDEV_PROP_ELOG,
VDEV_NUM_PROPS
} vdev_prop_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,7 @@ extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa);
extern metaslab_class_t *spa_embedded_log_class(spa_t *spa);
extern metaslab_class_t *spa_special_class(spa_t *spa);
extern metaslab_class_t *spa_special_embedded_log_class(spa_t *spa);
extern metaslab_class_t *spa_dedup_class(spa_t *spa);
extern metaslab_class_t *spa_preferred_class(spa_t *spa, uint64_t size,
dmu_object_type_t objtype, uint_t level, uint_t special_smallblk);
Expand Down
1 change: 1 addition & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ struct spa {
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */
metaslab_class_t *spa_special_embedded_log_class; /* "" special */
metaslab_class_t *spa_special_class; /* special allocation class */
metaslab_class_t *spa_dedup_class; /* dedup allocation class */
uint64_t spa_first_txg; /* first txg after spa_open() */
Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ struct vdev {
uint64_t vdev_ms_count; /* number of metaslabs */
metaslab_group_t *vdev_mg; /* metaslab group */
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
boolean_t use_embedded_log; /* use embedded slog mg */
metaslab_t **vdev_ms; /* metaslab array */
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
Expand Down
3 changes: 3 additions & 0 deletions module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_RAIDZ_EXPANDING, "raidz_expanding", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING",
boolean_table, sfeatures);
zprop_register_index(VDEV_PROP_ELOG, "embedded_log_target", 1,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "ELOG",
boolean_table, sfeatures);

/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
Expand Down
22 changes: 22 additions & 0 deletions module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,15 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
spa_t *spa = mg->mg_vd->vdev_spa;
metaslab_class_t *mc = mg->mg_class;

/*
* If we're attempting to allocate from an embedded_log class,
* and we have it set to not use that on this vdev, don't.
*/
if ((mc == spa_special_embedded_log_class(spa) ||
mc == spa_embedded_log_class(spa)) &&
mg->mg_vd->use_embedded_log == B_FALSE) {
return (B_FALSE);
}
/*
* We can only consider skipping this metaslab group if it's
* in the normal metaslab class and there are other metaslab
Expand Down Expand Up @@ -5226,6 +5235,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
ASSERT(mg->mg_activation_count == 1);
vd = mg->mg_vd;


/*
* Don't allocate from faulted devices.
*/
Expand All @@ -5237,6 +5247,18 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
allocatable = vdev_allocatable(vd);
}

/*
* If we're trying a log allocation from an
* embedded_log allocation class, and we
* have turned off allocating those from this vdev,
* don't.
*/
if ((mc == spa_special_embedded_log_class(spa) ||
mc == spa_embedded_log_class(spa)) &&
((flags & METASLAB_ZIL) != 0) &&
vd->use_embedded_log == B_FALSE)
allocatable = B_FALSE;

/*
* Determine if the selected metaslab group is eligible
* for allocations. If we're ganging then don't allow
Expand Down
12 changes: 12 additions & 0 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,11 +367,15 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
alloc += metaslab_class_get_alloc(spa_special_class(spa));
alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
alloc += metaslab_class_get_alloc(spa_embedded_log_class(spa));
alloc += metaslab_class_get_alloc(
spa_special_embedded_log_class(spa));

size = metaslab_class_get_space(mc);
size += metaslab_class_get_space(spa_special_class(spa));
size += metaslab_class_get_space(spa_dedup_class(spa));
size += metaslab_class_get_space(spa_embedded_log_class(spa));
size += metaslab_class_get_space(
spa_special_embedded_log_class(spa));

spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
Expand Down Expand Up @@ -1634,6 +1638,7 @@ spa_activate(spa_t *spa, spa_mode_t mode)
spa->spa_log_class = metaslab_class_create(spa, msp);
spa->spa_embedded_log_class = metaslab_class_create(spa, msp);
spa->spa_special_class = metaslab_class_create(spa, msp);
spa->spa_special_embedded_log_class = metaslab_class_create(spa, msp);
spa->spa_dedup_class = metaslab_class_create(spa, msp);

/* Try to create a covering process */
Expand Down Expand Up @@ -1807,6 +1812,9 @@ spa_deactivate(spa_t *spa)
metaslab_class_destroy(spa->spa_special_class);
spa->spa_special_class = NULL;

metaslab_class_destroy(spa->spa_special_embedded_log_class);
spa->spa_special_embedded_log_class = NULL;

metaslab_class_destroy(spa->spa_dedup_class);
spa->spa_dedup_class = NULL;

Expand Down Expand Up @@ -8792,6 +8800,8 @@ spa_async_thread(void *arg)
old_space += metaslab_class_get_space(spa_dedup_class(spa));
old_space += metaslab_class_get_space(
spa_embedded_log_class(spa));
old_space += metaslab_class_get_space(
spa_special_embedded_log_class(spa));

spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);

Expand All @@ -8800,6 +8810,8 @@ spa_async_thread(void *arg)
new_space += metaslab_class_get_space(spa_dedup_class(spa));
new_space += metaslab_class_get_space(
spa_embedded_log_class(spa));
new_space += metaslab_class_get_space(
spa_special_embedded_log_class(spa));
mutex_exit(&spa_namespace_lock);

/*
Expand Down
24 changes: 18 additions & 6 deletions module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1262,7 +1262,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,

int config_changed = B_FALSE;

ASSERT(txg > spa_last_synced_txg(spa));
ASSERT3U(txg, >, spa_last_synced_txg(spa));

spa->spa_pending_vdev = NULL;

Expand All @@ -1279,11 +1279,13 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
/*
* Verify the metaslab classes.
*/
ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_embedded_log_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_special_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_dedup_class(spa)) == 0);
ASSERT3U(metaslab_class_validate(spa_normal_class(spa)), ==, 0);
ASSERT3U(metaslab_class_validate(spa_log_class(spa)), ==, 0);
ASSERT3U(metaslab_class_validate(spa_embedded_log_class(spa)), ==, 0);
ASSERT3U(metaslab_class_validate(spa_special_class(spa)), ==, 0);
ASSERT3U(metaslab_class_validate(
spa_special_embedded_log_class(spa)), ==, 0);
ASSERT3U(metaslab_class_validate(spa_dedup_class(spa)), ==, 0);

spa_config_exit(spa, SCL_ALL, spa);

Expand Down Expand Up @@ -1851,6 +1853,10 @@ spa_get_slop_space(spa_t *spa)
metaslab_class_get_dspace(spa_embedded_log_class(spa));
slop -= MIN(embedded_log, slop >> 1);

uint64_t s_embedded_log =
metaslab_class_get_dspace(spa_special_embedded_log_class(spa));
slop -= MIN(s_embedded_log, slop >> 1);

/*
* Slop space should be at least spa_min_slop, but no more than half
* the entire pool.
Expand Down Expand Up @@ -1952,6 +1958,12 @@ spa_special_class(spa_t *spa)
return (spa->spa_special_class);
}

metaslab_class_t *
spa_special_embedded_log_class(spa_t *spa)
{
return (spa->spa_special_embedded_log_class);
}

metaslab_class_t *
spa_dedup_class(spa_t *spa)
{
Expand Down
35 changes: 31 additions & 4 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,9 @@ vdev_getops(const char *type)
metaslab_group_t *
vdev_get_mg(vdev_t *vd, metaslab_class_t *mc)
{
if (mc == spa_embedded_log_class(vd->vdev_spa) &&
vd->vdev_log_mg != NULL)
if ((mc == spa_embedded_log_class(vd->vdev_spa) ||
mc == spa_special_embedded_log_class(vd->vdev_spa)) &&
vd->vdev_log_mg != NULL && vd->use_embedded_log == B_TRUE)
return (vd->vdev_log_mg);
else
return (vd->vdev_mg);
Expand Down Expand Up @@ -1470,6 +1471,13 @@ vdev_metaslab_group_create(vdev_t *vd)
if (!vd->vdev_islog) {
vd->vdev_log_mg = metaslab_group_create(
spa_embedded_log_class(spa), vd, 1);
vd->use_embedded_log = B_TRUE;
}

if (vd->vdev_alloc_bias == VDEV_BIAS_SPECIAL) {
vd->vdev_log_mg = metaslab_group_create(
spa_special_embedded_log_class(spa), vd, 1);
vd->use_embedded_log = B_TRUE;
}

/*
Expand Down Expand Up @@ -1555,7 +1563,8 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
* embedded slog by moving it from the regular to the log metaslab
* group.
*/
if (vd->vdev_mg->mg_class == spa_normal_class(spa) &&
if ((vd->vdev_mg->mg_class == spa_normal_class(spa) ||
vd->vdev_mg->mg_class == spa_special_class(spa)) &&
vd->vdev_ms_count > zfs_embedded_slog_min_ms &&
avl_is_empty(&vd->vdev_log_mg->mg_metaslab_tree)) {
uint64_t slog_msid = 0;
Expand Down Expand Up @@ -5998,11 +6007,18 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_slow_io_t = intval;
break;
case VDEV_PROP_ELOG:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->use_embedded_log = intval;
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
}
end:
end:
if (error != 0) {
intval = error;
vdev_prop_add_list(outnvl, propname, strval, intval, 0);
Expand Down Expand Up @@ -6318,6 +6334,17 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, NULL,
intval, src);
break;
case VDEV_PROP_ELOG:
intval = vd->use_embedded_log;

if (intval == vdev_prop_default_numeric(prop))
src = ZPROP_SRC_DEFAULT;
else
src = ZPROP_SRC_LOCAL;
vdev_prop_add_list(outnvl, propname, NULL,
intval, src);
break;

case VDEV_PROP_FAILFAST:
src = ZPROP_SRC_LOCAL;
strval = NULL;
Expand Down
5 changes: 5 additions & 0 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -3858,6 +3858,11 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
txg, NULL, flags, &io_alloc_list, NULL, allocator);
*slog = (error == 0);
if (error != 0) {
error = metaslab_alloc(spa, spa_special_embedded_log_class(spa),
size, new_bp, 1, txg, NULL, flags,
&io_alloc_list, NULL, allocator);
}
if (error != 0) {
error = metaslab_alloc(spa, spa_embedded_log_class(spa), size,
new_bp, 1, txg, NULL, flags,
Expand Down

0 comments on commit 7d75cde

Please sign in to comment.