Skip to content

Commit

Permalink
rework how ZVOLs are updated in response to DSL operations
Browse files Browse the repository at this point in the history
With this change all ZVOL updates are initiated from the SPA sync
context instead of a mix of the sync and open contexts.  The updates are
queued to be applied by a dedicated thread in the original order.  This
should ensure that ZVOLs always accurately reflect the corresponding
datasets.  ZFS ioctl operations wait on the mentioned thread to complete
its work.  Thus, the illusion of the synchronous ZVOL update is
preserved.  At the same time, the SPA sync thread never blocks on ZVOL
related operations avoiding problems like reported in bug 203864.

This change is based on earlier work in the same direction: D7179 and
D14669 by Anthoine Bourgeois.  D7179 tried to perform ZVOL operations
in the open context and that opened races between them.  D14669 uses a
design very similar to this change but with different implementation
details.

This change also heavily borrows from similar code in ZoL, but there are
many differences too.  See:
- openzfs/zfs@a0bd735
- openzfs/zfs#3681
- openzfs/zfs#2217

PR:		203864
MFC after:	5 weeks
Sponsored by:	CyberSecure
Differential Revision: https://reviews.freebsd.org/D23478


git-svn-id: svn+ssh://svn.freebsd.org/base/head@362047 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f
  • Loading branch information
avg committed Jun 11, 2020
1 parent 4b47bfa commit 0b5523c
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 72 deletions.
6 changes: 6 additions & 0 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,9 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
doca->doca_cred, tx);
}

#if defined(__FreeBSD__) && defined(_KERNEL)
zvol_create_minors(dp->dp_spa, doca->doca_name);
#endif
spa_history_log_internal_ds(ds, "create", tx, "");
dsl_dataset_rele(ds, FTAG);
dsl_dir_rele(pdd, FTAG);
Expand Down Expand Up @@ -1148,6 +1151,9 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)

VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
dsl_dataset_name(origin, namebuf);
#if defined(__FreeBSD__) && defined(_KERNEL)
zvol_create_minors(dp->dp_spa, doca->doca_clone);
#endif
spa_history_log_internal_ds(ds, "clone", tx,
"origin=%s (%llu)", namebuf, origin->ds_object);
dsl_dataset_rele(ds, FTAG);
Expand Down
8 changes: 8 additions & 0 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
#include <sys/bqueue.h>
#ifdef __FreeBSD__
#include <sys/zvol.h>
#endif

#ifdef __FreeBSD__
#undef dump_write
Expand Down Expand Up @@ -3445,6 +3448,11 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
drc->drc_newsnapobj =
dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
}

#if defined(__FreeBSD__) && defined(_KERNEL)
zvol_create_minors(dp->dp_spa, drc->drc_tofs);
#endif

/*
* Release the hold from dmu_recv_begin. This must be done before
* we return to open context, so that when we free the dataset's dnode,
Expand Down
23 changes: 5 additions & 18 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1572,6 +1572,9 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
dsl_props_set_sync_impl(ds->ds_prev,
ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
}
#if defined(__FreeBSD__) && defined(_KERNEL)
zvol_create_minors(dp->dp_spa, name);
#endif
dsl_dataset_rele(ds, FTAG);
}
}
Expand Down Expand Up @@ -1646,17 +1649,6 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
fnvlist_free(suspended);
}

#ifdef __FreeBSD__
#ifdef _KERNEL
if (error == 0) {
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
char *snapname = nvpair_name(pair);
zvol_create_minors(snapname);
}
}
#endif
#endif
return (error);
}

Expand Down Expand Up @@ -2535,7 +2527,7 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
snprintf(newname, ZFS_MAX_DATASET_NAME_LEN, "%s@%s",
ddrsa->ddrsa_fsname, ddrsa->ddrsa_newsnapname);
zfsvfs_update_fromname(oldname, newname);
zvol_rename_minors(oldname, newname);
zvol_rename_minors(dp->dp_spa, oldname, newname);
kmem_free(newname, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(oldname, ZFS_MAX_DATASET_NAME_LEN);
#endif
Expand Down Expand Up @@ -3087,9 +3079,6 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
}

#if defined(__FreeBSD__) && defined(_KERNEL)
/* Take the spa_namespace_lock early so zvol renames don't deadlock. */
mutex_enter(&spa_namespace_lock);

oldname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
newname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
#endif
Expand Down Expand Up @@ -3135,7 +3124,7 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
#if defined(__FreeBSD__) && defined(_KERNEL)
dsl_dataset_name(ds, newname);
zfsvfs_update_fromname(oldname, newname);
zvol_rename_minors(oldname, newname);
zvol_rename_minors(dp->dp_spa, oldname, newname);
#endif

/* move any clone references */
Expand Down Expand Up @@ -3177,8 +3166,6 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
}

#if defined(__FreeBSD__) && defined(_KERNEL)
mutex_exit(&spa_namespace_lock);

kmem_free(newname, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(oldname, ZFS_MAX_DATASET_NAME_LEN);
#endif
Expand Down
15 changes: 15 additions & 0 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
#include <sys/dsl_deleg.h>
#include <sys/dmu_impl.h>
#include <sys/zcp.h>
#if defined(__FreeBSD__) && defined(_KERNEL)
#include <sys/zvol.h>
#endif


int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
Expand Down Expand Up @@ -489,6 +493,14 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
tx));

#if defined(__FreeBSD__) && defined(_KERNEL)
char dsname[ZFS_MAX_DATASET_NAME_LEN];

dsl_dataset_name(ds, dsname);
zvol_remove_minors(dp->dp_spa, dsname);
#endif

dsl_dir_rele(ds->ds_dir, ds);
ds->ds_dir = NULL;
dmu_object_free_zapified(mos, obj, tx);
Expand Down Expand Up @@ -979,6 +991,9 @@ dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)

VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
dsl_destroy_head_sync_impl(ds, tx);
#if defined(__FreeBSD__) && defined(_KERNEL)
zvol_remove_minors(dp->dp_spa, ddha->ddha_name);
#endif
dsl_dataset_rele(ds, FTAG);
}

Expand Down
2 changes: 1 addition & 1 deletion sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -2093,7 +2093,7 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
#ifdef __FreeBSD__
#ifdef _KERNEL
zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname);
zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname, ddra->ddra_newname);
#endif
#endif

Expand Down
32 changes: 30 additions & 2 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2017 Datto Inc.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

/*
Expand Down Expand Up @@ -1280,6 +1281,24 @@ spa_activate(spa_t *spa, int mode)
*/
trim_thread_create(spa);

/*
* This taskq is used to perform zvol-minor-related tasks
* asynchronously. This has several advantages, including easy
* resolution of various deadlocks (zfsonlinux bug #3681).
*
* The taskq must be single threaded to ensure tasks are always
* processed in the order in which they were dispatched.
*
* A taskq per pool allows one to keep the pools independent.
* This way if one pool is suspended, it will not impact another.
*
* The preferred location to dispatch a zvol minor task is a sync
* task. In this context, there is easy access to the spa_t and minimal
* error handling is required because the sync task must succeed.
*/
spa->spa_zvol_taskq = taskq_create("z_zvol", 1, minclsyspri,
1, INT_MAX, 0);

for (size_t i = 0; i < TXG_SIZE; i++) {
spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL,
ZIO_FLAG_CANFAIL);
Expand Down Expand Up @@ -1323,6 +1342,11 @@ spa_deactivate(spa_t *spa)

spa_evicting_os_wait(spa);

if (spa->spa_zvol_taskq) {
taskq_destroy(spa->spa_zvol_taskq);
spa->spa_zvol_taskq = NULL;
}

txg_list_destroy(&spa->spa_vdev_txg_list);

list_destroy(&spa->spa_config_dirty_list);
Expand Down Expand Up @@ -4614,7 +4638,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
#ifdef __FreeBSD__
#ifdef _KERNEL
if (firstopen)
zvol_create_minors(spa->spa_name);
zvol_create_minors(spa, spa->spa_name);
#endif
#endif
}
Expand Down Expand Up @@ -5970,7 +5994,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)

#ifdef __FreeBSD__
#ifdef _KERNEL
zvol_create_minors(pool);
zvol_create_minors(spa, pool);
#endif
#endif
return (0);
Expand Down Expand Up @@ -6119,6 +6143,10 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
spa_open_ref(spa, FTAG);
mutex_exit(&spa_namespace_lock);
spa_async_suspend(spa);
if (spa->spa_zvol_taskq) {
zvol_remove_minors(spa, spa_name(spa));
taskq_wait(spa->spa_zvol_taskq);
}
mutex_enter(&spa_namespace_lock);
spa_close(spa, FTAG);

Expand Down
3 changes: 3 additions & 0 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#ifndef _SYS_SPA_IMPL_H
Expand Down Expand Up @@ -399,6 +400,8 @@ struct spa {

hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */

taskq_t *spa_zvol_taskq; /* Taskq for minor management */

uint64_t spa_multihost; /* multihost aware (mmp) */
mmp_thread_t spa_mmp; /* multihost mmp thread */
list_t spa_leaf_list; /* list of leaf vdevs */
Expand Down
10 changes: 5 additions & 5 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#ifndef _SYS_ZVOL_H
Expand All @@ -40,9 +41,6 @@ extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
extern int zvol_create_minor(const char *);
extern int zvol_remove_minor(const char *);
extern void zvol_remove_minors(const char *);
extern int zvol_set_volsize(const char *, uint64_t);

#ifdef illumos
Expand Down Expand Up @@ -72,8 +70,10 @@ extern void zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off,
#endif /* illumos */

#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
extern int zvol_create_minors(const char *name);
extern void zvol_rename_minors(const char *oldname, const char *newname);
extern void zvol_create_minors(spa_t *spa, const char *name);
extern void zvol_remove_minors(spa_t *spa, const char *name);
extern void zvol_rename_minors(spa_t *spa, const char *oldname,
const char *newname);
#endif

#endif /* _KERNEL */
Expand Down
56 changes: 37 additions & 19 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1642,8 +1642,10 @@ zfs_ioc_pool_destroy(zfs_cmd_t *zc)
int error;
zfs_log_history(zc);
error = spa_destroy(zc->zc_name);
#ifndef __FreeBSD__
if (error == 0)
zvol_remove_minors(zc->zc_name);
#endif
return (error);
}

Expand Down Expand Up @@ -1694,8 +1696,10 @@ zfs_ioc_pool_export(zfs_cmd_t *zc)

zfs_log_history(zc);
error = spa_export(zc->zc_name, NULL, force, hardforce);
#ifndef __FreeBSD__
if (error == 0)
zvol_remove_minors(zc->zc_name);
#endif
return (error);
}

Expand Down Expand Up @@ -3395,13 +3399,23 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error == 0) {
error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
nvprops, outnvl);
#if defined(__FreeBSD__) && defined(_KERNEL)
/*
* Wait for ZVOL operations to settle down before destroying.
*/
if (error != 0) {
spa_t *spa;

if (spa_open(fsname, &spa, FTAG) == 0) {
taskqueue_drain_all(
spa->spa_zvol_taskq->tq_queue);
spa_close(spa, FTAG);
}
}
#endif
if (error != 0)
(void) dsl_destroy_head(fsname);
}
#ifdef __FreeBSD__
if (error == 0 && type == DMU_OST_ZVOL)
zvol_create_minors(fsname);
#endif
return (error);
}

Expand Down Expand Up @@ -3443,10 +3457,6 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
(void) dsl_destroy_head(fsname);
}
#ifdef __FreeBSD__
if (error == 0)
zvol_create_minors(fsname);
#endif
return (error);
}

Expand Down Expand Up @@ -3738,9 +3748,6 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
return (SET_ERROR(EXDEV));

zfs_unmount_snap(nvpair_name(pair));
#if defined(__FreeBSD__)
zvol_remove_minors(name);
#endif
}

return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
Expand Down Expand Up @@ -3924,10 +3931,8 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
else
err = dsl_destroy_head(zc->zc_name);
#ifndef __FreeBSD__
if (ost == DMU_OST_ZVOL && err == 0)
#ifdef __FreeBSD__
zvol_remove_minors(zc->zc_name);
#else
(void) zvol_remove_minor(zc->zc_name);
#endif
return (err);
Expand Down Expand Up @@ -4822,11 +4827,6 @@ zfs_ioc_recv(zfs_cmd_t *zc)
}
#endif

#ifdef __FreeBSD__
if (error == 0)
zvol_create_minors(tofs);
#endif

/*
* On error, restore the original props.
*/
Expand Down Expand Up @@ -6968,6 +6968,24 @@ zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
out:
nvlist_free(innvl);

#if defined(__FreeBSD__) && defined(_KERNEL)
/*
* Wait for ZVOL changes to get applied.
* NB: taskqueue_drain_all() does less than taskq_wait(),
* but enough for what we want.
* And there is no equivalent illumos API.
*/
if (error == 0) {
spa_t *spa;

if (spa_open(saved_poolname, &spa, FTAG) == 0) {
taskqueue_drain_all(
spa->spa_zvol_taskq->tq_queue);
spa_close(spa, FTAG);
}
}
#endif

#ifdef illumos
rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
if (error == 0 && rc != 0)
Expand Down
Loading

0 comments on commit 0b5523c

Please sign in to comment.