Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve ZVOL queue behavior #389

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <linux/xattr_compat.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/falloc.h>

/* zpl_inode.c */
extern const struct inode_operations zpl_inode_operations;
Expand Down
11 changes: 11 additions & 0 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -4144,6 +4144,17 @@ zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
ZFS_EXIT(zsb);
return (EINVAL);
}

/*
* Permissions aren't checked on Solaris because on this OS
* zfs_space() can only be called with an opened file handle.
* On Linux we can get here through truncate_range() which
* operates directly on inodes, so we need to check access rights.
*/
if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
ZFS_EXIT(zsb);
return (error);
}

off = bfp->l_start;
len = bfp->l_len; /* 0 means from off to end of file */
Expand Down
34 changes: 34 additions & 0 deletions module/zfs/zpl_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,39 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
return zpl_putpage(pp, wbc, pp->mapping);
}

static long
zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
{
struct dentry *dentry = filp->f_path.dentry;
cred_t *cr = CRED();
flock64_t bf;
int error;

crhold(cr);

/*
* The only flag combination which matches the behavior of
* zfs_space() is (FALLOC_FL_PUNCH_HOLE). Any other flag
* combination is currently unsupported.
*/
if (mode & FALLOC_FL_KEEP_SIZE)
return (EOPNOTSUPP);
if (!(mode & FALLOC_FL_PUNCH_HOLE))
return (EOPNOTSUPP);

bf.l_type = F_WRLCK;
bf.l_whence = 0;
bf.l_start = offset;
bf.l_len = len;
bf.l_pid = 0;
error = -zfs_space(dentry->d_inode, F_FREESP, &bf, FWRITE, offset, cr);

crfree(cr);

ASSERT3S(error, <=, 0);
return (error);
}

const struct address_space_operations zpl_address_space_operations = {
.readpages = zpl_readpages,
.readpage = zpl_readpage,
Expand All @@ -410,6 +443,7 @@ const struct file_operations zpl_file_operations = {
.readdir = zpl_readdir,
.mmap = zpl_mmap,
.fsync = zpl_fsync,
.fallocate = zpl_fallocate,
};

const struct file_operations zpl_dir_file_operations = {
Expand Down
27 changes: 27 additions & 0 deletions module/zfs/zpl_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,32 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
return (error);
}

static void zpl_truncate_range(struct inode* inode, loff_t start, loff_t end)
{
cred_t *cr = CRED();
flock64_t bf;

ASSERT3S(start, <=, end);

/*
* zfs_freesp() will interpret (len == 0) as meaning "truncate until
* the end of the file". We don't want that.
*/
if (start == end)
return;

crhold(cr);

bf.l_type = F_WRLCK;
bf.l_whence = 0;
bf.l_start = start;
bf.l_len = end - start;
bf.l_pid = 0;
zfs_space(inode, F_FREESP, &bf, FWRITE, start, cr);

crfree(cr);
}

const struct inode_operations zpl_inode_operations = {
.create = zpl_create,
.link = zpl_link,
Expand All @@ -330,6 +356,7 @@ const struct inode_operations zpl_inode_operations = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
.truncate_range = zpl_truncate_range,
};

const struct inode_operations zpl_dir_inode_operations = {
Expand Down
74 changes: 69 additions & 5 deletions module/zfs/zvol.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,17 @@ zvol_write(void *arg)
dmu_tx_t *tx;
rl_t *rl;

if (req->cmd_flags & REQ_FLUSH)
zil_commit(zv->zv_zilog, ZVOL_OBJ);

/*
* Some requests are just for flush and nothing else.
*/
if (size == 0) {
blk_end_request(req, 0, size);
return;
}

rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);

tx = dmu_tx_create(zv->zv_objset);
Expand All @@ -550,17 +561,52 @@ zvol_write(void *arg)

error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx);
if (error == 0)
zvol_log_write(zv, tx, offset, size, rq_is_sync(req));
zvol_log_write(zv, tx, offset, size, req->cmd_flags & REQ_FUA);

dmu_tx_commit(tx);
zfs_range_unlock(rl);

if (rq_is_sync(req))
if (req->cmd_flags & REQ_FUA ||
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zv->zv_zilog, ZVOL_OBJ);

blk_end_request(req, -error, size);
}

static void
zvol_discard(void* arg)
{
struct request *req = (struct request *)arg;
struct request_queue *q = req->q;
zvol_state_t *zv = q->queuedata;
uint64_t offset = blk_rq_pos(req) << 9;
uint64_t size = blk_rq_bytes(req);
int error;
rl_t *rl;

if (offset + size > zv->zv_volsize) {
blk_end_request(req, -EIO, size);
return;
}

if (size == 0) {
blk_end_request(req, 0, size);
return;
}

rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);

error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size);

/*
* TODO: maybe we should add the operation to the log.
*/

zfs_range_unlock(rl);

blk_end_request(req, -error, size);
}

/*
* Common read path running under the zvol taskq context. This function
* is responsible for copying the requested data out of the DMU and in to
Expand All @@ -578,6 +624,11 @@ zvol_read(void *arg)
int error;
rl_t *rl;

if (size == 0) {
blk_end_request(req, 0, size);
return;
}

rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);

error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req);
Expand Down Expand Up @@ -627,7 +678,7 @@ zvol_request(struct request_queue *q)
while ((req = blk_fetch_request(q)) != NULL) {
size = blk_rq_bytes(req);

if (blk_rq_pos(req) + blk_rq_sectors(req) >
if (size != 0 && blk_rq_pos(req) + blk_rq_sectors(req) >
get_capacity(zv->zv_disk)) {
printk(KERN_INFO
"%s: bad access: block=%llu, count=%lu\n",
Expand Down Expand Up @@ -655,8 +706,11 @@ zvol_request(struct request_queue *q)
__blk_end_request(req, -EROFS, size);
break;
}

zvol_dispatch(zvol_write, req);

if (req->cmd_flags & REQ_DISCARD)
zvol_dispatch(zvol_discard, req);
else
zvol_dispatch(zvol_write, req);
break;
default:
printk(KERN_INFO "%s: unknown cmd: %d\n",
Expand Down Expand Up @@ -1061,6 +1115,9 @@ zvol_alloc(dev_t dev, const char *name)
zv->zv_queue = blk_init_queue(zvol_request, &zv->zv_lock);
if (zv->zv_queue == NULL)
goto out_kmem;
zv->zv_queue->flush_flags = REQ_FLUSH | REQ_FUA;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
blk_queue_max_discard_sectors(zv->zv_queue, UINT_MAX);

zv->zv_disk = alloc_disk(ZVOL_MINORS);
if (zv->zv_disk == NULL)
Expand Down Expand Up @@ -1164,6 +1221,13 @@ __zvol_create_minor(const char *name)

set_capacity(zv->zv_disk, zv->zv_volsize >> 9);

blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX);
blk_queue_max_segments(zv->zv_queue, USHRT_MAX);
blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);

if (zil_replay_disable)
zil_destroy(dmu_objset_zil(os), B_FALSE);
else
Expand Down