diff --git a/include/sys/zpl.h b/include/sys/zpl.h index 0aacce8bdf43..8f89b76cecbf 100644 --- a/include/sys/zpl.h +++ b/include/sys/zpl.h @@ -30,6 +30,7 @@ #include #include #include +#include /* zpl_inode.c */ extern const struct inode_operations zpl_inode_operations; diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index b7f5daaaf00d..357effe35873 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -4144,6 +4144,17 @@ zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag, ZFS_EXIT(zsb); return (EINVAL); } + + /* + * Permissions aren't checked on Solaris because on this OS + * zfs_space() can only be called with an opened file handle. + * On Linux we can get here through truncate_range() which + * operates directly on inodes, so we need to check access rights. + */ + if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { + ZFS_EXIT(zsb); + return (error); + } off = bfp->l_start; len = bfp->l_len; /* 0 means from off to end of file */ diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c index 0ef2c1558f0b..734d449958f2 100644 --- a/module/zfs/zpl_file.c +++ b/module/zfs/zpl_file.c @@ -394,6 +394,39 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) return zpl_putpage(pp, wbc, pp->mapping); } +static long +zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) +{ + struct dentry *dentry = filp->f_path.dentry; + cred_t *cr = CRED(); + flock64_t bf; + int error; + + crhold(cr); + + /* + * The only flag combination which matches the behavior of + * zfs_space() is (FALLOC_FL_PUNCH_HOLE). Any other flag + * combination is currently unsupported. + */ + if (mode & FALLOC_FL_KEEP_SIZE) + return (EOPNOTSUPP); + if (!(mode & FALLOC_FL_PUNCH_HOLE)) + return (EOPNOTSUPP); + + bf.l_type = F_WRLCK; + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + bf.l_pid = 0; + error = -zfs_space(dentry->d_inode, F_FREESP, &bf, FWRITE, offset, cr); + + crfree(cr); + + ASSERT3S(error, <=, 0); + return (error); +} + const struct address_space_operations zpl_address_space_operations = { .readpages = zpl_readpages, .readpage = zpl_readpage, @@ -410,6 +443,7 @@ const struct file_operations zpl_file_operations = { .readdir = zpl_readdir, .mmap = zpl_mmap, .fsync = zpl_fsync, + .fallocate = zpl_fallocate, }; const struct file_operations zpl_dir_file_operations = { diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index 17acf3711237..e7b298e6c28a 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -315,6 +315,32 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return (error); } +static void zpl_truncate_range(struct inode* inode, loff_t start, loff_t end) +{ + cred_t *cr = CRED(); + flock64_t bf; + + ASSERT3S(start, <=, end); + + /* + * zfs_freesp() will interpret (len == 0) as meaning "truncate until + * the end of the file". We don't want that. + */ + if (start == end) + return; + + crhold(cr); + + bf.l_type = F_WRLCK; + bf.l_whence = 0; + bf.l_start = start; + bf.l_len = end - start; + bf.l_pid = 0; + zfs_space(inode, F_FREESP, &bf, FWRITE, start, cr); + + crfree(cr); +} + const struct inode_operations zpl_inode_operations = { .create = zpl_create, .link = zpl_link, @@ -330,6 +356,7 @@ const struct inode_operations zpl_inode_operations = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = zpl_xattr_list, + .truncate_range = zpl_truncate_range, }; const struct inode_operations zpl_dir_inode_operations = { diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 61e22b8ecfaf..f6d706f35399 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -534,6 +534,17 @@ zvol_write(void *arg) dmu_tx_t *tx; rl_t *rl; + if (req->cmd_flags & REQ_FLUSH) + zil_commit(zv->zv_zilog, ZVOL_OBJ); + + /* + * Some requests are just for flush and nothing else. + */ + if (size == 0) { + blk_end_request(req, 0, size); + return; + } + rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); @@ -550,17 +561,52 @@ zvol_write(void *arg) error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx); if (error == 0) - zvol_log_write(zv, tx, offset, size, rq_is_sync(req)); + zvol_log_write(zv, tx, offset, size, req->cmd_flags & REQ_FUA); dmu_tx_commit(tx); zfs_range_unlock(rl); - if (rq_is_sync(req)) + if (req->cmd_flags & REQ_FUA || + zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zv->zv_zilog, ZVOL_OBJ); blk_end_request(req, -error, size); } +static void +zvol_discard(void* arg) +{ + struct request *req = (struct request *)arg; + struct request_queue *q = req->q; + zvol_state_t *zv = q->queuedata; + uint64_t offset = blk_rq_pos(req) << 9; + uint64_t size = blk_rq_bytes(req); + int error; + rl_t *rl; + + if (offset + size > zv->zv_volsize) { + blk_end_request(req, -EIO, size); + return; + } + + if (size == 0) { + blk_end_request(req, 0, size); + return; + } + + rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); + + error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size); + + /* + * TODO: maybe we should add the operation to the log. + */ + + zfs_range_unlock(rl); + + blk_end_request(req, -error, size); +} + /* * Common read path running under the zvol taskq context. This function * is responsible for copying the requested data out of the DMU and in to @@ -578,6 +624,11 @@ zvol_read(void *arg) int error; rl_t *rl; + if (size == 0) { + blk_end_request(req, 0, size); + return; + } + rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req); @@ -627,7 +678,7 @@ zvol_request(struct request_queue *q) while ((req = blk_fetch_request(q)) != NULL) { size = blk_rq_bytes(req); - if (blk_rq_pos(req) + blk_rq_sectors(req) > + if (size != 0 && blk_rq_pos(req) + blk_rq_sectors(req) > get_capacity(zv->zv_disk)) { printk(KERN_INFO "%s: bad access: block=%llu, count=%lu\n", @@ -655,8 +706,11 @@ zvol_request(struct request_queue *q) __blk_end_request(req, -EROFS, size); break; } - - zvol_dispatch(zvol_write, req); + + if (req->cmd_flags & REQ_DISCARD) + zvol_dispatch(zvol_discard, req); + else + zvol_dispatch(zvol_write, req); break; default: printk(KERN_INFO "%s: unknown cmd: %d\n", @@ -1061,6 +1115,9 @@ zvol_alloc(dev_t dev, const char *name) zv->zv_queue = blk_init_queue(zvol_request, &zv->zv_lock); if (zv->zv_queue == NULL) goto out_kmem; + zv->zv_queue->flush_flags = REQ_FLUSH | REQ_FUA; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue); + blk_queue_max_discard_sectors(zv->zv_queue, UINT_MAX); zv->zv_disk = alloc_disk(ZVOL_MINORS); if (zv->zv_disk == NULL) @@ -1164,6 +1221,13 @@ __zvol_create_minor(const char *name) set_capacity(zv->zv_disk, zv->zv_volsize >> 9); + blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX); + blk_queue_max_segments(zv->zv_queue, USHRT_MAX); + blk_queue_max_segment_size(zv->zv_queue, UINT_MAX); + blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize); + blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue); + if (zil_replay_disable) zil_destroy(dmu_objset_zil(os), B_FALSE); else