From def1808963fec215d1c54e5553becc2371a61528 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Fri, 15 Nov 2024 16:57:49 -0500 Subject: [PATCH] git subrepo pull --force sys/contrib/subrepo-openzfs subrepo: subdir: "sys/contrib/subrepo-openzfs" merged: "889234a31793" upstream: origin: "https://github.com/CTSRD-CHERI/zfs.git" branch: "cheri-hybrid" commit: "889234a31793" git-subrepo: version: "0.4.6" origin: "???" commit: "???" --- .../.github/workflows/zfs-linux-tests.yml | 4 +- sys/contrib/subrepo-openzfs/.gitrepo | 4 +- sys/contrib/subrepo-openzfs/.mailmap | 6 + sys/contrib/subrepo-openzfs/AUTHORS | 13 + sys/contrib/subrepo-openzfs/README.md | 2 +- sys/contrib/subrepo-openzfs/cmd/arc_summary | 147 +- sys/contrib/subrepo-openzfs/cmd/zdb/zdb.c | 33 +- .../subrepo-openzfs/cmd/zfs/zfs_main.c | 428 ++- sys/contrib/subrepo-openzfs/cmd/zilstat.in | 3 + .../subrepo-openzfs/cmd/zpool/zpool_main.c | 2507 ++++++++++++++--- .../cmd/zstream/zstream_redup.c | 9 - sys/contrib/subrepo-openzfs/cmd/ztest.c | 37 +- sys/contrib/subrepo-openzfs/config/Rules.am | 2 +- .../contrib/bash_completion.d/.gitignore | 1 + .../contrib/bash_completion.d/Makefile.am | 13 +- .../contrib/debian/openzfs-zfsutils.install | 1 + sys/contrib/subrepo-openzfs/include/libzfs.h | 28 +- .../subrepo-openzfs/include/libzfs_core.h | 3 + .../include/os/freebsd/spl/sys/ccompat.h | 42 - .../include/os/freebsd/spl/sys/kmem.h | 1 + .../include/os/freebsd/spl/sys/policy.h | 6 - .../include/os/freebsd/spl/sys/proc.h | 3 - .../include/os/freebsd/spl/sys/random.h | 4 +- .../include/os/freebsd/spl/sys/vm.h | 16 - .../include/os/freebsd/spl/sys/vnode.h | 6 +- .../os/freebsd/zfs/sys/zfs_vfsops_os.h | 77 - .../include/os/linux/spl/sys/kmem_cache.h | 5 +- sys/contrib/subrepo-openzfs/include/sys/abd.h | 7 + sys/contrib/subrepo-openzfs/include/sys/arc.h | 11 + .../subrepo-openzfs/include/sys/arc_impl.h | 4 +- .../subrepo-openzfs/include/sys/dbuf.h | 16 +- sys/contrib/subrepo-openzfs/include/sys/ddt.h | 9 +- .../subrepo-openzfs/include/sys/ddt_impl.h | 1 + sys/contrib/subrepo-openzfs/include/sys/dmu.h | 8 + .../subrepo-openzfs/include/sys/dnode.h | 5 + .../subrepo-openzfs/include/sys/fs/zfs.h | 28 + .../subrepo-openzfs/include/sys/sa_impl.h | 1 - sys/contrib/subrepo-openzfs/include/sys/spa.h | 6 +- .../subrepo-openzfs/include/sys/spa_impl.h | 4 + sys/contrib/subrepo-openzfs/include/sys/zap.h | 1 + .../subrepo-openzfs/include/sys/zfs_context.h | 1 + sys/contrib/subrepo-openzfs/include/sys/zil.h | 18 + sys/contrib/subrepo-openzfs/include/sys/zio.h | 1 + .../subrepo-openzfs/include/sys/zvol_impl.h | 18 +- .../lib/libshare/os/freebsd/nfs.c | 71 +- .../lib/libspl/include/statcommon.h | 4 + .../subrepo-openzfs/lib/libspl/timestamp.c | 42 + .../subrepo-openzfs/lib/libuutil/libuutil.abi | 14 + .../subrepo-openzfs/lib/libzfs/libzfs.abi | 127 +- .../subrepo-openzfs/lib/libzfs/libzfs_impl.h | 3 + .../subrepo-openzfs/lib/libzfs/libzfs_pool.c | 232 +- .../subrepo-openzfs/lib/libzfs/libzfs_util.c | 168 ++ .../lib/libzfs_core/libzfs_core.abi | 27 + .../lib/libzfs_core/libzfs_core.c | 20 + sys/contrib/subrepo-openzfs/man/Makefile.am | 1 + sys/contrib/subrepo-openzfs/man/man4/zfs.4 | 7 + .../subrepo-openzfs/man/man7/vdevprops.7 | 10 +- .../subrepo-openzfs/man/man7/zfsprops.7 | 14 +- .../subrepo-openzfs/man/man7/zpoolprops.7 | 31 +- .../subrepo-openzfs/man/man8/zfs-list.8 | 161 ++ .../subrepo-openzfs/man/man8/zfs-mount.8 | 6 + .../subrepo-openzfs/man/man8/zfs-set.8 | 51 + sys/contrib/subrepo-openzfs/man/man8/zfs.8 | 5 + .../subrepo-openzfs/man/man8/zpool-get.8 | 17 + .../subrepo-openzfs/man/man8/zpool-list.8 | 107 + .../subrepo-openzfs/man/man8/zpool-prefetch.8 | 46 + .../subrepo-openzfs/man/man8/zpool-status.8 | 185 +- sys/contrib/subrepo-openzfs/man/man8/zpool.8 | 10 +- sys/contrib/subrepo-openzfs/module/Kbuild.in | 1 + .../module/os/freebsd/spl/spl_kmem.c | 8 - .../module/os/freebsd/spl/spl_policy.c | 52 +- .../module/os/freebsd/spl/spl_taskq.c | 5 - .../module/os/freebsd/spl/spl_vfs.c | 12 +- .../module/os/freebsd/spl/spl_zone.c | 4 +- .../module/os/freebsd/zfs/abd_os.c | 2 +- .../module/os/freebsd/zfs/arc_os.c | 19 +- .../module/os/freebsd/zfs/crypto_os.c | 249 -- .../module/os/freebsd/zfs/dmu_os.c | 27 +- .../module/os/freebsd/zfs/event_os.c | 19 - .../module/os/freebsd/zfs/kmod_core.c | 4 - .../module/os/freebsd/zfs/vdev_geom.c | 4 - .../module/os/freebsd/zfs/zfs_ctldir.c | 28 +- .../module/os/freebsd/zfs/zfs_dir.c | 4 +- .../module/os/freebsd/zfs/zfs_file_os.c | 10 +- .../module/os/freebsd/zfs/zfs_ioctl_os.c | 4 - .../module/os/freebsd/zfs/zfs_vfsops.c | 49 +- .../module/os/freebsd/zfs/zfs_vnops_os.c | 299 +- .../module/os/freebsd/zfs/zfs_znode.c | 19 +- .../module/os/freebsd/zfs/zvol_os.c | 18 +- .../module/os/linux/spl/spl-kmem-cache.c | 8 + .../module/os/linux/spl/spl-zlib.c | 2 +- .../module/os/linux/zfs/abd_os.c | 6 +- .../module/os/linux/zfs/arc_os.c | 88 +- .../module/os/linux/zfs/zfs_vfsops.c | 18 +- .../module/os/linux/zfs/zfs_znode.c | 3 +- .../module/os/linux/zfs/zvol_os.c | 23 +- .../module/zcommon/zpool_prop.c | 20 +- sys/contrib/subrepo-openzfs/module/zfs/abd.c | 25 + sys/contrib/subrepo-openzfs/module/zfs/arc.c | 230 +- sys/contrib/subrepo-openzfs/module/zfs/dbuf.c | 35 +- sys/contrib/subrepo-openzfs/module/zfs/ddt.c | 160 +- .../subrepo-openzfs/module/zfs/ddt_stats.c | 79 +- .../subrepo-openzfs/module/zfs/ddt_zap.c | 7 + sys/contrib/subrepo-openzfs/module/zfs/dmu.c | 281 +- .../subrepo-openzfs/module/zfs/dmu_tx.c | 5 +- .../subrepo-openzfs/module/zfs/dnode.c | 34 +- .../subrepo-openzfs/module/zfs/lz4_zfs.c | 3 +- sys/contrib/subrepo-openzfs/module/zfs/sa.c | 88 +- sys/contrib/subrepo-openzfs/module/zfs/spa.c | 75 +- .../subrepo-openzfs/module/zfs/spa_misc.c | 29 +- sys/contrib/subrepo-openzfs/module/zfs/vdev.c | 18 + .../subrepo-openzfs/module/zfs/vdev_label.c | 123 +- .../subrepo-openzfs/module/zfs/zap_micro.c | 16 + .../subrepo-openzfs/module/zfs/zfs_ioctl.c | 98 +- .../subrepo-openzfs/module/zfs/zfs_log.c | 2 +- .../subrepo-openzfs/module/zfs/zfs_replay.c | 4 +- sys/contrib/subrepo-openzfs/module/zfs/zil.c | 40 +- sys/contrib/subrepo-openzfs/module/zfs/zio.c | 104 +- .../subrepo-openzfs/module/zfs/zio_compress.c | 28 +- sys/contrib/subrepo-openzfs/module/zfs/zvol.c | 116 +- .../subrepo-openzfs/rpm/generic/zfs.spec.in | 1 + .../subrepo-openzfs/tests/runfiles/common.run | 17 +- .../subrepo-openzfs/tests/runfiles/linux.run | 2 +- .../tests/test-runner/bin/test-runner.py.in | 2 +- .../tests/test-runner/bin/zts-report.py.in | 7 +- .../tests/zfs-tests/cmd/mmap_seek.c | 10 + .../tests/zfs-tests/include/commands.cfg | 1 + .../tests/zfs-tests/include/tunables.cfg | 7 +- .../tests/zfs-tests/tests/Makefile.am | 11 + .../block_cloning_rlimit_fsize.ksh | 64 + .../functional/cli_root/json/cleanup.ksh | 31 + .../functional/cli_root/json/json_sanity.ksh | 57 + .../tests/functional/cli_root/json/setup.ksh | 50 + .../cli_root/zfs_clone/zfs_clone_010_pos.ksh | 10 +- .../cli_root/zfs_copies/zfs_copies.kshlib | 20 +- .../cli_root/zpool_get/vdev_get.cfg | 3 + .../cli_root/zpool_get/zpool_get.cfg | 2 + .../cli_root/zpool_prefetch/cleanup.ksh | 30 + .../cli_root/zpool_prefetch/setup.ksh | 32 + .../zpool_prefetch/zpool_prefetch_001_pos.ksh | 128 + .../zpool_set/user_property_001_pos.ksh | 16 +- .../zpool_set/user_property_002_neg.ksh | 28 +- .../tests/functional/cp_files/cp_stress.ksh | 9 +- .../tests/functional/cp_files/seekflood.c | 7 + .../tests/functional/dedup/cleanup.ksh | 29 + .../tests/functional/dedup/dedup_quota.ksh | 223 ++ .../tests/functional/dedup/setup.ksh | 31 + .../tests/functional/history/history.cfg | 6 +- .../tests/functional/io/io_uring.ksh | 6 +- .../upgrade/upgrade_projectquota_002_pos.ksh | 80 + .../zvol/zvol_misc/zvol_misc_fua.ksh | 9 - .../zvol/zvol_misc/zvol_misc_trim.ksh | 9 - 152 files changed, 6489 insertions(+), 1953 deletions(-) create mode 100644 sys/contrib/subrepo-openzfs/man/man8/zpool-prefetch.8 create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/cleanup.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/setup.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/cleanup.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/setup.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/cleanup.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/setup.ksh create mode 100755 sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_002_pos.ksh diff --git a/sys/contrib/subrepo-openzfs/.github/workflows/zfs-linux-tests.yml b/sys/contrib/subrepo-openzfs/.github/workflows/zfs-linux-tests.yml index 753f3cd0214e..74f94ab8f2a4 100644 --- a/sys/contrib/subrepo-openzfs/.github/workflows/zfs-linux-tests.yml +++ b/sys/contrib/subrepo-openzfs/.github/workflows/zfs-linux-tests.yml @@ -37,7 +37,7 @@ jobs: - uses: actions/upload-artifact@v4 if: failure() with: - name: Zpool-logs-${{ inputs.os }} + name: Zloop-logs-${{ inputs.os }} path: | /var/tmp/zloop/*/ !/var/tmp/zloop/*/vdev/ @@ -46,7 +46,7 @@ jobs: - uses: actions/upload-artifact@v4 if: failure() with: - name: Zpool-files-${{ inputs.os }} + name: Zloop-files-${{ inputs.os }} path: | /var/tmp/zloop/*/vdev/ retention-days: 14 diff --git a/sys/contrib/subrepo-openzfs/.gitrepo b/sys/contrib/subrepo-openzfs/.gitrepo index c97366fce699..36dcef9f4584 100644 --- a/sys/contrib/subrepo-openzfs/.gitrepo +++ b/sys/contrib/subrepo-openzfs/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/CTSRD-CHERI/zfs.git branch = cheri-hybrid - commit = 5dc4c8bd546afd79d88af74a2c369ace8d75f76f - parent = f9a9fc7f3e6166efff6beac001c2b37a4620df52 + commit = 889234a31793916a18f7d701107f370651e89311 + parent = 13faa8e5d596db1d82d8173dc543631e488aca50 method = merge cmdver = 0.4.6 diff --git a/sys/contrib/subrepo-openzfs/.mailmap b/sys/contrib/subrepo-openzfs/.mailmap index 32bdb5209613..7e17d82d7352 100644 --- a/sys/contrib/subrepo-openzfs/.mailmap +++ b/sys/contrib/subrepo-openzfs/.mailmap @@ -77,7 +77,10 @@ Yanping Gao Youzhong Yang # Signed-off-by: overriding Author: +Ryan +Qiuhao Chen Yuxin Wang +Zhenlei Huang # Commits from strange places, long ago Brian Behlendorf @@ -95,6 +98,7 @@ Alek Pinchuk Alexander Lobakin Alexey Smirnoff Allen Holl <65494904+allen-4@users.noreply.github.com> +Alphan Yılmaz Ameer Hamza <106930537+ixhamza@users.noreply.github.com> Andrew J. Hesford <48421688+ahesford@users.noreply.github.com>> Andrew Sun @@ -102,6 +106,7 @@ Aron Xu Arun KV <65647132+arun-kv@users.noreply.github.com> Ben Wolsieffer bernie1995 <42413912+bernie1995@users.noreply.github.com> +Bojan Novković <72801811+bnovkov@users.noreply.github.com> Boris Protopopov Brad Forschinger Brandon Thetford @@ -193,6 +198,7 @@ Stefan Lendl <1321542+stfl@users.noreply.github.com> Thomas Bertschinger <101425190+bertschinger@users.noreply.github.com> Thomas Geppert Tim Crawford +Todd Seidelmann <18294602+seidelma@users.noreply.github.com> Tom Matthews Tony Perkins <62951051+tony-zfs@users.noreply.github.com> Torsten Wörtwein diff --git a/sys/contrib/subrepo-openzfs/AUTHORS b/sys/contrib/subrepo-openzfs/AUTHORS index d7d55f42d2e7..09814b435311 100644 --- a/sys/contrib/subrepo-openzfs/AUTHORS +++ b/sys/contrib/subrepo-openzfs/AUTHORS @@ -46,6 +46,7 @@ CONTRIBUTORS: Alex Zhuravlev Allan Jude Allen Holl + Alphan Yılmaz alteriks Alyssa Ross Ameer Hamza @@ -99,6 +100,7 @@ CONTRIBUTORS: bernie1995 Bill McGonigle Bill Pijewski + Bojan Novković Boris Protopopov Brad Forschinger Brad Lewis @@ -168,6 +170,7 @@ CONTRIBUTORS: Daniel Hoffman Daniel Kobras Daniel Kolesa + Daniel Perry Daniel Reichelt Daniel Stevenson Daniel Verite @@ -187,6 +190,7 @@ CONTRIBUTORS: Dennis R. Friedrichsen Denys Rtveliashvili Derek Dai + Derek Schrock Dex Wood DHE Didier Roche @@ -245,6 +249,7 @@ CONTRIBUTORS: Gionatan Danti Giuseppe Di Natale Glenn Washburn + glibg10b gofaster Gordan Bobic Gordon Bergling @@ -410,6 +415,7 @@ CONTRIBUTORS: Mart Frauenlob Martin Matuska Martin Rüegg + Martin Wagner Massimo Maggi Mateusz Guzik Mateusz Piotrowski <0mp@FreeBSD.org> @@ -488,6 +494,7 @@ CONTRIBUTORS: Peng Peter Ashford Peter Dave Hello + Peter Doherty Peter Levine Peter Wirdemo Petros Koutoupis @@ -501,6 +508,7 @@ CONTRIBUTORS: Prasad Joshi privb0x23 P.SCH + Qiuhao Chen Quartz Quentin Zdanis Rafael Kitover @@ -532,6 +540,7 @@ CONTRIBUTORS: Roman Strashkin Ross Williams Ruben Kerkhof + Ryan Ryan Hirasaki Ryan Lahfa Ryan Libby @@ -556,6 +565,7 @@ CONTRIBUTORS: Sen Haerens Serapheim Dimitropoulos Seth Forshee + Seth Troisi Shaan Nobee Shampavman Shaun Tancheff @@ -602,6 +612,7 @@ CONTRIBUTORS: Tim Schumacher Tino Reichardt Tobin Harding + Todd Seidelmann Tom Caputi Tom Matthews Tomohiro Kusumi @@ -653,6 +664,8 @@ CONTRIBUTORS: Zachary Bedell Zach Dykstra zgock + Zhao Yongming + Zhenlei Huang Zhu Chuang Érico Nogueira Đoàn Trần Công Danh diff --git a/sys/contrib/subrepo-openzfs/README.md b/sys/contrib/subrepo-openzfs/README.md index af244c1fff14..a90736bb56b7 100644 --- a/sys/contrib/subrepo-openzfs/README.md +++ b/sys/contrib/subrepo-openzfs/README.md @@ -32,4 +32,4 @@ For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197` # Supported Kernels * The `META` file contains the officially recognized supported Linux kernel versions. - * Supported FreeBSD versions are any supported branches and releases starting from 12.4-RELEASE. + * Supported FreeBSD versions are any supported branches and releases starting from 13.0-RELEASE. diff --git a/sys/contrib/subrepo-openzfs/cmd/arc_summary b/sys/contrib/subrepo-openzfs/cmd/arc_summary index 100fb1987a8b..2d0bfddd93a9 100755 --- a/sys/contrib/subrepo-openzfs/cmd/arc_summary +++ b/sys/contrib/subrepo-openzfs/cmd/arc_summary @@ -260,33 +260,34 @@ def draw_graph(kstats_dict): arc_stats = isolate_section('arcstats', kstats_dict) GRAPH_INDENT = ' '*4 - GRAPH_WIDTH = 60 + GRAPH_WIDTH = 70 + arc_max = int(arc_stats['c_max']) arc_size = f_bytes(arc_stats['size']) - arc_perc = f_perc(arc_stats['size'], arc_stats['c_max']) - mfu_size = f_bytes(arc_stats['mfu_size']) - mru_size = f_bytes(arc_stats['mru_size']) - meta_size = f_bytes(arc_stats['arc_meta_used']) - dnode_limit = f_bytes(arc_stats['arc_dnode_limit']) + arc_perc = f_perc(arc_stats['size'], arc_max) + data_size = f_bytes(arc_stats['data_size']) + meta_size = f_bytes(arc_stats['metadata_size']) dnode_size = f_bytes(arc_stats['dnode_size']) - info_form = ('ARC: {0} ({1}) MFU: {2} MRU: {3} META: {4} ' - 'DNODE {5} ({6})') - info_line = info_form.format(arc_size, arc_perc, mfu_size, mru_size, - meta_size, dnode_size, dnode_limit) + info_form = ('ARC: {0} ({1}) Data: {2} Meta: {3} Dnode: {4}') + info_line = info_form.format(arc_size, arc_perc, data_size, meta_size, + dnode_size) info_spc = ' '*int((GRAPH_WIDTH-len(info_line))/2) info_line = GRAPH_INDENT+info_spc+info_line graph_line = GRAPH_INDENT+'+'+('-'*(GRAPH_WIDTH-2))+'+' - mfu_perc = float(int(arc_stats['mfu_size'])/int(arc_stats['c_max'])) - mru_perc = float(int(arc_stats['mru_size'])/int(arc_stats['c_max'])) - arc_perc = float(int(arc_stats['size'])/int(arc_stats['c_max'])) + arc_perc = float(int(arc_stats['size'])/arc_max) + data_perc = float(int(arc_stats['data_size'])/arc_max) + meta_perc = float(int(arc_stats['metadata_size'])/arc_max) + dnode_perc = float(int(arc_stats['dnode_size'])/arc_max) total_ticks = float(arc_perc)*GRAPH_WIDTH - mfu_ticks = mfu_perc*GRAPH_WIDTH - mru_ticks = mru_perc*GRAPH_WIDTH - other_ticks = total_ticks-(mfu_ticks+mru_ticks) + data_ticks = data_perc*GRAPH_WIDTH + meta_ticks = meta_perc*GRAPH_WIDTH + dnode_ticks = dnode_perc*GRAPH_WIDTH + other_ticks = total_ticks-(data_ticks+meta_ticks+dnode_ticks) - core_form = 'F'*int(mfu_ticks)+'R'*int(mru_ticks)+'O'*int(other_ticks) + core_form = 'D'*int(data_ticks)+'M'*int(meta_ticks)+'N'*int(dnode_ticks)+\ + 'O'*int(other_ticks) core_spc = ' '*(GRAPH_WIDTH-(2+len(core_form))) core_line = GRAPH_INDENT+'|'+core_form+core_spc+'|' @@ -536,56 +537,87 @@ def section_arc(kstats_dict): arc_stats = isolate_section('arcstats', kstats_dict) - throttle = arc_stats['memory_throttle_count'] - - if throttle == '0': - health = 'HEALTHY' - else: - health = 'THROTTLED' - - prt_1('ARC status:', health) - prt_i1('Memory throttle count:', throttle) - print() - + memory_all = arc_stats['memory_all_bytes'] + memory_free = arc_stats['memory_free_bytes'] + memory_avail = arc_stats['memory_available_bytes'] arc_size = arc_stats['size'] arc_target_size = arc_stats['c'] arc_max = arc_stats['c_max'] arc_min = arc_stats['c_min'] + dnode_limit = arc_stats['arc_dnode_limit'] + + print('ARC status:') + prt_i1('Total memory size:', f_bytes(memory_all)) + prt_i2('Min target size:', f_perc(arc_min, memory_all), f_bytes(arc_min)) + prt_i2('Max target size:', f_perc(arc_max, memory_all), f_bytes(arc_max)) + prt_i2('Target size (adaptive):', + f_perc(arc_size, arc_max), f_bytes(arc_target_size)) + prt_i2('Current size:', f_perc(arc_size, arc_max), f_bytes(arc_size)) + prt_i1('Free memory size:', f_bytes(memory_free)) + prt_i1('Available memory size:', f_bytes(memory_avail)) + print() + + compressed_size = arc_stats['compressed_size'] + overhead_size = arc_stats['overhead_size'] + bonus_size = arc_stats['bonus_size'] + dnode_size = arc_stats['dnode_size'] + dbuf_size = arc_stats['dbuf_size'] + hdr_size = arc_stats['hdr_size'] + l2_hdr_size = arc_stats['l2_hdr_size'] + abd_chunk_waste_size = arc_stats['abd_chunk_waste_size'] + + prt_1('ARC structal breakdown (current size):', f_bytes(arc_size)) + prt_i2('Compressed size:', + f_perc(compressed_size, arc_size), f_bytes(compressed_size)) + prt_i2('Overhead size:', + f_perc(overhead_size, arc_size), f_bytes(overhead_size)) + prt_i2('Bonus size:', + f_perc(bonus_size, arc_size), f_bytes(bonus_size)) + prt_i2('Dnode size:', + f_perc(dnode_size, arc_size), f_bytes(dnode_size)) + prt_i2('Dbuf size:', + f_perc(dbuf_size, arc_size), f_bytes(dbuf_size)) + prt_i2('Header size:', + f_perc(hdr_size, arc_size), f_bytes(hdr_size)) + prt_i2('L2 header size:', + f_perc(l2_hdr_size, arc_size), f_bytes(l2_hdr_size)) + prt_i2('ABD chunk waste size:', + f_perc(abd_chunk_waste_size, arc_size), f_bytes(abd_chunk_waste_size)) + print() + meta = arc_stats['meta'] pd = arc_stats['pd'] pm = arc_stats['pm'] + data_size = arc_stats['data_size'] + metadata_size = arc_stats['metadata_size'] anon_data = arc_stats['anon_data'] anon_metadata = arc_stats['anon_metadata'] mfu_data = arc_stats['mfu_data'] mfu_metadata = arc_stats['mfu_metadata'] + mfu_edata = arc_stats['mfu_evictable_data'] + mfu_emetadata = arc_stats['mfu_evictable_metadata'] mru_data = arc_stats['mru_data'] mru_metadata = arc_stats['mru_metadata'] + mru_edata = arc_stats['mru_evictable_data'] + mru_emetadata = arc_stats['mru_evictable_metadata'] mfug_data = arc_stats['mfu_ghost_data'] mfug_metadata = arc_stats['mfu_ghost_metadata'] mrug_data = arc_stats['mru_ghost_data'] mrug_metadata = arc_stats['mru_ghost_metadata'] unc_data = arc_stats['uncached_data'] unc_metadata = arc_stats['uncached_metadata'] - bonus_size = arc_stats['bonus_size'] - dnode_limit = arc_stats['arc_dnode_limit'] - dnode_size = arc_stats['dnode_size'] - dbuf_size = arc_stats['dbuf_size'] - hdr_size = arc_stats['hdr_size'] - l2_hdr_size = arc_stats['l2_hdr_size'] - abd_chunk_waste_size = arc_stats['abd_chunk_waste_size'] - target_size_ratio = '{0}:1'.format(int(arc_max) // int(arc_min)) - - prt_2('ARC size (current):', - f_perc(arc_size, arc_max), f_bytes(arc_size)) - prt_i2('Target size (adaptive):', - f_perc(arc_target_size, arc_max), f_bytes(arc_target_size)) - prt_i2('Min size (hard limit):', - f_perc(arc_min, arc_max), f_bytes(arc_min)) - prt_i2('Max size (high water):', - target_size_ratio, f_bytes(arc_max)) caches_size = int(anon_data)+int(anon_metadata)+\ int(mfu_data)+int(mfu_metadata)+int(mru_data)+int(mru_metadata)+\ int(unc_data)+int(unc_metadata) + + prt_1('ARC types breakdown (compressed + overhead):', f_bytes(caches_size)) + prt_i2('Data size:', + f_perc(data_size, caches_size), f_bytes(data_size)) + prt_i2('Metadata size:', + f_perc(metadata_size, caches_size), f_bytes(metadata_size)) + print() + + prt_1('ARC states breakdown (compressed + overhead):', f_bytes(caches_size)) prt_i2('Anonymous data size:', f_perc(anon_data, caches_size), f_bytes(anon_data)) prt_i2('Anonymous metadata size:', @@ -596,43 +628,37 @@ def section_arc(kstats_dict): f_bytes(v / 65536 * caches_size / 65536)) prt_i2('MFU data size:', f_perc(mfu_data, caches_size), f_bytes(mfu_data)) + prt_i2('MFU evictable data size:', + f_perc(mfu_edata, caches_size), f_bytes(mfu_edata)) prt_i1('MFU ghost data size:', f_bytes(mfug_data)) v = (s-int(pm))*int(meta)/s prt_i2('MFU metadata target:', f_perc(v, s), f_bytes(v / 65536 * caches_size / 65536)) prt_i2('MFU metadata size:', f_perc(mfu_metadata, caches_size), f_bytes(mfu_metadata)) + prt_i2('MFU evictable metadata size:', + f_perc(mfu_emetadata, caches_size), f_bytes(mfu_emetadata)) prt_i1('MFU ghost metadata size:', f_bytes(mfug_metadata)) v = int(pd)*(s-int(meta))/s prt_i2('MRU data target:', f_perc(v, s), f_bytes(v / 65536 * caches_size / 65536)) prt_i2('MRU data size:', f_perc(mru_data, caches_size), f_bytes(mru_data)) + prt_i2('MRU evictable data size:', + f_perc(mru_edata, caches_size), f_bytes(mru_edata)) prt_i1('MRU ghost data size:', f_bytes(mrug_data)) v = int(pm)*int(meta)/s prt_i2('MRU metadata target:', f_perc(v, s), f_bytes(v / 65536 * caches_size / 65536)) prt_i2('MRU metadata size:', f_perc(mru_metadata, caches_size), f_bytes(mru_metadata)) + prt_i2('MRU evictable metadata size:', + f_perc(mru_emetadata, caches_size), f_bytes(mru_emetadata)) prt_i1('MRU ghost metadata size:', f_bytes(mrug_metadata)) prt_i2('Uncached data size:', f_perc(unc_data, caches_size), f_bytes(unc_data)) prt_i2('Uncached metadata size:', f_perc(unc_metadata, caches_size), f_bytes(unc_metadata)) - prt_i2('Bonus size:', - f_perc(bonus_size, arc_size), f_bytes(bonus_size)) - prt_i2('Dnode cache target:', - f_perc(dnode_limit, arc_max), f_bytes(dnode_limit)) - prt_i2('Dnode cache size:', - f_perc(dnode_size, dnode_limit), f_bytes(dnode_size)) - prt_i2('Dbuf size:', - f_perc(dbuf_size, arc_size), f_bytes(dbuf_size)) - prt_i2('Header size:', - f_perc(hdr_size, arc_size), f_bytes(hdr_size)) - prt_i2('L2 header size:', - f_perc(l2_hdr_size, arc_size), f_bytes(l2_hdr_size)) - prt_i2('ABD chunk waste size:', - f_perc(abd_chunk_waste_size, arc_size), f_bytes(abd_chunk_waste_size)) print() print('ARC hash breakdown:') @@ -647,6 +673,9 @@ def section_arc(kstats_dict): print() print('ARC misc:') + prt_i1('Memory throttles:', arc_stats['memory_throttle_count']) + prt_i1('Memory direct reclaims:', arc_stats['memory_direct_count']) + prt_i1('Memory indirect reclaims:', arc_stats['memory_indirect_count']) prt_i1('Deleted:', f_hits(arc_stats['deleted'])) prt_i1('Mutex misses:', f_hits(arc_stats['mutex_miss'])) prt_i1('Eviction skips:', f_hits(arc_stats['evict_skip'])) diff --git a/sys/contrib/subrepo-openzfs/cmd/zdb/zdb.c b/sys/contrib/subrepo-openzfs/cmd/zdb/zdb.c index 6063dc3b7cd3..dec70c60cec1 100644 --- a/sys/contrib/subrepo-openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/subrepo-openzfs/cmd/zdb/zdb.c @@ -1131,8 +1131,8 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size) !!(zap_getflags(zc.zc_zap) & ZAP_FLAG_UINT64_KEY); if (key64) - (void) printf("\t\t0x%010llx = ", - (u_longlong_t)*(uint64_t *)attr.za_name); + (void) printf("\t\t0x%010lx = ", + *(uint64_t *)attr.za_name); else (void) printf("\t\t%s = ", attr.za_name); @@ -1985,8 +1985,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class) (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", name, (u_longlong_t)count, - (u_longlong_t)(dspace / count), - (u_longlong_t)(mspace / count)); + (u_longlong_t)dspace, + (u_longlong_t)mspace); if (dump_opt['D'] < 3) return; @@ -2104,8 +2104,13 @@ dump_brt(spa_t *spa) for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { - uint64_t offset = *(uint64_t *)za.za_name; - uint64_t refcnt = za.za_first_integer; + uint64_t refcnt; + VERIFY0(zap_lookup_uint64(brt->brt_mos, + brtvd->bv_mos_entries, + (const uint64_t *)za.za_name, 1, + za.za_integer_length, za.za_num_integers, &refcnt)); + + uint64_t offset = *(const uint64_t *)za.za_name; snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", vdevid, (u_longlong_t)offset); @@ -8358,7 +8363,7 @@ zdb_dump_block(char *label, void *buf, uint64_t size, int flags) (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr); -#ifdef _LITTLE_ENDIAN +#ifdef _ZFS_LITTLE_ENDIAN /* correct the endianness */ do_bswap = !do_bswap; #endif @@ -8927,6 +8932,19 @@ zdb_numeric(char *str) return (B_TRUE); } +static int +dummy_get_file_info(dmu_object_type_t bonustype, const void *data, + zfs_file_info_t *zoi) +{ + (void) data, (void) zoi; + + if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) + return (ENOENT); + + (void) fprintf(stderr, "dummy_get_file_info: not implemented"); + abort(); +} + int main(int argc, char **argv) { @@ -9242,6 +9260,7 @@ main(int argc, char **argv) libzfs_core_fini(); } + dmu_objset_register_type(DMU_OST_ZFS, dummy_get_file_info); kernel_init(SPA_MODE_READ); kernel_init_done = B_TRUE; diff --git a/sys/contrib/subrepo-openzfs/cmd/zfs/zfs_main.c b/sys/contrib/subrepo-openzfs/cmd/zfs/zfs_main.c index b77917764c86..34c693fbcb0f 100644 --- a/sys/contrib/subrepo-openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/subrepo-openzfs/cmd/zfs/zfs_main.c @@ -134,6 +134,10 @@ static int zfs_do_unzone(int argc, char **argv); static int zfs_do_help(int argc, char **argv); +enum zfs_options { + ZFS_OPTION_JSON_NUMS_AS_INT = 1024 +}; + /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. */ @@ -272,6 +276,8 @@ static zfs_command_t command_table[] = { #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) +#define MAX_CMD_LEN 256 + zfs_command_t *current_command; static const char * @@ -292,7 +298,7 @@ get_usage(zfs_help_t idx) "@[%][,...]\n" "\tdestroy #\n")); case HELP_GET: - return (gettext("\tget [-rHp] [-d max] " + return (gettext("\tget [-rHp] [-j [--json-int]] [-d max] " "[-o \"all\" | field[,...]]\n" "\t [-t type[,...]] [-s source[,...]]\n" "\t <\"all\" | property[,...]> " @@ -304,11 +310,12 @@ get_usage(zfs_help_t idx) return (gettext("\tupgrade [-v]\n" "\tupgrade [-r] [-V version] <-a | filesystem ...>\n")); case HELP_LIST: - return (gettext("\tlist [-Hp] [-r|-d max] [-o property[,...]] " - "[-s property]...\n\t [-S property]... [-t type[,...]] " + return (gettext("\tlist [-Hp] [-j [--json-int]] [-r|-d max] " + "[-o property[,...]] [-s property]...\n\t " + "[-S property]... [-t type[,...]] " "[filesystem|volume|snapshot] ...\n")); case HELP_MOUNT: - return (gettext("\tmount\n" + return (gettext("\tmount [-j]\n" "\tmount [-flvO] [-o opts] <-a|-R filesystem|" "filesystem>\n")); case HELP_PROMOTE: @@ -420,7 +427,7 @@ get_usage(zfs_help_t idx) "\t \n" "\tchange-key -i [-l] \n")); case HELP_VERSION: - return (gettext("\tversion\n")); + return (gettext("\tversion [-j]\n")); case HELP_REDACT: return (gettext("\tredact " " ...\n")); @@ -1885,7 +1892,89 @@ is_recvd_column(zprop_get_cbdata_t *cbp) } /* - * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...] + * Generates an nvlist with output version for every command based on params. + * Purpose of this is to add a version of JSON output, considering the schema + * format might be updated for each command in future. + * + * Schema: + * + * "output_version": { + * "command": string, + * "vers_major": integer, + * "vers_minor": integer, + * } + */ +static nvlist_t * +zfs_json_schema(int maj_v, int min_v) +{ + nvlist_t *sch = NULL; + nvlist_t *ov = NULL; + char cmd[MAX_CMD_LEN]; + snprintf(cmd, MAX_CMD_LEN, "zfs %s", current_command->name); + + sch = fnvlist_alloc(); + ov = fnvlist_alloc(); + fnvlist_add_string(ov, "command", cmd); + fnvlist_add_uint32(ov, "vers_major", maj_v); + fnvlist_add_uint32(ov, "vers_minor", min_v); + fnvlist_add_nvlist(sch, "output_version", ov); + fnvlist_free(ov); + return (sch); +} + +static void +fill_dataset_info(nvlist_t *list, zfs_handle_t *zhp, boolean_t as_int) +{ + char createtxg[ZFS_MAXPROPLEN]; + zfs_type_t type = zfs_get_type(zhp); + nvlist_add_string(list, "name", zfs_get_name(zhp)); + + switch (type) { + case ZFS_TYPE_FILESYSTEM: + fnvlist_add_string(list, "type", "FILESYSTEM"); + break; + case ZFS_TYPE_VOLUME: + fnvlist_add_string(list, "type", "VOLUME"); + break; + case ZFS_TYPE_SNAPSHOT: + fnvlist_add_string(list, "type", "SNAPSHOT"); + break; + case ZFS_TYPE_POOL: + fnvlist_add_string(list, "type", "POOL"); + break; + case ZFS_TYPE_BOOKMARK: + fnvlist_add_string(list, "type", "BOOKMARK"); + break; + default: + fnvlist_add_string(list, "type", "UNKNOWN"); + break; + } + + if (type != ZFS_TYPE_POOL) + fnvlist_add_string(list, "pool", zfs_get_pool_name(zhp)); + + if (as_int) { + fnvlist_add_uint64(list, "createtxg", zfs_prop_get_int(zhp, + ZFS_PROP_CREATETXG)); + } else { + if (zfs_prop_get(zhp, ZFS_PROP_CREATETXG, createtxg, + sizeof (createtxg), NULL, NULL, 0, B_TRUE) == 0) + fnvlist_add_string(list, "createtxg", createtxg); + } + + if (type == ZFS_TYPE_SNAPSHOT) { + char *ds, *snap; + ds = snap = strdup(zfs_get_name(zhp)); + ds = strsep(&snap, "@"); + fnvlist_add_string(list, "dataset", ds); + fnvlist_add_string(list, "snapshot_name", snap); + free(ds); + } +} + +/* + * zfs get [-rHp] [-j [--json-int]] [-o all | field[,field]...] + * [-s source[,source]...] * < all | property[,property]... > < fs | snap | vol > ... * * -r recurse over any child datasets @@ -1898,6 +1987,8 @@ is_recvd_column(zprop_get_cbdata_t *cbp) * "local,default,inherited,received,temporary,none". Default is * all six. * -p Display values in parsable (literal) format. + * -j Display output in JSON format. + * --json-int Display numbers as integers instead of strings. * * Prints properties for the given datasets. The user can control which * columns to display as well as which property types to allow. @@ -1917,9 +2008,21 @@ get_callback(zfs_handle_t *zhp, void *data) nvlist_t *user_props = zfs_get_user_props(zhp); zprop_list_t *pl = cbp->cb_proplist; nvlist_t *propval; + nvlist_t *item, *d, *props; + item = d = props = NULL; const char *strval; const char *sourceval; boolean_t received = is_recvd_column(cbp); + int err = 0; + + if (cbp->cb_json) { + d = fnvlist_lookup_nvlist(cbp->cb_jsobj, "datasets"); + if (d == NULL) { + fprintf(stderr, "datasets obj not found.\n"); + exit(1); + } + props = fnvlist_alloc(); + } for (; pl != NULL; pl = pl->pl_next) { char *recvdval = NULL; @@ -1954,9 +2057,9 @@ get_callback(zfs_handle_t *zhp, void *data) cbp->cb_literal) == 0)) recvdval = rbuf; - zprop_print_one_property(zfs_get_name(zhp), cbp, + err = zprop_collect_property(zfs_get_name(zhp), cbp, zfs_prop_to_name(pl->pl_prop), - buf, sourcetype, source, recvdval); + buf, sourcetype, source, recvdval, props); } else if (zfs_prop_userquota(pl->pl_user_prop)) { sourcetype = ZPROP_SRC_LOCAL; @@ -1966,8 +2069,9 @@ get_callback(zfs_handle_t *zhp, void *data) (void) strlcpy(buf, "-", sizeof (buf)); } - zprop_print_one_property(zfs_get_name(zhp), cbp, - pl->pl_user_prop, buf, sourcetype, source, NULL); + err = zprop_collect_property(zfs_get_name(zhp), cbp, + pl->pl_user_prop, buf, sourcetype, source, NULL, + props); } else if (zfs_prop_written(pl->pl_user_prop)) { sourcetype = ZPROP_SRC_LOCAL; @@ -1977,8 +2081,9 @@ get_callback(zfs_handle_t *zhp, void *data) (void) strlcpy(buf, "-", sizeof (buf)); } - zprop_print_one_property(zfs_get_name(zhp), cbp, - pl->pl_user_prop, buf, sourcetype, source, NULL); + err = zprop_collect_property(zfs_get_name(zhp), cbp, + pl->pl_user_prop, buf, sourcetype, source, NULL, + props); } else { if (nvlist_lookup_nvlist(user_props, pl->pl_user_prop, &propval) != 0) { @@ -2010,9 +2115,24 @@ get_callback(zfs_handle_t *zhp, void *data) cbp->cb_literal) == 0)) recvdval = rbuf; - zprop_print_one_property(zfs_get_name(zhp), cbp, + err = zprop_collect_property(zfs_get_name(zhp), cbp, pl->pl_user_prop, strval, sourcetype, - source, recvdval); + source, recvdval, props); + } + if (err != 0) + return (err); + } + + if (cbp->cb_json) { + if (!nvlist_empty(props)) { + item = fnvlist_alloc(); + fill_dataset_info(item, zhp, cbp->cb_json_as_int); + fnvlist_add_nvlist(item, "properties", props); + fnvlist_add_nvlist(d, zfs_get_name(zhp), item); + fnvlist_free(props); + fnvlist_free(item); + } else { + fnvlist_free(props); } } @@ -2029,6 +2149,7 @@ zfs_do_get(int argc, char **argv) int ret = 0; int limit = 0; zprop_list_t fake_name = { 0 }; + nvlist_t *data; /* * Set up default columns and sources. @@ -2040,8 +2161,14 @@ zfs_do_get(int argc, char **argv) cb.cb_columns[3] = GET_COL_SOURCE; cb.cb_type = ZFS_TYPE_DATASET; + struct option long_options[] = { + {"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) { + while ((c = getopt_long(argc, argv, ":d:o:s:jrt:Hp", long_options, + NULL)) != -1) { switch (c) { case 'p': cb.cb_literal = B_TRUE; @@ -2055,6 +2182,17 @@ zfs_do_get(int argc, char **argv) case 'H': cb.cb_scripted = B_TRUE; break; + case 'j': + cb.cb_json = B_TRUE; + cb.cb_jsobj = zfs_json_schema(0, 1); + data = fnvlist_alloc(); + fnvlist_add_nvlist(cb.cb_jsobj, "datasets", data); + fnvlist_free(data); + break; + case ZFS_OPTION_JSON_NUMS_AS_INT: + cb.cb_json_as_int = B_TRUE; + cb.cb_literal = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -2178,7 +2316,6 @@ found2:; found3:; } break; - case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -2195,6 +2332,12 @@ found3:; usage(B_FALSE); } + if (!cb.cb_json && cb.cb_json_as_int) { + (void) fprintf(stderr, gettext("'--json-int' only works with" + " '-j' option\n")); + usage(B_FALSE); + } + fields = argv[0]; /* @@ -2235,6 +2378,11 @@ found3:; ret = zfs_for_each(argc, argv, flags, types, NULL, &cb.cb_proplist, limit, get_callback, &cb); + if (ret == 0 && cb.cb_json) + zcmd_print_json(cb.cb_jsobj); + else if (ret != 0 && cb.cb_json) + nvlist_free(cb.cb_jsobj); + if (cb.cb_proplist == &fake_name) zprop_free_list(fake_name.pl_next); else @@ -3442,6 +3590,9 @@ typedef struct list_cbdata { boolean_t cb_literal; boolean_t cb_scripted; zprop_list_t *cb_proplist; + boolean_t cb_json; + nvlist_t *cb_jsobj; + boolean_t cb_json_as_int; } list_cbdata_t; /* @@ -3512,10 +3663,11 @@ zfs_list_avail_color(zfs_handle_t *zhp) /* * Given a dataset and a list of fields, print out all the properties according - * to the described layout. + * to the described layout, or return an nvlist containing all the fields, later + * to be printed out as JSON object. */ static void -print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) +collect_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) { zprop_list_t *pl = cb->cb_proplist; boolean_t first = B_TRUE; @@ -3524,9 +3676,23 @@ print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) nvlist_t *propval; const char *propstr; boolean_t right_justify; + nvlist_t *item, *d, *props; + item = d = props = NULL; + zprop_source_t sourcetype = ZPROP_SRC_NONE; + char source[ZFS_MAX_DATASET_NAME_LEN]; + if (cb->cb_json) { + d = fnvlist_lookup_nvlist(cb->cb_jsobj, "datasets"); + if (d == NULL) { + fprintf(stderr, "datasets obj not found.\n"); + exit(1); + } + item = fnvlist_alloc(); + props = fnvlist_alloc(); + fill_dataset_info(item, zhp, cb->cb_json_as_int); + } for (; pl != NULL; pl = pl->pl_next) { - if (!first) { + if (!cb->cb_json && !first) { if (cb->cb_scripted) (void) putchar('\t'); else @@ -3542,69 +3708,112 @@ print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) right_justify = zfs_prop_align_right(pl->pl_prop); } else if (pl->pl_prop != ZPROP_USERPROP) { if (zfs_prop_get(zhp, pl->pl_prop, property, - sizeof (property), NULL, NULL, 0, - cb->cb_literal) != 0) + sizeof (property), &sourcetype, source, + sizeof (source), cb->cb_literal) != 0) propstr = "-"; else propstr = property; right_justify = zfs_prop_align_right(pl->pl_prop); } else if (zfs_prop_userquota(pl->pl_user_prop)) { + sourcetype = ZPROP_SRC_LOCAL; if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, - property, sizeof (property), cb->cb_literal) != 0) + property, sizeof (property), cb->cb_literal) != 0) { + sourcetype = ZPROP_SRC_NONE; propstr = "-"; - else + } else { propstr = property; + } right_justify = B_TRUE; } else if (zfs_prop_written(pl->pl_user_prop)) { + sourcetype = ZPROP_SRC_LOCAL; if (zfs_prop_get_written(zhp, pl->pl_user_prop, - property, sizeof (property), cb->cb_literal) != 0) + property, sizeof (property), cb->cb_literal) != 0) { + sourcetype = ZPROP_SRC_NONE; propstr = "-"; - else + } else { propstr = property; + } right_justify = B_TRUE; } else { if (nvlist_lookup_nvlist(userprops, - pl->pl_user_prop, &propval) != 0) + pl->pl_user_prop, &propval) != 0) { propstr = "-"; - else + } else { propstr = fnvlist_lookup_string(propval, ZPROP_VALUE); + strlcpy(source, + fnvlist_lookup_string(propval, + ZPROP_SOURCE), ZFS_MAX_DATASET_NAME_LEN); + if (strcmp(source, + zfs_get_name(zhp)) == 0) { + sourcetype = ZPROP_SRC_LOCAL; + } else if (strcmp(source, + ZPROP_SOURCE_VAL_RECVD) == 0) { + sourcetype = ZPROP_SRC_RECEIVED; + } else { + sourcetype = ZPROP_SRC_INHERITED; + } + } right_justify = B_FALSE; } - /* - * zfs_list_avail_color() needs ZFS_PROP_AVAILABLE + USED - * - so we need another for() search for the USED part - * - when no colors wanted, we can skip the whole thing - */ - if (use_color() && pl->pl_prop == ZFS_PROP_AVAILABLE) { - zprop_list_t *pl2 = cb->cb_proplist; - for (; pl2 != NULL; pl2 = pl2->pl_next) { - if (pl2->pl_prop == ZFS_PROP_USED) { - color_start(zfs_list_avail_color(zhp)); - /* found it, no need for more loops */ - break; + if (cb->cb_json) { + if (pl->pl_prop == ZFS_PROP_NAME) + continue; + if (zprop_nvlist_one_property( + zfs_prop_to_name(pl->pl_prop), propstr, + sourcetype, source, NULL, props, + cb->cb_json_as_int) != 0) + nomem(); + } else { + /* + * zfs_list_avail_color() needs + * ZFS_PROP_AVAILABLE + USED, so we need another + * for() search for the USED part when no colors + * wanted, we can skip the whole thing + */ + if (use_color() && pl->pl_prop == ZFS_PROP_AVAILABLE) { + zprop_list_t *pl2 = cb->cb_proplist; + for (; pl2 != NULL; pl2 = pl2->pl_next) { + if (pl2->pl_prop == ZFS_PROP_USED) { + color_start( + zfs_list_avail_color(zhp)); + /* + * found it, no need for more + * loops + */ + break; + } } } - } - /* - * If this is being called in scripted mode, or if this is the - * last column and it is left-justified, don't include a width - * format specifier. - */ - if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) - (void) fputs(propstr, stdout); - else if (right_justify) - (void) printf("%*s", (int)pl->pl_width, propstr); - else - (void) printf("%-*s", (int)pl->pl_width, propstr); + /* + * If this is being called in scripted mode, or if + * this is the last column and it is left-justified, + * don't include a width format specifier. + */ + if (cb->cb_scripted || (pl->pl_next == NULL && + !right_justify)) + (void) fputs(propstr, stdout); + else if (right_justify) { + (void) printf("%*s", (int)pl->pl_width, + propstr); + } else { + (void) printf("%-*s", (int)pl->pl_width, + propstr); + } - if (pl->pl_prop == ZFS_PROP_AVAILABLE) - color_end(); + if (pl->pl_prop == ZFS_PROP_AVAILABLE) + color_end(); + } } - - (void) putchar('\n'); + if (cb->cb_json) { + fnvlist_add_nvlist(item, "properties", props); + fnvlist_add_nvlist(d, zfs_get_name(zhp), item); + fnvlist_free(props); + fnvlist_free(item); + } else + (void) putchar('\n'); } /* @@ -3616,12 +3825,12 @@ list_callback(zfs_handle_t *zhp, void *data) list_cbdata_t *cbp = data; if (cbp->cb_first) { - if (!cbp->cb_scripted) + if (!cbp->cb_scripted && !cbp->cb_json) print_header(cbp); cbp->cb_first = B_FALSE; } - print_dataset(zhp, cbp); + collect_dataset(zhp, cbp); return (0); } @@ -3640,9 +3849,16 @@ zfs_do_list(int argc, char **argv) int ret = 0; zfs_sort_column_t *sortcol = NULL; int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS; + nvlist_t *data = NULL; + + struct option long_options[] = { + {"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "HS:d:o:prs:t:")) != -1) { + while ((c = getopt_long(argc, argv, "jHS:d:o:prs:t:", long_options, + NULL)) != -1) { switch (c) { case 'o': fields = optarg; @@ -3657,6 +3873,17 @@ zfs_do_list(int argc, char **argv) case 'r': flags |= ZFS_ITER_RECURSE; break; + case 'j': + cb.cb_json = B_TRUE; + cb.cb_jsobj = zfs_json_schema(0, 1); + data = fnvlist_alloc(); + fnvlist_add_nvlist(cb.cb_jsobj, "datasets", data); + fnvlist_free(data); + break; + case ZFS_OPTION_JSON_NUMS_AS_INT: + cb.cb_json_as_int = B_TRUE; + cb.cb_literal = B_TRUE; + break; case 'H': cb.cb_scripted = B_TRUE; break; @@ -3730,6 +3957,12 @@ found3:; argc -= optind; argv += optind; + if (!cb.cb_json && cb.cb_json_as_int) { + (void) fprintf(stderr, gettext("'--json-int' only works with" + " '-j' option\n")); + usage(B_FALSE); + } + /* * If "-o space" and no types were specified, don't display snapshots. */ @@ -3769,6 +4002,11 @@ found3:; ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist, limit, list_callback, &cb); + if (ret == 0 && cb.cb_json) + zcmd_print_json(cb.cb_jsobj); + else if (ret != 0 && cb.cb_json) + nvlist_free(cb.cb_jsobj); + zprop_free_list(cb.cb_proplist); zfs_free_sort_columns(sortcol); @@ -7189,14 +7427,17 @@ share_mount(int op, int argc, char **argv) int do_all = 0; int recursive = 0; boolean_t verbose = B_FALSE; + boolean_t json = B_FALSE; int c, ret = 0; char *options = NULL; int flags = 0; + nvlist_t *jsobj, *data, *item; const uint_t mount_nthr = 512; uint_t nthr; + jsobj = data = item = NULL; /* check options */ - while ((c = getopt(argc, argv, op == OP_MOUNT ? ":aRlvo:Of" : "al")) + while ((c = getopt(argc, argv, op == OP_MOUNT ? ":ajRlvo:Of" : "al")) != -1) { switch (c) { case 'a': @@ -7211,6 +7452,11 @@ share_mount(int op, int argc, char **argv) case 'l': flags |= MS_CRYPT; break; + case 'j': + json = B_TRUE; + jsobj = zfs_json_schema(0, 1); + data = fnvlist_alloc(); + break; case 'o': if (*optarg == '\0') { (void) fprintf(stderr, gettext("empty mount " @@ -7245,6 +7491,11 @@ share_mount(int op, int argc, char **argv) argc -= optind; argv += optind; + if (json && argc != 0) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + /* check number of arguments */ if (do_all || recursive) { enum sa_protocol protocol = SA_NO_PROTOCOL; @@ -7348,12 +7599,30 @@ share_mount(int op, int argc, char **argv) if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 || strchr(entry.mnt_special, '@') != NULL) continue; - - (void) printf("%-30s %s\n", entry.mnt_special, - entry.mnt_mountp); + if (json) { + item = fnvlist_alloc(); + fnvlist_add_string(item, "filesystem", + entry.mnt_special); + fnvlist_add_string(item, "mountpoint", + entry.mnt_mountp); + fnvlist_add_nvlist(data, entry.mnt_special, + item); + fnvlist_free(item); + } else { + (void) printf("%-30s %s\n", entry.mnt_special, + entry.mnt_mountp); + } } (void) fclose(mnttab); + if (json) { + fnvlist_add_nvlist(jsobj, "datasets", data); + if (nvlist_empty(data)) + fnvlist_free(jsobj); + else + zcmd_print_json(jsobj); + fnvlist_free(data); + } } else { zfs_handle_t *zhp; @@ -8811,8 +9080,39 @@ found:; static int zfs_do_version(int argc, char **argv) { - (void) argc, (void) argv; - return (zfs_version_print() != 0); + int c; + nvlist_t *jsobj = NULL, *zfs_ver = NULL; + boolean_t json = B_FALSE; + while ((c = getopt(argc, argv, "j")) != -1) { + switch (c) { + case 'j': + json = B_TRUE; + jsobj = zfs_json_schema(0, 1); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + if (argc != 0) { + (void) fprintf(stderr, "too many arguments\n"); + usage(B_FALSE); + } + + if (json) { + zfs_ver = zfs_version_nvlist(); + if (zfs_ver) { + fnvlist_add_nvlist(jsobj, "zfs_version", zfs_ver); + zcmd_print_json(jsobj); + fnvlist_free(zfs_ver); + return (0); + } else + return (-1); + } else + return (zfs_version_print() != 0); } /* Display documentation */ diff --git a/sys/contrib/subrepo-openzfs/cmd/zilstat.in b/sys/contrib/subrepo-openzfs/cmd/zilstat.in index e8678e20cafa..6be7f83936d7 100755 --- a/sys/contrib/subrepo-openzfs/cmd/zilstat.in +++ b/sys/contrib/subrepo-openzfs/cmd/zilstat.in @@ -43,6 +43,9 @@ cols = { "obj": [12, -1, "objset"], "cc": [5, 1000, "zil_commit_count"], "cwc": [5, 1000, "zil_commit_writer_count"], + "cec": [5, 1000, "zil_commit_error_count"], + "csc": [5, 1000, "zil_commit_stall_count"], + "cSc": [5, 1000, "zil_commit_suspend_count"], "ic": [5, 1000, "zil_itx_count"], "iic": [5, 1000, "zil_itx_indirect_count"], "iib": [5, 1024, "zil_itx_indirect_bytes"], diff --git a/sys/contrib/subrepo-openzfs/cmd/zpool/zpool_main.c b/sys/contrib/subrepo-openzfs/cmd/zpool/zpool_main.c index 57170c8ae717..620746f8e7bb 100644 --- a/sys/contrib/subrepo-openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/subrepo-openzfs/cmd/zpool/zpool_main.c @@ -32,7 +32,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, loli10K * Copyright (c) 2021, Colm Buckley - * Copyright (c) 2021, Klara Inc. + * Copyright (c) 2021, 2023, Klara Inc. * Copyright [2021] Hewlett Packard Enterprise Development LP */ @@ -66,7 +66,7 @@ #include #include #include - +#include #include #include @@ -90,6 +90,7 @@ static int zpool_do_remove(int, char **); static int zpool_do_labelclear(int, char **); static int zpool_do_checkpoint(int, char **); +static int zpool_do_prefetch(int, char **); static int zpool_do_list(int, char **); static int zpool_do_iostat(int, char **); @@ -138,7 +139,10 @@ enum zpool_options { ZPOOL_OPTION_POWER = 1024, ZPOOL_OPTION_ALLOW_INUSE, ZPOOL_OPTION_ALLOW_REPLICATION_MISMATCH, - ZPOOL_OPTION_ALLOW_ASHIFT_MISMATCH + ZPOOL_OPTION_ALLOW_ASHIFT_MISMATCH, + ZPOOL_OPTION_POOL_KEY_GUID, + ZPOOL_OPTION_JSON_NUMS_AS_INT, + ZPOOL_OPTION_JSON_FLAT_VDEVS }; /* @@ -176,6 +180,7 @@ typedef enum { HELP_LIST, HELP_OFFLINE, HELP_ONLINE, + HELP_PREFETCH, HELP_REPLACE, HELP_REMOVE, HELP_INITIALIZE, @@ -272,6 +277,86 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][15] = { NULL}, }; +static const char *pool_scan_func_str[] = { + "NONE", + "SCRUB", + "RESILVER", + "ERRORSCRUB" +}; + +static const char *pool_scan_state_str[] = { + "NONE", + "SCANNING", + "FINISHED", + "CANCELED", + "ERRORSCRUBBING" +}; + +static const char *vdev_rebuild_state_str[] = { + "NONE", + "ACTIVE", + "CANCELED", + "COMPLETE" +}; + +static const char *checkpoint_state_str[] = { + "NONE", + "EXISTS", + "DISCARDING" +}; + +static const char *vdev_state_str[] = { + "UNKNOWN", + "CLOSED", + "OFFLINE", + "REMOVED", + "CANT_OPEN", + "FAULTED", + "DEGRADED", + "ONLINE" +}; + +static const char *vdev_aux_str[] = { + "NONE", + "OPEN_FAILED", + "CORRUPT_DATA", + "NO_REPLICAS", + "BAD_GUID_SUM", + "TOO_SMALL", + "BAD_LABEL", + "VERSION_NEWER", + "VERSION_OLDER", + "UNSUP_FEAT", + "SPARED", + "ERR_EXCEEDED", + "IO_FAILURE", + "BAD_LOG", + "EXTERNAL", + "SPLIT_POOL", + "BAD_ASHIFT", + "EXTERNAL_PERSIST", + "ACTIVE", + "CHILDREN_OFFLINE", + "ASHIFT_TOO_BIG" +}; + +static const char *vdev_init_state_str[] = { + "NONE", + "ACTIVE", + "CANCELED", + "SUSPENDED", + "COMPLETE" +}; + +static const char *vdev_trim_state_str[] = { + "NONE", + "ACTIVE", + "CANCELED", + "SUSPENDED", + "COMPLETE" +}; + +#define ZFS_NICE_TIMESTAMP 100 /* * Given a cb->cb_flags with a histogram bit set, return the iostat_type. @@ -307,6 +392,7 @@ static zpool_command_t command_table[] = { { "labelclear", zpool_do_labelclear, HELP_LABELCLEAR }, { NULL }, { "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT }, + { "prefetch", zpool_do_prefetch, HELP_PREFETCH }, { NULL }, { "list", zpool_do_list, HELP_LIST }, { "iostat", zpool_do_iostat, HELP_IOSTAT }, @@ -346,6 +432,8 @@ static zpool_command_t command_table[] = { #define VDEV_ALLOC_CLASS_LOGS "logs" +#define MAX_CMD_LEN 256 + static zpool_command_t *current_command; static zfs_type_t current_prop_type = (ZFS_TYPE_POOL | ZFS_TYPE_VDEV); static char history_str[HIS_MAX_RECORD_LEN]; @@ -395,9 +483,12 @@ get_usage(zpool_help_t idx) case HELP_LABELCLEAR: return (gettext("\tlabelclear [-f] \n")); case HELP_LIST: - return (gettext("\tlist [-gHLpPv] [-o property[,...]] " - "[-T d|u] [pool] ... \n" - "\t [interval [count]]\n")); + return (gettext("\tlist [-gHLpPv] [-o property[,...]] [-j " + "[--json-int, --json-pool-key-guid]] ...\n" + "\t [-T d|u] [pool] [interval [count]]\n")); + case HELP_PREFETCH: + return (gettext("\tprefetch -t [] \n" + "\t -t ddt \n")); case HELP_OFFLINE: return (gettext("\toffline [--power]|[[-f][-t]] " " ...\n")); @@ -422,9 +513,11 @@ get_usage(zpool_help_t idx) return (gettext("\ttrim [-dw] [-r ] [-c | -s] " "[ ...]\n")); case HELP_STATUS: - return (gettext("\tstatus [--power] [-c [script1,script2,...]] " - "[-DegiLpPstvx] [-T d|u] [pool] ...\n" - "\t [interval [count]]\n")); + return (gettext("\tstatus [--power] [-j [--json-int, " + "--json-flat-vdevs, ...\n" + "\t --json-pool-key-guid]] [-c [script1,script2,...]] " + "[-DegiLpPstvx] ...\n" + "\t [-T d|u] [pool] [interval [count]]\n")); case HELP_UPGRADE: return (gettext("\tupgrade\n" "\tupgrade -v\n" @@ -432,7 +525,9 @@ get_usage(zpool_help_t idx) case HELP_EVENTS: return (gettext("\tevents [-vHf [pool] | -c]\n")); case HELP_GET: - return (gettext("\tget [-Hp] [-o \"all\" | field[,...]] " + return (gettext("\tget [-Hp] [-j [--json-int, " + "--json-pool-key-guid]] ...\n" + "\t [-o \"all\" | field[,...]] " "<\"all\" | property[,...]> ...\n")); case HELP_SET: return (gettext("\tset \n" @@ -446,7 +541,7 @@ get_usage(zpool_help_t idx) case HELP_SYNC: return (gettext("\tsync [pool] ...\n")); case HELP_VERSION: - return (gettext("\tversion\n")); + return (gettext("\tversion [-j]\n")); case HELP_WAIT: return (gettext("\twait [-Hp] [-T d|u] [-t [,...]] " " [interval]\n")); @@ -890,6 +985,264 @@ print_spare_list(nvlist_t *nv, int indent) } } +typedef struct spare_cbdata { + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} spare_cbdata_t; + +static boolean_t +find_vdev(nvlist_t *nv, uint64_t search) +{ + uint64_t guid; + nvlist_t **child; + uint_t c, children; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && + search == guid) + return (B_TRUE); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (find_vdev(child[c], search)) + return (B_TRUE); + } + + return (B_FALSE); +} + +static int +find_spare(zpool_handle_t *zhp, void *data) +{ + spare_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + if (find_vdev(nvroot, cbp->cb_guid)) { + cbp->cb_zhp = zhp; + return (1); + } + + zpool_close(zhp); + return (0); +} + +static void +nice_num_str_nvlist(nvlist_t *item, const char *key, uint64_t value, + boolean_t literal, boolean_t as_int, int format) +{ + char buf[256]; + if (literal) { + if (!as_int) + snprintf(buf, 256, "%llu", (u_longlong_t)value); + } else { + switch (format) { + case ZFS_NICENUM_1024: + zfs_nicenum_format(value, buf, 256, ZFS_NICENUM_1024); + break; + case ZFS_NICENUM_BYTES: + zfs_nicenum_format(value, buf, 256, ZFS_NICENUM_BYTES); + break; + case ZFS_NICENUM_TIME: + zfs_nicenum_format(value, buf, 256, ZFS_NICENUM_TIME); + break; + case ZFS_NICE_TIMESTAMP: + format_timestamp(value, buf, 256); + break; + default: + fprintf(stderr, "Invalid number format"); + exit(1); + } + } + if (as_int) + fnvlist_add_uint64(item, key, value); + else + fnvlist_add_string(item, key, buf); +} + +/* + * Generates an nvlist with output version for every command based on params. + * Purpose of this is to add a version of JSON output, considering the schema + * format might be updated for each command in future. + * + * Schema: + * + * "output_version": { + * "command": string, + * "vers_major": integer, + * "vers_minor": integer, + * } + */ +static nvlist_t * +zpool_json_schema(int maj_v, int min_v) +{ + char cmd[MAX_CMD_LEN]; + nvlist_t *sch = fnvlist_alloc(); + nvlist_t *ov = fnvlist_alloc(); + + snprintf(cmd, MAX_CMD_LEN, "zpool %s", current_command->name); + fnvlist_add_string(ov, "command", cmd); + fnvlist_add_uint32(ov, "vers_major", maj_v); + fnvlist_add_uint32(ov, "vers_minor", min_v); + fnvlist_add_nvlist(sch, "output_version", ov); + fnvlist_free(ov); + return (sch); +} + +static void +fill_pool_info(nvlist_t *list, zpool_handle_t *zhp, boolean_t addtype, + boolean_t as_int) +{ + nvlist_t *config = zpool_get_config(zhp, NULL); + uint64_t guid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID); + uint64_t txg = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG); + + fnvlist_add_string(list, "name", zpool_get_name(zhp)); + if (addtype) + fnvlist_add_string(list, "type", "POOL"); + fnvlist_add_string(list, "state", zpool_get_state_str(zhp)); + if (as_int) { + if (guid) + fnvlist_add_uint64(list, ZPOOL_CONFIG_POOL_GUID, guid); + if (txg) + fnvlist_add_uint64(list, ZPOOL_CONFIG_POOL_TXG, txg); + fnvlist_add_uint64(list, "spa_version", SPA_VERSION); + fnvlist_add_uint64(list, "zpl_version", ZPL_VERSION); + } else { + char value[ZFS_MAXPROPLEN]; + if (guid) { + snprintf(value, ZFS_MAXPROPLEN, "%llu", + (u_longlong_t)guid); + fnvlist_add_string(list, ZPOOL_CONFIG_POOL_GUID, value); + } + if (txg) { + snprintf(value, ZFS_MAXPROPLEN, "%llu", + (u_longlong_t)txg); + fnvlist_add_string(list, ZPOOL_CONFIG_POOL_TXG, value); + } + fnvlist_add_string(list, "spa_version", SPA_VERSION_STRING); + fnvlist_add_string(list, "zpl_version", ZPL_VERSION_STRING); + } +} + +static void +used_by_other(zpool_handle_t *zhp, nvlist_t *nvdev, nvlist_t *list) +{ + spare_cbdata_t spare_cb; + verify(nvlist_lookup_uint64(nvdev, ZPOOL_CONFIG_GUID, + &spare_cb.cb_guid) == 0); + if (zpool_iter(g_zfs, find_spare, &spare_cb) == 1) { + if (strcmp(zpool_get_name(spare_cb.cb_zhp), + zpool_get_name(zhp)) != 0) { + fnvlist_add_string(list, "used_by", + zpool_get_name(spare_cb.cb_zhp)); + } + zpool_close(spare_cb.cb_zhp); + } +} + +static void +fill_vdev_info(nvlist_t *list, zpool_handle_t *zhp, char *name, + boolean_t addtype, boolean_t as_int) +{ + boolean_t l2c = B_FALSE; + const char *path, *phys, *devid, *bias = NULL; + uint64_t hole = 0, log = 0, spare = 0; + vdev_stat_t *vs; + uint_t c; + nvlist_t *nvdev; + nvlist_t *nvdev_parent = NULL; + char *_name; + + if (strcmp(name, zpool_get_name(zhp)) != 0) + _name = name; + else + _name = (char *)"root-0"; + + nvdev = zpool_find_vdev(zhp, _name, NULL, &l2c, NULL); + + fnvlist_add_string(list, "name", name); + if (addtype) + fnvlist_add_string(list, "type", "VDEV"); + if (nvdev) { + const char *type = fnvlist_lookup_string(nvdev, + ZPOOL_CONFIG_TYPE); + if (type) + fnvlist_add_string(list, "vdev_type", type); + uint64_t guid = fnvlist_lookup_uint64(nvdev, ZPOOL_CONFIG_GUID); + if (guid) { + if (as_int) { + fnvlist_add_uint64(list, "guid", guid); + } else { + char buf[ZFS_MAXPROPLEN]; + snprintf(buf, ZFS_MAXPROPLEN, "%llu", + (u_longlong_t)guid); + fnvlist_add_string(list, "guid", buf); + } + } + if (nvlist_lookup_string(nvdev, ZPOOL_CONFIG_PATH, &path) == 0) + fnvlist_add_string(list, "path", path); + if (nvlist_lookup_string(nvdev, ZPOOL_CONFIG_PHYS_PATH, + &phys) == 0) + fnvlist_add_string(list, "phys_path", phys); + if (nvlist_lookup_string(nvdev, ZPOOL_CONFIG_DEVID, + &devid) == 0) + fnvlist_add_string(list, "devid", devid); + (void) nvlist_lookup_uint64(nvdev, ZPOOL_CONFIG_IS_LOG, &log); + (void) nvlist_lookup_uint64(nvdev, ZPOOL_CONFIG_IS_SPARE, + &spare); + (void) nvlist_lookup_uint64(nvdev, ZPOOL_CONFIG_IS_HOLE, &hole); + if (hole) + fnvlist_add_string(list, "class", VDEV_TYPE_HOLE); + else if (l2c) + fnvlist_add_string(list, "class", VDEV_TYPE_L2CACHE); + else if (spare) + fnvlist_add_string(list, "class", VDEV_TYPE_SPARE); + else if (log) + fnvlist_add_string(list, "class", VDEV_TYPE_LOG); + else { + (void) nvlist_lookup_string(nvdev, + ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); + if (bias != NULL) + fnvlist_add_string(list, "class", bias); + else { + nvdev_parent = NULL; + nvdev_parent = zpool_find_parent_vdev(zhp, + _name, NULL, NULL, NULL); + + /* + * With a mirrored special device, the parent + * "mirror" vdev will have + * ZPOOL_CONFIG_ALLOCATION_BIAS set to "special" + * not the leaf vdevs. If we're a leaf vdev + * in that case we need to look at our parent + * to see if they're "special" to know if we + * are "special" too. + */ + if (nvdev_parent) { + (void) nvlist_lookup_string( + nvdev_parent, + ZPOOL_CONFIG_ALLOCATION_BIAS, + &bias); + } + if (bias != NULL) + fnvlist_add_string(list, "class", bias); + else + fnvlist_add_string(list, "class", + "normal"); + } + } + if (nvlist_lookup_uint64_array(nvdev, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0) { + fnvlist_add_string(list, "state", + vdev_state_str[vs->vs_state]); + } + } +} + static boolean_t prop_list_contains_feature(nvlist_t *proplist) { @@ -2228,51 +2581,6 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max, return (max); } -typedef struct spare_cbdata { - uint64_t cb_guid; - zpool_handle_t *cb_zhp; -} spare_cbdata_t; - -static boolean_t -find_vdev(nvlist_t *nv, uint64_t search) -{ - uint64_t guid; - nvlist_t **child; - uint_t c, children; - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && - search == guid) - return (B_TRUE); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev(child[c], search)) - return (B_TRUE); - } - - return (B_FALSE); -} - -static int -find_spare(zpool_handle_t *zhp, void *data) -{ - spare_cbdata_t *cbp = data; - nvlist_t *config, *nvroot; - - config = zpool_get_config(zhp, NULL); - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - - if (find_vdev(nvroot, cbp->cb_guid)) { - cbp->cb_zhp = zhp; - return (1); - } - - zpool_close(zhp); - return (0); -} - typedef struct status_cbdata { int cb_count; int cb_name_flags; @@ -2290,6 +2598,11 @@ typedef struct status_cbdata { boolean_t cb_print_vdev_trim; vdev_cmd_data_list_t *vcdl; boolean_t cb_print_power; + boolean_t cb_json; + boolean_t cb_flat_vdevs; + nvlist_t *cb_jsobj; + boolean_t cb_json_as_int; + boolean_t cb_json_pool_key_guid; } status_cbdata_t; /* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */ @@ -2302,6 +2615,46 @@ is_blank_str(const char *str) return (B_TRUE); } +static void +zpool_nvlist_cmd(vdev_cmd_data_list_t *vcdl, const char *pool, const char *path, + nvlist_t *item) +{ + vdev_cmd_data_t *data; + int i, j, k = 1; + char tmp[256]; + const char *val; + + for (i = 0; i < vcdl->count; i++) { + if ((strcmp(vcdl->data[i].path, path) != 0) || + (strcmp(vcdl->data[i].pool, pool) != 0)) + continue; + + data = &vcdl->data[i]; + for (j = 0; j < vcdl->uniq_cols_cnt; j++) { + val = NULL; + for (int k = 0; k < data->cols_cnt; k++) { + if (strcmp(data->cols[k], + vcdl->uniq_cols[j]) == 0) { + val = data->lines[k]; + break; + } + } + if (val == NULL || is_blank_str(val)) + val = "-"; + fnvlist_add_string(item, vcdl->uniq_cols[j], val); + } + + for (j = data->cols_cnt; j < data->lines_cnt; j++) { + if (data->lines[j]) { + snprintf(tmp, 256, "extra_%d", k++); + fnvlist_add_string(item, tmp, + data->lines[j]); + } + } + break; + } +} + /* Print command output lines for specific vdev in a specific pool */ static void zpool_print_cmd(vdev_cmd_data_list_t *vcdl, const char *pool, const char *path) @@ -3017,6 +3370,7 @@ show_import(nvlist_t *config, boolean_t report_error) uint_t vsc; const char *comment; const char *indent; + char buf[2048]; status_cbdata_t cb = { 0 }; verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, @@ -3122,7 +3476,8 @@ show_import(nvlist_t *config, boolean_t report_error) printf_color(ANSI_YELLOW, gettext("The pool uses the following " "feature(s) not supported on this system:\n")); color_start(ANSI_YELLOW); - zpool_print_unsup_feat(config); + zpool_collect_unsup_feat(config, buf, 2048); + (void) printf("%s", buf); color_end(); break; @@ -3134,7 +3489,8 @@ show_import(nvlist_t *config, boolean_t report_error) "\t%sfeature(s) not supported on this system:\n"), indent, indent); color_start(ANSI_YELLOW); - zpool_print_unsup_feat(config); + zpool_collect_unsup_feat(config, buf, 2048); + (void) printf("%s", buf); color_end(); break; @@ -3827,6 +4183,72 @@ zpool_do_checkpoint(int argc, char **argv) #define CHECKPOINT_OPT 1024 +/* + * zpool prefetch [] + * + * Prefetchs a particular type of data in the specified pool. + */ +int +zpool_do_prefetch(int argc, char **argv) +{ + int c; + char *poolname; + char *typestr = NULL; + zpool_prefetch_type_t type; + zpool_handle_t *zhp; + int err = 0; + + while ((c = getopt(argc, argv, "t:")) != -1) { + switch (c) { + case 't': + typestr = optarg; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + + argc--; + argv++; + + if (strcmp(typestr, "ddt") == 0) { + type = ZPOOL_PREFETCH_DDT; + } else { + (void) fprintf(stderr, gettext("unsupported prefetch type\n")); + usage(B_FALSE); + } + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + err = zpool_prefetch(zhp, type); + + zpool_close(zhp); + + return (err); +} + /* * zpool import [-d dir] [-D] * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] @@ -6303,9 +6725,13 @@ typedef struct list_cbdata { boolean_t cb_verbose; int cb_name_flags; int cb_namewidth; + boolean_t cb_json; boolean_t cb_scripted; zprop_list_t *cb_proplist; boolean_t cb_literal; + nvlist_t *cb_jsobj; + boolean_t cb_json_as_int; + boolean_t cb_json_pool_key_guid; } list_cbdata_t; @@ -6366,7 +6792,7 @@ print_header(list_cbdata_t *cb) * to the described layout. Used by zpool_do_list(). */ static void -print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) +collect_pool(zpool_handle_t *zhp, list_cbdata_t *cb) { zprop_list_t *pl = cb->cb_proplist; boolean_t first = B_TRUE; @@ -6374,6 +6800,20 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) const char *propstr; boolean_t right_justify; size_t width; + zprop_source_t sourcetype = ZPROP_SRC_NONE; + nvlist_t *item, *d, *props; + item = d = props = NULL; + + if (cb->cb_json) { + item = fnvlist_alloc(); + props = fnvlist_alloc(); + d = fnvlist_lookup_nvlist(cb->cb_jsobj, "pools"); + if (d == NULL) { + fprintf(stderr, "pools obj not found.\n"); + exit(1); + } + fill_pool_info(item, zhp, B_TRUE, cb->cb_json_as_int); + } for (; pl != NULL; pl = pl->pl_next) { @@ -6386,7 +6826,7 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) width = cb->cb_namewidth; } - if (!first) { + if (!cb->cb_json && !first) { if (cb->cb_scripted) (void) fputc('\t', stdout); else @@ -6398,7 +6838,8 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) right_justify = B_FALSE; if (pl->pl_prop != ZPROP_USERPROP) { if (zpool_get_prop(zhp, pl->pl_prop, property, - sizeof (property), NULL, cb->cb_literal) != 0) + sizeof (property), &sourcetype, + cb->cb_literal) != 0) propstr = "-"; else propstr = property; @@ -6409,33 +6850,61 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) zpool_prop_get_feature(zhp, pl->pl_user_prop, property, sizeof (property)) == 0) { propstr = property; + sourcetype = ZPROP_SRC_LOCAL; } else if (zfs_prop_user(pl->pl_user_prop) && zpool_get_userprop(zhp, pl->pl_user_prop, property, - sizeof (property), NULL) == 0) { + sizeof (property), &sourcetype) == 0) { propstr = property; } else { propstr = "-"; } - /* - * If this is being called in scripted mode, or if this is the - * last column and it is left-justified, don't include a width - * format specifier. - */ - if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) - (void) fputs(propstr, stdout); - else if (right_justify) - (void) printf("%*s", (int)width, propstr); - else - (void) printf("%-*s", (int)width, propstr); + if (cb->cb_json) { + if (pl->pl_prop == ZPOOL_PROP_NAME) + continue; + (void) zprop_nvlist_one_property( + zpool_prop_to_name(pl->pl_prop), propstr, + sourcetype, NULL, NULL, props, cb->cb_json_as_int); + } else { + /* + * If this is being called in scripted mode, or if this + * is the last column and it is left-justified, don't + * include a width format specifier. + */ + if (cb->cb_scripted || (pl->pl_next == NULL && + !right_justify)) + (void) fputs(propstr, stdout); + else if (right_justify) + (void) printf("%*s", (int)width, propstr); + else + (void) printf("%-*s", (int)width, propstr); + } } - (void) fputc('\n', stdout); + if (cb->cb_json) { + fnvlist_add_nvlist(item, "properties", props); + if (cb->cb_json_pool_key_guid) { + char pool_guid[256]; + uint64_t guid = fnvlist_lookup_uint64( + zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_POOL_GUID); + snprintf(pool_guid, 256, "%llu", + (u_longlong_t)guid); + fnvlist_add_nvlist(d, pool_guid, item); + } else { + fnvlist_add_nvlist(d, zpool_get_name(zhp), + item); + } + fnvlist_free(props); + fnvlist_free(item); + } else + (void) fputc('\n', stdout); } static void -print_one_column(zpool_prop_t prop, uint64_t value, const char *str, - boolean_t scripted, boolean_t valid, enum zfs_nicenum_format format) +collect_vdev_prop(zpool_prop_t prop, uint64_t value, const char *str, + boolean_t scripted, boolean_t valid, enum zfs_nicenum_format format, + boolean_t json, nvlist_t *nvl, boolean_t as_int) { char propval[64]; boolean_t fixed; @@ -6446,6 +6915,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str, case ZPOOL_PROP_EXPANDSZ: case ZPOOL_PROP_CHECKPOINT: case ZPOOL_PROP_DEDUPRATIO: + case ZPOOL_PROP_DEDUPCACHED: if (value == 0) (void) strlcpy(propval, "-", sizeof (propval)); else @@ -6484,10 +6954,15 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str, if (!valid) (void) strlcpy(propval, "-", sizeof (propval)); - if (scripted) - (void) printf("\t%s", propval); - else - (void) printf(" %*s", (int)width, propval); + if (json) { + zprop_nvlist_one_property(zpool_prop_to_name(prop), propval, + ZPROP_SRC_NONE, NULL, NULL, nvl, as_int); + } else { + if (scripted) + (void) printf("\t%s", propval); + else + (void) printf(" %*s", (int)width, propval); + } } /* @@ -6495,15 +6970,17 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str, * not compatible with '-o' option */ static void -print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, - list_cbdata_t *cb, int depth, boolean_t isspare) +collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, + list_cbdata_t *cb, int depth, boolean_t isspare, nvlist_t *item) { nvlist_t **child; vdev_stat_t *vs; - uint_t c, children; + uint_t c, children = 0; char *vname; boolean_t scripted = cb->cb_scripted; uint64_t islog = B_FALSE; + nvlist_t *props, *ent, *ch, *obj, *l2c, *sp; + props = ent = ch = obj = sp = l2c = NULL; const char *dashes = "%-*s - - - - " "- - - - -\n"; @@ -6524,13 +7001,21 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, if (strcmp(name, VDEV_TYPE_INDIRECT) == 0) return; - if (scripted) - (void) printf("\t%s", name); - else if (strlen(name) + depth > cb->cb_namewidth) - (void) printf("%*s%s", depth, "", name); - else - (void) printf("%*s%s%*s", depth, "", name, - (int)(cb->cb_namewidth - strlen(name) - depth), ""); + if (cb->cb_json) { + props = fnvlist_alloc(); + ent = fnvlist_alloc(); + fill_vdev_info(ent, zhp, (char *)name, B_FALSE, + cb->cb_json_as_int); + } else { + if (scripted) + (void) printf("\t%s", name); + else if (strlen(name) + depth > cb->cb_namewidth) + (void) printf("%*s%s", depth, "", name); + else + (void) printf("%*s%s%*s", depth, "", name, + (int)(cb->cb_namewidth - strlen(name) - + depth), ""); + } /* * Print the properties for the individual vdevs. Some @@ -6538,30 +7023,39 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, * 'toplevel' boolean value is passed to the print_one_column() * to indicate that the value is valid. */ - if (VDEV_STAT_VALID(vs_pspace, c) && vs->vs_pspace) - print_one_column(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL, - scripted, B_TRUE, format); - else - print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, NULL, - scripted, toplevel, format); - print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL, - scripted, toplevel, format); - print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, - NULL, scripted, toplevel, format); - print_one_column(ZPOOL_PROP_CHECKPOINT, - vs->vs_checkpoint_space, NULL, scripted, toplevel, format); - print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, NULL, - scripted, B_TRUE, format); - print_one_column(ZPOOL_PROP_FRAGMENTATION, + if (VDEV_STAT_VALID(vs_pspace, c) && vs->vs_pspace) { + collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL, + scripted, B_TRUE, format, cb->cb_json, props, + cb->cb_json_as_int); + } else { + collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_space, NULL, + scripted, toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + } + collect_vdev_prop(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL, + scripted, toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + collect_vdev_prop(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, + NULL, scripted, toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + collect_vdev_prop(ZPOOL_PROP_CHECKPOINT, + vs->vs_checkpoint_space, NULL, scripted, toplevel, format, + cb->cb_json, props, cb->cb_json_as_int); + collect_vdev_prop(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, NULL, + scripted, B_TRUE, format, cb->cb_json, props, + cb->cb_json_as_int); + collect_vdev_prop(ZPOOL_PROP_FRAGMENTATION, vs->vs_fragmentation, NULL, scripted, (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel), - format); + format, cb->cb_json, props, cb->cb_json_as_int); cap = (vs->vs_space == 0) ? 0 : (vs->vs_alloc * 10000 / vs->vs_space); - print_one_column(ZPOOL_PROP_CAPACITY, cap, NULL, - scripted, toplevel, format); - print_one_column(ZPOOL_PROP_DEDUPRATIO, 0, NULL, - scripted, toplevel, format); + collect_vdev_prop(ZPOOL_PROP_CAPACITY, cap, NULL, + scripted, toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + collect_vdev_prop(ZPOOL_PROP_DEDUPRATIO, 0, NULL, + scripted, toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); state = zpool_state_to_name(vs->vs_state, vs->vs_aux); if (isspare) { if (vs->vs_aux == VDEV_AUX_SPARED) @@ -6569,14 +7063,28 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, else if (vs->vs_state == VDEV_STATE_HEALTHY) state = "AVAIL"; } - print_one_column(ZPOOL_PROP_HEALTH, 0, state, scripted, - B_TRUE, format); - (void) fputc('\n', stdout); + collect_vdev_prop(ZPOOL_PROP_HEALTH, 0, state, scripted, + B_TRUE, format, cb->cb_json, props, cb->cb_json_as_int); + + if (cb->cb_json) { + fnvlist_add_nvlist(ent, "properties", props); + fnvlist_free(props); + } else + (void) fputc('\n', stdout); } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) + &child, &children) != 0) { + if (cb->cb_json) { + fnvlist_add_nvlist(item, name, ent); + fnvlist_free(ent); + } return; + } + + if (cb->cb_json) { + ch = fnvlist_alloc(); + } /* list the normal vdevs first */ for (c = 0; c < children; c++) { @@ -6595,14 +7103,28 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); - print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE); + + if (name == NULL || cb->cb_json != B_TRUE) + collect_list_stats(zhp, vname, child[c], cb, depth + 2, + B_FALSE, item); + else if (cb->cb_json) { + collect_list_stats(zhp, vname, child[c], cb, depth + 2, + B_FALSE, ch); + } free(vname); } + if (cb->cb_json) { + if (!nvlist_empty(ch)) + fnvlist_add_nvlist(ent, "vdevs", ch); + fnvlist_free(ch); + } + /* list the classes: 'logs', 'dedup', and 'special' */ for (uint_t n = 0; n < ARRAY_SIZE(class_name); n++) { boolean_t printed = B_FALSE; - + if (cb->cb_json) + obj = fnvlist_alloc(); for (c = 0; c < children; c++) { const char *bias = NULL; const char *type = NULL; @@ -6621,7 +7143,7 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, if (!islog && strcmp(type, VDEV_TYPE_INDIRECT) == 0) continue; - if (!printed) { + if (!printed && !cb->cb_json) { /* LINTED E_SEC_PRINTF_VAR_FMT */ (void) printf(dashes, cb->cb_namewidth, class_name[n]); @@ -6629,36 +7151,64 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, } vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); - print_list_stats(zhp, vname, child[c], cb, depth + 2, - B_FALSE); + collect_list_stats(zhp, vname, child[c], cb, depth + 2, + B_FALSE, obj); free(vname); } + if (cb->cb_json) { + if (!nvlist_empty(obj)) + fnvlist_add_nvlist(item, class_name[n], obj); + fnvlist_free(obj); + } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0 && children > 0) { - /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, "cache"); + if (cb->cb_json) { + l2c = fnvlist_alloc(); + } else { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "cache"); + } for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags); - print_list_stats(zhp, vname, child[c], cb, depth + 2, - B_FALSE); + collect_list_stats(zhp, vname, child[c], cb, depth + 2, + B_FALSE, l2c); free(vname); } + if (cb->cb_json) { + if (!nvlist_empty(l2c)) + fnvlist_add_nvlist(item, "l2cache", l2c); + fnvlist_free(l2c); + } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0 && children > 0) { - /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, "spare"); + if (cb->cb_json) { + sp = fnvlist_alloc(); + } else { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "spare"); + } for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags); - print_list_stats(zhp, vname, child[c], cb, depth + 2, - B_TRUE); + collect_list_stats(zhp, vname, child[c], cb, depth + 2, + B_TRUE, sp); free(vname); } + if (cb->cb_json) { + if (!nvlist_empty(sp)) + fnvlist_add_nvlist(item, "spares", sp); + fnvlist_free(sp); + } + } + + if (name != NULL && cb->cb_json) { + fnvlist_add_nvlist(item, name, ent); + fnvlist_free(ent); } } @@ -6668,17 +7218,44 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, static int list_callback(zpool_handle_t *zhp, void *data) { + nvlist_t *p, *d, *nvdevs; + uint64_t guid; + char pool_guid[256]; + const char *pool_name = zpool_get_name(zhp); list_cbdata_t *cbp = data; + p = d = nvdevs = NULL; - print_pool(zhp, cbp); + collect_pool(zhp, cbp); if (cbp->cb_verbose) { nvlist_t *config, *nvroot; - config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - print_list_stats(zhp, NULL, nvroot, cbp, 0, B_FALSE); + if (cbp->cb_json) { + d = fnvlist_lookup_nvlist(cbp->cb_jsobj, + "pools"); + if (cbp->cb_json_pool_key_guid) { + guid = fnvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_GUID); + snprintf(pool_guid, 256, "%llu", + (u_longlong_t)guid); + p = fnvlist_lookup_nvlist(d, pool_guid); + } else { + p = fnvlist_lookup_nvlist(d, pool_name); + } + nvdevs = fnvlist_alloc(); + } + collect_list_stats(zhp, NULL, nvroot, cbp, 0, B_FALSE, nvdevs); + if (cbp->cb_json) { + fnvlist_add_nvlist(p, "vdevs", nvdevs); + if (cbp->cb_json_pool_key_guid) + fnvlist_add_nvlist(d, pool_guid, p); + else + fnvlist_add_nvlist(d, pool_name, p); + fnvlist_add_nvlist(cbp->cb_jsobj, "pools", d); + fnvlist_free(nvdevs); + } } return (0); @@ -6718,6 +7295,9 @@ get_namewidth_list(zpool_handle_t *zhp, void *data) * -p Display values in parsable (exact) format. * -P Display full path for vdev name. * -T Display a timestamp in date(1) or Unix format + * -j Display the output in JSON format + * --json-int Display the numbers as integer instead of strings. + * --json-pool-key-guid Set pool GUID as key for pool objects. * * List all pools in the system, whether or not they're healthy. Output space * statistics for each one, as well as health status summary. @@ -6736,10 +7316,19 @@ zpool_do_list(int argc, char **argv) unsigned long count = 0; zpool_list_t *list; boolean_t first = B_TRUE; + nvlist_t *data = NULL; current_prop_type = ZFS_TYPE_POOL; + struct option long_options[] = { + {"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT}, + {"json-pool-key-guid", no_argument, NULL, + ZPOOL_OPTION_POOL_KEY_GUID}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, ":gHLo:pPT:v")) != -1) { + while ((c = getopt_long(argc, argv, ":gjHLo:pPT:v", long_options, + NULL)) != -1) { switch (c) { case 'g': cb.cb_name_flags |= VDEV_NAME_GUID; @@ -6759,6 +7348,16 @@ zpool_do_list(int argc, char **argv) case 'p': cb.cb_literal = B_TRUE; break; + case 'j': + cb.cb_json = B_TRUE; + break; + case ZPOOL_OPTION_JSON_NUMS_AS_INT: + cb.cb_json_as_int = B_TRUE; + cb.cb_literal = B_TRUE; + break; + case ZPOOL_OPTION_POOL_KEY_GUID: + cb.cb_json_pool_key_guid = B_TRUE; + break; case 'T': get_timestamp_arg(*optarg); break; @@ -6781,6 +7380,18 @@ zpool_do_list(int argc, char **argv) argc -= optind; argv += optind; + if (!cb.cb_json && cb.cb_json_as_int) { + (void) fprintf(stderr, gettext("'--json-int' only works with" + " '-j' option\n")); + usage(B_FALSE); + } + + if (!cb.cb_json && cb.cb_json_pool_key_guid) { + (void) fprintf(stderr, gettext("'json-pool-key-guid' only" + " works with '-j' option\n")); + usage(B_FALSE); + } + get_interval_count(&argc, argv, &interval, &count); if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0) @@ -6794,18 +7405,43 @@ zpool_do_list(int argc, char **argv) if (pool_list_count(list) == 0) break; + if (cb.cb_json) { + cb.cb_jsobj = zpool_json_schema(0, 1); + data = fnvlist_alloc(); + fnvlist_add_nvlist(cb.cb_jsobj, "pools", data); + fnvlist_free(data); + } + cb.cb_namewidth = 0; (void) pool_list_iter(list, B_FALSE, get_namewidth_list, &cb); - if (timestamp_fmt != NODATE) - print_timestamp(timestamp_fmt); + if (timestamp_fmt != NODATE) { + if (cb.cb_json) { + if (cb.cb_json_as_int) { + fnvlist_add_uint64(cb.cb_jsobj, "time", + time(NULL)); + } else { + char ts[128]; + get_timestamp(timestamp_fmt, ts, 128); + fnvlist_add_string(cb.cb_jsobj, "time", + ts); + } + } else + print_timestamp(timestamp_fmt); + } - if (!cb.cb_scripted && (first || cb.cb_verbose)) { + if (!cb.cb_scripted && (first || cb.cb_verbose) && + !cb.cb_json) { print_header(&cb); first = B_FALSE; } ret = pool_list_iter(list, B_TRUE, list_callback, &cb); + if (ret == 0 && cb.cb_json) + zcmd_print_json(cb.cb_jsobj); + else if (ret != 0 && cb.cb_json) + nvlist_free(cb.cb_jsobj); + if (interval == 0) break; @@ -6818,7 +7454,8 @@ zpool_do_list(int argc, char **argv) (void) fsleep(interval); } - if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) { + if (argc == 0 && !cb.cb_scripted && !cb.cb_json && + pool_list_count(list) == 0) { (void) printf(gettext("no pools available\n")); ret = 0; } @@ -8429,6 +9066,807 @@ check_rebuilding(nvlist_t *nvroot, uint64_t *rebuild_end_time) return (rebuilding); } +static void +vdev_stats_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, + int depth, boolean_t isspare, char *parent, nvlist_t *item) +{ + nvlist_t *vds, **child, *ch = NULL; + uint_t vsc, children; + vdev_stat_t *vs; + char *vname; + uint64_t notpresent; + const char *type, *path; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + children = 0; + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + if (strcmp(type, VDEV_TYPE_INDIRECT) == 0) + return; + + if (cb->cb_print_unhealthy && depth > 0 && + for_each_vdev_in_nvlist(nv, vdev_health_check_cb, cb) == 0) { + return; + } + vname = zpool_vdev_name(g_zfs, zhp, nv, + cb->cb_name_flags | VDEV_NAME_TYPE_ID); + vds = fnvlist_alloc(); + fill_vdev_info(vds, zhp, vname, B_FALSE, cb->cb_json_as_int); + if (cb->cb_flat_vdevs && parent != NULL) { + fnvlist_add_string(vds, "parent", parent); + } + + if (isspare) { + if (vs->vs_aux == VDEV_AUX_SPARED) { + fnvlist_add_string(vds, "state", "INUSE"); + used_by_other(zhp, nv, vds); + } else if (vs->vs_state == VDEV_STATE_HEALTHY) + fnvlist_add_string(vds, "state", "AVAIL"); + } else { + if (vs->vs_alloc) { + nice_num_str_nvlist(vds, "alloc_space", vs->vs_alloc, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + } + if (vs->vs_space) { + nice_num_str_nvlist(vds, "total_space", vs->vs_space, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + } + if (vs->vs_dspace) { + nice_num_str_nvlist(vds, "def_space", vs->vs_dspace, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + } + if (vs->vs_rsize) { + nice_num_str_nvlist(vds, "rep_dev_size", vs->vs_rsize, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + } + if (vs->vs_esize) { + nice_num_str_nvlist(vds, "ex_dev_size", vs->vs_esize, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + } + if (vs->vs_self_healed) { + nice_num_str_nvlist(vds, "self_healed", + vs->vs_self_healed, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICENUM_BYTES); + } + if (vs->vs_pspace) { + nice_num_str_nvlist(vds, "phys_space", vs->vs_pspace, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + } + nice_num_str_nvlist(vds, "read_errors", vs->vs_read_errors, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_1024); + nice_num_str_nvlist(vds, "write_errors", vs->vs_write_errors, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_1024); + nice_num_str_nvlist(vds, "checksum_errors", + vs->vs_checksum_errors, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICENUM_1024); + if (vs->vs_scan_processed) { + nice_num_str_nvlist(vds, "scan_processed", + vs->vs_scan_processed, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICENUM_BYTES); + } + if (vs->vs_checkpoint_space) { + nice_num_str_nvlist(vds, "checkpoint_space", + vs->vs_checkpoint_space, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICENUM_BYTES); + } + if (vs->vs_resilver_deferred) { + nice_num_str_nvlist(vds, "resilver_deferred", + vs->vs_resilver_deferred, B_TRUE, + cb->cb_json_as_int, ZFS_NICENUM_1024); + } + if (children == 0) { + nice_num_str_nvlist(vds, "slow_ios", vs->vs_slow_ios, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + } + if (cb->cb_print_power) { + if (children == 0) { + /* Only leaf vdevs have physical slots */ + switch (zpool_power_current_state(zhp, (char *) + fnvlist_lookup_string(nv, + ZPOOL_CONFIG_PATH))) { + case 0: + fnvlist_add_string(vds, "power_state", + "off"); + break; + case 1: + fnvlist_add_string(vds, "power_state", + "on"); + break; + default: + fnvlist_add_string(vds, "power_state", + "-"); + } + } else { + fnvlist_add_string(vds, "power_state", "-"); + } + } + } + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, + ¬present) == 0) { + nice_num_str_nvlist(vds, ZPOOL_CONFIG_NOT_PRESENT, + 1, B_TRUE, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + fnvlist_add_string(vds, "was", + fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH)); + } else if (vs->vs_aux != VDEV_AUX_NONE) { + fnvlist_add_string(vds, "aux", vdev_aux_str[vs->vs_aux]); + } else if (children == 0 && !isspare && + getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") == NULL && + VDEV_STAT_VALID(vs_physical_ashift, vsc) && + vs->vs_configured_ashift < vs->vs_physical_ashift) { + nice_num_str_nvlist(vds, "configured_ashift", + vs->vs_configured_ashift, B_TRUE, cb->cb_json_as_int, + ZFS_NICENUM_1024); + nice_num_str_nvlist(vds, "physical_ashift", + vs->vs_physical_ashift, B_TRUE, cb->cb_json_as_int, + ZFS_NICENUM_1024); + } + if (vs->vs_scan_removing != 0) { + nice_num_str_nvlist(vds, "removing", vs->vs_scan_removing, + B_TRUE, cb->cb_json_as_int, ZFS_NICENUM_1024); + } else if (VDEV_STAT_VALID(vs_noalloc, vsc) && vs->vs_noalloc != 0) { + nice_num_str_nvlist(vds, "noalloc", vs->vs_noalloc, + B_TRUE, cb->cb_json_as_int, ZFS_NICENUM_1024); + } + + if (cb->vcdl != NULL) { + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { + zpool_nvlist_cmd(cb->vcdl, zpool_get_name(zhp), + path, vds); + } + } + + if (children == 0) { + if (cb->cb_print_vdev_init) { + if (vs->vs_initialize_state != 0) { + uint64_t st = vs->vs_initialize_state; + fnvlist_add_string(vds, "init_state", + vdev_init_state_str[st]); + nice_num_str_nvlist(vds, "initialized", + vs->vs_initialize_bytes_done, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(vds, "to_initialize", + vs->vs_initialize_bytes_est, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(vds, "init_time", + vs->vs_initialize_action_time, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(vds, "init_errors", + vs->vs_initialize_errors, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + } else { + fnvlist_add_string(vds, "init_state", + "UNINITIALIZED"); + } + } + if (cb->cb_print_vdev_trim) { + if (vs->vs_trim_notsup == 0) { + if (vs->vs_trim_state != 0) { + uint64_t st = vs->vs_trim_state; + fnvlist_add_string(vds, "trim_state", + vdev_trim_state_str[st]); + nice_num_str_nvlist(vds, "trimmed", + vs->vs_trim_bytes_done, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(vds, "to_trim", + vs->vs_trim_bytes_est, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(vds, "trim_time", + vs->vs_trim_action_time, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(vds, "trim_errors", + vs->vs_trim_errors, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + } else + fnvlist_add_string(vds, "trim_state", + "UNTRIMMED"); + } + nice_num_str_nvlist(vds, "trim_notsup", + vs->vs_trim_notsup, B_TRUE, + cb->cb_json_as_int, ZFS_NICENUM_1024); + } + } else { + ch = fnvlist_alloc(); + } + + if (cb->cb_flat_vdevs && children == 0) { + fnvlist_add_nvlist(item, vname, vds); + } + + for (int c = 0; c < children; c++) { + uint64_t islog = B_FALSE, ishole = B_FALSE; + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &islog); + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, + &ishole); + if (islog || ishole) + continue; + if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) + continue; + if (cb->cb_flat_vdevs) { + vdev_stats_nvlist(zhp, cb, child[c], depth + 2, isspare, + vname, item); + } + vdev_stats_nvlist(zhp, cb, child[c], depth + 2, isspare, + vname, ch); + } + + if (ch != NULL) { + if (!nvlist_empty(ch)) + fnvlist_add_nvlist(vds, "vdevs", ch); + fnvlist_free(ch); + } + fnvlist_add_nvlist(item, vname, vds); + fnvlist_free(vds); + free(vname); +} + +static void +class_vdevs_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, + const char *class, nvlist_t *item) +{ + uint_t c, children; + nvlist_t **child; + nvlist_t *class_obj = NULL; + + if (!cb->cb_flat_vdevs) + class_obj = fnvlist_alloc(); + + assert(zhp != NULL || !cb->cb_verbose); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, + &children) != 0) + return; + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE; + const char *bias = NULL; + const char *type = NULL; + char *name = zpool_vdev_name(g_zfs, zhp, child[c], + cb->cb_name_flags | VDEV_NAME_TYPE_ID); + + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + + if (is_log) { + bias = (char *)VDEV_ALLOC_CLASS_LOGS; + } else { + (void) nvlist_lookup_string(child[c], + ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); + (void) nvlist_lookup_string(child[c], + ZPOOL_CONFIG_TYPE, &type); + } + + if (bias == NULL || strcmp(bias, class) != 0) + continue; + if (!is_log && strcmp(type, VDEV_TYPE_INDIRECT) == 0) + continue; + + if (cb->cb_flat_vdevs) { + vdev_stats_nvlist(zhp, cb, child[c], 2, B_FALSE, + NULL, item); + } else { + vdev_stats_nvlist(zhp, cb, child[c], 2, B_FALSE, + NULL, class_obj); + } + free(name); + } + if (!cb->cb_flat_vdevs) { + if (!nvlist_empty(class_obj)) + fnvlist_add_nvlist(item, class, class_obj); + fnvlist_free(class_obj); + } +} + +static void +l2cache_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, + nvlist_t *item) +{ + nvlist_t *l2c = NULL, **l2cache; + uint_t nl2cache; + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + if (nl2cache == 0) + return; + if (!cb->cb_flat_vdevs) + l2c = fnvlist_alloc(); + for (int i = 0; i < nl2cache; i++) { + if (cb->cb_flat_vdevs) { + vdev_stats_nvlist(zhp, cb, l2cache[i], 2, + B_FALSE, NULL, item); + } else { + vdev_stats_nvlist(zhp, cb, l2cache[i], 2, + B_FALSE, NULL, l2c); + } + } + } + if (!cb->cb_flat_vdevs) { + if (!nvlist_empty(l2c)) + fnvlist_add_nvlist(item, "l2cache", l2c); + fnvlist_free(l2c); + } +} + +static void +spares_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, + nvlist_t *item) +{ + nvlist_t *sp = NULL, **spares; + uint_t nspares; + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + if (nspares == 0) + return; + if (!cb->cb_flat_vdevs) + sp = fnvlist_alloc(); + for (int i = 0; i < nspares; i++) { + if (cb->cb_flat_vdevs) { + vdev_stats_nvlist(zhp, cb, spares[i], 2, B_TRUE, + NULL, item); + } else { + vdev_stats_nvlist(zhp, cb, spares[i], 2, B_TRUE, + NULL, sp); + } + } + } + if (!cb->cb_flat_vdevs) { + if (!nvlist_empty(sp)) + fnvlist_add_nvlist(item, "spares", sp); + fnvlist_free(sp); + } +} + +static void +errors_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *item) +{ + uint64_t nerr; + nvlist_t *config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, + &nerr) == 0) { + nice_num_str_nvlist(item, ZPOOL_CONFIG_ERRCOUNT, nerr, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_1024); + if (nerr != 0 && cb->cb_verbose) { + nvlist_t *nverrlist = NULL; + if (zpool_get_errlog(zhp, &nverrlist) == 0) { + int i = 0; + int count = 0; + size_t len = MAXPATHLEN * 2; + nvpair_t *elem = NULL; + + for (nvpair_t *pair = + nvlist_next_nvpair(nverrlist, NULL); + pair != NULL; + pair = nvlist_next_nvpair(nverrlist, pair)) + count++; + char **errl = (char **)malloc( + count * sizeof (char *)); + + while ((elem = nvlist_next_nvpair(nverrlist, + elem)) != NULL) { + nvlist_t *nv; + uint64_t dsobj, obj; + + verify(nvpair_value_nvlist(elem, + &nv) == 0); + verify(nvlist_lookup_uint64(nv, + ZPOOL_ERR_DATASET, &dsobj) == 0); + verify(nvlist_lookup_uint64(nv, + ZPOOL_ERR_OBJECT, &obj) == 0); + errl[i] = safe_malloc(len); + zpool_obj_to_path(zhp, dsobj, obj, + errl[i++], len); + } + nvlist_free(nverrlist); + fnvlist_add_string_array(item, "errlist", + (const char **)errl, count); + for (int i = 0; i < count; ++i) + free(errl[i]); + free(errl); + } else + fnvlist_add_string(item, "errlist", + strerror(errno)); + } + } +} + +static void +ddt_stats_nvlist(ddt_stat_t *dds, status_cbdata_t *cb, nvlist_t *item) +{ + nice_num_str_nvlist(item, "blocks", dds->dds_blocks, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_1024); + nice_num_str_nvlist(item, "logical_size", dds->dds_lsize, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(item, "physical_size", dds->dds_psize, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(item, "deflated_size", dds->dds_dsize, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(item, "ref_blocks", dds->dds_ref_blocks, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_1024); + nice_num_str_nvlist(item, "ref_lsize", dds->dds_ref_lsize, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(item, "ref_psize", dds->dds_ref_psize, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(item, "ref_dsize", dds->dds_ref_dsize, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); +} + +static void +dedup_stats_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *item) +{ + nvlist_t *config; + if (cb->cb_dedup_stats) { + ddt_histogram_t *ddh; + ddt_stat_t *dds; + ddt_object_t *ddo; + nvlist_t *ddt_stat, *ddt_obj, *dedup; + uint_t c; + uint64_t cspace_prop; + + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_uint64_array(config, + ZPOOL_CONFIG_DDT_OBJ_STATS, (uint64_t **)&ddo, &c) != 0) + return; + + dedup = fnvlist_alloc(); + ddt_obj = fnvlist_alloc(); + nice_num_str_nvlist(dedup, "obj_count", ddo->ddo_count, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_1024); + if (ddo->ddo_count == 0) { + fnvlist_add_nvlist(dedup, ZPOOL_CONFIG_DDT_OBJ_STATS, + ddt_obj); + fnvlist_add_nvlist(item, "dedup_stats", dedup); + fnvlist_free(ddt_obj); + fnvlist_free(dedup); + return; + } else { + nice_num_str_nvlist(dedup, "dspace", ddo->ddo_dspace, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + nice_num_str_nvlist(dedup, "mspace", ddo->ddo_mspace, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + /* + * Squash cached size into in-core size to handle race. + * Only include cached size if it is available. + */ + cspace_prop = zpool_get_prop_int(zhp, + ZPOOL_PROP_DEDUPCACHED, NULL); + cspace_prop = MIN(cspace_prop, ddo->ddo_mspace); + nice_num_str_nvlist(dedup, "cspace", cspace_prop, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + } + + ddt_stat = fnvlist_alloc(); + if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, + (uint64_t **)&dds, &c) == 0) { + nvlist_t *total = fnvlist_alloc(); + if (dds->dds_blocks == 0) + fnvlist_add_string(total, "blocks", "0"); + else + ddt_stats_nvlist(dds, cb, total); + fnvlist_add_nvlist(ddt_stat, "total", total); + fnvlist_free(total); + } + if (nvlist_lookup_uint64_array(config, + ZPOOL_CONFIG_DDT_HISTOGRAM, (uint64_t **)&ddh, &c) == 0) { + nvlist_t *hist = fnvlist_alloc(); + nvlist_t *entry = NULL; + char buf[16]; + for (int h = 0; h < 64; h++) { + if (ddh->ddh_stat[h].dds_blocks != 0) { + entry = fnvlist_alloc(); + ddt_stats_nvlist(&ddh->ddh_stat[h], cb, + entry); + snprintf(buf, 16, "%d", h); + fnvlist_add_nvlist(hist, buf, entry); + fnvlist_free(entry); + } + } + if (!nvlist_empty(hist)) + fnvlist_add_nvlist(ddt_stat, "histogram", hist); + fnvlist_free(hist); + } + + if (!nvlist_empty(ddt_obj)) { + fnvlist_add_nvlist(dedup, ZPOOL_CONFIG_DDT_OBJ_STATS, + ddt_obj); + } + fnvlist_free(ddt_obj); + if (!nvlist_empty(ddt_stat)) { + fnvlist_add_nvlist(dedup, ZPOOL_CONFIG_DDT_STATS, + ddt_stat); + } + fnvlist_free(ddt_stat); + if (!nvlist_empty(dedup)) + fnvlist_add_nvlist(item, "dedup_stats", dedup); + fnvlist_free(dedup); + } +} + +static void +raidz_expand_status_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, + nvlist_t *nvroot, nvlist_t *item) +{ + uint_t c; + pool_raidz_expand_stat_t *pres = NULL; + if (nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_RAIDZ_EXPAND_STATS, (uint64_t **)&pres, &c) == 0) { + nvlist_t **child; + uint_t children; + nvlist_t *nv = fnvlist_alloc(); + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0); + assert(pres->pres_expanding_vdev < children); + char *name = + zpool_vdev_name(g_zfs, zhp, + child[pres->pres_expanding_vdev], 0); + fill_vdev_info(nv, zhp, name, B_FALSE, cb->cb_json_as_int); + fnvlist_add_string(nv, "state", + pool_scan_state_str[pres->pres_state]); + nice_num_str_nvlist(nv, "expanding_vdev", + pres->pres_expanding_vdev, B_TRUE, cb->cb_json_as_int, + ZFS_NICENUM_1024); + nice_num_str_nvlist(nv, "start_time", pres->pres_start_time, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(nv, "end_time", pres->pres_end_time, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(nv, "to_reflow", pres->pres_to_reflow, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "reflowed", pres->pres_reflowed, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "waiting_for_resilver", + pres->pres_waiting_for_resilver, B_TRUE, + cb->cb_json_as_int, ZFS_NICENUM_1024); + fnvlist_add_nvlist(item, ZPOOL_CONFIG_RAIDZ_EXPAND_STATS, nv); + fnvlist_free(nv); + free(name); + } +} + +static void +checkpoint_status_nvlist(nvlist_t *nvroot, status_cbdata_t *cb, + nvlist_t *item) +{ + uint_t c; + pool_checkpoint_stat_t *pcs = NULL; + if (nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c) == 0) { + nvlist_t *nv = fnvlist_alloc(); + fnvlist_add_string(nv, "state", + checkpoint_state_str[pcs->pcs_state]); + nice_num_str_nvlist(nv, "start_time", + pcs->pcs_start_time, cb->cb_literal, cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(nv, "space", + pcs->pcs_space, cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + fnvlist_add_nvlist(item, ZPOOL_CONFIG_CHECKPOINT_STATS, nv); + fnvlist_free(nv); + } +} + +static void +removal_status_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, + nvlist_t *nvroot, nvlist_t *item) +{ + uint_t c; + pool_removal_stat_t *prs = NULL; + if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_REMOVAL_STATS, + (uint64_t **)&prs, &c) == 0) { + if (prs->prs_state != DSS_NONE) { + nvlist_t **child; + uint_t children; + verify(nvlist_lookup_nvlist_array(nvroot, + ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); + assert(prs->prs_removing_vdev < children); + char *vdev_name = zpool_vdev_name(g_zfs, zhp, + child[prs->prs_removing_vdev], B_TRUE); + nvlist_t *nv = fnvlist_alloc(); + fill_vdev_info(nv, zhp, vdev_name, B_FALSE, + cb->cb_json_as_int); + fnvlist_add_string(nv, "state", + pool_scan_state_str[prs->prs_state]); + nice_num_str_nvlist(nv, "removing_vdev", + prs->prs_removing_vdev, B_TRUE, cb->cb_json_as_int, + ZFS_NICENUM_1024); + nice_num_str_nvlist(nv, "start_time", + prs->prs_start_time, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(nv, "end_time", prs->prs_end_time, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(nv, "to_copy", prs->prs_to_copy, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "copied", prs->prs_copied, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "mapping_memory", + prs->prs_mapping_memory, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICENUM_BYTES); + fnvlist_add_nvlist(item, + ZPOOL_CONFIG_REMOVAL_STATS, nv); + fnvlist_free(nv); + free(vdev_name); + } + } +} + +static void +scan_status_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, + nvlist_t *nvroot, nvlist_t *item) +{ + pool_scan_stat_t *ps = NULL; + uint_t c; + nvlist_t *scan = fnvlist_alloc(); + nvlist_t **child; + uint_t children; + + if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, + (uint64_t **)&ps, &c) == 0) { + fnvlist_add_string(scan, "function", + pool_scan_func_str[ps->pss_func]); + fnvlist_add_string(scan, "state", + pool_scan_state_str[ps->pss_state]); + nice_num_str_nvlist(scan, "start_time", ps->pss_start_time, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(scan, "end_time", ps->pss_end_time, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(scan, "to_examine", ps->pss_to_examine, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(scan, "examined", ps->pss_examined, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(scan, "skipped", ps->pss_skipped, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(scan, "processed", ps->pss_processed, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(scan, "errors", ps->pss_errors, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_1024); + nice_num_str_nvlist(scan, "bytes_per_scan", ps->pss_pass_exam, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(scan, "pass_start", ps->pss_pass_start, + B_TRUE, cb->cb_json_as_int, ZFS_NICENUM_1024); + nice_num_str_nvlist(scan, "scrub_pause", + ps->pss_pass_scrub_pause, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(scan, "scrub_spent_paused", + ps->pss_pass_scrub_spent_paused, + B_TRUE, cb->cb_json_as_int, ZFS_NICENUM_1024); + nice_num_str_nvlist(scan, "issued_bytes_per_scan", + ps->pss_pass_issued, cb->cb_literal, + cb->cb_json_as_int, ZFS_NICENUM_BYTES); + nice_num_str_nvlist(scan, "issued", ps->pss_issued, + cb->cb_literal, cb->cb_json_as_int, ZFS_NICENUM_BYTES); + if (ps->pss_error_scrub_func == POOL_SCAN_ERRORSCRUB && + ps->pss_error_scrub_start > ps->pss_start_time) { + fnvlist_add_string(scan, "err_scrub_func", + pool_scan_func_str[ps->pss_error_scrub_func]); + fnvlist_add_string(scan, "err_scrub_state", + pool_scan_state_str[ps->pss_error_scrub_state]); + nice_num_str_nvlist(scan, "err_scrub_start_time", + ps->pss_error_scrub_start, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(scan, "err_scrub_end_time", + ps->pss_error_scrub_end, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(scan, "err_scrub_examined", + ps->pss_error_scrub_examined, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + nice_num_str_nvlist(scan, "err_scrub_to_examine", + ps->pss_error_scrub_to_be_examined, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_1024); + nice_num_str_nvlist(scan, "err_scrub_pause", + ps->pss_pass_error_scrub_pause, + B_TRUE, cb->cb_json_as_int, ZFS_NICENUM_1024); + } + } + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + vdev_rebuild_stat_t *vrs; + uint_t i; + char *name; + nvlist_t *nv; + nvlist_t *rebuild = fnvlist_alloc(); + uint64_t st; + for (uint_t c = 0; c < children; c++) { + if (nvlist_lookup_uint64_array(child[c], + ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, + &i) == 0) { + if (vrs->vrs_state != VDEV_REBUILD_NONE) { + nv = fnvlist_alloc(); + name = zpool_vdev_name(g_zfs, zhp, + child[c], VDEV_NAME_TYPE_ID); + fill_vdev_info(nv, zhp, name, B_FALSE, + cb->cb_json_as_int); + st = vrs->vrs_state; + fnvlist_add_string(nv, "state", + vdev_rebuild_state_str[st]); + nice_num_str_nvlist(nv, "start_time", + vrs->vrs_start_time, cb->cb_literal, + cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(nv, "end_time", + vrs->vrs_end_time, cb->cb_literal, + cb->cb_json_as_int, + ZFS_NICE_TIMESTAMP); + nice_num_str_nvlist(nv, "scan_time", + vrs->vrs_scan_time_ms * 1000000, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_TIME); + nice_num_str_nvlist(nv, "scanned", + vrs->vrs_bytes_scanned, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "issued", + vrs->vrs_bytes_issued, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "rebuilt", + vrs->vrs_bytes_rebuilt, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "to_scan", + vrs->vrs_bytes_est, cb->cb_literal, + cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "errors", + vrs->vrs_errors, cb->cb_literal, + cb->cb_json_as_int, + ZFS_NICENUM_1024); + nice_num_str_nvlist(nv, "pass_time", + vrs->vrs_pass_time_ms * 1000000, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_TIME); + nice_num_str_nvlist(nv, "pass_scanned", + vrs->vrs_pass_bytes_scanned, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "pass_issued", + vrs->vrs_pass_bytes_issued, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + nice_num_str_nvlist(nv, "pass_skipped", + vrs->vrs_pass_bytes_skipped, + cb->cb_literal, cb->cb_json_as_int, + ZFS_NICENUM_BYTES); + fnvlist_add_nvlist(rebuild, name, nv); + free(name); + } + } + } + if (!nvlist_empty(rebuild)) + fnvlist_add_nvlist(scan, "rebuild_stats", rebuild); + fnvlist_free(rebuild); + } + + if (!nvlist_empty(scan)) + fnvlist_add_nvlist(item, ZPOOL_CONFIG_SCAN_STATS, scan); + fnvlist_free(scan); +} + /* * Print the scan status. */ @@ -8792,13 +10230,17 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache, } static void -print_dedup_stats(nvlist_t *config) +print_dedup_stats(zpool_handle_t *zhp, nvlist_t *config, boolean_t literal) { ddt_histogram_t *ddh; ddt_stat_t *dds; ddt_object_t *ddo; uint_t c; - char dspace[6], mspace[6]; + /* Extra space provided for literal display */ + char dspace[32], mspace[32], cspace[32]; + uint64_t cspace_prop; + enum zfs_nicenum_format format; + zprop_source_t src; /* * If the pool was faulted then we may not have been able to @@ -8816,295 +10258,217 @@ print_dedup_stats(nvlist_t *config) return; } - zfs_nicebytes(ddo->ddo_dspace, dspace, sizeof (dspace)); - zfs_nicebytes(ddo->ddo_mspace, mspace, sizeof (mspace)); - (void) printf("DDT entries %llu, size %s on disk, %s in core\n", - (u_longlong_t)ddo->ddo_count, - dspace, - mspace); - - verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, - (uint64_t **)&dds, &c) == 0); - verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, - (uint64_t **)&ddh, &c) == 0); - zpool_dump_ddt(dds, ddh); -} - -/* - * Display a summary of pool status. Displays a summary such as: - * - * pool: tank - * status: DEGRADED - * reason: One or more devices ... - * see: https://openzfs.github.io/openzfs-docs/msg/ZFS-xxxx-01 - * config: - * mirror DEGRADED - * c1t0d0 OK - * c2t0d0 UNAVAIL - * - * When given the '-v' option, we print out the complete config. If the '-e' - * option is specified, then we print out error rate information as well. - */ -static int -status_callback(zpool_handle_t *zhp, void *data) -{ - status_cbdata_t *cbp = data; - nvlist_t *config, *nvroot; - const char *msgid; - zpool_status_t reason; - zpool_errata_t errata; - const char *health; - uint_t c; - vdev_stat_t *vs; - - config = zpool_get_config(zhp, NULL); - reason = zpool_get_status(zhp, &msgid, &errata); - - cbp->cb_count++; - /* - * If we were given 'zpool status -x', only report those pools with - * problems. + * Squash cached size into in-core size to handle race. + * Only include cached size if it is available. */ - if (cbp->cb_explain && - (reason == ZPOOL_STATUS_OK || - reason == ZPOOL_STATUS_VERSION_OLDER || - reason == ZPOOL_STATUS_FEAT_DISABLED || - reason == ZPOOL_STATUS_COMPATIBILITY_ERR || - reason == ZPOOL_STATUS_INCOMPATIBLE_FEAT)) { - if (!cbp->cb_allpools) { - (void) printf(gettext("pool '%s' is healthy\n"), - zpool_get_name(zhp)); - if (cbp->cb_first) - cbp->cb_first = B_FALSE; - } - return (0); - } - - if (cbp->cb_first) - cbp->cb_first = B_FALSE; - else - (void) printf("\n"); - - nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &c) == 0); - - health = zpool_get_state_str(zhp); + cspace_prop = zpool_get_prop_int(zhp, ZPOOL_PROP_DEDUPCACHED, &src); + cspace_prop = MIN(cspace_prop, ddo->ddo_mspace); + format = literal ? ZFS_NICENUM_RAW : ZFS_NICENUM_1024; + zfs_nicenum_format(cspace_prop, cspace, sizeof (cspace), format); + zfs_nicenum_format(ddo->ddo_dspace, dspace, sizeof (dspace), format); + zfs_nicenum_format(ddo->ddo_mspace, mspace, sizeof (mspace), format); + (void) printf("DDT entries %llu, size %s on disk, %s in core", + (u_longlong_t)ddo->ddo_count, + dspace, + mspace); + if (src != ZPROP_SRC_DEFAULT) { + (void) printf(", %s cached (%.02f%%)", + cspace, + (double)cspace_prop / (double)ddo->ddo_mspace * 100.0); + } + (void) printf("\n"); - printf(" "); - printf_color(ANSI_BOLD, gettext("pool:")); - printf(" %s\n", zpool_get_name(zhp)); - fputc(' ', stdout); - printf_color(ANSI_BOLD, gettext("state: ")); + verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, + (uint64_t **)&dds, &c) == 0); + verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, + (uint64_t **)&ddh, &c) == 0); + zpool_dump_ddt(dds, ddh); +} - printf_color(health_str_to_color(health), "%s", health); +#define ST_SIZE 4096 +#define AC_SIZE 2048 - fputc('\n', stdout); +static void +print_status_reason(zpool_handle_t *zhp, status_cbdata_t *cbp, + zpool_status_t reason, zpool_errata_t errata, nvlist_t *item) +{ + char status[ST_SIZE]; + char action[AC_SIZE]; + memset(status, 0, ST_SIZE); + memset(action, 0, AC_SIZE); switch (reason) { case ZPOOL_STATUS_MISSING_DEV_R: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices could " + snprintf(status, ST_SIZE, gettext("One or more devices could " "not be opened. Sufficient replicas exist for\n\tthe pool " "to continue functioning in a degraded state.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Attach the missing device " + snprintf(action, AC_SIZE, gettext("Attach the missing device " "and online it using 'zpool online'.\n")); break; case ZPOOL_STATUS_MISSING_DEV_NR: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices could " + snprintf(status, ST_SIZE, gettext("One or more devices could " "not be opened. There are insufficient\n\treplicas for the" " pool to continue functioning.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Attach the missing device " + snprintf(action, AC_SIZE, gettext("Attach the missing device " "and online it using 'zpool online'.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_R: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices could " + snprintf(status, ST_SIZE, gettext("One or more devices could " "not be used because the label is missing or\n\tinvalid. " "Sufficient replicas exist for the pool to continue\n\t" "functioning in a degraded state.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Replace the device using " + snprintf(action, AC_SIZE, gettext("Replace the device using " "'zpool replace'.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_NR: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices could " + snprintf(status, ST_SIZE, gettext("One or more devices could " "not be used because the label is missing \n\tor invalid. " "There are insufficient replicas for the pool to " "continue\n\tfunctioning.\n")); zpool_explain_recover(zpool_get_handle(zhp), - zpool_get_name(zhp), reason, config); + zpool_get_name(zhp), reason, zpool_get_config(zhp, NULL), + action, AC_SIZE); break; case ZPOOL_STATUS_FAILING_DEV: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices has " + snprintf(status, ST_SIZE, gettext("One or more devices has " "experienced an unrecoverable error. An\n\tattempt was " "made to correct the error. Applications are " "unaffected.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Determine if the " + snprintf(action, AC_SIZE, gettext("Determine if the " "device needs to be replaced, and clear the errors\n\tusing" " 'zpool clear' or replace the device with 'zpool " "replace'.\n")); break; case ZPOOL_STATUS_OFFLINE_DEV: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices has " + snprintf(status, ST_SIZE, gettext("One or more devices has " "been taken offline by the administrator.\n\tSufficient " "replicas exist for the pool to continue functioning in " "a\n\tdegraded state.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Online the device " + snprintf(action, AC_SIZE, gettext("Online the device " "using 'zpool online' or replace the device with\n\t'zpool " "replace'.\n")); break; case ZPOOL_STATUS_REMOVED_DEV: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices has " + snprintf(status, ST_SIZE, gettext("One or more devices has " "been removed by the administrator.\n\tSufficient " "replicas exist for the pool to continue functioning in " "a\n\tdegraded state.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Online the device " + snprintf(action, AC_SIZE, gettext("Online the device " "using zpool online' or replace the device with\n\t'zpool " "replace'.\n")); break; case ZPOOL_STATUS_RESILVERING: case ZPOOL_STATUS_REBUILDING: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices is " + snprintf(status, ST_SIZE, gettext("One or more devices is " "currently being resilvered. The pool will\n\tcontinue " "to function, possibly in a degraded state.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Wait for the resilver to " + snprintf(action, AC_SIZE, gettext("Wait for the resilver to " "complete.\n")); break; case ZPOOL_STATUS_REBUILD_SCRUB: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices have " + snprintf(status, ST_SIZE, gettext("One or more devices have " "been sequentially resilvered, scrubbing\n\tthe pool " "is recommended.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Use 'zpool scrub' to " + snprintf(action, AC_SIZE, gettext("Use 'zpool scrub' to " "verify all data checksums.\n")); break; case ZPOOL_STATUS_CORRUPT_DATA: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices has " + snprintf(status, ST_SIZE, gettext("One or more devices has " "experienced an error resulting in data\n\tcorruption. " "Applications may be affected.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Restore the file in question" + snprintf(action, AC_SIZE, gettext("Restore the file in question" " if possible. Otherwise restore the\n\tentire pool from " "backup.\n")); break; case ZPOOL_STATUS_CORRUPT_POOL: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("The pool metadata is " + snprintf(status, ST_SIZE, gettext("The pool metadata is " "corrupted and the pool cannot be opened.\n")); zpool_explain_recover(zpool_get_handle(zhp), - zpool_get_name(zhp), reason, config); + zpool_get_name(zhp), reason, zpool_get_config(zhp, NULL), + action, AC_SIZE); break; case ZPOOL_STATUS_VERSION_OLDER: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("The pool is formatted using " + snprintf(status, ST_SIZE, gettext("The pool is formatted using " "a legacy on-disk format. The pool can\n\tstill be used, " "but some features are unavailable.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Upgrade the pool using " + snprintf(action, AC_SIZE, gettext("Upgrade the pool using " "'zpool upgrade'. Once this is done, the\n\tpool will no " "longer be accessible on software that does not support\n\t" "feature flags.\n")); break; case ZPOOL_STATUS_VERSION_NEWER: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("The pool has been upgraded " + snprintf(status, ST_SIZE, gettext("The pool has been upgraded " "to a newer, incompatible on-disk version.\n\tThe pool " "cannot be accessed on this system.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Access the pool from a " + snprintf(action, AC_SIZE, gettext("Access the pool from a " "system running more recent software, or\n\trestore the " "pool from backup.\n")); break; case ZPOOL_STATUS_FEAT_DISABLED: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("Some supported and " + snprintf(status, ST_SIZE, gettext("Some supported and " "requested features are not enabled on the pool.\n\t" "The pool can still be used, but some features are " "unavailable.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Enable all features using " + snprintf(action, AC_SIZE, gettext("Enable all features using " "'zpool upgrade'. Once this is done,\n\tthe pool may no " "longer be accessible by software that does not support\n\t" "the features. See zpool-features(7) for details.\n")); break; case ZPOOL_STATUS_COMPATIBILITY_ERR: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("This pool has a " + snprintf(status, ST_SIZE, gettext("This pool has a " "compatibility list specified, but it could not be\n\t" "read/parsed at this time. The pool can still be used, " "but this\n\tshould be investigated.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Check the value of the " + snprintf(action, AC_SIZE, gettext("Check the value of the " "'compatibility' property against the\n\t" "appropriate file in " ZPOOL_SYSCONF_COMPAT_D " or " ZPOOL_DATA_COMPAT_D ".\n")); break; case ZPOOL_STATUS_INCOMPATIBLE_FEAT: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more features " + snprintf(status, ST_SIZE, gettext("One or more features " "are enabled on the pool despite not being\n\t" "requested by the 'compatibility' property.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Consider setting " + snprintf(action, AC_SIZE, gettext("Consider setting " "'compatibility' to an appropriate value, or\n\t" "adding needed features to the relevant file in\n\t" ZPOOL_SYSCONF_COMPAT_D " or " ZPOOL_DATA_COMPAT_D ".\n")); break; case ZPOOL_STATUS_UNSUP_FEAT_READ: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("The pool cannot be accessed " + snprintf(status, ST_SIZE, gettext("The pool cannot be accessed " "on this system because it uses the\n\tfollowing feature(s)" " not supported on this system:\n")); - zpool_print_unsup_feat(config); - (void) printf("\n"); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Access the pool from a " + zpool_collect_unsup_feat(zpool_get_config(zhp, NULL), status, + 1024); + snprintf(action, AC_SIZE, gettext("Access the pool from a " "system that supports the required feature(s),\n\tor " "restore the pool from backup.\n")); break; case ZPOOL_STATUS_UNSUP_FEAT_WRITE: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("The pool can only be " + snprintf(status, ST_SIZE, gettext("The pool can only be " "accessed in read-only mode on this system. It\n\tcannot be" " accessed in read-write mode because it uses the " "following\n\tfeature(s) not supported on this system:\n")); - zpool_print_unsup_feat(config); - (void) printf("\n"); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("The pool cannot be accessed " + zpool_collect_unsup_feat(zpool_get_config(zhp, NULL), status, + 1024); + snprintf(action, AC_SIZE, gettext("The pool cannot be accessed " "in read-write mode. Import the pool with\n" "\t\"-o readonly=on\", access the pool from a system that " "supports the\n\trequired feature(s), or restore the " @@ -9112,106 +10476,90 @@ status_callback(zpool_handle_t *zhp, void *data) break; case ZPOOL_STATUS_FAULTED_DEV_R: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices are " + snprintf(status, ST_SIZE, gettext("One or more devices are " "faulted in response to persistent errors.\n\tSufficient " "replicas exist for the pool to continue functioning " "in a\n\tdegraded state.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Replace the faulted device, " + snprintf(action, AC_SIZE, gettext("Replace the faulted device, " "or use 'zpool clear' to mark the device\n\trepaired.\n")); break; case ZPOOL_STATUS_FAULTED_DEV_NR: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices are " + snprintf(status, ST_SIZE, gettext("One or more devices are " "faulted in response to persistent errors. There are " "insufficient replicas for the pool to\n\tcontinue " "functioning.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Destroy and re-create the " + snprintf(action, AC_SIZE, gettext("Destroy and re-create the " "pool from a backup source. Manually marking the device\n" "\trepaired using 'zpool clear' may allow some data " "to be recovered.\n")); break; case ZPOOL_STATUS_IO_FAILURE_MMP: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("The pool is suspended " + snprintf(status, ST_SIZE, gettext("The pool is suspended " "because multihost writes failed or were delayed;\n\t" "another system could import the pool undetected.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Make sure the pool's devices" + snprintf(action, AC_SIZE, gettext("Make sure the pool's devices" " are connected, then reboot your system and\n\timport the " "pool or run 'zpool clear' to resume the pool.\n")); break; case ZPOOL_STATUS_IO_FAILURE_WAIT: case ZPOOL_STATUS_IO_FAILURE_CONTINUE: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices are " + snprintf(status, ST_SIZE, gettext("One or more devices are " "faulted in response to IO failures.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Make sure the affected " + snprintf(action, AC_SIZE, gettext("Make sure the affected " "devices are connected, then run 'zpool clear'.\n")); break; case ZPOOL_STATUS_BAD_LOG: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("An intent log record " + snprintf(status, ST_SIZE, gettext("An intent log record " "could not be read.\n" "\tWaiting for administrator intervention to fix the " "faulted pool.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Either restore the affected " + snprintf(action, AC_SIZE, gettext("Either restore the affected " "device(s) and run 'zpool online',\n" "\tor ignore the intent log records by running " "'zpool clear'.\n")); break; case ZPOOL_STATUS_NON_NATIVE_ASHIFT: - (void) printf(gettext("status: One or more devices are " + snprintf(status, ST_SIZE, gettext("One or more devices are " "configured to use a non-native block size.\n" "\tExpect reduced performance.\n")); - (void) printf(gettext("action: Replace affected devices with " - "devices that support the\n\tconfigured block size, or " - "migrate data to a properly configured\n\tpool.\n")); + snprintf(action, AC_SIZE, gettext("Replace affected devices " + "with devices that support the\n\tconfigured block size, " + "or migrate data to a properly configured\n\tpool.\n")); break; case ZPOOL_STATUS_HOSTID_MISMATCH: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("Mismatch between pool hostid" + snprintf(status, ST_SIZE, gettext("Mismatch between pool hostid" " and system hostid on imported pool.\n\tThis pool was " "previously imported into a system with a different " "hostid,\n\tand then was verbatim imported into this " "system.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("Export this pool on all " + snprintf(action, AC_SIZE, gettext("Export this pool on all " "systems on which it is imported.\n" "\tThen import it to correct the mismatch.\n")); break; case ZPOOL_STATUS_ERRATA: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("Errata #%d detected.\n"), + snprintf(status, ST_SIZE, gettext("Errata #%d detected.\n"), errata); - switch (errata) { case ZPOOL_ERRATA_NONE: break; case ZPOOL_ERRATA_ZOL_2094_SCRUB: - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("To correct the issue" + snprintf(action, AC_SIZE, gettext("To correct the issue" " run 'zpool scrub'.\n")); break; case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: - (void) printf(gettext("\tExisting encrypted datasets " - "contain an on-disk incompatibility\n\twhich " - "needs to be corrected.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("To correct the issue" + (void) strlcat(status, gettext("\tExisting encrypted " + "datasets contain an on-disk incompatibility\n\t " + "which needs to be corrected.\n"), ST_SIZE); + snprintf(action, AC_SIZE, gettext("To correct the issue" " backup existing encrypted datasets to new\n\t" "encrypted datasets and destroy the old ones. " "'zfs mount -o ro' can\n\tbe used to temporarily " @@ -9219,12 +10567,12 @@ status_callback(zpool_handle_t *zhp, void *data) break; case ZPOOL_ERRATA_ZOL_8308_ENCRYPTION: - (void) printf(gettext("\tExisting encrypted snapshots " - "and bookmarks contain an on-disk\n\tincompat" - "ibility. This may cause on-disk corruption if " - "they are used\n\twith 'zfs recv'.\n")); - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("To correct the" + (void) strlcat(status, gettext("\tExisting encrypted " + "snapshots and bookmarks contain an on-disk\n\t" + "incompatibility. This may cause on-disk " + "corruption if they are used\n\twith " + "'zfs recv'.\n"), ST_SIZE); + snprintf(action, AC_SIZE, gettext("To correct the" "issue, enable the bookmark_v2 feature. No " "additional\n\taction is needed if there are no " "encrypted snapshots or bookmarks.\n\tIf preserving" @@ -9250,6 +10598,210 @@ status_callback(zpool_handle_t *zhp, void *data) assert(reason == ZPOOL_STATUS_OK); } + if (status[0] != 0) { + if (cbp->cb_json) + fnvlist_add_string(item, "status", status); + else { + printf_color(ANSI_BOLD, gettext("status: ")); + printf_color(ANSI_YELLOW, status); + } + } + + if (action[0] != 0) { + if (cbp->cb_json) + fnvlist_add_string(item, "action", action); + else { + printf_color(ANSI_BOLD, gettext("action: ")); + printf_color(ANSI_YELLOW, action); + } + } +} + +static int +status_callback_json(zpool_handle_t *zhp, void *data) +{ + status_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + const char *msgid; + char pool_guid[256]; + char msgbuf[256]; + uint64_t guid; + zpool_status_t reason; + zpool_errata_t errata; + uint_t c; + vdev_stat_t *vs; + nvlist_t *item, *d, *load_info, *vds; + item = d = NULL; + + /* If dedup stats were requested, also fetch dedupcached. */ + if (cbp->cb_dedup_stats > 1) + zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME); + reason = zpool_get_status(zhp, &msgid, &errata); + /* + * If we were given 'zpool status -x', only report those pools with + * problems. + */ + if (cbp->cb_explain && + (reason == ZPOOL_STATUS_OK || + reason == ZPOOL_STATUS_VERSION_OLDER || + reason == ZPOOL_STATUS_FEAT_DISABLED || + reason == ZPOOL_STATUS_COMPATIBILITY_ERR || + reason == ZPOOL_STATUS_INCOMPATIBLE_FEAT)) { + return (0); + } + + d = fnvlist_lookup_nvlist(cbp->cb_jsobj, "pools"); + item = fnvlist_alloc(); + vds = fnvlist_alloc(); + fill_pool_info(item, zhp, B_FALSE, cbp->cb_json_as_int); + config = zpool_get_config(zhp, NULL); + + if (config != NULL) { + nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); + verify(nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); + if (cbp->cb_json_pool_key_guid) { + guid = fnvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_GUID); + snprintf(pool_guid, 256, "%llu", (u_longlong_t)guid); + } + cbp->cb_count++; + + print_status_reason(zhp, cbp, reason, errata, item); + if (msgid != NULL) { + snprintf(msgbuf, 256, + "https://openzfs.github.io/openzfs-docs/msg/%s", + msgid); + fnvlist_add_string(item, "msgid", msgid); + fnvlist_add_string(item, "moreinfo", msgbuf); + } + + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, + &load_info) == 0) { + fnvlist_add_nvlist(item, ZPOOL_CONFIG_LOAD_INFO, + load_info); + } + + scan_status_nvlist(zhp, cbp, nvroot, item); + removal_status_nvlist(zhp, cbp, nvroot, item); + checkpoint_status_nvlist(nvroot, cbp, item); + raidz_expand_status_nvlist(zhp, cbp, nvroot, item); + vdev_stats_nvlist(zhp, cbp, nvroot, 0, B_FALSE, NULL, vds); + if (cbp->cb_flat_vdevs) { + class_vdevs_nvlist(zhp, cbp, nvroot, + VDEV_ALLOC_BIAS_DEDUP, vds); + class_vdevs_nvlist(zhp, cbp, nvroot, + VDEV_ALLOC_BIAS_SPECIAL, vds); + class_vdevs_nvlist(zhp, cbp, nvroot, + VDEV_ALLOC_CLASS_LOGS, vds); + l2cache_nvlist(zhp, cbp, nvroot, vds); + spares_nvlist(zhp, cbp, nvroot, vds); + + fnvlist_add_nvlist(item, "vdevs", vds); + fnvlist_free(vds); + } else { + fnvlist_add_nvlist(item, "vdevs", vds); + fnvlist_free(vds); + + class_vdevs_nvlist(zhp, cbp, nvroot, + VDEV_ALLOC_BIAS_DEDUP, item); + class_vdevs_nvlist(zhp, cbp, nvroot, + VDEV_ALLOC_BIAS_SPECIAL, item); + class_vdevs_nvlist(zhp, cbp, nvroot, + VDEV_ALLOC_CLASS_LOGS, item); + l2cache_nvlist(zhp, cbp, nvroot, item); + spares_nvlist(zhp, cbp, nvroot, item); + } + dedup_stats_nvlist(zhp, cbp, item); + errors_nvlist(zhp, cbp, item); + } + if (cbp->cb_json_pool_key_guid) { + fnvlist_add_nvlist(d, pool_guid, item); + } else { + fnvlist_add_nvlist(d, zpool_get_name(zhp), + item); + } + fnvlist_free(item); + return (0); +} + +/* + * Display a summary of pool status. Displays a summary such as: + * + * pool: tank + * status: DEGRADED + * reason: One or more devices ... + * see: https://openzfs.github.io/openzfs-docs/msg/ZFS-xxxx-01 + * config: + * mirror DEGRADED + * c1t0d0 OK + * c2t0d0 UNAVAIL + * + * When given the '-v' option, we print out the complete config. If the '-e' + * option is specified, then we print out error rate information as well. + */ +static int +status_callback(zpool_handle_t *zhp, void *data) +{ + status_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + const char *msgid; + zpool_status_t reason; + zpool_errata_t errata; + const char *health; + uint_t c; + vdev_stat_t *vs; + + /* If dedup stats were requested, also fetch dedupcached. */ + if (cbp->cb_dedup_stats > 1) + zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME); + + config = zpool_get_config(zhp, NULL); + reason = zpool_get_status(zhp, &msgid, &errata); + + cbp->cb_count++; + + /* + * If we were given 'zpool status -x', only report those pools with + * problems. + */ + if (cbp->cb_explain && + (reason == ZPOOL_STATUS_OK || + reason == ZPOOL_STATUS_VERSION_OLDER || + reason == ZPOOL_STATUS_FEAT_DISABLED || + reason == ZPOOL_STATUS_COMPATIBILITY_ERR || + reason == ZPOOL_STATUS_INCOMPATIBLE_FEAT)) { + if (!cbp->cb_allpools) { + (void) printf(gettext("pool '%s' is healthy\n"), + zpool_get_name(zhp)); + if (cbp->cb_first) + cbp->cb_first = B_FALSE; + } + return (0); + } + + if (cbp->cb_first) + cbp->cb_first = B_FALSE; + else + (void) printf("\n"); + + nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0); + + health = zpool_get_state_str(zhp); + + printf(" "); + printf_color(ANSI_BOLD, gettext("pool:")); + printf(" %s\n", zpool_get_name(zhp)); + fputc(' ', stdout); + printf_color(ANSI_BOLD, gettext("state: ")); + + printf_color(health_str_to_color(health), "%s", health); + + fputc('\n', stdout); + print_status_reason(zhp, cbp, reason, errata, NULL); + if (msgid != NULL) { printf(" "); printf_color(ANSI_BOLD, gettext("see:")); @@ -9338,7 +10890,7 @@ status_callback(zpool_handle_t *zhp, void *data) } if (cbp->cb_dedup_stats) - print_dedup_stats(config); + print_dedup_stats(zhp, config, cbp->cb_literal); } else { (void) printf(gettext("config: The configuration cannot be " "determined.\n")); @@ -9364,7 +10916,11 @@ status_callback(zpool_handle_t *zhp, void *data) * -T Display a timestamp in date(1) or Unix format * -v Display complete error logs * -x Display only pools with potential problems + * -j Display output in JSON format * --power Display vdev enclosure slot power status + * --json-int Display numbers in inteeger format instead of string + * --json-flat-vdevs Display vdevs in flat hierarchy + * --json-pool-key-guid Use pool GUID as key for pool objects * * Describes the health status of all pools or some subset. */ @@ -9376,15 +10932,21 @@ zpool_do_status(int argc, char **argv) float interval = 0; unsigned long count = 0; status_cbdata_t cb = { 0 }; + nvlist_t *data; char *cmd = NULL; struct option long_options[] = { {"power", no_argument, NULL, ZPOOL_OPTION_POWER}, + {"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT}, + {"json-flat-vdevs", no_argument, NULL, + ZPOOL_OPTION_JSON_FLAT_VDEVS}, + {"json-pool-key-guid", no_argument, NULL, + ZPOOL_OPTION_POOL_KEY_GUID}, {0, 0, 0, 0} }; /* check options */ - while ((c = getopt_long(argc, argv, "c:DegiLpPstT:vx", long_options, + while ((c = getopt_long(argc, argv, "c:jDegiLpPstT:vx", long_options, NULL)) != -1) { switch (c) { case 'c': @@ -9412,7 +10974,8 @@ zpool_do_status(int argc, char **argv) cmd = optarg; break; case 'D': - cb.cb_dedup_stats = B_TRUE; + if (++cb.cb_dedup_stats > 2) + cb.cb_dedup_stats = 2; break; case 'e': cb.cb_print_unhealthy = B_TRUE; @@ -9444,12 +11007,25 @@ zpool_do_status(int argc, char **argv) case 'v': cb.cb_verbose = B_TRUE; break; + case 'j': + cb.cb_json = B_TRUE; + break; case 'x': cb.cb_explain = B_TRUE; break; case ZPOOL_OPTION_POWER: cb.cb_print_power = B_TRUE; break; + case ZPOOL_OPTION_JSON_FLAT_VDEVS: + cb.cb_flat_vdevs = B_TRUE; + break; + case ZPOOL_OPTION_JSON_NUMS_AS_INT: + cb.cb_json_as_int = B_TRUE; + cb.cb_literal = B_TRUE; + break; + case ZPOOL_OPTION_POOL_KEY_GUID: + cb.cb_json_pool_key_guid = B_TRUE; + break; case '?': if (optopt == 'c') { print_zpool_script_list("status"); @@ -9473,23 +11049,79 @@ zpool_do_status(int argc, char **argv) cb.cb_first = B_TRUE; cb.cb_print_status = B_TRUE; + if (cb.cb_flat_vdevs && !cb.cb_json) { + fprintf(stderr, gettext("'--json-flat-vdevs' only works with" + " '-j' option\n")); + usage(B_FALSE); + } + + if (cb.cb_json_as_int && !cb.cb_json) { + (void) fprintf(stderr, gettext("'--json-int' only works with" + " '-j' option\n")); + usage(B_FALSE); + } + + if (!cb.cb_json && cb.cb_json_pool_key_guid) { + (void) fprintf(stderr, gettext("'json-pool-key-guid' only" + " works with '-j' option\n")); + usage(B_FALSE); + } + for (;;) { - if (timestamp_fmt != NODATE) - print_timestamp(timestamp_fmt); + if (cb.cb_json) { + cb.cb_jsobj = zpool_json_schema(0, 1); + data = fnvlist_alloc(); + fnvlist_add_nvlist(cb.cb_jsobj, "pools", data); + fnvlist_free(data); + } + + if (timestamp_fmt != NODATE) { + if (cb.cb_json) { + if (cb.cb_json_as_int) { + fnvlist_add_uint64(cb.cb_jsobj, "time", + time(NULL)); + } else { + char ts[128]; + get_timestamp(timestamp_fmt, ts, 128); + fnvlist_add_string(cb.cb_jsobj, "time", + ts); + } + } else + print_timestamp(timestamp_fmt); + } if (cmd != NULL) cb.vcdl = all_pools_for_each_vdev_run(argc, argv, cmd, NULL, NULL, 0, 0); - ret = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, - cb.cb_literal, status_callback, &cb); + if (cb.cb_json) { + ret = for_each_pool(argc, argv, B_TRUE, NULL, + ZFS_TYPE_POOL, cb.cb_literal, + status_callback_json, &cb); + } else { + ret = for_each_pool(argc, argv, B_TRUE, NULL, + ZFS_TYPE_POOL, cb.cb_literal, + status_callback, &cb); + } if (cb.vcdl != NULL) free_vdev_cmd_data_list(cb.vcdl); - if (argc == 0 && cb.cb_count == 0) - (void) fprintf(stderr, gettext("no pools available\n")); - else if (cb.cb_explain && cb.cb_first && cb.cb_allpools) - (void) printf(gettext("all pools are healthy\n")); + + if (cb.cb_json) { + if (ret == 0) + zcmd_print_json(cb.cb_jsobj); + else + nvlist_free(cb.cb_jsobj); + } else { + if (argc == 0 && cb.cb_count == 0) { + (void) fprintf(stderr, "%s", + gettext("no pools available\n")); + } else if (cb.cb_explain && cb.cb_first && + cb.cb_allpools) { + (void) printf("%s", + gettext("all pools are healthy\n")); + } + } if (ret != 0) return (ret); @@ -10642,6 +12274,17 @@ get_callback_vdev(zpool_handle_t *zhp, char *vdevname, void *data) zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; char value[ZFS_MAXPROPLEN]; zprop_source_t srctype; + nvlist_t *props, *item, *d; + props = item = d = NULL; + + if (cbp->cb_json) { + d = fnvlist_lookup_nvlist(cbp->cb_jsobj, "vdevs"); + if (d == NULL) { + fprintf(stderr, "vdevs obj not found.\n"); + exit(1); + } + props = fnvlist_alloc(); + } for (zprop_list_t *pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { @@ -10663,9 +12306,22 @@ get_callback_vdev(zpool_handle_t *zhp, char *vdevname, void *data) if (zpool_get_vdev_prop(zhp, vdevname, pl->pl_prop, prop_name, value, sizeof (value), &srctype, cbp->cb_literal) == 0) { - zprop_print_one_property(vdevname, cbp, prop_name, - value, srctype, NULL, NULL); + zprop_collect_property(vdevname, cbp, prop_name, + value, srctype, NULL, NULL, props); + } + } + + if (cbp->cb_json) { + if (!nvlist_empty(props)) { + item = fnvlist_alloc(); + fill_vdev_info(item, zhp, vdevname, B_TRUE, + cbp->cb_json_as_int); + fnvlist_add_nvlist(item, "properties", props); + fnvlist_add_nvlist(d, vdevname, item); + fnvlist_add_nvlist(cbp->cb_jsobj, "vdevs", d); + fnvlist_free(item); } + fnvlist_free(props); } return (0); @@ -10709,8 +12365,18 @@ get_callback(zpool_handle_t *zhp, void *data) zprop_source_t srctype; zprop_list_t *pl; int vid; + int err = 0; + nvlist_t *props, *item, *d; + props = item = d = NULL; if (cbp->cb_type == ZFS_TYPE_VDEV) { + if (cbp->cb_json) { + nvlist_t *pool = fnvlist_alloc(); + fill_pool_info(pool, zhp, B_FALSE, cbp->cb_json_as_int); + fnvlist_add_nvlist(cbp->cb_jsobj, "pool", pool); + fnvlist_free(pool); + } + if (strcmp(cbp->cb_vdevs.cb_names[0], "all-vdevs") == 0) { for_each_vdev(zhp, get_callback_vdev_cb, data); } else { @@ -10730,6 +12396,14 @@ get_callback(zpool_handle_t *zhp, void *data) } } else { assert(cbp->cb_type == ZFS_TYPE_POOL); + if (cbp->cb_json) { + d = fnvlist_lookup_nvlist(cbp->cb_jsobj, "pools"); + if (d == NULL) { + fprintf(stderr, "pools obj not found.\n"); + exit(1); + } + props = fnvlist_alloc(); + } for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { /* * Skip the special fake placeholder. This will also @@ -10747,9 +12421,9 @@ get_callback(zpool_handle_t *zhp, void *data) value, sizeof (value), &srctype) != 0) continue; - zprop_print_one_property(zpool_get_name(zhp), - cbp, pl->pl_user_prop, value, srctype, - NULL, NULL); + err = zprop_collect_property( + zpool_get_name(zhp), cbp, pl->pl_user_prop, + value, srctype, NULL, NULL, props); } else if (pl->pl_prop == ZPROP_INVAL && (zpool_prop_feature(pl->pl_user_prop) || zpool_prop_unsupported(pl->pl_user_prop))) { @@ -10758,10 +12432,10 @@ get_callback(zpool_handle_t *zhp, void *data) if (zpool_prop_get_feature(zhp, pl->pl_user_prop, value, sizeof (value)) == 0) { - zprop_print_one_property( + err = zprop_collect_property( zpool_get_name(zhp), cbp, pl->pl_user_prop, value, srctype, - NULL, NULL); + NULL, NULL, props); } } else { if (zpool_get_prop(zhp, pl->pl_prop, value, @@ -10769,10 +12443,37 @@ get_callback(zpool_handle_t *zhp, void *data) cbp->cb_literal) != 0) continue; - zprop_print_one_property(zpool_get_name(zhp), - cbp, zpool_prop_to_name(pl->pl_prop), - value, srctype, NULL, NULL); + err = zprop_collect_property( + zpool_get_name(zhp), cbp, + zpool_prop_to_name(pl->pl_prop), + value, srctype, NULL, NULL, props); + } + if (err != 0) + return (err); + } + + if (cbp->cb_json) { + if (!nvlist_empty(props)) { + item = fnvlist_alloc(); + fill_pool_info(item, zhp, B_TRUE, + cbp->cb_json_as_int); + fnvlist_add_nvlist(item, "properties", props); + if (cbp->cb_json_pool_key_guid) { + char buf[256]; + uint64_t guid = fnvlist_lookup_uint64( + zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_POOL_GUID); + snprintf(buf, 256, "%llu", + (u_longlong_t)guid); + fnvlist_add_nvlist(d, buf, item); + } else { + const char *name = zpool_get_name(zhp); + fnvlist_add_nvlist(d, name, item); + } + fnvlist_add_nvlist(cbp->cb_jsobj, "pools", d); + fnvlist_free(item); } + fnvlist_free(props); } } @@ -10787,6 +12488,9 @@ get_callback(zpool_handle_t *zhp, void *data) * -o List of columns to display. Defaults to * "name,property,value,source". * -p Display values in parsable (exact) format. + * -j Display output in JSON format. + * --json-int Display numbers as integers instead of strings. + * --json-pool-key-guid Set pool GUID as key for pool objects. * * Get properties of pools in the system. Output space statistics * for each one as well as other attributes. @@ -10800,6 +12504,7 @@ zpool_do_get(int argc, char **argv) int c, i; char *propstr = NULL; char *vdev = NULL; + nvlist_t *data = NULL; cb.cb_first = B_TRUE; @@ -10815,8 +12520,16 @@ zpool_do_get(int argc, char **argv) cb.cb_vdevs.cb_name_flags |= VDEV_NAME_TYPE_ID; current_prop_type = cb.cb_type; + struct option long_options[] = { + {"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT}, + {"json-pool-key-guid", no_argument, NULL, + ZPOOL_OPTION_POOL_KEY_GUID}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, ":Hpo:")) != -1) { + while ((c = getopt_long(argc, argv, ":jHpo:", long_options, + NULL)) != -1) { switch (c) { case 'p': cb.cb_literal = B_TRUE; @@ -10824,6 +12537,18 @@ zpool_do_get(int argc, char **argv) case 'H': cb.cb_scripted = B_TRUE; break; + case 'j': + cb.cb_json = B_TRUE; + cb.cb_jsobj = zpool_json_schema(0, 1); + data = fnvlist_alloc(); + break; + case ZPOOL_OPTION_POOL_KEY_GUID: + cb.cb_json_pool_key_guid = B_TRUE; + break; + case ZPOOL_OPTION_JSON_NUMS_AS_INT: + cb.cb_json_as_int = B_TRUE; + cb.cb_literal = B_TRUE; + break; case 'o': memset(&cb.cb_columns, 0, sizeof (cb.cb_columns)); i = 0; @@ -10878,6 +12603,18 @@ zpool_do_get(int argc, char **argv) argc -= optind; argv += optind; + if (!cb.cb_json && cb.cb_json_as_int) { + (void) fprintf(stderr, gettext("'--json-int' only works with" + " '-j' option\n")); + usage(B_FALSE); + } + + if (!cb.cb_json && cb.cb_json_pool_key_guid) { + (void) fprintf(stderr, gettext("'json-pool-key-guid' only" + " works with '-j' option\n")); + usage(B_FALSE); + } + if (argc < 1) { (void) fprintf(stderr, gettext("missing property " "argument\n")); @@ -10912,6 +12649,10 @@ zpool_do_get(int argc, char **argv) cb.cb_type = ZFS_TYPE_VDEV; argc = 1; /* One pool to process */ } else { + if (cb.cb_json) { + nvlist_free(cb.cb_jsobj); + nvlist_free(data); + } fprintf(stderr, gettext("Expected a list of vdevs in" " \"%s\", but got:\n"), argv[0]); error_list_unresolved_vdevs(argc - 1, argv + 1, @@ -10921,6 +12662,10 @@ zpool_do_get(int argc, char **argv) return (1); } } else { + if (cb.cb_json) { + nvlist_free(cb.cb_jsobj); + nvlist_free(data); + } /* * The first arg isn't the name of a valid pool. */ @@ -10943,9 +12688,22 @@ zpool_do_get(int argc, char **argv) cb.cb_proplist = &fake_name; } + if (cb.cb_json) { + if (cb.cb_type == ZFS_TYPE_VDEV) + fnvlist_add_nvlist(cb.cb_jsobj, "vdevs", data); + else + fnvlist_add_nvlist(cb.cb_jsobj, "pools", data); + fnvlist_free(data); + } + ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, cb.cb_type, cb.cb_literal, get_callback, &cb); + if (ret == 0 && cb.cb_json) + zcmd_print_json(cb.cb_jsobj); + else if (ret != 0 && cb.cb_json) + nvlist_free(cb.cb_jsobj); + if (cb.cb_proplist == &fake_name) zprop_free_list(fake_name.pl_next); else @@ -11592,8 +13350,39 @@ find_command_idx(const char *command, int *idx) static int zpool_do_version(int argc, char **argv) { - (void) argc, (void) argv; - return (zfs_version_print() != 0); + int c; + nvlist_t *jsobj = NULL, *zfs_ver = NULL; + boolean_t json = B_FALSE; + while ((c = getopt(argc, argv, "j")) != -1) { + switch (c) { + case 'j': + json = B_TRUE; + jsobj = zpool_json_schema(0, 1); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + if (argc != 0) { + (void) fprintf(stderr, "too many arguments\n"); + usage(B_FALSE); + } + + if (json) { + zfs_ver = zfs_version_nvlist(); + if (zfs_ver) { + fnvlist_add_nvlist(jsobj, "zfs_version", zfs_ver); + zcmd_print_json(jsobj); + fnvlist_free(zfs_ver); + return (0); + } else + return (-1); + } else + return (zfs_version_print() != 0); } /* Display documentation */ diff --git a/sys/contrib/subrepo-openzfs/cmd/zstream/zstream_redup.c b/sys/contrib/subrepo-openzfs/cmd/zstream/zstream_redup.c index 6866639fe465..dccd325d4cfa 100644 --- a/sys/contrib/subrepo-openzfs/cmd/zstream/zstream_redup.c +++ b/sys/contrib/subrepo-openzfs/cmd/zstream/zstream_redup.c @@ -56,15 +56,6 @@ typedef struct redup_table { int numhashbits; } redup_table_t; -int -highbit64(uint64_t i) -{ - if (i == 0) - return (0); - - return (NBBY * sizeof (uint64_t) - __builtin_clzll(i)); -} - void * safe_calloc(size_t n) { diff --git a/sys/contrib/subrepo-openzfs/cmd/ztest.c b/sys/contrib/subrepo-openzfs/cmd/ztest.c index f77a37c21545..6a9264ddcc4c 100644 --- a/sys/contrib/subrepo-openzfs/cmd/ztest.c +++ b/sys/contrib/subrepo-openzfs/cmd/ztest.c @@ -26,6 +26,7 @@ * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2023, Klara, Inc. */ /* @@ -444,6 +445,7 @@ ztest_func_t ztest_blake3; ztest_func_t ztest_fletcher; ztest_func_t ztest_fletcher_incr; ztest_func_t ztest_verify_dnode_bt; +ztest_func_t ztest_pool_prefetch_ddt; static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ @@ -499,6 +501,7 @@ static ztest_info_t ztest_info[] = { ZTI_INIT(ztest_fletcher, 1, &zopt_rarely), ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely), ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes), + ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely), }; #define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) @@ -6993,6 +6996,21 @@ ztest_fletcher_incr(ztest_ds_t *zd, uint64_t id) } } +void +ztest_pool_prefetch_ddt(ztest_ds_t *zd, uint64_t id) +{ + (void) zd, (void) id; + spa_t *spa; + + (void) pthread_rwlock_rdlock(&ztest_name_lock); + VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); + + ddt_prefetch_all(spa); + + spa_close(spa, FTAG); + (void) pthread_rwlock_unlock(&ztest_name_lock); +} + static int ztest_set_global_vars(void) { @@ -8495,17 +8513,24 @@ print_time(hrtime_t t, char *timebuf) } static nvlist_t * -make_random_props(void) +make_random_pool_props(void) { nvlist_t *props; props = fnvlist_alloc(); - if (ztest_random(2) == 0) - return (props); + /* Twenty percent of the time enable ZPOOL_PROP_DEDUP_TABLE_QUOTA */ + if (ztest_random(5) == 0) { + fnvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_DEDUP_TABLE_QUOTA), + 2 * 1024 * 1024); + } - fnvlist_add_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1); + /* Fifty percent of the time enable ZPOOL_PROP_AUTOREPLACE */ + if (ztest_random(2) == 0) { + fnvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1); + } return (props); } @@ -8537,7 +8562,7 @@ ztest_init(ztest_shared_t *zs) zs->zs_mirrors = ztest_opts.zo_mirrors; nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, NULL, ztest_opts.zo_raid_children, zs->zs_mirrors, 1); - props = make_random_props(); + props = make_random_pool_props(); /* * We don't expect the pool to suspend unless maxfaults == 0, diff --git a/sys/contrib/subrepo-openzfs/config/Rules.am b/sys/contrib/subrepo-openzfs/config/Rules.am index 30c5f353cd23..00ac890e2303 100644 --- a/sys/contrib/subrepo-openzfs/config/Rules.am +++ b/sys/contrib/subrepo-openzfs/config/Rules.am @@ -57,7 +57,7 @@ AM_LDFLAGS += $(ASAN_LDFLAGS) AM_LDFLAGS += $(UBSAN_LDFLAGS) if BUILD_FREEBSD -AM_LDFLAGS += -fstack-protector-strong -shared +AM_LDFLAGS += -fstack-protector-strong AM_LDFLAGS += -Wl,-x -Wl,--fatal-warnings -Wl,--warn-shared-textrel AM_LDFLAGS += -lm endif diff --git a/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/.gitignore b/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/.gitignore index 0fd9cc63af2a..217893a6bd89 100644 --- a/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/.gitignore +++ b/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/.gitignore @@ -1 +1,2 @@ /zfs +/zpool diff --git a/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/Makefile.am b/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/Makefile.am index 1ec05ed73d2d..95d4ffa76e22 100644 --- a/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/Makefile.am +++ b/sys/contrib/subrepo-openzfs/contrib/bash_completion.d/Makefile.am @@ -1,5 +1,10 @@ -nodist_bashcompletion_DATA = %D%/zfs -SUBSTFILES += $(nodist_bashcompletion_DATA) +nodist_bashcompletion_DATA = %D%/zfs %D%/zpool +COMPLETION_FILES = %D%/zfs +SUBSTFILES += $(COMPLETION_FILES) +CLEANFILES += %D%/zpool -SHELLCHECKSCRIPTS += $(nodist_bashcompletion_DATA) -$(call SHELLCHECK_OPTS,$(nodist_bashcompletion_DATA)): SHELLCHECK_SHELL = bash +SHELLCHECKSCRIPTS += $(COMPLETION_FILES) +$(call SHELLCHECK_OPTS,$(COMPLETION_FILES)): SHELLCHECK_SHELL = bash + +%D%/zpool: %D%/zfs + $(LN_S) -f zfs $@ diff --git a/sys/contrib/subrepo-openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/subrepo-openzfs/contrib/debian/openzfs-zfsutils.install index 741014398ade..10083351abb5 100644 --- a/sys/contrib/subrepo-openzfs/contrib/debian/openzfs-zfsutils.install +++ b/sys/contrib/subrepo-openzfs/contrib/debian/openzfs-zfsutils.install @@ -111,6 +111,7 @@ usr/share/man/man8/zpool-labelclear.8 usr/share/man/man8/zpool-list.8 usr/share/man/man8/zpool-offline.8 usr/share/man/man8/zpool-online.8 +usr/share/man/man8/zpool-prefetch.8 usr/share/man/man8/zpool-reguid.8 usr/share/man/man8/zpool-remove.8 usr/share/man/man8/zpool-reopen.8 diff --git a/sys/contrib/subrepo-openzfs/include/libzfs.h b/sys/contrib/subrepo-openzfs/include/libzfs.h index 7836c2325f4e..bf5579f38fda 100644 --- a/sys/contrib/subrepo-openzfs/include/libzfs.h +++ b/sys/contrib/subrepo-openzfs/include/libzfs.h @@ -51,8 +51,8 @@ extern "C" { /* * Miscellaneous ZFS constants */ -#define ZFS_MAXPROPLEN MAXPATHLEN -#define ZPOOL_MAXPROPLEN MAXPATHLEN +#define ZFS_MAXPROPLEN ZAP_MAXVALUELEN +#define ZPOOL_MAXPROPLEN ZAP_MAXVALUELEN /* * libzfs errors @@ -327,6 +327,8 @@ _LIBZFS_H int zpool_vdev_clear(zpool_handle_t *, uint64_t); _LIBZFS_H nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); +_LIBZFS_H nvlist_t *zpool_find_parent_vdev(zpool_handle_t *, const char *, + boolean_t *, boolean_t *, boolean_t *); _LIBZFS_H nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); _LIBZFS_H int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, @@ -458,6 +460,7 @@ _LIBZFS_H nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); _LIBZFS_H nvlist_t *zpool_get_features(zpool_handle_t *); _LIBZFS_H int zpool_refresh_stats(zpool_handle_t *, boolean_t *); _LIBZFS_H int zpool_get_errlog(zpool_handle_t *, nvlist_t **); +_LIBZFS_H void zpool_add_propname(zpool_handle_t *, const char *); /* * Import and export functions @@ -468,7 +471,8 @@ _LIBZFS_H int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, char *altroot); _LIBZFS_H int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, nvlist_t *, int); -_LIBZFS_H void zpool_print_unsup_feat(nvlist_t *config); +_LIBZFS_H void zpool_collect_unsup_feat(nvlist_t *config, char *buf, + size_t size); /* * Miscellaneous pool functions @@ -499,11 +503,13 @@ _LIBZFS_H void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t); _LIBZFS_H int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); _LIBZFS_H void zpool_explain_recover(libzfs_handle_t *, const char *, int, - nvlist_t *); + nvlist_t *, char *, size_t); _LIBZFS_H int zpool_checkpoint(zpool_handle_t *); _LIBZFS_H int zpool_discard_checkpoint(zpool_handle_t *); _LIBZFS_H boolean_t zpool_is_draid_spare(const char *); +_LIBZFS_H int zpool_prefetch(zpool_handle_t *, zpool_prefetch_type_t); + /* * Basic handle manipulations. These functions do not create or destroy the * underlying datasets, only the references to them. @@ -628,6 +634,8 @@ _LIBZFS_H int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, zfs_type_t); _LIBZFS_H void zprop_free_list(zprop_list_t *); +_LIBZFS_H void zcmd_print_json(nvlist_t *); + #define ZFS_GET_NCOLS 5 typedef enum { @@ -655,9 +663,13 @@ typedef struct zprop_get_cbdata { boolean_t cb_scripted; boolean_t cb_literal; boolean_t cb_first; + boolean_t cb_json; zprop_list_t *cb_proplist; zfs_type_t cb_type; vdev_cbdata_t cb_vdevs; + nvlist_t *cb_jsobj; + boolean_t cb_json_as_int; + boolean_t cb_json_pool_key_guid; } zprop_get_cbdata_t; #define ZFS_SET_NOMOUNT 1 @@ -671,6 +683,13 @@ _LIBZFS_H void zprop_print_one_property(const char *, zprop_get_cbdata_t *, const char *, const char *, zprop_source_t, const char *, const char *); +_LIBZFS_H int zprop_nvlist_one_property(const char *, const char *, + zprop_source_t, const char *, const char *, nvlist_t *, boolean_t); + +_LIBZFS_H int zprop_collect_property(const char *, zprop_get_cbdata_t *, + const char *, const char *, zprop_source_t, const char *, + const char *, nvlist_t *); + /* * Iterator functions. */ @@ -976,6 +995,7 @@ _LIBZFS_H boolean_t libzfs_envvar_is_set(const char *); _LIBZFS_H const char *zfs_version_userland(void); _LIBZFS_H char *zfs_version_kernel(void); _LIBZFS_H int zfs_version_print(void); +_LIBZFS_H nvlist_t *zfs_version_nvlist(void); /* * Given a device or file, determine if it is part of a pool. diff --git a/sys/contrib/subrepo-openzfs/include/libzfs_core.h b/sys/contrib/subrepo-openzfs/include/libzfs_core.h index b2fd97372cd4..206e5e5c2bf6 100644 --- a/sys/contrib/subrepo-openzfs/include/libzfs_core.h +++ b/sys/contrib/subrepo-openzfs/include/libzfs_core.h @@ -148,6 +148,9 @@ _LIBZFS_CORE_H int lzc_pool_checkpoint_discard(const char *); _LIBZFS_CORE_H int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *); _LIBZFS_CORE_H int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *); + +_LIBZFS_CORE_H int lzc_pool_prefetch(const char *, zpool_prefetch_type_t); + _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); _LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *); diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/ccompat.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/ccompat.h index e34bab7e896d..48749fb8eea2 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/ccompat.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/ccompat.h @@ -28,48 +28,6 @@ #ifndef _SYS_CCOMPAT_H #define _SYS_CCOMPAT_H -#if __FreeBSD_version < 1300051 -#define vm_page_valid(m) (m)->valid = VM_PAGE_BITS_ALL -#define vm_page_do_sunbusy(m) -#define vm_page_none_valid(m) ((m)->valid == 0) -#else -#define vm_page_do_sunbusy(m) vm_page_sunbusy(m) -#endif - -#if __FreeBSD_version < 1300074 -#define VOP_UNLOCK1(x) VOP_UNLOCK(x, 0) -#else -#define VOP_UNLOCK1(x) VOP_UNLOCK(x) -#endif - -#if __FreeBSD_version < 1300064 -#define VN_IS_DOOMED(vp) ((vp)->v_iflag & VI_DOOMED) -#endif - -#if __FreeBSD_version < 1300068 -#define VFS_VOP_VECTOR_REGISTER(x) -#endif - -#if __FreeBSD_version >= 1300076 -#define getnewvnode_reserve_() getnewvnode_reserve() -#else -#define getnewvnode_reserve_() getnewvnode_reserve(1) -#endif - -#if __FreeBSD_version < 1300102 -#define ASSERT_VOP_IN_SEQC(zp) -#define MNTK_FPLOOKUP 0 -#define vn_seqc_write_begin(vp) -#define vn_seqc_write_end(vp) - -#ifndef VFS_SMR_DECLARE -#define VFS_SMR_DECLARE -#endif -#ifndef VFS_SMR_ZONE_SET -#define VFS_SMR_ZONE_SET(zone) -#endif -#endif - struct hlist_node { struct hlist_node *next, **pprev; }; diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/kmem.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/kmem.h index c633799318d5..ae786f0e20ca 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/kmem.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/kmem.h @@ -49,6 +49,7 @@ MALLOC_DECLARE(M_SOLARIS); #define KM_NOSLEEP M_NOWAIT #define KM_NORMALPRI 0 #define KMC_NODEBUG UMA_ZONE_NODUMP +#define KMC_RECLAIMABLE 0x0 typedef struct vmem vmem_t; diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/policy.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/policy.h index 909ae3886e9c..32c10bdca90e 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/policy.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/policy.h @@ -70,10 +70,4 @@ int secpolicy_xvattr(vnode_t *vp, xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype); int secpolicy_smb(cred_t *cr); - -#if __FreeBSD_version >= 1300005 -#define spl_priv_check_cred(a, b) priv_check_cred((a), (b)) -#else -#define spl_priv_check_cred(a, b) priv_check_cred((a), (b), 0) -#endif #endif /* _OPENSOLARIS_SYS_POLICY_H_ */ diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/proc.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/proc.h index 8583df509b55..96ba10e5679e 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/proc.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/proc.h @@ -88,9 +88,6 @@ do_thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg, thread_lock(td); sched_prio(td, pri); sched_add(td, SRQ_BORING); -#if __FreeBSD_version < 1300068 - thread_unlock(td); -#endif } return (td); } diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/random.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/random.h index 7583166e727b..146fa9e5d133 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/random.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/random.h @@ -30,9 +30,7 @@ #define _OPENSOLARIS_SYS_RANDOM_H_ #include_next -#if __FreeBSD_version >= 1300108 #include -#endif static inline int random_get_bytes(uint8_t *p, size_t s) @@ -51,7 +49,7 @@ random_get_pseudo_bytes(uint8_t *p, size_t s) static inline uint32_t random_in_range(uint32_t range) { -#if defined(_KERNEL) && __FreeBSD_version >= 1300108 +#if defined(_KERNEL) return (prng32_bounded(range)); #else uint32_t r; diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vm.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vm.h index 7b3830be8a57..07ee6bc191a7 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vm.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vm.h @@ -41,22 +41,6 @@ void zfs_vmobject_assert_wlocked(vm_object_t object); void zfs_vmobject_wlock(vm_object_t object); void zfs_vmobject_wunlock(vm_object_t object); -#if __FreeBSD_version >= 1300081 -#define zfs_vmobject_assert_wlocked_12(x) -#define zfs_vmobject_wlock_12(x) -#define zfs_vmobject_wunlock_12(x) -#else -#define zfs_vmobject_assert_wlocked_12(x) \ - zfs_vmobject_assert_wlocked((x)) -#define zfs_vmobject_wlock_12(x) \ - zfs_vmobject_wlock(x) -#define zfs_vmobject_wunlock_12(x) \ - zfs_vmobject_wunlock(x) -#define vm_page_grab_unlocked(obj, idx, flags) \ - vm_page_grab((obj), (idx), (flags)) -#define vm_page_grab_valid_unlocked(m, obj, idx, flags) \ - vm_page_grab_valid((m), (obj), (idx), (flags)) -#endif static inline caddr_t zfs_map_page(vm_page_t pp, struct sf_buf **sfp) { diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vnode.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vnode.h index 7a3b38736a58..4e73d63321af 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vnode.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/vnode.h @@ -96,17 +96,13 @@ vn_is_readonly(vnode_t *vp) static __inline void vn_flush_cached_data(vnode_t *vp, boolean_t sync) { -#if __FreeBSD_version > 1300054 if (vm_object_mightbedirty(vp->v_object)) { -#else - if (vp->v_object->flags & OBJ_MIGHTBEDIRTY) { -#endif vn_lock(vp, LK_SHARED | LK_RETRY); if (sync) vnode_pager_clean_sync(vp); else vnode_pager_clean_async(vp); - VOP_UNLOCK1(vp); + VOP_UNLOCK(vp); } } #endif diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h index 7f0f24325d59..3e54f3e846f7 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h @@ -27,40 +27,21 @@ #ifndef _SYS_FS_ZFS_VFSOPS_H #define _SYS_FS_ZFS_VFSOPS_H -#if __FreeBSD_version >= 1300125 -#define TEARDOWN_RMS -#endif - -#if __FreeBSD_version >= 1300109 -#define TEARDOWN_INACTIVE_RMS -#endif - #include #include #include #include #include #include -#ifdef TEARDOWN_INACTIVE_RMS #include -#endif #include #ifdef __cplusplus extern "C" { #endif -#ifdef TEARDOWN_RMS typedef struct rmslock zfs_teardown_lock_t; -#else -#define zfs_teardown_lock_t rrmlock_t -#endif - -#ifdef TEARDOWN_INACTIVE_RMS typedef struct rmslock zfs_teardown_inactive_lock_t; -#else -#define zfs_teardown_inactive_lock_t krwlock_t -#endif typedef struct zfsvfs zfsvfs_t; struct znode; @@ -120,7 +101,6 @@ struct zfsvfs { struct task z_unlinked_drain_task; }; -#ifdef TEARDOWN_RMS #define ZFS_TEARDOWN_INIT(zfsvfs) \ rms_init(&(zfsvfs)->z_teardown_lock, "zfs teardown") @@ -150,39 +130,7 @@ struct zfsvfs { #define ZFS_TEARDOWN_HELD(zfsvfs) \ rms_owned_any(&(zfsvfs)->z_teardown_lock) -#else -#define ZFS_TEARDOWN_INIT(zfsvfs) \ - rrm_init(&(zfsvfs)->z_teardown_lock, B_FALSE) - -#define ZFS_TEARDOWN_DESTROY(zfsvfs) \ - rrm_destroy(&(zfsvfs)->z_teardown_lock) - -#define ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag) \ - rrm_enter_read(&(zfsvfs)->z_teardown_lock, tag); - -#define ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag) \ - rrm_exit(&(zfsvfs)->z_teardown_lock, tag) - -#define ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, tag) \ - rrm_enter(&(zfsvfs)->z_teardown_lock, RW_WRITER, tag) - -#define ZFS_TEARDOWN_EXIT_WRITE(zfsvfs) \ - rrm_exit(&(zfsvfs)->z_teardown_lock, tag) - -#define ZFS_TEARDOWN_EXIT(zfsvfs, tag) \ - rrm_exit(&(zfsvfs)->z_teardown_lock, tag) - -#define ZFS_TEARDOWN_READ_HELD(zfsvfs) \ - RRM_READ_HELD(&(zfsvfs)->z_teardown_lock) - -#define ZFS_TEARDOWN_WRITE_HELD(zfsvfs) \ - RRM_WRITE_HELD(&(zfsvfs)->z_teardown_lock) - -#define ZFS_TEARDOWN_HELD(zfsvfs) \ - RRM_LOCK_HELD(&(zfsvfs)->z_teardown_lock) -#endif -#ifdef TEARDOWN_INACTIVE_RMS #define ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs) \ rms_init(&(zfsvfs)->z_teardown_inactive_lock, "zfs teardown inactive") @@ -206,31 +154,6 @@ struct zfsvfs { #define ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs) \ rms_wowned(&(zfsvfs)->z_teardown_inactive_lock) -#else -#define ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs) \ - rw_init(&(zfsvfs)->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL) - -#define ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs) \ - rw_destroy(&(zfsvfs)->z_teardown_inactive_lock) - -#define ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs) \ - rw_tryenter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER) - -#define ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs) \ - rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER) - -#define ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs) \ - rw_exit(&(zfsvfs)->z_teardown_inactive_lock) - -#define ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs) \ - rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_WRITER) - -#define ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs) \ - rw_exit(&(zfsvfs)->z_teardown_inactive_lock) - -#define ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs) \ - RW_WRITE_HELD(&(zfsvfs)->z_teardown_inactive_lock) -#endif #define ZSB_XATTR 0x0001 /* Enable user xattrs */ /* diff --git a/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/kmem_cache.h b/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/kmem_cache.h index 905ff57a1434..2b4f120e6427 100644 --- a/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/kmem_cache.h +++ b/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/kmem_cache.h @@ -45,6 +45,7 @@ typedef enum kmc_bit { KMC_BIT_TOTAL = 18, /* Proc handler helper bit */ KMC_BIT_ALLOC = 19, /* Proc handler helper bit */ KMC_BIT_MAX = 20, /* Proc handler helper bit */ + KMC_BIT_RECLAIMABLE = 21, /* Can be freed by shrinker */ } kmc_bit_t; /* kmem move callback return values */ @@ -66,9 +67,7 @@ typedef enum kmem_cbrc { #define KMC_TOTAL (1 << KMC_BIT_TOTAL) #define KMC_ALLOC (1 << KMC_BIT_ALLOC) #define KMC_MAX (1 << KMC_BIT_MAX) - -#define KMC_REAP_CHUNK INT_MAX -#define KMC_DEFAULT_SEEKS 1 +#define KMC_RECLAIMABLE (1 << KMC_BIT_RECLAIMABLE) extern struct list_head spl_kmem_cache_list; extern struct rw_semaphore spl_kmem_cache_sem; diff --git a/sys/contrib/subrepo-openzfs/include/sys/abd.h b/sys/contrib/subrepo-openzfs/include/sys/abd.h index 19fe96292d5f..7b7d84b528cd 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/abd.h +++ b/sys/contrib/subrepo-openzfs/include/sys/abd.h @@ -137,6 +137,7 @@ void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t); void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t); int abd_cmp(abd_t *, abd_t *); int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t); +int abd_cmp_zero_off(abd_t *, size_t, size_t); void abd_zero_off(abd_t *, size_t, size_t); void abd_verify(abd_t *); @@ -183,6 +184,12 @@ abd_zero(abd_t *abd, size_t size) abd_zero_off(abd, 0, size); } +static inline int +abd_cmp_zero(abd_t *abd, size_t size) +{ + return (abd_cmp_zero_off(abd, 0, size)); +} + /* * ABD type check functions */ diff --git a/sys/contrib/subrepo-openzfs/include/sys/arc.h b/sys/contrib/subrepo-openzfs/include/sys/arc.h index 05307aab99e3..c92b3eee618c 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/arc.h +++ b/sys/contrib/subrepo-openzfs/include/sys/arc.h @@ -250,6 +250,16 @@ typedef struct arc_buf_info { enum zio_compress abi_l2arc_compress; } arc_buf_info_t; +/* + * Flags returned by arc_cached; describes which part of the arc + * the block is cached in. + */ +#define ARC_CACHED_EMBEDDED (1U << 0) +#define ARC_CACHED_IN_L1 (1U << 1) +#define ARC_CACHED_IN_MRU (1U << 2) +#define ARC_CACHED_IN_MFU (1U << 3) +#define ARC_CACHED_IN_L2 (1U << 4) + void arc_space_consume(uint64_t space, arc_space_type_t type); void arc_space_return(uint64_t space, arc_space_type_t type); boolean_t arc_is_metadata(arc_buf_t *buf); @@ -310,6 +320,7 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv); void arc_remove_prune_callback(arc_prune_t *p); void arc_freed(spa_t *spa, const blkptr_t *bp); +int arc_cached(spa_t *spa, const blkptr_t *bp); void arc_flush(spa_t *spa, boolean_t retry); void arc_tempreserve_clear(uint64_t reserve); diff --git a/sys/contrib/subrepo-openzfs/include/sys/arc_impl.h b/sys/contrib/subrepo-openzfs/include/sys/arc_impl.h index defebe3b2fbb..01693d72dda8 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/arc_impl.h +++ b/sys/contrib/subrepo-openzfs/include/sys/arc_impl.h @@ -1058,10 +1058,10 @@ extern uint_t arc_lotsfree_percent; extern uint64_t zfs_arc_min; extern uint64_t zfs_arc_max; -extern void arc_reduce_target_size(int64_t to_free); +extern uint64_t arc_reduce_target_size(uint64_t to_free); extern boolean_t arc_reclaim_needed(void); extern void arc_kmem_reap_soon(void); -extern void arc_wait_for_eviction(uint64_t, boolean_t); +extern void arc_wait_for_eviction(uint64_t, boolean_t, boolean_t); extern void arc_lowmem_init(void); extern void arc_lowmem_fini(void); diff --git a/sys/contrib/subrepo-openzfs/include/sys/dbuf.h b/sys/contrib/subrepo-openzfs/include/sys/dbuf.h index 3808a04cba80..8b03b1f895f8 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/dbuf.h +++ b/sys/contrib/subrepo-openzfs/include/sys/dbuf.h @@ -214,9 +214,15 @@ typedef struct dmu_buf_impl { struct objset *db_objset; /* - * handle to safely access the dnode we belong to (NULL when evicted) + * Handle to safely access the dnode we belong to (NULL when evicted) + * if dnode_move() is used on the platform, or just dnode otherwise. */ +#if !defined(__linux__) && !defined(__FreeBSD__) +#define USE_DNODE_HANDLE 1 struct dnode_handle *db_dnode_handle; +#else + struct dnode *db_dnode; +#endif /* * our parent buffer; if the dnode points to us directly, @@ -417,11 +423,19 @@ void dbuf_stats_destroy(void); int dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid, blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift); +#ifdef USE_DNODE_HANDLE #define DB_DNODE(_db) ((_db)->db_dnode_handle->dnh_dnode) #define DB_DNODE_LOCK(_db) ((_db)->db_dnode_handle->dnh_zrlock) #define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db))) #define DB_DNODE_EXIT(_db) (zrl_remove(&DB_DNODE_LOCK(_db))) #define DB_DNODE_HELD(_db) (!zrl_is_zero(&DB_DNODE_LOCK(_db))) +#else +#define DB_DNODE(_db) ((_db)->db_dnode) +#define DB_DNODE_LOCK(_db) +#define DB_DNODE_ENTER(_db) +#define DB_DNODE_EXIT(_db) +#define DB_DNODE_HELD(_db) (B_TRUE) +#endif void dbuf_init(void); void dbuf_fini(void); diff --git a/sys/contrib/subrepo-openzfs/include/sys/ddt.h b/sys/contrib/subrepo-openzfs/include/sys/ddt.h index 726f1a3902eb..66d59cebacde 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/ddt.h +++ b/sys/contrib/subrepo-openzfs/include/sys/ddt.h @@ -151,7 +151,8 @@ enum ddt_phys_type { */ /* State flags for dde_flags */ -#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */ +#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */ +#define DDE_FLAG_OVERQUOTA (1 << 1) /* entry unusable, no space */ typedef struct { /* key must be first for ddt_key_compare */ @@ -170,6 +171,7 @@ typedef struct { uint8_t dde_flags; /* load state flags */ kcondvar_t dde_cv; /* signaled when load completes */ + uint64_t dde_waiters; /* count of waiters on dde_cv */ avl_node_t dde_node; /* ddt_tree node */ } ddt_entry_t; @@ -228,11 +230,13 @@ extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src); extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh); extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh); extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo); +extern uint64_t ddt_get_ddt_dsize(spa_t *spa); extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh); extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total); extern uint64_t ddt_get_dedup_dspace(spa_t *spa); extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa); +extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize); extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp); extern void ddt_enter(ddt_t *ddt); @@ -240,8 +244,9 @@ extern void ddt_exit(ddt_t *ddt); extern void ddt_init(void); extern void ddt_fini(void); extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add); -extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp); extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde); +extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp); +extern void ddt_prefetch_all(spa_t *spa); extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp); diff --git a/sys/contrib/subrepo-openzfs/include/sys/ddt_impl.h b/sys/contrib/subrepo-openzfs/include/sys/ddt_impl.h index 52b927b7519d..4aaab10c8737 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/ddt_impl.h +++ b/sys/contrib/subrepo-openzfs/include/sys/ddt_impl.h @@ -47,6 +47,7 @@ typedef struct { const ddt_key_t *ddk); void (*ddt_op_prefetch)(objset_t *os, uint64_t object, const ddt_key_t *ddk); + void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object); int (*ddt_op_update)(objset_t *os, uint64_t object, const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx); diff --git a/sys/contrib/subrepo-openzfs/include/sys/dmu.h b/sys/contrib/subrepo-openzfs/include/sys/dmu.h index b5fed64da4ad..1376cbef763c 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/dmu.h +++ b/sys/contrib/subrepo-openzfs/include/sys/dmu.h @@ -505,6 +505,12 @@ void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, dmu_tx_t *tx); +/* + * Get an estimated cache size for an object. Caller must expect races. + */ +int dmu_object_cached_size(objset_t *os, uint64_t object, + uint64_t *l1sz, uint64_t *l2sz); + void dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, void *data, uint8_t etype, uint8_t comp, int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx); @@ -903,6 +909,8 @@ void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, void dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset, uint64_t len, enum zio_priority pri); void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri); +int dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size); typedef struct dmu_object_info { /* All sizes are in bytes unless otherwise indicated. */ diff --git a/sys/contrib/subrepo-openzfs/include/sys/dnode.h b/sys/contrib/subrepo-openzfs/include/sys/dnode.h index dbe7350d4da7..5d0f0fb26d02 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/dnode.h +++ b/sys/contrib/subrepo-openzfs/include/sys/dnode.h @@ -380,6 +380,9 @@ struct dnode { /* holds prefetch structure */ struct zfetch dn_zfetch; + + /* Not in dn_phys, but should be. set it after taking a hold */ + dmu_object_type_t dn_storage_type; /* type for storage class */ }; /* @@ -462,6 +465,8 @@ void dnode_evict_dbufs(dnode_t *dn); void dnode_evict_bonus(dnode_t *dn); void dnode_free_interior_slots(dnode_t *dn); +void dnode_set_storage_type(dnode_t *dn, dmu_object_type_t type); + #define DNODE_IS_DIRTY(_dn) \ ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa)) diff --git a/sys/contrib/subrepo-openzfs/include/sys/fs/zfs.h b/sys/contrib/subrepo-openzfs/include/sys/fs/zfs.h index e191420f2d2d..c7e48d1edc0e 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/fs/zfs.h +++ b/sys/contrib/subrepo-openzfs/include/sys/fs/zfs.h @@ -258,6 +258,9 @@ typedef enum { ZPOOL_PROP_BCLONEUSED, ZPOOL_PROP_BCLONESAVED, ZPOOL_PROP_BCLONERATIO, + ZPOOL_PROP_DEDUP_TABLE_SIZE, + ZPOOL_PROP_DEDUP_TABLE_QUOTA, + ZPOOL_PROP_DEDUPCACHED, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -368,6 +371,9 @@ typedef enum { VDEV_PROP_RAIDZ_EXPANDING, VDEV_PROP_SLOW_IO_N, VDEV_PROP_SLOW_IO_T, + VDEV_PROP_TRIM_SUPPORT, + VDEV_PROP_TRIM_ERRORS, + VDEV_PROP_SLOW_IOS, VDEV_NUM_PROPS } vdev_prop_t; @@ -1512,6 +1518,7 @@ typedef enum zfs_ioc { ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */ ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */ ZFS_IOC_POOL_SCRUB, /* 0x5a57 */ + ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */ /* * Per-platform (Optional) - 8/128 numbers reserved. @@ -1643,6 +1650,11 @@ typedef enum { ZFS_WAIT_NUM_ACTIVITIES } zfs_wait_activity_t; +typedef enum { + ZPOOL_PREFETCH_NONE = 0, + ZPOOL_PREFETCH_DDT +} zpool_prefetch_type_t; + /* * Bookmark name values. */ @@ -1681,6 +1693,17 @@ typedef enum { */ #define ZPOOL_HIDDEN_ARGS "hidden_args" +/* + * The following is used when invoking ZFS_IOC_POOL_GET_PROPS. + */ +#define ZPOOL_GET_PROPS_NAMES "get_props_names" + +/* + * Opt-in property names used with ZPOOL_GET_PROPS_NAMES. + * For example, properties that are hidden or expensive to compute. + */ +#define ZPOOL_DEDUPCACHED_PROP_NAME "dedupcached" + /* * The following are names used when invoking ZFS_IOC_POOL_INITIALIZE. */ @@ -1720,6 +1743,11 @@ typedef enum { #define ZFS_WAIT_ACTIVITY "wait_activity" #define ZFS_WAIT_WAITED "wait_waited" +/* + * The following are names used when invoking ZFS_IOC_POOL_PREFETCH. + */ +#define ZPOOL_PREFETCH_TYPE "prefetch_type" + /* * Flags for ZFS_IOC_VDEV_SET_STATE */ diff --git a/sys/contrib/subrepo-openzfs/include/sys/sa_impl.h b/sys/contrib/subrepo-openzfs/include/sys/sa_impl.h index 744c8dcb7dfb..6eb0c96188fa 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/sa_impl.h +++ b/sys/contrib/subrepo-openzfs/include/sys/sa_impl.h @@ -272,7 +272,6 @@ int sa_add_impl(sa_handle_t *, sa_attr_type_t, uint32_t, sa_data_locator_t, void *, dmu_tx_t *); void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *); -int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *); void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *); int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t, diff --git a/sys/contrib/subrepo-openzfs/include/sys/spa.h b/sys/contrib/subrepo-openzfs/include/sys/spa.h index e8b4745b91f5..9e667b38dbb1 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/spa.h +++ b/sys/contrib/subrepo-openzfs/include/sys/spa.h @@ -1049,8 +1049,8 @@ extern metaslab_class_t *spa_log_class(spa_t *spa); extern metaslab_class_t *spa_embedded_log_class(spa_t *spa); extern metaslab_class_t *spa_special_class(spa_t *spa); extern metaslab_class_t *spa_dedup_class(spa_t *spa); -extern metaslab_class_t *spa_preferred_class(spa_t *spa, uint64_t size, - dmu_object_type_t objtype, uint_t level, uint_t special_smallblk); +extern metaslab_class_t *spa_preferred_class(spa_t *spa, const zio_t *zio); +extern boolean_t spa_special_has_ddt(spa_t *spa); extern void spa_evicting_os_register(spa_t *, objset_t *os); extern void spa_evicting_os_deregister(spa_t *, objset_t *os); @@ -1197,6 +1197,8 @@ extern void spa_boot_init(void); /* properties */ extern int spa_prop_set(spa_t *spa, nvlist_t *nvp); extern int spa_prop_get(spa_t *spa, nvlist_t **nvp); +extern int spa_prop_get_nvlist(spa_t *spa, char **props, + unsigned int n_props, nvlist_t **outnvl); extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx); extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t); diff --git a/sys/contrib/subrepo-openzfs/include/sys/spa_impl.h b/sys/contrib/subrepo-openzfs/include/sys/spa_impl.h index 5605a35b8641..4fc6f22fcb50 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/spa_impl.h +++ b/sys/contrib/subrepo-openzfs/include/sys/spa_impl.h @@ -146,6 +146,7 @@ struct spa_aux_vdev { vdev_t **sav_vdevs; /* devices */ int sav_count; /* number devices */ boolean_t sav_sync; /* sync the device list */ + boolean_t sav_label_sync; /* sync aux labels */ nvlist_t **sav_pending; /* pending device additions */ uint_t sav_npending; /* # pending devices */ }; @@ -465,6 +466,9 @@ struct spa { boolean_t spa_waiters_cancel; /* waiters should return */ char *spa_compatibility; /* compatibility file(s) */ + uint64_t spa_dedup_table_quota; /* property DDT maximum size */ + uint64_t spa_dedup_dsize; /* cached on-disk size of DDT */ + uint64_t spa_dedup_class_full_txg; /* txg dedup class was full */ /* * spa_refcount & spa_config_lock must be the last elements diff --git a/sys/contrib/subrepo-openzfs/include/sys/zap.h b/sys/contrib/subrepo-openzfs/include/sys/zap.h index 96ddcc324b65..0027f7c5103e 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/zap.h +++ b/sys/contrib/subrepo-openzfs/include/sys/zap.h @@ -225,6 +225,7 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf); int zap_contains(objset_t *ds, uint64_t zapobj, const char *name); int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name); +int zap_prefetch_object(objset_t *os, uint64_t zapobj); int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints); diff --git a/sys/contrib/subrepo-openzfs/include/sys/zfs_context.h b/sys/contrib/subrepo-openzfs/include/sys/zfs_context.h index e4711ce4194a..998eaa5dd813 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/zfs_context.h +++ b/sys/contrib/subrepo-openzfs/include/sys/zfs_context.h @@ -413,6 +413,7 @@ void procfs_list_add(procfs_list_t *procfs_list, void *p); #define KM_NORMALPRI 0 /* not needed with UMEM_DEFAULT */ #define KMC_NODEBUG UMC_NODEBUG #define KMC_KVMEM 0x0 +#define KMC_RECLAIMABLE 0x0 #define kmem_alloc(_s, _f) umem_alloc(_s, _f) #define kmem_zalloc(_s, _f) umem_zalloc(_s, _f) #define kmem_free(_b, _s) umem_free(_b, _s) diff --git a/sys/contrib/subrepo-openzfs/include/sys/zil.h b/sys/contrib/subrepo-openzfs/include/sys/zil.h index 4747ecc067a9..384678b223d5 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/zil.h +++ b/sys/contrib/subrepo-openzfs/include/sys/zil.h @@ -467,6 +467,21 @@ typedef struct zil_stats { */ kstat_named_t zil_commit_writer_count; + /* + * Number of times a ZIL commit failed and the ZIL was forced to fall + * back to txg_wait_synced(). The separate counts are for different + * reasons: + * - error: ZIL IO (write/flush) returned an error + * (see zil_commit_impl()) + * - stall: LWB block allocation failed, ZIL chain abandoned + * (see zil_commit_writer_stall()) + * - suspend: ZIL suspended + * (see zil_commit(), zil_get_commit_list()) + */ + kstat_named_t zil_commit_error_count; + kstat_named_t zil_commit_stall_count; + kstat_named_t zil_commit_suspend_count; + /* * Number of transactions (reads, writes, renames, etc.) * that have been committed. @@ -510,6 +525,9 @@ typedef struct zil_stats { typedef struct zil_sums { wmsum_t zil_commit_count; wmsum_t zil_commit_writer_count; + wmsum_t zil_commit_error_count; + wmsum_t zil_commit_stall_count; + wmsum_t zil_commit_suspend_count; wmsum_t zil_itx_count; wmsum_t zil_itx_indirect_count; wmsum_t zil_itx_indirect_bytes; diff --git a/sys/contrib/subrepo-openzfs/include/sys/zio.h b/sys/contrib/subrepo-openzfs/include/sys/zio.h index 77c70b9b481c..446b64ccd8ab 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/zio.h +++ b/sys/contrib/subrepo-openzfs/include/sys/zio.h @@ -356,6 +356,7 @@ typedef struct zio_prop { uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN]; uint32_t zp_zpl_smallblk; + dmu_object_type_t zp_storage_type; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; diff --git a/sys/contrib/subrepo-openzfs/include/sys/zvol_impl.h b/sys/contrib/subrepo-openzfs/include/sys/zvol_impl.h index 6c15c84b6bf4..3cd0d78c353d 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/zvol_impl.h +++ b/sys/contrib/subrepo-openzfs/include/sys/zvol_impl.h @@ -18,22 +18,19 @@ * * CDDL HEADER END */ +/* + * Copyright (c) 2024, Klara, Inc. + */ #ifndef _SYS_ZVOL_IMPL_H #define _SYS_ZVOL_IMPL_H #include -#define ZVOL_RDONLY 0x1 -/* - * Whether the zvol has been written to (as opposed to ZVOL_RDONLY, which - * specifies whether or not the zvol _can_ be written to) - */ -#define ZVOL_WRITTEN_TO 0x2 - -#define ZVOL_DUMPIFIED 0x4 - -#define ZVOL_EXCL 0x8 +#define ZVOL_RDONLY (1<<0) /* zvol is readonly (writes rejected) */ +#define ZVOL_WRITTEN_TO (1<<1) /* zvol has been written to (needs flush) */ +#define ZVOL_EXCL (1<<2) /* zvol has O_EXCL client right now */ +#define ZVOL_REMOVING (1<<3) /* zvol waiting to remove minor */ /* * The in-core state of each volume. @@ -57,6 +54,7 @@ typedef struct zvol_state { kmutex_t zv_state_lock; /* protects zvol_state_t */ atomic_t zv_suspend_ref; /* refcount for suspend */ krwlock_t zv_suspend_lock; /* suspend lock */ + kcondvar_t zv_removing_cv; /* ready to remove minor */ struct zvol_state_os *zv_zso; /* private platform state */ boolean_t zv_threading; /* volthreading property */ } zvol_state_t; diff --git a/sys/contrib/subrepo-openzfs/lib/libshare/os/freebsd/nfs.c b/sys/contrib/subrepo-openzfs/lib/libshare/os/freebsd/nfs.c index d4cdb07a4947..b5ce221bb1cb 100644 --- a/sys/contrib/subrepo-openzfs/lib/libshare/os/freebsd/nfs.c +++ b/sys/contrib/subrepo-openzfs/lib/libshare/os/freebsd/nfs.c @@ -44,8 +44,6 @@ #include "nfs.h" #define _PATH_MOUNTDPID "/var/run/mountd.pid" -#define OPTSSIZE 1024 -#define MAXLINESIZE (PATH_MAX + OPTSSIZE) #define ZFS_EXPORTS_FILE "/etc/zfs/exports" #define ZFS_EXPORTS_LOCK ZFS_EXPORTS_FILE".lock" @@ -69,17 +67,30 @@ * index, quiet */ static int -translate_opts(const char *shareopts, FILE *out) +translate_opts(char *oldopts, FILE *out) { static const char *const known_opts[] = { "ro", "maproot", "mapall", "mask", "network", "sec", "alldirs", "public", "webnfs", "index", "quiet" }; - char oldopts[OPTSSIZE], newopts[OPTSSIZE]; - char *o, *s = NULL; + char *newopts, *o, *s = NULL; unsigned int i; - size_t len; - - strlcpy(oldopts, shareopts, sizeof (oldopts)); + size_t len, newopts_len; + int ret; + + /* + * Calculate the length needed for the worst case of a single + * character option: + * - Add one to strlen(oldopts) so that the trailing nul is counted + * as a separator. + * - Multiply by 3/2 since the single character option plus separator + * is expanded to 3 characters. + * - Add one for the trailing nul. Needed for a single repetition of + * the single character option and certain other cases. + */ + newopts_len = (strlen(oldopts) + 1) * 3 / 2 + 1; + newopts = malloc(newopts_len); + if (newopts == NULL) + return (EOF); newopts[0] = '\0'; s = oldopts; while ((o = strsep(&s, "-, ")) != NULL) { @@ -89,14 +100,16 @@ translate_opts(const char *shareopts, FILE *out) len = strlen(known_opts[i]); if (strncmp(known_opts[i], o, len) == 0 && (o[len] == '\0' || o[len] == '=')) { - strlcat(newopts, "-", sizeof (newopts)); + strlcat(newopts, "-", newopts_len); break; } } - strlcat(newopts, o, sizeof (newopts)); - strlcat(newopts, " ", sizeof (newopts)); + strlcat(newopts, o, newopts_len); + strlcat(newopts, " ", newopts_len); } - return (fputs(newopts, out)); + ret = fputs(newopts, out); + free(newopts); + return (ret); } static int @@ -106,20 +119,38 @@ nfs_enable_share_impl(sa_share_impl_t impl_share, FILE *tmpfile) if (strcmp(shareopts, "on") == 0) shareopts = ""; - boolean_t need_free; - char *mp; + boolean_t need_free, fnd_semi; + char *mp, *lineopts, *exportopts, *s; + size_t whitelen; int rc = nfs_escape_mountpoint(impl_share->sa_mountpoint, &mp, &need_free); if (rc != SA_OK) return (rc); - if (fputs(mp, tmpfile) == EOF || - fputc('\t', tmpfile) == EOF || - translate_opts(shareopts, tmpfile) == EOF || - fputc('\n', tmpfile) == EOF) { - fprintf(stderr, "failed to write to temporary file\n"); - rc = SA_SYSTEM_ERR; + lineopts = strdup(shareopts); + if (lineopts == NULL) + return (SA_SYSTEM_ERR); + s = lineopts; + fnd_semi = B_FALSE; + while ((exportopts = strsep(&s, ";")) != NULL) { + if (s != NULL) + fnd_semi = B_TRUE; + /* Ignore only whitespace between ';' separated option sets. */ + if (fnd_semi) { + whitelen = strspn(exportopts, "\t "); + if (exportopts[whitelen] == '\0') + continue; + } + if (fputs(mp, tmpfile) == EOF || + fputc('\t', tmpfile) == EOF || + translate_opts(exportopts, tmpfile) == EOF || + fputc('\n', tmpfile) == EOF) { + fprintf(stderr, "failed to write to temporary file\n"); + rc = SA_SYSTEM_ERR; + break; + } } + free(lineopts); if (need_free) free(mp); diff --git a/sys/contrib/subrepo-openzfs/lib/libspl/include/statcommon.h b/sys/contrib/subrepo-openzfs/lib/libspl/include/statcommon.h index 971997a447a5..6b7cd0c105e1 100644 --- a/sys/contrib/subrepo-openzfs/lib/libspl/include/statcommon.h +++ b/sys/contrib/subrepo-openzfs/lib/libspl/include/statcommon.h @@ -37,5 +37,9 @@ /* Print a timestamp in either Unix or standard format. */ void print_timestamp(uint_t); +/* Return timestamp in either Unix or standard format in provided buffer */ +void get_timestamp(uint_t, char *, int); +/* convert time_t to standard format */ +void format_timestamp(time_t, char *, int); #endif /* _STATCOMMON_H */ diff --git a/sys/contrib/subrepo-openzfs/lib/libspl/timestamp.c b/sys/contrib/subrepo-openzfs/lib/libspl/timestamp.c index 9b435221f5fb..efe21fc1c0e5 100644 --- a/sys/contrib/subrepo-openzfs/lib/libspl/timestamp.c +++ b/sys/contrib/subrepo-openzfs/lib/libspl/timestamp.c @@ -62,3 +62,45 @@ print_timestamp(uint_t timestamp_fmt) (void) printf("%s\n", dstr); } } + +/* + * Return timestamp as decimal reprentation (in string) of time_t + * value (-T u was specified) or in date(1) format (-T d was specified). + */ +void +get_timestamp(uint_t timestamp_fmt, char *buf, int len) +{ + time_t t = time(NULL); + static const char *fmt = NULL; + + /* We only need to retrieve this once per invocation */ + if (fmt == NULL) + fmt = nl_langinfo(_DATE_FMT); + + if (timestamp_fmt == UDATE) { + (void) snprintf(buf, len, "%lld", (longlong_t)t); + } else if (timestamp_fmt == DDATE) { + struct tm tm; + strftime(buf, len, fmt, localtime_r(&t, &tm)); + } +} + +/* + * Format the provided time stamp to human readable format + */ +void +format_timestamp(time_t t, char *buf, int len) +{ + struct tm tm; + static const char *fmt = NULL; + + if (t == 0) { + snprintf(buf, len, "-"); + return; + } + + /* We only need to retrieve this once per invocation */ + if (fmt == NULL) + fmt = nl_langinfo(_DATE_FMT); + strftime(buf, len, fmt, localtime_r(&t, &tm)); +} diff --git a/sys/contrib/subrepo-openzfs/lib/libuutil/libuutil.abi b/sys/contrib/subrepo-openzfs/lib/libuutil/libuutil.abi index 2ed2fb2e41e6..1ad837b0edf8 100644 --- a/sys/contrib/subrepo-openzfs/lib/libuutil/libuutil.abi +++ b/sys/contrib/subrepo-openzfs/lib/libuutil/libuutil.abi @@ -143,7 +143,9 @@ + + @@ -1151,6 +1153,18 @@ + + + + + + + + + + + + diff --git a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs.abi b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs.abi index 80f4b7439a55..51c8dc9647ee 100644 --- a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs.abi @@ -179,10 +179,12 @@ + + @@ -280,6 +282,7 @@ + @@ -452,11 +455,13 @@ + + @@ -481,6 +486,7 @@ + @@ -520,9 +526,10 @@ + - + @@ -575,12 +582,14 @@ + + @@ -1282,6 +1291,18 @@ + + + + + + + + + + + + @@ -1642,6 +1663,9 @@ + + + @@ -2096,7 +2120,7 @@ - + @@ -2109,19 +2133,25 @@ + + + + + + - + - + - + - + @@ -2921,7 +2951,10 @@ - + + + + @@ -5702,7 +5735,10 @@ - + + + + @@ -5891,6 +5927,7 @@ + @@ -5919,6 +5956,12 @@ + + + + + + @@ -5966,6 +6009,8 @@ + + @@ -6058,6 +6103,11 @@ + + + + + @@ -6203,6 +6253,13 @@ + + + + + + + @@ -6391,6 +6448,11 @@ + + + + + @@ -6413,6 +6475,8 @@ + + @@ -6422,8 +6486,10 @@ - + + + @@ -6475,6 +6541,14 @@ + + + + + + + + @@ -6585,6 +6659,11 @@ + + + + + @@ -8146,6 +8225,20 @@ + + + + + + + + + + + + + + @@ -8156,6 +8249,17 @@ + + + + + + + + + + + @@ -8184,6 +8288,9 @@ + + + @@ -8684,7 +8791,6 @@ - @@ -9146,7 +9252,6 @@ - diff --git a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_impl.h b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_impl.h index ef0359f45ea0..e98ede51e4ba 100644 --- a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_impl.h +++ b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_impl.h @@ -94,12 +94,15 @@ struct zfs_handle { * snapshots of volumes. */ #define ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME) +#define ZHP_MAX_PROPNAMES 4 struct zpool_handle { libzfs_handle_t *zpool_hdl; zpool_handle_t *zpool_next; char zpool_name[ZFS_MAX_DATASET_NAME_LEN]; int zpool_state; + unsigned int zpool_n_propnames; + const char *zpool_propnames[ZHP_MAX_PROPNAMES]; size_t zpool_config_size; nvlist_t *zpool_config; nvlist_t *zpool_old_config; diff --git a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_pool.c index 9ee0d936857f..1ec7a8ca30cc 100644 --- a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_pool.c +++ b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_pool.c @@ -79,6 +79,13 @@ zpool_get_all_props(zpool_handle_t *zhp) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if (zhp->zpool_n_propnames > 0) { + nvlist_t *innvl = fnvlist_alloc(); + fnvlist_add_string_array(innvl, ZPOOL_GET_PROPS_NAMES, + zhp->zpool_propnames, zhp->zpool_n_propnames); + zcmd_write_src_nvlist(hdl, &zc, innvl); + } + zcmd_alloc_dst_nvlist(hdl, &zc, 0); while (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) { @@ -318,6 +325,15 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, return (0); } + /* + * ZPOOL_PROP_DEDUPCACHED can be fetched by name only using + * the ZPOOL_GET_PROPS_NAMES mechanism + */ + if (prop == ZPOOL_PROP_DEDUPCACHED) { + zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME); + (void) zpool_get_all_props(zhp); + } + if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) && prop != ZPOOL_PROP_NAME) return (-1); @@ -332,6 +348,24 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, intval = zpool_get_prop_int(zhp, prop, &src); switch (prop) { + case ZPOOL_PROP_DEDUP_TABLE_QUOTA: + /* + * If dedup quota is 0, we translate this into 'none' + * (unless literal is set). And if it is UINT64_MAX + * we translate that as 'automatic' (limit to size of + * the dedicated dedup VDEV. Otherwise, fall throught + * into the regular number formating. + */ + if (intval == 0) { + (void) strlcpy(buf, literal ? "0" : "none", + len); + break; + } else if (intval == UINT64_MAX) { + (void) strlcpy(buf, "auto", len); + break; + } + zfs_fallthrough; + case ZPOOL_PROP_SIZE: case ZPOOL_PROP_ALLOCATED: case ZPOOL_PROP_FREE: @@ -342,6 +376,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, case ZPOOL_PROP_MAXDNODESIZE: case ZPOOL_PROP_BCLONESAVED: case ZPOOL_PROP_BCLONEUSED: + case ZPOOL_PROP_DEDUP_TABLE_SIZE: + case ZPOOL_PROP_DEDUPCACHED: if (literal) (void) snprintf(buf, len, "%llu", (u_longlong_t)intval); @@ -493,9 +529,10 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, zpool_prop_t prop; const char *strval; uint64_t intval; - const char *slash, *check; + const char *check; struct stat64 statbuf; zpool_handle_t *zhp; + char *parent, *slash; char report[1024]; if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) { @@ -749,30 +786,36 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, goto error; } - slash = strrchr(strval, '/'); + parent = strdup(strval); + if (parent == NULL) { + (void) zfs_error(hdl, EZFS_NOMEM, errbuf); + goto error; + } + slash = strrchr(parent, '/'); if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || strcmp(slash, "/..") == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is not a valid file"), strval); + "'%s' is not a valid file"), parent); (void) zfs_error(hdl, EZFS_BADPATH, errbuf); + free(parent); goto error; } - *(char *)slash = '\0'; + *slash = '\0'; - if (strval[0] != '\0' && - (stat64(strval, &statbuf) != 0 || + if (parent[0] != '\0' && + (stat64(parent, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))) { - *(char *)slash = '/'; zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is not a valid directory"), - strval); + parent); (void) zfs_error(hdl, EZFS_BADPATH, errbuf); + free(parent); goto error; } + free(parent); - *(char *)slash = '/'; break; case ZPOOL_PROP_COMPATIBILITY: @@ -1719,6 +1762,28 @@ zpool_discard_checkpoint(zpool_handle_t *zhp) return (0); } +/* + * Load data type for the given pool. + */ +int +zpool_prefetch(zpool_handle_t *zhp, zpool_prefetch_type_t type) +{ + libzfs_handle_t *hdl = zhp->zpool_hdl; + char msg[1024]; + int error; + + error = lzc_pool_prefetch(zhp->zpool_name, type); + if (error != 0) { + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot prefetch %s in '%s'"), + type == ZPOOL_PREFETCH_DDT ? "ddt" : "", zhp->zpool_name); + (void) zpool_standard_error(hdl, error, msg); + return (-1); + } + + return (0); +} + /* * Add the given vdevs to the pool. The caller must have already performed the * necessary verification to ensure that the vdev specification is well-formed. @@ -1932,23 +1997,18 @@ zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun, void zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason, - nvlist_t *config) + nvlist_t *config, char *buf, size_t size) { nvlist_t *nv = NULL; int64_t loss = -1; uint64_t edata = UINT64_MAX; uint64_t rewindto; struct tm t; - char timestr[128]; + char timestr[128], temp[1024]; if (!hdl->libzfs_printerr) return; - if (reason >= 0) - (void) printf(dgettext(TEXT_DOMAIN, "action: ")); - else - (void) printf(dgettext(TEXT_DOMAIN, "\t")); - /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 || nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 || @@ -1959,56 +2019,61 @@ zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS, &edata); - (void) printf(dgettext(TEXT_DOMAIN, + (void) snprintf(buf, size, dgettext(TEXT_DOMAIN, "Recovery is possible, but will result in some data loss.\n")); if (localtime_r((time_t *)&rewindto, &t) != NULL && ctime_r((time_t *)&rewindto, timestr) != NULL) { timestr[24] = 0; - (void) printf(dgettext(TEXT_DOMAIN, + (void) snprintf(temp, 1024, dgettext(TEXT_DOMAIN, "\tReturning the pool to its state as of %s\n" - "\tshould correct the problem. "), - timestr); + "\tshould correct the problem. "), timestr); + (void) strlcat(buf, temp, size); } else { - (void) printf(dgettext(TEXT_DOMAIN, + (void) strlcat(buf, dgettext(TEXT_DOMAIN, "\tReverting the pool to an earlier state " - "should correct the problem.\n\t")); + "should correct the problem.\n\t"), size); } if (loss > 120) { - (void) printf(dgettext(TEXT_DOMAIN, + (void) snprintf(temp, 1024, dgettext(TEXT_DOMAIN, "Approximately %lld minutes of data\n" "\tmust be discarded, irreversibly. "), ((longlong_t)loss + 30) / 60); + (void) strlcat(buf, temp, size); } else if (loss > 0) { - (void) printf(dgettext(TEXT_DOMAIN, + (void) snprintf(temp, 1024, dgettext(TEXT_DOMAIN, "Approximately %lld seconds of data\n" "\tmust be discarded, irreversibly. "), (longlong_t)loss); + (void) strlcat(buf, temp, size); } if (edata != 0 && edata != UINT64_MAX) { if (edata == 1) { - (void) printf(dgettext(TEXT_DOMAIN, + (void) strlcat(buf, dgettext(TEXT_DOMAIN, "After rewind, at least\n" - "\tone persistent user-data error will remain. ")); + "\tone persistent user-data error will remain. "), + size); } else { - (void) printf(dgettext(TEXT_DOMAIN, + (void) strlcat(buf, dgettext(TEXT_DOMAIN, "After rewind, several\n" - "\tpersistent user-data errors will remain. ")); + "\tpersistent user-data errors will remain. "), + size); } } - (void) printf(dgettext(TEXT_DOMAIN, + (void) snprintf(temp, 1024, dgettext(TEXT_DOMAIN, "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "), reason >= 0 ? "clear" : "import", name); + (void) strlcat(buf, temp, size); - (void) printf(dgettext(TEXT_DOMAIN, + (void) strlcat(buf, dgettext(TEXT_DOMAIN, "A scrub of the pool\n" - "\tis strongly recommended after recovery.\n")); + "\tis strongly recommended after recovery.\n"), size); return; no_info: - (void) printf(dgettext(TEXT_DOMAIN, - "Destroy and re-create the pool from\n\ta backup source.\n")); + (void) strlcat(buf, dgettext(TEXT_DOMAIN, + "Destroy and re-create the pool from\n\ta backup source.\n"), size); } /* @@ -2077,9 +2142,10 @@ print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv, } void -zpool_print_unsup_feat(nvlist_t *config) +zpool_collect_unsup_feat(nvlist_t *config, char *buf, size_t size) { nvlist_t *nvinfo, *unsup_feat; + char temp[512]; nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); unsup_feat = fnvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT); @@ -2087,10 +2153,14 @@ zpool_print_unsup_feat(nvlist_t *config) for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL; nvp = nvlist_next_nvpair(unsup_feat, nvp)) { const char *desc = fnvpair_value_string(nvp); - if (strlen(desc) > 0) - (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc); - else - (void) printf("\t%s\n", nvpair_name(nvp)); + if (strlen(desc) > 0) { + (void) snprintf(temp, 512, "\t%s (%s)\n", + nvpair_name(nvp), desc); + (void) strlcat(buf, temp, size); + } else { + (void) snprintf(temp, 512, "\t%s\n", nvpair_name(nvp)); + (void) strlcat(buf, temp, size); + } } } @@ -2113,6 +2183,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, const char *origname; int ret; int error = 0; + char buf[2048]; char errbuf[ERRBUFLEN]; origname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); @@ -2195,7 +2266,9 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, (void) printf(dgettext(TEXT_DOMAIN, "This " "pool uses the following feature(s) not " "supported by this system:\n")); - zpool_print_unsup_feat(nv); + memset(buf, 0, 2048); + zpool_collect_unsup_feat(nv, buf, 2048); + (void) printf("%s", buf); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY)) { (void) printf(dgettext(TEXT_DOMAIN, @@ -2294,8 +2367,11 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, break; default: (void) zpool_standard_error(hdl, error, desc); + memset(buf, 0, 2048); zpool_explain_recover(hdl, - newname ? origname : thename, -error, nv); + newname ? origname : thename, -error, nv, + buf, 2048); + (void) printf("\t%s", buf); break; } @@ -2794,10 +2870,13 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) * the nvpair name to determine how we should look for the device. * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL * spare; but FALSE if its an INUSE spare. + * + * If 'return_parent' is set, then return the *parent* of the vdev you're + * searching for rather than the vdev itself. */ static nvlist_t * vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, - boolean_t *l2cache, boolean_t *log) + boolean_t *l2cache, boolean_t *log, boolean_t return_parent) { uint_t c, children; nvlist_t **child; @@ -2805,6 +2884,8 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, uint64_t is_log; const char *srchkey; nvpair_t *pair = nvlist_next_nvpair(search, NULL); + const char *tmp = NULL; + boolean_t is_root; /* Nothing to look for */ if (search == NULL || pair == NULL) @@ -2813,6 +2894,12 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, /* Obtain the key we will use to search */ srchkey = nvpair_name(pair); + nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &tmp); + if (strcmp(tmp, "root") == 0) + is_root = B_TRUE; + else + is_root = B_FALSE; + switch (nvpair_type(pair)) { case DATA_TYPE_UINT64: if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) { @@ -2943,7 +3030,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, for (c = 0; c < children; c++) { if ((ret = vdev_to_nvlist_iter(child[c], search, - avail_spare, l2cache, NULL)) != NULL) { + avail_spare, l2cache, NULL, return_parent)) != NULL) { /* * The 'is_log' value is only set for the toplevel * vdev, not the leaf vdevs. So we always lookup the @@ -2956,7 +3043,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, is_log) { *log = B_TRUE; } - return (ret); + return (ret && return_parent && !is_root ? nv : ret); } } @@ -2964,9 +3051,11 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, &child, &children) == 0) { for (c = 0; c < children; c++) { if ((ret = vdev_to_nvlist_iter(child[c], search, - avail_spare, l2cache, NULL)) != NULL) { + avail_spare, l2cache, NULL, return_parent)) + != NULL) { *avail_spare = B_TRUE; - return (ret); + return (ret && return_parent && + !is_root ? nv : ret); } } } @@ -2975,9 +3064,11 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, &child, &children) == 0) { for (c = 0; c < children; c++) { if ((ret = vdev_to_nvlist_iter(child[c], search, - avail_spare, l2cache, NULL)) != NULL) { + avail_spare, l2cache, NULL, return_parent)) + != NULL) { *l2cache = B_TRUE; - return (ret); + return (ret && return_parent && + !is_root ? nv : ret); } } } @@ -3012,7 +3103,8 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, *l2cache = B_FALSE; if (log != NULL) *log = B_FALSE; - ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log, + B_FALSE); fnvlist_free(search); return (ret); @@ -3040,11 +3132,12 @@ zpool_vdev_is_interior(const char *name) } /* - * Lookup the nvlist for a given vdev. + * Lookup the nvlist for a given vdev or vdev's parent (depending on + * if 'return_parent' is set). */ -nvlist_t * -zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, - boolean_t *l2cache, boolean_t *log) +static nvlist_t * +__zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, + boolean_t *l2cache, boolean_t *log, boolean_t return_parent) { char *end; nvlist_t *nvroot, *search, *ret; @@ -3081,12 +3174,30 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, *l2cache = B_FALSE; if (log != NULL) *log = B_FALSE; - ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log, + return_parent); fnvlist_free(search); return (ret); } +nvlist_t * +zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, + boolean_t *l2cache, boolean_t *log) +{ + return (__zpool_find_vdev(zhp, path, avail_spare, l2cache, log, + B_FALSE)); +} + +/* Given a vdev path, return its parent's nvlist */ +nvlist_t * +zpool_find_parent_vdev(zpool_handle_t *zhp, const char *path, + boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) +{ + return (__zpool_find_vdev(zhp, path, avail_spare, l2cache, log, + B_TRUE)); +} + /* * Convert a vdev path to a GUID. Returns GUID or 0 on error. * @@ -4382,6 +4493,14 @@ zbookmark_mem_compare(const void *a, const void *b) return (memcmp(a, b, sizeof (zbookmark_phys_t))); } +void +zpool_add_propname(zpool_handle_t *zhp, const char *propname) +{ + assert(zhp->zpool_n_propnames < ZHP_MAX_PROPNAMES); + zhp->zpool_propnames[zhp->zpool_n_propnames] = propname; + zhp->zpool_n_propnames++; +} + /* * Retrieve the persistent error log, uniquify the members, and return to the * caller. @@ -5225,6 +5344,8 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, case VDEV_PROP_WRITE_ERRORS: case VDEV_PROP_CHECKSUM_ERRORS: case VDEV_PROP_INITIALIZE_ERRORS: + case VDEV_PROP_TRIM_ERRORS: + case VDEV_PROP_SLOW_IOS: case VDEV_PROP_OPS_NULL: case VDEV_PROP_OPS_READ: case VDEV_PROP_OPS_WRITE: @@ -5304,6 +5425,11 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, src = fnvlist_lookup_uint64(nv, ZPROP_SOURCE); intval = fnvlist_lookup_uint64(nv, ZPROP_VALUE); } else { + /* 'trim_support' only valid for leaf vdevs */ + if (prop == VDEV_PROP_TRIM_SUPPORT) { + (void) strlcpy(buf, "-", len); + break; + } src = ZPROP_SRC_DEFAULT; intval = vdev_prop_default_numeric(prop); /* Only use if provided by the RAIDZ VDEV above */ diff --git a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_util.c index 5db94ce6503d..8b6073564b48 100644 --- a/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_util.c +++ b/sys/contrib/subrepo-openzfs/lib/libzfs/libzfs_util.c @@ -68,6 +68,7 @@ * as necessary. */ #define URI_REGEX "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):" +#define STR_NUMS "0123456789" int libzfs_errno(libzfs_handle_t *hdl) @@ -1267,6 +1268,14 @@ zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp) * ================================================================ */ +void +zcmd_print_json(nvlist_t *nvl) +{ + nvlist_print_json(stdout, nvl); + (void) putchar('\n'); + nvlist_free(nvl); +} + static void zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) { @@ -1393,6 +1402,103 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) (void) printf("\n"); } +/* + * Add property value and source to provided nvlist, according to + * settings in cb structure. Later to be printed in JSON format. + */ +int +zprop_nvlist_one_property(const char *propname, + const char *value, zprop_source_t sourcetype, const char *source, + const char *recvd_value, nvlist_t *nvl, boolean_t as_int) +{ + int ret = 0; + nvlist_t *src_nv, *prop; + boolean_t all_numeric = strspn(value, STR_NUMS) == strlen(value); + src_nv = prop = NULL; + + if ((nvlist_alloc(&prop, NV_UNIQUE_NAME, 0) != 0) || + (nvlist_alloc(&src_nv, NV_UNIQUE_NAME, 0) != 0)) { + ret = -1; + goto err; + } + + if (as_int && all_numeric) { + uint64_t val; + sscanf(value, "%lld", (u_longlong_t *)&val); + if (nvlist_add_uint64(prop, "value", val) != 0) { + ret = -1; + goto err; + } + } else { + if (nvlist_add_string(prop, "value", value) != 0) { + ret = -1; + goto err; + } + } + + switch (sourcetype) { + case ZPROP_SRC_NONE: + if (nvlist_add_string(src_nv, "type", "NONE") != 0 || + (nvlist_add_string(src_nv, "data", "-") != 0)) { + ret = -1; + goto err; + } + break; + case ZPROP_SRC_DEFAULT: + if (nvlist_add_string(src_nv, "type", "DEFAULT") != 0 || + (nvlist_add_string(src_nv, "data", "-") != 0)) { + ret = -1; + goto err; + } + break; + case ZPROP_SRC_LOCAL: + if (nvlist_add_string(src_nv, "type", "LOCAL") != 0 || + (nvlist_add_string(src_nv, "data", "-") != 0)) { + ret = -1; + goto err; + } + break; + case ZPROP_SRC_TEMPORARY: + if (nvlist_add_string(src_nv, "type", "TEMPORARY") != 0 || + (nvlist_add_string(src_nv, "data", "-") != 0)) { + ret = -1; + goto err; + } + break; + case ZPROP_SRC_INHERITED: + if (nvlist_add_string(src_nv, "type", "INHERITED") != 0 || + (nvlist_add_string(src_nv, "data", source) != 0)) { + ret = -1; + goto err; + } + break; + case ZPROP_SRC_RECEIVED: + if (nvlist_add_string(src_nv, "type", "RECEIVED") != 0 || + (nvlist_add_string(src_nv, "data", + (recvd_value == NULL ? "-" : recvd_value)) != 0)) { + ret = -1; + goto err; + } + break; + default: + assert(!"unhandled zprop_source_t"); + if (nvlist_add_string(src_nv, "type", + "unhandled zprop_source_t") != 0) { + ret = -1; + goto err; + } + } + if ((nvlist_add_nvlist(prop, "source", src_nv) != 0) || + (nvlist_add_nvlist(nvl, propname, prop)) != 0) { + ret = -1; + goto err; + } +err: + nvlist_free(src_nv); + nvlist_free(prop); + return (ret); +} + /* * Display a single line of output, according to the settings in the callback * structure. @@ -1484,6 +1590,26 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, (void) printf("\n"); } +int +zprop_collect_property(const char *name, zprop_get_cbdata_t *cbp, + const char *propname, const char *value, zprop_source_t sourcetype, + const char *source, const char *recvd_value, nvlist_t *nvl) +{ + if (cbp->cb_json) { + if ((sourcetype & cbp->cb_sources) == 0) + return (0); + else { + return (zprop_nvlist_one_property(propname, value, + sourcetype, source, recvd_value, nvl, + cbp->cb_json_as_int)); + } + } else { + zprop_print_one_property(name, cbp, + propname, value, sourcetype, source, recvd_value); + return (0); + } +} + /* * Given a numeric suffix, convert the value into a number of bits that the * resulting value must be shifted. @@ -1691,6 +1817,16 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, "use 'none' to disable quota/refquota")); goto error; } + /* + * Pool dedup table quota; force use of 'none' instead of 0 + */ + if ((type & ZFS_TYPE_POOL) && *ivalp == 0 && + (!isnone && !isauto) && + prop == ZPOOL_PROP_DEDUP_TABLE_QUOTA) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "use 'none' to disable ddt table quota")); + goto error; + } /* * Special handling for "*_limit=none". In this case it's not @@ -1732,6 +1868,10 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, } *ivalp = UINT64_MAX; break; + case ZPOOL_PROP_DEDUP_TABLE_QUOTA: + ASSERT(type & ZFS_TYPE_POOL); + *ivalp = UINT64_MAX; + break; default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'auto' is invalid value for '%s'"), @@ -1985,6 +2125,34 @@ zfs_version_print(void) return (0); } +/* + * Returns an nvlist with both zfs userland and kernel versions. + * Returns NULL on error. + */ +nvlist_t * +zfs_version_nvlist(void) +{ + nvlist_t *nvl; + char kmod_ver[64]; + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + return (NULL); + if (nvlist_add_string(nvl, "userland", ZFS_META_ALIAS) != 0) + goto err; + char *kver = zfs_version_kernel(); + if (kver == NULL) { + fprintf(stderr, "zfs_version_kernel() failed: %s\n", + zfs_strerror(errno)); + goto err; + } + (void) snprintf(kmod_ver, 64, "zfs-kmod-%s", kver); + if (nvlist_add_string(nvl, "kernel", kmod_ver) != 0) + goto err; + return (nvl); +err: + nvlist_free(nvl); + return (NULL); +} + /* * Return 1 if the user requested ANSI color output, and our terminal supports * it. Return 0 for no color. diff --git a/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.abi b/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.abi index c20698580ee7..1062a6b52dff 100644 --- a/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.abi +++ b/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.abi @@ -126,7 +126,9 @@ + + @@ -176,6 +178,7 @@ + @@ -1109,6 +1112,18 @@ + + + + + + + + + + + + @@ -1428,6 +1443,7 @@ + @@ -1462,6 +1478,12 @@ + + + + + + @@ -2892,6 +2914,11 @@ + + + + + diff --git a/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.c b/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.c index ad8fe45b9ce5..8d3b268825cb 100644 --- a/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.c +++ b/sys/contrib/subrepo-openzfs/lib/libzfs_core/libzfs_core.c @@ -1628,6 +1628,26 @@ lzc_pool_checkpoint_discard(const char *pool) return (error); } +/* + * Load the requested data type for the specified pool. + */ +int +lzc_pool_prefetch(const char *pool, zpool_prefetch_type_t type) +{ + int error; + nvlist_t *result = NULL; + nvlist_t *args = fnvlist_alloc(); + + fnvlist_add_int32(args, ZPOOL_PREFETCH_TYPE, type); + + error = lzc_ioctl(ZFS_IOC_POOL_PREFETCH, pool, args, &result); + + fnvlist_free(args); + fnvlist_free(result); + + return (error); +} + /* * Executes a read-only channel program. * diff --git a/sys/contrib/subrepo-openzfs/man/Makefile.am b/sys/contrib/subrepo-openzfs/man/Makefile.am index 43bb014ddd32..194bb4721619 100644 --- a/sys/contrib/subrepo-openzfs/man/Makefile.am +++ b/sys/contrib/subrepo-openzfs/man/Makefile.am @@ -83,6 +83,7 @@ dist_man_MANS = \ %D%/man8/zpool-list.8 \ %D%/man8/zpool-offline.8 \ %D%/man8/zpool-online.8 \ + %D%/man8/zpool-prefetch.8 \ %D%/man8/zpool-reguid.8 \ %D%/man8/zpool-remove.8 \ %D%/man8/zpool-reopen.8 \ diff --git a/sys/contrib/subrepo-openzfs/man/man4/zfs.4 b/sys/contrib/subrepo-openzfs/man/man4/zfs.4 index 3f7485fa78ca..45b6c338aa9e 100644 --- a/sys/contrib/subrepo-openzfs/man/man4/zfs.4 +++ b/sys/contrib/subrepo-openzfs/man/man4/zfs.4 @@ -831,6 +831,13 @@ even with a small average compressed block size of ~8 KiB. The parameter can be set to 0 (zero) to disable the limit, and only applies on Linux. . +.It Sy zfs_arc_shrinker_seeks Ns = Ns Sy 2 Pq int +Relative cost of ARC eviction on Linux, AKA number of seeks needed to +restore evicted page. +Bigger values make ARC more precious and evictions smaller, comparing to +other kernel subsystems. +Value of 4 means parity with page cache. +. .It Sy zfs_arc_sys_free Ns = Ns Sy 0 Ns B Pq u64 The target number of bytes the ARC should leave as free memory on the system. If zero, equivalent to the bigger of diff --git a/sys/contrib/subrepo-openzfs/man/man7/vdevprops.7 b/sys/contrib/subrepo-openzfs/man/man7/vdevprops.7 index 5ec37df179de..34d4026b1009 100644 --- a/sys/contrib/subrepo-openzfs/man/man7/vdevprops.7 +++ b/sys/contrib/subrepo-openzfs/man/man7/vdevprops.7 @@ -102,8 +102,14 @@ Parent of this vdev Comma separated list of children of this vdev .It Sy numchildren The number of children belonging to this vdev -.It Sy read_errors , write_errors , checksum_errors , initialize_errors +.It Sy read_errors , write_errors , checksum_errors , initialize_errors , trim_errors The number of errors of each type encountered by this vdev +.It Sy slow_ios +The number of slow I/Os encountered by this vdev, +These represent I/O operations that didn't complete in +.Sy zio_slow_io_ms +milliseconds +.Pq Sy 30000 No by default . .It Sy null_ops , read_ops , write_ops , free_ops , claim_ops , trim_ops The number of I/O operations of each type performed by this vdev .It Xo @@ -113,6 +119,8 @@ The number of I/O operations of each type performed by this vdev The cumulative size of all operations of each type performed by this vdev .It Sy removing If this device is currently being removed from the pool +.It Sy trim_support +Indicates if a leaf device supports trim operations. .El .Pp The following native properties can be used to change the behavior of a vdev. diff --git a/sys/contrib/subrepo-openzfs/man/man7/zfsprops.7 b/sys/contrib/subrepo-openzfs/man/man7/zfsprops.7 index 429369bd2e9e..f7026119b730 100644 --- a/sys/contrib/subrepo-openzfs/man/man7/zfsprops.7 +++ b/sys/contrib/subrepo-openzfs/man/man7/zfsprops.7 @@ -38,7 +38,7 @@ .\" Copyright (c) 2019, Kjeld Schouten-Lebbing .\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP. .\" -.Dd August 8, 2023 +.Dd June 29, 2024 .Dt ZFSPROPS 7 .Os . @@ -1727,6 +1727,18 @@ Please note that the options are comma-separated, unlike those found in This is done to negate the need for quoting, as well as to make parsing with scripts easier. .Pp +For +.Fx , +there may be multiple sets of options separated by semicolon(s). +Each set of options must apply to different hosts or networks and each +set of options will create a separate line for +.Xr exports 5 . +Any semicolon separated option set that consists entirely of whitespace +will be ignored. +This use of semicolons is only for +.Fx +at this time. +.Pp See .Xr exports 5 for the meaning of the default options. diff --git a/sys/contrib/subrepo-openzfs/man/man7/zpoolprops.7 b/sys/contrib/subrepo-openzfs/man/man7/zpoolprops.7 index 5428ab8d3076..f4fcc620e4d9 100644 --- a/sys/contrib/subrepo-openzfs/man/man7/zpoolprops.7 +++ b/sys/contrib/subrepo-openzfs/man/man7/zpoolprops.7 @@ -28,7 +28,7 @@ .\" Copyright (c) 2021, Colm Buckley .\" Copyright (c) 2023, Klara Inc. .\" -.Dd January 2, 2024 +.Dd July 29, 2024 .Dt ZPOOLPROPS 7 .Os . @@ -73,6 +73,12 @@ The amount of storage used by cloned blocks. Percentage of pool space used. This property can also be referred to by its shortened column name, .Sy cap . +.It Sy dedupcached +Total size of the deduplication table currently loaded into the ARC. +See +.Xr zpool-prefetch 8 . +.It Sy dedup_table_size +Total on-disk size of the deduplication table. .It Sy expandsize Amount of uninitialized space within the pool or device that can be used to increase the total capacity of the pool. @@ -348,6 +354,27 @@ See and .Xr zpool-upgrade 8 for more information on the operation of compatibility feature sets. +.It Sy dedup_table_quota Ns = Ns Ar number Ns | Ns Sy none Ns | Ns Sy auto +This property sets a limit on the on-disk size of the pool's dedup table. +Entries will not be added to the dedup table once this size is reached; +if a dedup table already exists, and is larger than this size, they +will not be removed as part of setting this property. +Existing entries will still have their reference counts updated. +.Pp +The actual size limit of the table may be above or below the quota, +depending on the actual on-disk size of the entries (which may be +approximated for purposes of calculating the quota). +That is, setting a quota size of 1M may result in the maximum size being +slightly below, or slightly above, that value. +Set to +.Sy 'none' +to disable. +In automatic mode, which is the default, the size of a dedicated dedup vdev +is used as the quota limit. +.Pp +The +.Sy dedup_table_quota +property works for both legacy and fast dedup tables. .It Sy dedupditto Ns = Ns Ar number This property is deprecated and no longer has any effect. .It Sy delegation Ns = Ns Sy on Ns | Ns Sy off @@ -461,7 +488,7 @@ The expected convention is that the property name is divided into two portions such as .Ar module : Ns Ar property , but this namespace is not enforced by ZFS. -User property names can be at most 256 characters, and cannot begin with a dash +User property names can be at most 255 characters, and cannot begin with a dash .Pq Qq Sy - . .Pp When making programmatic use of user properties, it is strongly suggested to use diff --git a/sys/contrib/subrepo-openzfs/man/man8/zfs-list.8 b/sys/contrib/subrepo-openzfs/man/man8/zfs-list.8 index 85bd3fbafced..b49def08b72b 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zfs-list.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zfs-list.8 @@ -41,6 +41,7 @@ .Cm list .Op Fl r Ns | Ns Fl d Ar depth .Op Fl Hp +.Op Fl j Op Ar --json-int .Oo Fl o Ar property Ns Oo , Ns Ar property Oc Ns … Oc .Oo Fl s Ar property Oc Ns … .Oo Fl S Ar property Oc Ns … @@ -70,6 +71,11 @@ The following fields are displayed: Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary white space. +.It Fl j Op Ar --json-int +Print the output in JSON format. +Specify +.Sy --json-int +to print the numbers in integer format instead of strings in JSON output. .It Fl d Ar depth Recursively display any children of the dataset, limiting the recursion to .Ar depth . @@ -186,6 +192,161 @@ pool/home 315K 457G 21K /export/home pool/home/anne 18K 457G 18K /export/home/anne pool/home/bob 276K 457G 276K /export/home/bob .Ed +.Ss Example 2 : No Listing ZFS filesystems and snapshots in JSON format +.Bd -literal -compact -offset Ds +.No # Nm zfs Cm list Fl j Fl t Ar filesystem,snapshot | Cm jq +{ + "output_version": { + "command": "zfs list", + "vers_major": 0, + "vers_minor": 1 + }, + "datasets": { + "pool": { + "name": "pool", + "type": "FILESYSTEM", + "pool": "pool", + "properties": { + "used": { + "value": "290K", + "source": { + "type": "NONE", + "data": "-" + } + }, + "available": { + "value": "30.5G", + "source": { + "type": "NONE", + "data": "-" + } + }, + "referenced": { + "value": "24K", + "source": { + "type": "NONE", + "data": "-" + } + }, + "mountpoint": { + "value": "/pool", + "source": { + "type": "DEFAULT", + "data": "-" + } + } + } + }, + "pool/home": { + "name": "pool/home", + "type": "FILESYSTEM", + "pool": "pool", + "properties": { + "used": { + "value": "48K", + "source": { + "type": "NONE", + "data": "-" + } + }, + "available": { + "value": "30.5G", + "source": { + "type": "NONE", + "data": "-" + } + }, + "referenced": { + "value": "24K", + "source": { + "type": "NONE", + "data": "-" + } + }, + "mountpoint": { + "value": "/mnt/home", + "source": { + "type": "LOCAL", + "data": "-" + } + } + } + }, + "pool/home/bob": { + "name": "pool/home/bob", + "type": "FILESYSTEM", + "pool": "pool", + "properties": { + "used": { + "value": "24K", + "source": { + "type": "NONE", + "data": "-" + } + }, + "available": { + "value": "30.5G", + "source": { + "type": "NONE", + "data": "-" + } + }, + "referenced": { + "value": "24K", + "source": { + "type": "NONE", + "data": "-" + } + }, + "mountpoint": { + "value": "/mnt/home/bob", + "source": { + "type": "INHERITED", + "data": "pool/home" + } + } + } + }, + "pool/home/bob@v1": { + "name": "pool/home/bob@v1", + "type": "SNAPSHOT", + "pool": "pool", + "dataset": "pool/home/bob", + "snapshot_name": "v1", + "properties": { + "used": { + "value": "0B", + "source": { + "type": "NONE", + "data": "-" + } + }, + "available": { + "value": "-", + "source": { + "type": "NONE", + "data": "-" + } + }, + "referenced": { + "value": "24K", + "source": { + "type": "NONE", + "data": "-" + } + }, + "mountpoint": { + "value": "-", + "source": { + "type": "NONE", + "data": "-" + } + } + } + } + } +} +.Ed . .Sh SEE ALSO .Xr zfsprops 7 , diff --git a/sys/contrib/subrepo-openzfs/man/man8/zfs-mount.8 b/sys/contrib/subrepo-openzfs/man/man8/zfs-mount.8 index 20dbe4d0e648..6116fbaab77f 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zfs-mount.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zfs-mount.8 @@ -39,6 +39,7 @@ .Sh SYNOPSIS .Nm zfs .Cm mount +.Op Fl j .Nm zfs .Cm mount .Op Fl Oflv @@ -54,8 +55,13 @@ .It Xo .Nm zfs .Cm mount +.Op Fl j .Xc Displays all ZFS file systems currently mounted. +.Bl -tag -width "-j" +.It Fl j +Displays all mounted file systems in JSON format. +.El .It Xo .Nm zfs .Cm mount diff --git a/sys/contrib/subrepo-openzfs/man/man8/zfs-set.8 b/sys/contrib/subrepo-openzfs/man/man8/zfs-set.8 index 8cc19caf3f00..204450d72ec9 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zfs-set.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zfs-set.8 @@ -46,6 +46,7 @@ .Cm get .Op Fl r Ns | Ns Fl d Ar depth .Op Fl Hp +.Op Fl j Op Ar --json-int .Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc .Oo Fl s Ar source Ns Oo , Ns Ar source Oc Ns … Oc .Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc @@ -91,6 +92,7 @@ dataset. .Cm get .Op Fl r Ns | Ns Fl d Ar depth .Op Fl Hp +.Op Fl j Op Ar --json-int .Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc .Oo Fl s Ar source Ns Oo , Ns Ar source Oc Ns … Oc .Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc @@ -128,6 +130,11 @@ The value can be used to display all properties that apply to the given dataset's type .Pq Sy filesystem , volume , snapshot , No or Sy bookmark . .Bl -tag -width "-s source" +.It Fl j Op Ar --json-int +Display the output in JSON format. +Specify +.Sy --json-int +to display numbers in integer format instead of strings for JSON output. .It Fl H Display output in a form more easily parsed by scripts. Any headers are omitted, and fields are explicitly separated by a single tab @@ -283,6 +290,50 @@ The following command gets a single property value: on .Ed .Pp +The following command gets a single property value recursively in JSON format: +.Bd -literal -compact -offset Ds +.No # Nm zfs Cm get Fl j Fl r Sy mountpoint Ar pool/home | Nm jq +{ + "output_version": { + "command": "zfs get", + "vers_major": 0, + "vers_minor": 1 + }, + "datasets": { + "pool/home": { + "name": "pool/home", + "type": "FILESYSTEM", + "pool": "pool", + "createtxg": "10", + "properties": { + "mountpoint": { + "value": "/pool/home", + "source": { + "type": "DEFAULT", + "data": "-" + } + } + } + }, + "pool/home/bob": { + "name": "pool/home/bob", + "type": "FILESYSTEM", + "pool": "pool", + "createtxg": "1176", + "properties": { + "mountpoint": { + "value": "/pool/home/bob", + "source": { + "type": "DEFAULT", + "data": "-" + } + } + } + } + } +} +.Ed +.Pp The following command lists all properties with local settings for .Ar pool/home/bob : .Bd -literal -compact -offset Ds diff --git a/sys/contrib/subrepo-openzfs/man/man8/zfs.8 b/sys/contrib/subrepo-openzfs/man/man8/zfs.8 index dd578cb74aac..2ee15ab21806 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zfs.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zfs.8 @@ -48,6 +48,7 @@ .Fl ?V .Nm .Cm version +.Op Fl j .Nm .Cm subcommand .Op Ar arguments @@ -153,10 +154,14 @@ Displays a help message. .It Xo .Nm .Cm version +.Op Fl j .Xc Displays the software version of the .Nm userland utility and the zfs kernel module. +Use +.Fl j +option to output in JSON format. .El . .Ss Dataset Management diff --git a/sys/contrib/subrepo-openzfs/man/man8/zpool-get.8 b/sys/contrib/subrepo-openzfs/man/man8/zpool-get.8 index 78a39b07d749..5384906f17f2 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zpool-get.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zpool-get.8 @@ -37,6 +37,7 @@ .Nm zpool .Cm get .Op Fl Hp +.Op Fl j Op Ar --json-int, --json-pool-key-guid .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns … .Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns … .Oo Ar pool Oc Ns … @@ -44,6 +45,7 @@ .Nm zpool .Cm get .Op Fl Hp +.Op Fl j Op Ar --json-int .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns … .Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns … .Ar pool @@ -67,6 +69,7 @@ .Nm zpool .Cm get .Op Fl Hp +.Op Fl j Op Ar --json-int, --json-pool-key-guid .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns … .Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns … .Oo Ar pool Oc Ns … @@ -95,6 +98,14 @@ See the .Xr zpoolprops 7 manual page for more information on the available pool properties. .Bl -tag -compact -offset Ds -width "-o field" +.It Fl j Op Ar --json-int, --json-pool-key-guid +Display the list of properties in JSON format. +Specify +.Sy --json-int +to display the numbers in integer format instead of strings in JSON output. +Specify +.Sy --json-pool-key-guid +to set pool GUID as key for pool objects instead of pool name. .It Fl H Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary @@ -108,6 +119,7 @@ Display numbers in parsable (exact) values. .It Xo .Nm zpool .Cm get +.Op Fl j Op Ar --json-int .Op Fl Hp .Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns … .Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns … @@ -145,6 +157,11 @@ See the .Xr vdevprops 7 manual page for more information on the available pool properties. .Bl -tag -compact -offset Ds -width "-o field" +.It Fl j Op Ar --json-int +Display the list of properties in JSON format. +Specify +.Sy --json-int +to display the numbers in integer format instead of strings in JSON output. .It Fl H Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary diff --git a/sys/contrib/subrepo-openzfs/man/man8/zpool-list.8 b/sys/contrib/subrepo-openzfs/man/man8/zpool-list.8 index c60c47f5eb3d..b0ee659701d4 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zpool-list.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zpool-list.8 @@ -37,6 +37,7 @@ .Nm zpool .Cm list .Op Fl HgLpPv +.Op Fl j Op Ar --json-int, --json-pool-key-guid .Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns … .Op Fl T Sy u Ns | Ns Sy d .Oo Ar pool Oc Ns … @@ -58,6 +59,14 @@ is specified, the command exits after .Ar count reports are printed. .Bl -tag -width Ds +.It Fl j Op Ar --json-int, --json-pool-key-guid +Display the list of pools in JSON format. +Specify +.Sy --json-int +to display the numbers in integer format instead of strings. +Specify +.Sy --json-pool-key-guid +to set pool GUID as key for pool objects instead of pool names. .It Fl g Display vdev GUIDs instead of the normal device names. These GUIDs can be used in place of device names for the zpool @@ -139,6 +148,104 @@ data 23.9G 14.6G 9.30G - 48% 61% 1.00x ONLINE - sda - - - - - sdb - - - 10G - sdc - - - - - +.Ed +. +.Ss Example 3 : No Displaying expanded space on a device +The following command lists all available pools on the system in JSON +format. +.Bd -literal -compact -offset Ds +.No # Nm zpool Cm list Fl j | Nm jq +{ + "output_version": { + "command": "zpool list", + "vers_major": 0, + "vers_minor": 1 + }, + "pools": { + "tank": { + "name": "tank", + "type": "POOL", + "state": "ONLINE", + "guid": "15220353080205405147", + "txg": "2671", + "spa_version": "5000", + "zpl_version": "5", + "properties": { + "size": { + "value": "111G", + "source": { + "type": "NONE", + "data": "-" + } + }, + "allocated": { + "value": "30.8G", + "source": { + "type": "NONE", + "data": "-" + } + }, + "free": { + "value": "80.2G", + "source": { + "type": "NONE", + "data": "-" + } + }, + "checkpoint": { + "value": "-", + "source": { + "type": "NONE", + "data": "-" + } + }, + "expandsize": { + "value": "-", + "source": { + "type": "NONE", + "data": "-" + } + }, + "fragmentation": { + "value": "0%", + "source": { + "type": "NONE", + "data": "-" + } + }, + "capacity": { + "value": "27%", + "source": { + "type": "NONE", + "data": "-" + } + }, + "dedupratio": { + "value": "1.00x", + "source": { + "type": "NONE", + "data": "-" + } + }, + "health": { + "value": "ONLINE", + "source": { + "type": "NONE", + "data": "-" + } + }, + "altroot": { + "value": "-", + "source": { + "type": "DEFAULT", + "data": "-" + } + } + } + } + } +} + .Ed . .Sh SEE ALSO diff --git a/sys/contrib/subrepo-openzfs/man/man8/zpool-prefetch.8 b/sys/contrib/subrepo-openzfs/man/man8/zpool-prefetch.8 new file mode 100644 index 000000000000..57445bd4a655 --- /dev/null +++ b/sys/contrib/subrepo-openzfs/man/man8/zpool-prefetch.8 @@ -0,0 +1,46 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" +.\" Copyright (c) 2023, Klara Inc. +.\" +.Dd February 14, 2024 +.Dt ZPOOL-PREFETCH 8 +.Os +. +.Sh NAME +.Nm zpool-prefetch +.Nd Loads specific types of data for the given pool +.Sh SYNOPSIS +.Nm zpool +.Cm prefetch +.Fl t Ar type +.Ar pool +.Sh DESCRIPTION +.Bl -tag -width Ds +.It Xo +.Nm zpool +.Cm prefetch +.Fl t Li ddt +.Ar pool +.Xc +Prefetch data of a specific type for the given pool; specifically the DDT, +which will improve write I/O performance when the DDT is resident in the ARC. +.El diff --git a/sys/contrib/subrepo-openzfs/man/man8/zpool-status.8 b/sys/contrib/subrepo-openzfs/man/man8/zpool-status.8 index bbe7a45aa0c6..b40faeb9977f 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zpool-status.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zpool-status.8 @@ -26,7 +26,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd February 14, 2024 .Dt ZPOOL-STATUS 8 .Os . @@ -41,6 +41,7 @@ .Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … .Oo Ar pool Oc Ns … .Op Ar interval Op Ar count +.Op Fl j Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid . .Sh DESCRIPTION Displays the detailed health status for the given pools. @@ -69,12 +70,25 @@ See the option of .Nm zpool Cm iostat for complete details. +.It Fl j Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid +Display the status for ZFS pools in JSON format. +Specify +.Sy --json-int +to display numbers in integer format instead of strings. +Specify +.Sy --json-flat-vdevs +to display vdevs in flat hierarchy instead of nested vdev objects. +Specify +.Sy --json-pool-key-guid +to set pool GUID as key for pool objects instead of pool names. .It Fl D Display a histogram of deduplication statistics, showing the allocated .Pq physically present on disk and referenced .Pq logically referenced in the pool block counts and sizes by reference count. +If repeated, (-DD), also shows statistics on how much of the DDT is resident +in the ARC. .It Fl e Only show unhealthy vdevs (not-ONLINE or with errors). .It Fl g @@ -159,6 +173,175 @@ rpool 14.6G 54.9G 4 55 250K 2.69M ---------- ----- ----- ----- ----- ----- ----- ---- .Ed . +.Ss Example 2 : No Display the status output in JSON format +.Nm zpool Cm status No can output in JSON format if +.Fl j +is specified. +.Fl c +can be used to run a script on each VDEV. +.Bd -literal -compact -offset Ds +.No # Nm zpool Cm status Fl j Fl c Pa vendor , Ns Pa model , Ns Pa size | Nm jq +{ + "output_version": { + "command": "zpool status", + "vers_major": 0, + "vers_minor": 1 + }, + "pools": { + "tank": { + "name": "tank", + "state": "ONLINE", + "guid": "3920273586464696295", + "txg": "16597", + "spa_version": "5000", + "zpl_version": "5", + "status": "OK", + "vdevs": { + "tank": { + "name": "tank", + "alloc_space": "62.6G", + "total_space": "15.0T", + "def_space": "11.3T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vdevs": { + "raidz1-0": { + "name": "raidz1-0", + "vdev_type": "raidz", + "guid": "763132626387621737", + "state": "HEALTHY", + "alloc_space": "62.5G", + "total_space": "10.9T", + "def_space": "7.26T", + "rep_dev_size": "10.9T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vdevs": { + "ca1eb824-c371-491d-ac13-37637e35c683": { + "name": "ca1eb824-c371-491d-ac13-37637e35c683", + "vdev_type": "disk", + "guid": "12841765308123764671", + "path": "/dev/disk/by-partuuid/ca1eb824-c371-491d-ac13-37637e35c683", + "state": "HEALTHY", + "rep_dev_size": "3.64T", + "phys_space": "3.64T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vendor": "ATA", + "model": "WDC WD40EFZX-68AWUN0", + "size": "3.6T" + }, + "97cd98fb-8fb8-4ac4-bc84-bd8950a7ace7": { + "name": "97cd98fb-8fb8-4ac4-bc84-bd8950a7ace7", + "vdev_type": "disk", + "guid": "1527839927278881561", + "path": "/dev/disk/by-partuuid/97cd98fb-8fb8-4ac4-bc84-bd8950a7ace7", + "state": "HEALTHY", + "rep_dev_size": "3.64T", + "phys_space": "3.64T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vendor": "ATA", + "model": "WDC WD40EFZX-68AWUN0", + "size": "3.6T" + }, + "e9ddba5f-f948-4734-a472-cb8aa5f0ff65": { + "name": "e9ddba5f-f948-4734-a472-cb8aa5f0ff65", + "vdev_type": "disk", + "guid": "6982750226085199860", + "path": "/dev/disk/by-partuuid/e9ddba5f-f948-4734-a472-cb8aa5f0ff65", + "state": "HEALTHY", + "rep_dev_size": "3.64T", + "phys_space": "3.64T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vendor": "ATA", + "model": "WDC WD40EFZX-68AWUN0", + "size": "3.6T" + } + } + } + } + } + }, + "dedup": { + "mirror-2": { + "name": "mirror-2", + "vdev_type": "mirror", + "guid": "2227766268377771003", + "state": "HEALTHY", + "alloc_space": "89.1M", + "total_space": "3.62T", + "def_space": "3.62T", + "rep_dev_size": "3.62T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vdevs": { + "db017360-d8e9-4163-961b-144ca75293a3": { + "name": "db017360-d8e9-4163-961b-144ca75293a3", + "vdev_type": "disk", + "guid": "17880913061695450307", + "path": "/dev/disk/by-partuuid/db017360-d8e9-4163-961b-144ca75293a3", + "state": "HEALTHY", + "rep_dev_size": "3.63T", + "phys_space": "3.64T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vendor": "ATA", + "model": "WDC WD40EFZX-68AWUN0", + "size": "3.6T" + }, + "952c3baf-b08a-4a8c-b7fa-33a07af5fe6f": { + "name": "952c3baf-b08a-4a8c-b7fa-33a07af5fe6f", + "vdev_type": "disk", + "guid": "10276374011610020557", + "path": "/dev/disk/by-partuuid/952c3baf-b08a-4a8c-b7fa-33a07af5fe6f", + "state": "HEALTHY", + "rep_dev_size": "3.63T", + "phys_space": "3.64T", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vendor": "ATA", + "model": "WDC WD40EFZX-68AWUN0", + "size": "3.6T" + } + } + } + }, + "special": { + "25d418f8-92bd-4327-b59f-7ef5d5f50d81": { + "name": "25d418f8-92bd-4327-b59f-7ef5d5f50d81", + "vdev_type": "disk", + "guid": "3935742873387713123", + "path": "/dev/disk/by-partuuid/25d418f8-92bd-4327-b59f-7ef5d5f50d81", + "state": "HEALTHY", + "alloc_space": "37.4M", + "total_space": "444G", + "def_space": "444G", + "rep_dev_size": "444G", + "phys_space": "447G", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0", + "vendor": "ATA", + "model": "Micron_5300_MTFDDAK480TDS", + "size": "447.1G" + } + }, + "error_count": "0" + } + } +} +.Ed +. .Sh SEE ALSO .Xr zpool-events 8 , .Xr zpool-history 8 , diff --git a/sys/contrib/subrepo-openzfs/man/man8/zpool.8 b/sys/contrib/subrepo-openzfs/man/man8/zpool.8 index fe44e15cabe1..c55644d9ecea 100644 --- a/sys/contrib/subrepo-openzfs/man/man8/zpool.8 +++ b/sys/contrib/subrepo-openzfs/man/man8/zpool.8 @@ -26,7 +26,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd March 16, 2022 +.Dd February 14, 2024 .Dt ZPOOL 8 .Os . @@ -38,6 +38,7 @@ .Fl ?V .Nm .Cm version +.Op Fl j .Nm .Cm subcommand .Op Ar arguments @@ -79,10 +80,14 @@ Displays a help message. .It Xo .Nm .Cm version +.Op Fl j .Xc Displays the software version of the .Nm userland utility and the ZFS kernel module. +Use +.Fl j +option to output in JSON format. .El . .Ss Creation @@ -168,6 +173,8 @@ specified. . .Ss Maintenance .Bl -tag -width Ds +.It Xr zpool-prefetch 8 +Prefetches specific types of pool data. .It Xr zpool-scrub 8 Begins a scrub or resumes a paused scrub. .It Xr zpool-checkpoint 8 @@ -598,6 +605,7 @@ don't wait. .Xr zpool-list 8 , .Xr zpool-offline 8 , .Xr zpool-online 8 , +.Xr zpool-prefetch 8 , .Xr zpool-reguid 8 , .Xr zpool-remove 8 , .Xr zpool-reopen 8 , diff --git a/sys/contrib/subrepo-openzfs/module/Kbuild.in b/sys/contrib/subrepo-openzfs/module/Kbuild.in index 4707452711f7..4f48cb9da0c1 100644 --- a/sys/contrib/subrepo-openzfs/module/Kbuild.in +++ b/sys/contrib/subrepo-openzfs/module/Kbuild.in @@ -491,6 +491,7 @@ UBSAN_SANITIZE_zap_leaf.o := n UBSAN_SANITIZE_zap_micro.o := n UBSAN_SANITIZE_sa.o := n UBSAN_SANITIZE_zfs/zap_micro.o := n +UBSAN_SANITIZE_zfs/sa.o := n # Suppress incorrect warnings from versions of objtool which are not # aware of x86 EVEX prefix instructions used for AVX512. diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_kmem.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_kmem.c index 95af6200cd01..ceaa2b40beae 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_kmem.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_kmem.c @@ -240,22 +240,14 @@ void kmem_cache_reap_soon(kmem_cache_t *cache) { #ifndef KMEM_DEBUG -#if __FreeBSD_version >= 1300043 uma_zone_reclaim(cache->kc_zone, UMA_RECLAIM_DRAIN); -#else - zone_drain(cache->kc_zone); -#endif #endif } void kmem_reap(void) { -#if __FreeBSD_version >= 1300043 uma_reclaim(UMA_RECLAIM_TRIM); -#else - uma_reclaim(); -#endif } #else void diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_policy.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_policy.c index f2dd7c8e7f8a..42a693b073d1 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_policy.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_policy.c @@ -41,42 +41,42 @@ int secpolicy_nfs(cred_t *cr) { - return (spl_priv_check_cred(cr, PRIV_NFS_DAEMON)); + return (priv_check_cred(cr, PRIV_NFS_DAEMON)); } int secpolicy_zfs(cred_t *cr) { - return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT)); + return (priv_check_cred(cr, PRIV_VFS_MOUNT)); } int secpolicy_zfs_proc(cred_t *cr, proc_t *proc) { - return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT)); + return (priv_check_cred(cr, PRIV_VFS_MOUNT)); } int secpolicy_sys_config(cred_t *cr, int checkonly __unused) { - return (spl_priv_check_cred(cr, PRIV_ZFS_POOL_CONFIG)); + return (priv_check_cred(cr, PRIV_ZFS_POOL_CONFIG)); } int secpolicy_zinject(cred_t *cr) { - return (spl_priv_check_cred(cr, PRIV_ZFS_INJECT)); + return (priv_check_cred(cr, PRIV_ZFS_INJECT)); } int secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp __unused) { - return (spl_priv_check_cred(cr, PRIV_VFS_UNMOUNT)); + return (priv_check_cred(cr, PRIV_VFS_UNMOUNT)); } int @@ -104,7 +104,7 @@ secpolicy_basic_link(vnode_t *vp, cred_t *cr) return (0); if (secpolicy_fs_owner(vp->v_mount, cr) == 0) return (0); - return (spl_priv_check_cred(cr, PRIV_VFS_LINK)); + return (priv_check_cred(cr, PRIV_VFS_LINK)); } int @@ -120,7 +120,7 @@ secpolicy_vnode_remove(vnode_t *vp, cred_t *cr) if (secpolicy_fs_owner(vp->v_mount, cr) == 0) return (0); - return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN)); + return (priv_check_cred(cr, PRIV_VFS_ADMIN)); } int @@ -130,18 +130,18 @@ secpolicy_vnode_access(cred_t *cr, vnode_t *vp, uid_t owner, accmode_t accmode) if (secpolicy_fs_owner(vp->v_mount, cr) == 0) return (0); - if ((accmode & VREAD) && spl_priv_check_cred(cr, PRIV_VFS_READ) != 0) + if ((accmode & VREAD) && priv_check_cred(cr, PRIV_VFS_READ) != 0) return (EACCES); if ((accmode & VWRITE) && - spl_priv_check_cred(cr, PRIV_VFS_WRITE) != 0) { + priv_check_cred(cr, PRIV_VFS_WRITE) != 0) { return (EACCES); } if (accmode & VEXEC) { if (vp->v_type == VDIR) { - if (spl_priv_check_cred(cr, PRIV_VFS_LOOKUP) != 0) + if (priv_check_cred(cr, PRIV_VFS_LOOKUP) != 0) return (EACCES); } else { - if (spl_priv_check_cred(cr, PRIV_VFS_EXEC) != 0) + if (priv_check_cred(cr, PRIV_VFS_EXEC) != 0) return (EACCES); } } @@ -198,7 +198,7 @@ secpolicy_vnode_any_access(cred_t *cr, vnode_t *vp, uid_t owner) continue; break; } - if (spl_priv_check_cred(cr, priv) == 0) + if (priv_check_cred(cr, priv) == 0) return (0); } return (EPERM); @@ -212,7 +212,7 @@ secpolicy_vnode_setdac(vnode_t *vp, cred_t *cr, uid_t owner) return (0); if (secpolicy_fs_owner(vp->v_mount, cr) == 0) return (0); - return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN)); + return (priv_check_cred(cr, PRIV_VFS_ADMIN)); } int @@ -262,7 +262,7 @@ secpolicy_vnode_setattr(cred_t *cr, vnode_t *vp, struct vattr *vap, ((mask & AT_GID) && vap->va_gid != ovap->va_gid && !groupmember(vap->va_gid, cr))) { if (secpolicy_fs_owner(vp->v_mount, cr) != 0) { - error = spl_priv_check_cred(cr, PRIV_VFS_CHOWN); + error = priv_check_cred(cr, PRIV_VFS_CHOWN); if (error) return (error); } @@ -306,7 +306,7 @@ secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid) return (0); if (secpolicy_fs_owner(vp->v_mount, cr) == 0) return (0); - return (spl_priv_check_cred(cr, PRIV_VFS_SETGID)); + return (priv_check_cred(cr, PRIV_VFS_SETGID)); } int @@ -316,7 +316,7 @@ secpolicy_vnode_setid_retain(znode_t *zp, cred_t *cr, if (secpolicy_fs_owner(ZTOV(zp)->v_mount, cr) == 0) return (0); - return (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)); + return (priv_check_cred(cr, PRIV_VFS_RETAINSUGID)); } void @@ -327,7 +327,7 @@ secpolicy_setid_clear(struct vattr *vap, vnode_t *vp, cred_t *cr) return; if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0) { - if (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)) { + if (priv_check_cred(cr, PRIV_VFS_RETAINSUGID)) { vap->va_mask |= AT_MODE; vap->va_mode &= ~(S_ISUID|S_ISGID); } @@ -349,7 +349,7 @@ secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap, * is not a member of. Both of these are allowed in jail(8). */ if (vp->v_type != VDIR && (vap->va_mode & S_ISTXT)) { - if (spl_priv_check_cred(cr, PRIV_VFS_STICKYFILE)) + if (priv_check_cred(cr, PRIV_VFS_STICKYFILE)) return (EFTYPE); } /* @@ -365,7 +365,7 @@ secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap, * Deny setting setuid if we are not the file owner. */ if ((vap->va_mode & S_ISUID) && ovap->va_uid != cr->cr_uid) { - error = spl_priv_check_cred(cr, PRIV_VFS_ADMIN); + error = priv_check_cred(cr, PRIV_VFS_ADMIN); if (error) return (error); } @@ -376,7 +376,7 @@ int secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp) { - return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT)); + return (priv_check_cred(cr, PRIV_VFS_MOUNT)); } int @@ -389,7 +389,7 @@ secpolicy_vnode_owner(vnode_t *vp, cred_t *cr, uid_t owner) return (0); /* XXX: vfs_suser()? */ - return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_OWNER)); + return (priv_check_cred(cr, PRIV_VFS_MOUNT_OWNER)); } int @@ -398,14 +398,14 @@ secpolicy_vnode_chown(vnode_t *vp, cred_t *cr, uid_t owner) if (secpolicy_fs_owner(vp->v_mount, cr) == 0) return (0); - return (spl_priv_check_cred(cr, PRIV_VFS_CHOWN)); + return (priv_check_cred(cr, PRIV_VFS_CHOWN)); } void secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp) { - if (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_NONUSER) != 0) { + if (priv_check_cred(cr, PRIV_VFS_MOUNT_NONUSER) != 0) { MNT_ILOCK(vfsp); vfsp->vfs_flag |= VFS_NOSETUID | MNT_USER; vfs_clearmntopt(vfsp, MNTOPT_SETUID); @@ -424,12 +424,12 @@ secpolicy_xvattr(vnode_t *vp, xvattr_t *xvap, uid_t owner, cred_t *cr, if (secpolicy_fs_owner(vp->v_mount, cr) == 0) return (0); - return (spl_priv_check_cred(cr, PRIV_VFS_SYSFLAGS)); + return (priv_check_cred(cr, PRIV_VFS_SYSFLAGS)); } int secpolicy_smb(cred_t *cr) { - return (spl_priv_check_cred(cr, PRIV_NETSMB)); + return (priv_check_cred(cr, PRIV_NETSMB)); } diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_taskq.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_taskq.c index 81d49b7c8725..ae618e830ecd 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_taskq.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_taskq.c @@ -42,11 +42,6 @@ #include -#if __FreeBSD_version < 1201522 -#define taskqueue_start_threads_in_proc(tqp, count, pri, proc, name, ...) \ - taskqueue_start_threads(tqp, count, pri, name, __VA_ARGS__) -#endif - static uint_t taskq_tsd; static uma_zone_t taskq_zone; diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_vfs.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_vfs.c index 724709ddee9d..6af3da138f8d 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_vfs.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_vfs.c @@ -158,7 +158,7 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, return (error); } vn_seqc_write_begin(vp); - VOP_UNLOCK1(vp); + VOP_UNLOCK(vp); /* * Allocate and initialize the filesystem. @@ -249,10 +249,8 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) panic("mount: lost mount"); vn_seqc_write_end(vp); - VOP_UNLOCK1(vp); -#if __FreeBSD_version >= 1300048 + VOP_UNLOCK(vp); vfs_op_exit(mp); -#endif vfs_unbusy(mp); *vpp = mvp; return (0); @@ -272,12 +270,8 @@ void vn_rele_async(vnode_t *vp, taskq_t *taskq) { VERIFY3U(vp->v_usecount, >, 0); - if (refcount_release_if_not_last(&vp->v_usecount)) { -#if __FreeBSD_version < 1300045 - vdrop(vp); -#endif + if (refcount_release_if_not_last(&vp->v_usecount)) return; - } VERIFY3U(taskq_dispatch((taskq_t *)taskq, (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0); } diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_zone.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_zone.c index 7f2b5c712c42..e3896517bd61 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_zone.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/spl/spl_zone.c @@ -63,7 +63,7 @@ zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid) struct prison *pr; int dofree, error; - if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0) + if ((error = priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0) return (error); /* Allocate memory before we grab prison's mutex. */ @@ -116,7 +116,7 @@ zone_dataset_detach(struct ucred *cred, const char *dataset, int jailid) struct prison *pr; int error; - if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0) + if ((error = priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0) return (error); sx_slock(&allprison_lock); diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/abd_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/abd_os.c index 3b812271f98b..fb5c46ecf7c2 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/abd_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/abd_os.c @@ -300,7 +300,7 @@ void abd_init(void) { abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0, - NULL, NULL, NULL, NULL, 0, KMC_NODEBUG); + NULL, NULL, NULL, NULL, 0, KMC_NODEBUG | KMC_RECLAIMABLE); wmsum_init(&abd_sums.abdstat_struct_size, 0); wmsum_init(&abd_sums.abdstat_scatter_cnt, 0); diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/arc_os.c index e271d3bf98a0..dd3da2183a18 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/arc_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/arc_os.c @@ -149,26 +149,29 @@ static eventhandler_tag arc_event_lowmem = NULL; static void arc_lowmem(void *arg __unused, int howto __unused) { - int64_t free_memory, to_free; + int64_t can_free, free_memory, to_free; arc_no_grow = B_TRUE; arc_warm = B_TRUE; arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry); + free_memory = arc_available_memory(); - int64_t can_free = arc_c - arc_c_min; - if (can_free <= 0) - return; - to_free = (can_free >> arc_shrink_shift) - MIN(free_memory, 0); + can_free = arc_c - arc_c_min; + to_free = (MAX(can_free, 0) >> arc_shrink_shift) - MIN(free_memory, 0); DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free); - arc_reduce_target_size(to_free); + to_free = arc_reduce_target_size(to_free); /* * It is unsafe to block here in arbitrary threads, because we can come * here from ARC itself and may hold ARC locks and thus risk a deadlock * with ARC reclaim thread. */ - if (curproc == pageproc) - arc_wait_for_eviction(to_free, B_FALSE); + if (curproc == pageproc) { + arc_wait_for_eviction(to_free, B_FALSE, B_FALSE); + ARCSTAT_BUMP(arcstat_memory_indirect_count); + } else { + ARCSTAT_BUMP(arcstat_memory_direct_count); + } } void diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/crypto_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/crypto_os.c index ed8d2407613e..4d8493743ef1 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/crypto_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/crypto_os.c @@ -197,13 +197,6 @@ zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp) crp->crp_etype = 0; crp->crp_flags &= ~CRYPTO_F_DONE; session->fs_done = false; -#if __FreeBSD_version < 1300087 - /* - * Session ID changed, so we should record that, - * and try again - */ - session->fs_sid = crp->crp_session; -#endif } return (error); } @@ -250,7 +243,6 @@ freebsd_crypt_uio_debug_log(boolean_t encrypt, * happen every time the key changes (including when * it's first loaded). */ -#if __FreeBSD_version >= 1300087 int freebsd_crypt_newsession(freebsd_crypt_session_t *sessp, const struct zio_crypt_info *c_info, crypto_key_t *key) @@ -389,244 +381,3 @@ freebsd_crypt_uio(boolean_t encrypt, } return (error); } - -#else -int -freebsd_crypt_newsession(freebsd_crypt_session_t *sessp, - const struct zio_crypt_info *c_info, crypto_key_t *key) -{ - struct cryptoini cria = {0}, crie = {0}, *crip; - struct enc_xform *xform; - struct auth_hash *xauth; - int error = 0; - crypto_session_t sid; - -#ifdef FCRYPTO_DEBUG - printf("%s(%p, { %s, %d, %d, %s }, { %p, %u })\n", - __FUNCTION__, sessp, - c_info->ci_algname, c_info->ci_crypt_type, - (unsigned int)c_info->ci_keylen, c_info->ci_name, - key->ck_data, (unsigned int)key->ck_length); - printf("\tkey = { "); - for (int i = 0; i < key->ck_length / 8; i++) { - uint8_t *b = (uint8_t *)key->ck_data; - printf("%02x ", b[i]); - } - printf("}\n"); -#endif - switch (c_info->ci_crypt_type) { - case ZC_TYPE_GCM: - xform = &enc_xform_aes_nist_gcm; - switch (key->ck_length/8) { - case AES_128_GMAC_KEY_LEN: - xauth = &auth_hash_nist_gmac_aes_128; - break; - case AES_192_GMAC_KEY_LEN: - xauth = &auth_hash_nist_gmac_aes_192; - break; - case AES_256_GMAC_KEY_LEN: - xauth = &auth_hash_nist_gmac_aes_256; - break; - default: - error = EINVAL; - goto bad; - } - break; - case ZC_TYPE_CCM: - xform = &enc_xform_ccm; - switch (key->ck_length/8) { - case AES_128_CBC_MAC_KEY_LEN: - xauth = &auth_hash_ccm_cbc_mac_128; - break; - case AES_192_CBC_MAC_KEY_LEN: - xauth = &auth_hash_ccm_cbc_mac_192; - break; - case AES_256_CBC_MAC_KEY_LEN: - xauth = &auth_hash_ccm_cbc_mac_256; - break; - default: - error = EINVAL; - goto bad; - break; - } - break; - default: - error = ENOTSUP; - goto bad; - } -#ifdef FCRYPTO_DEBUG - printf("%s(%d): Using crypt %s (key length %u [%u bytes]), " - "auth %s (key length %d)\n", - __FUNCTION__, __LINE__, - xform->name, (unsigned int)key->ck_length, - (unsigned int)key->ck_length/8, - xauth->name, xauth->keysize); -#endif - - crie.cri_alg = xform->type; - crie.cri_key = key->ck_data; - crie.cri_klen = key->ck_length; - - cria.cri_alg = xauth->type; - cria.cri_key = key->ck_data; - cria.cri_klen = key->ck_length; - - cria.cri_next = &crie; - crie.cri_next = NULL; - crip = &cria; - // Everything else is zero-initialised - - error = crypto_newsession(&sid, crip, - CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE); - if (error != 0) { - printf("%s(%d): crypto_newsession failed with %d\n", - __FUNCTION__, __LINE__, error); - goto bad; - } - sessp->fs_sid = sid; - mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock", - NULL, MTX_DEF); - crypt_sessions++; -bad: - return (error); -} - -/* - * The meat of encryption/decryption. - * If sessp is NULL, then it will create a - * temporary cryptographic session, and release - * it when done. - */ -int -freebsd_crypt_uio(boolean_t encrypt, - freebsd_crypt_session_t *input_sessionp, - const struct zio_crypt_info *c_info, - zfs_uio_t *data_uio, - crypto_key_t *key, - uint8_t *ivbuf, - size_t datalen, - size_t auth_len) -{ - struct cryptop *crp; - struct cryptodesc *enc_desc, *auth_desc; - struct enc_xform *xform; - struct auth_hash *xauth; - freebsd_crypt_session_t *session = NULL; - int error; - - freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio, - key, ivbuf, datalen, auth_len); - switch (c_info->ci_crypt_type) { - case ZC_TYPE_GCM: - xform = &enc_xform_aes_nist_gcm; - switch (key->ck_length/8) { - case AES_128_GMAC_KEY_LEN: - xauth = &auth_hash_nist_gmac_aes_128; - break; - case AES_192_GMAC_KEY_LEN: - xauth = &auth_hash_nist_gmac_aes_192; - break; - case AES_256_GMAC_KEY_LEN: - xauth = &auth_hash_nist_gmac_aes_256; - break; - default: - error = EINVAL; - goto bad; - } - break; - case ZC_TYPE_CCM: - xform = &enc_xform_ccm; - switch (key->ck_length/8) { - case AES_128_CBC_MAC_KEY_LEN: - xauth = &auth_hash_ccm_cbc_mac_128; - break; - case AES_192_CBC_MAC_KEY_LEN: - xauth = &auth_hash_ccm_cbc_mac_192; - break; - case AES_256_CBC_MAC_KEY_LEN: - xauth = &auth_hash_ccm_cbc_mac_256; - break; - default: - error = EINVAL; - goto bad; - break; - } - break; - default: - error = ENOTSUP; - goto bad; - } - -#ifdef FCRYPTO_DEBUG - printf("%s(%d): Using crypt %s (key length %u [%u bytes]), " - "auth %s (key length %d)\n", - __FUNCTION__, __LINE__, - xform->name, (unsigned int)key->ck_length, - (unsigned int)key->ck_length/8, - xauth->name, xauth->keysize); -#endif - - if (input_sessionp == NULL) { - session = kmem_zalloc(sizeof (*session), KM_SLEEP); - error = freebsd_crypt_newsession(session, c_info, key); - if (error) - goto out; - } else - session = input_sessionp; - - crp = crypto_getreq(2); - if (crp == NULL) { - error = ENOMEM; - goto bad; - } - - auth_desc = crp->crp_desc; - enc_desc = auth_desc->crd_next; - - crp->crp_session = session->fs_sid; - crp->crp_ilen = auth_len + datalen; - crp->crp_buf = (void*)GET_UIO_STRUCT(data_uio); - crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC; - - auth_desc->crd_skip = 0; - auth_desc->crd_len = auth_len; - auth_desc->crd_inject = auth_len + datalen; - auth_desc->crd_alg = xauth->type; -#ifdef FCRYPTO_DEBUG - printf("%s: auth: skip = %u, len = %u, inject = %u\n", - __FUNCTION__, auth_desc->crd_skip, auth_desc->crd_len, - auth_desc->crd_inject); -#endif - - enc_desc->crd_skip = auth_len; - enc_desc->crd_len = datalen; - enc_desc->crd_inject = auth_len; - enc_desc->crd_alg = xform->type; - enc_desc->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; - memcpy(enc_desc->crd_iv, ivbuf, ZIO_DATA_IV_LEN); - enc_desc->crd_next = NULL; - -#ifdef FCRYPTO_DEBUG - printf("%s: enc: skip = %u, len = %u, inject = %u\n", - __FUNCTION__, enc_desc->crd_skip, enc_desc->crd_len, - enc_desc->crd_inject); -#endif - - if (encrypt) - enc_desc->crd_flags |= CRD_F_ENCRYPT; - - error = zfs_crypto_dispatch(session, crp); - crypto_freereq(crp); -out: - if (input_sessionp == NULL) { - freebsd_crypt_freesession(session); - kmem_free(session, sizeof (*session)); - } -bad: -#ifdef FCRYPTO_DEBUG - if (error) - printf("%s: returning error %d\n", __FUNCTION__, error); -#endif - return (error); -} -#endif diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c index 48ea37cbad59..0a0af102ea82 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c @@ -60,20 +60,7 @@ #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) #endif -#if __FreeBSD_version < 1300051 -#define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY -#else #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY -#endif - - -#if __FreeBSD_version < 1300072 -#define dmu_page_lock(m) vm_page_lock(m) -#define dmu_page_unlock(m) vm_page_unlock(m) -#else -#define dmu_page_lock(m) -#define dmu_page_unlock(m) -#endif int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, @@ -167,7 +154,6 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, #endif vmobj = ma[0]->object; - zfs_vmobject_wlock_12(vmobj); db = dbp[0]; for (i = 0; i < *rbehind; i++) { @@ -177,7 +163,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, break; if (!vm_page_none_valid(m)) { ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); - vm_page_do_sunbusy(m); + vm_page_sunbusy(m); break; } ASSERT3U(m->dirty, ==, 0); @@ -189,13 +175,11 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, memcpy(va, (char *)db->db_data + bufoff, PAGESIZE); zfs_unmap_page(sf); vm_page_valid(m); - dmu_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - dmu_page_unlock(m); - vm_page_do_sunbusy(m); + vm_page_sunbusy(m); } *rbehind = i; @@ -296,7 +280,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, break; if (!vm_page_none_valid(m)) { ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); - vm_page_do_sunbusy(m); + vm_page_sunbusy(m); break; } ASSERT3U(m->dirty, ==, 0); @@ -314,16 +298,13 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, } zfs_unmap_page(sf); vm_page_valid(m); - dmu_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - dmu_page_unlock(m); - vm_page_do_sunbusy(m); + vm_page_sunbusy(m); } *rahead = i; - zfs_vmobject_wunlock_12(vmobj); dmu_buf_rele_array(dbp, numbufs, FTAG); return (0); diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/event_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/event_os.c index e774fbaaf867..ace0fa734013 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/event_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/event_os.c @@ -46,7 +46,6 @@ knlist_sx_xunlock(void *arg) sx_xunlock((struct sx *)arg); } -#if __FreeBSD_version >= 1300128 static void knlist_sx_assert_lock(void *arg, int what) { @@ -56,28 +55,10 @@ knlist_sx_assert_lock(void *arg, int what) else sx_assert((struct sx *)arg, SX_UNLOCKED); } -#else -static void -knlist_sx_assert_locked(void *arg) -{ - sx_assert((struct sx *)arg, SX_LOCKED); -} -static void -knlist_sx_assert_unlocked(void *arg) -{ - sx_assert((struct sx *)arg, SX_UNLOCKED); -} -#endif void knlist_init_sx(struct knlist *knl, struct sx *lock) { - -#if __FreeBSD_version >= 1300128 knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock, knlist_sx_assert_lock); -#else - knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock, - knlist_sx_assert_locked, knlist_sx_assert_unlocked); -#endif } diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/kmod_core.c index 58445a3e32f6..cf066831c444 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/kmod_core.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/kmod_core.c @@ -345,11 +345,7 @@ FEATURE(zfs, "OpenZFS support"); DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY); MODULE_VERSION(zfsctrl, 1); -#if __FreeBSD_version > 1300092 MODULE_DEPEND(zfsctrl, xdr, 1, 1, 1); -#else -MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1); -#endif MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1); MODULE_DEPEND(zfsctrl, crypto, 1, 1, 1); MODULE_DEPEND(zfsctrl, zlib, 1, 1, 1); diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/vdev_geom.c index 38c1d8e9e464..b7ff1063b089 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/vdev_geom.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/vdev_geom.c @@ -379,11 +379,7 @@ vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, int i, n_bios, j; size_t bios_size; -#if __FreeBSD_version > 1300130 maxio = maxphys - (maxphys % cp->provider->sectorsize); -#else - maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize); -#endif n_bios = 0; /* How many bios are required for all commands ? */ diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ctldir.c index a753e91da4fe..4d539461886b 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ctldir.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ctldir.c @@ -733,7 +733,7 @@ zfsctl_root_vptocnp(struct vop_vptocnp_args *ap) if (error != 0) return (SET_ERROR(error)); - VOP_UNLOCK1(dvp); + VOP_UNLOCK(dvp); *ap->a_vpp = dvp; *ap->a_buflen -= sizeof (dotzfs_name); memcpy(ap->a_buf + *ap->a_buflen, dotzfs_name, sizeof (dotzfs_name)); @@ -814,12 +814,8 @@ zfsctl_common_getacl(struct vop_getacl_args *ap) static struct vop_vector zfsctl_ops_root = { .vop_default = &default_vnodeops, -#if __FreeBSD_version >= 1300121 .vop_fplookup_vexec = VOP_EAGAIN, -#endif -#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = VOP_EAGAIN, -#endif .vop_open = zfsctl_common_open, .vop_close = zfsctl_common_close, .vop_ioctl = VOP_EINVAL, @@ -1146,12 +1142,8 @@ zfsctl_snapdir_getattr(struct vop_getattr_args *ap) static struct vop_vector zfsctl_ops_snapdir = { .vop_default = &default_vnodeops, -#if __FreeBSD_version >= 1300121 .vop_fplookup_vexec = VOP_EAGAIN, -#endif -#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = VOP_EAGAIN, -#endif .vop_open = zfsctl_common_open, .vop_close = zfsctl_common_close, .vop_getattr = zfsctl_snapdir_getattr, @@ -1226,27 +1218,19 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) * before we can lock the vnode again. */ locked = VOP_ISLOCKED(vp); -#if __FreeBSD_version >= 1300045 enum vgetstate vs = vget_prep(vp); -#else - vhold(vp); -#endif vput(vp); /* Look up .zfs/snapshot, our parent. */ error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp); if (error == 0) { - VOP_UNLOCK1(dvp); + VOP_UNLOCK(dvp); *ap->a_vpp = dvp; *ap->a_buflen -= len; memcpy(ap->a_buf + *ap->a_buflen, node->sn_name, len); } vfs_unbusy(mp); -#if __FreeBSD_version >= 1300045 vget_finish(vp, locked | LK_RETRY, vs); -#else - vget(vp, locked | LK_VNHELD | LK_RETRY, curthread); -#endif return (error); } @@ -1256,18 +1240,12 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) */ static struct vop_vector zfsctl_ops_snapshot = { .vop_default = NULL, /* ensure very restricted access */ -#if __FreeBSD_version >= 1300121 .vop_fplookup_vexec = VOP_EAGAIN, -#endif -#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = VOP_EAGAIN, -#endif .vop_open = zfsctl_common_open, .vop_close = zfsctl_common_close, .vop_inactive = zfsctl_snapshot_inactive, -#if __FreeBSD_version >= 1300045 - .vop_need_inactive = vop_stdneed_inactive, -#endif + .vop_need_inactive = vop_stdneed_inactive, .vop_reclaim = zfsctl_snapshot_reclaim, .vop_vptocnp = zfsctl_snapshot_vptocnp, .vop_lock1 = vop_stdlock, diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_dir.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_dir.c index 3cdb94d6cd53..00d499c8c63e 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_dir.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_dir.c @@ -824,7 +824,7 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr) return (SET_ERROR(EDQUOT)); } - getnewvnode_reserve_(); + getnewvnode_reserve(); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + @@ -926,7 +926,7 @@ zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags) goto top; } if (error == 0) - VOP_UNLOCK1(ZTOV(*xzpp)); + VOP_UNLOCK(ZTOV(*xzpp)); return (error); } diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_file_os.c index e40193566fe2..6fdcc827d64b 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_file_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_file_os.c @@ -268,7 +268,7 @@ zfs_vop_fsync(vnode_t *vp) goto drop; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(vp, MNT_WAIT, curthread); - VOP_UNLOCK1(vp); + VOP_UNLOCK(vp); vn_finished_write(mp); drop: return (SET_ERROR(error)); @@ -328,15 +328,7 @@ zfs_file_unlink(const char *fnamep) zfs_uio_seg_t seg = UIO_SYSSPACE; int rc; -#if __FreeBSD_version >= 1300018 rc = kern_funlinkat(curthread, AT_FDCWD, PTR2CAP(fnamep), FD_NONE, seg, 0, 0); -#elif __FreeBSD_version >= 1202504 || defined(AT_BENEATH) - rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep), - seg, 0, 0); -#else - rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep), - seg, 0); -#endif return (SET_ERROR(rc)); } diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c index b8f5fa4e7543..928cf25e940f 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c @@ -36,10 +36,6 @@ #include -#if __FreeBSD_version < 1201517 -#define vm_page_max_user_wired vm_page_max_wired -#endif - int zfs_vfs_ref(zfsvfs_t **zfvp) { diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 9334d28f6976..8188659141f9 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -126,25 +126,16 @@ static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); static int zfs_sync(vfs_t *vfsp, int waitfor); -#if __FreeBSD_version >= 1300098 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, struct ucred **credanonp, int *numsecflavors, int *secflavors); -#else -static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, - struct ucred **credanonp, int *numsecflavors, int **secflavors); -#endif static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); static void zfs_freevfs(vfs_t *vfsp); struct vfsops zfs_vfsops = { .vfs_mount = zfs_mount, .vfs_unmount = zfs_umount, -#if __FreeBSD_version >= 1300049 .vfs_root = vfs_cache_root, - .vfs_cachedroot = zfs_root, -#else - .vfs_root = zfs_root, -#endif + .vfs_cachedroot = zfs_root, .vfs_statfs = zfs_statfs, .vfs_vget = zfs_vget, .vfs_sync = zfs_sync, @@ -1357,16 +1348,16 @@ zfs_mount(vfs_t *vfsp) vn_lock(mvp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(mvp, &vattr, cr)) { - VOP_UNLOCK1(mvp); + VOP_UNLOCK(mvp); goto out; } if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { - VOP_UNLOCK1(mvp); + VOP_UNLOCK(mvp); goto out; } - VOP_UNLOCK1(mvp); + VOP_UNLOCK(mvp); } secpolicy_fs_mount_clearopts(cr, vfsp); @@ -1578,11 +1569,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * 'z_parent' is self referential for non-snapshots. */ #ifdef FREEBSD_NAMECACHE -#if __FreeBSD_version >= 1300117 cache_purgevfs(zfsvfs->z_parent->z_vfs); -#else - cache_purgevfs(zfsvfs->z_parent->z_vfs, true); -#endif #endif } @@ -1649,9 +1636,18 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) zfs_unregister_callbacks(zfsvfs); /* - * Evict cached data + * Evict cached data. We must write out any dirty data before + * disowning the dataset. */ - if (!zfs_is_readonly(zfsvfs)) + objset_t *os = zfsvfs->z_os; + boolean_t os_dirty = B_FALSE; + for (int t = 0; t < TXG_SIZE; t++) { + if (dmu_objset_is_dirty(os, t)) { + os_dirty = B_TRUE; + break; + } + } + if (!zfs_is_readonly(zfsvfs) && os_dirty) txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); dmu_objset_evict_dbufs(zfsvfs->z_os); dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; @@ -1775,13 +1771,8 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) } static int -#if __FreeBSD_version >= 1300098 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, struct ucred **credanonp, int *numsecflavors, int *secflavors) -#else -zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, - struct ucred **credanonp, int *numsecflavors, int **secflavors) -#endif { zfsvfs_t *zfsvfs = vfsp->vfs_data; @@ -2070,10 +2061,8 @@ zfs_vnodes_adjust_back(void) #endif } -#if __FreeBSD_version >= 1300139 static struct sx zfs_vnlru_lock; static struct vnode *zfs_vnlru_marker; -#endif static arc_prune_t *zfs_prune; static void @@ -2081,13 +2070,9 @@ zfs_prune_task(uint64_t nr_to_scan, void *arg __unused) { if (nr_to_scan > INT_MAX) nr_to_scan = INT_MAX; -#if __FreeBSD_version >= 1300139 sx_xlock(&zfs_vnlru_lock); vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker); sx_xunlock(&zfs_vnlru_lock); -#else - vnlru_free(nr_to_scan, &zfs_vfsops); -#endif } void @@ -2117,10 +2102,8 @@ zfs_init(void) zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); -#if __FreeBSD_version >= 1300139 zfs_vnlru_marker = vnlru_alloc_marker(); sx_init(&zfs_vnlru_lock, "zfs vnlru lock"); -#endif zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL); } @@ -2128,10 +2111,8 @@ void zfs_fini(void) { arc_remove_prune_callback(zfs_prune); -#if __FreeBSD_version >= 1300139 vnlru_free_marker(zfs_vnlru_marker); sx_destroy(&zfs_vnlru_lock); -#endif taskq_destroy(zfsvfs_taskq); zfsctl_fini(); diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 9602c9ecbe0e..ceea0b1a3eb9 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -39,9 +39,7 @@ #include #include #include -#if __FreeBSD_version >= 1300102 #include -#endif #include #include #include @@ -100,18 +98,6 @@ VFS_SMR_DECLARE; -#if __FreeBSD_version < 1300103 -#define NDFREE_PNBUF(ndp) NDFREE((ndp), NDF_ONLY_PNBUF) -#endif - -#if __FreeBSD_version >= 1300047 -#define vm_page_wire_lock(pp) -#define vm_page_wire_unlock(pp) -#else -#define vm_page_wire_lock(pp) vm_page_lock(pp) -#define vm_page_wire_unlock(pp) vm_page_unlock(pp) -#endif - #ifdef DEBUG_VFS_LOCKS #define VNCHECKREF(vp) \ VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \ @@ -338,39 +324,6 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) nbytes = end - off; obj = vp->v_object; - zfs_vmobject_assert_wlocked_12(obj); -#if __FreeBSD_version < 1300050 - for (;;) { - if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - pp->valid) { - if (vm_page_xbusied(pp)) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_lock(pp); - zfs_vmobject_wunlock(obj); - vm_page_busy_sleep(pp, "zfsmwb", true); - zfs_vmobject_wlock(obj); - continue; - } - vm_page_sbusy(pp); - } else if (pp != NULL) { - ASSERT(!pp->valid); - pp = NULL; - } - if (pp != NULL) { - ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_object_pip_add(obj, 1); - pmap_remove_write(pp); - if (nbytes != 0) - vm_page_clear_dirty(pp, off, nbytes); - } - break; - } -#else vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); @@ -381,7 +334,6 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) if (nbytes != 0) vm_page_clear_dirty(pp, off, nbytes); } -#endif return (pp); } @@ -390,14 +342,9 @@ page_unbusy(vm_page_t pp) { vm_page_sunbusy(pp); -#if __FreeBSD_version >= 1300041 vm_object_pip_wakeup(pp->object); -#else - vm_object_pip_subtract(pp->object, 1); -#endif } -#if __FreeBSD_version > 1300051 static vm_page_t page_hold(vnode_t *vp, int64_t start) { @@ -410,57 +357,11 @@ page_hold(vnode_t *vp, int64_t start) VM_ALLOC_NOBUSY); return (m); } -#else -static vm_page_t -page_hold(vnode_t *vp, int64_t start) -{ - vm_object_t obj; - vm_page_t pp; - - obj = vp->v_object; - zfs_vmobject_assert_wlocked(obj); - - for (;;) { - if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - pp->valid) { - if (vm_page_xbusied(pp)) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_lock(pp); - zfs_vmobject_wunlock(obj); - vm_page_busy_sleep(pp, "zfsmwb", true); - zfs_vmobject_wlock(obj); - continue; - } - - ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_wire_lock(pp); - vm_page_hold(pp); - vm_page_wire_unlock(pp); - - } else - pp = NULL; - break; - } - return (pp); -} -#endif static void page_unhold(vm_page_t pp) { - - vm_page_wire_lock(pp); -#if __FreeBSD_version >= 1300035 vm_page_unwire(pp, PQ_ACTIVE); -#else - vm_page_unhold(pp); -#endif - vm_page_wire_unlock(pp); } /* @@ -484,34 +385,22 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os) ASSERT3P(obj, !=, NULL); off = start & PAGEOFFSET; - zfs_vmobject_wlock_12(obj); -#if __FreeBSD_version >= 1300041 vm_object_pip_add(obj, 1); -#endif for (start &= PAGEMASK; len > 0; start += PAGESIZE) { vm_page_t pp; int nbytes = imin(PAGESIZE - off, len); if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { - zfs_vmobject_wunlock_12(obj); - va = zfs_map_page(pp, &sf); (void) dmu_read(os, zp->z_id, start + off, nbytes, va + off, DMU_READ_PREFETCH); zfs_unmap_page(sf); - - zfs_vmobject_wlock_12(obj); page_unbusy(pp); } len -= nbytes; off = 0; } -#if __FreeBSD_version >= 1300041 vm_object_pip_wakeup(obj); -#else - vm_object_pip_wakeupn(obj, 0); -#endif - zfs_vmobject_wunlock_12(obj); } /* @@ -542,26 +431,22 @@ mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) ASSERT3P(obj, !=, NULL); ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET); - zfs_vmobject_wlock_12(obj); for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) { int bytes = MIN(PAGESIZE, len); pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); if (vm_page_none_valid(pp)) { - zfs_vmobject_wunlock_12(obj); va = zfs_map_page(pp, &sf); error = dmu_read(os, zp->z_id, start, bytes, va, DMU_READ_PREFETCH); if (bytes != PAGESIZE && error == 0) memset(va + bytes, 0, PAGESIZE - bytes); zfs_unmap_page(sf); - zfs_vmobject_wlock_12(obj); -#if __FreeBSD_version >= 1300081 if (error == 0) { vm_page_valid(pp); vm_page_activate(pp); - vm_page_do_sunbusy(pp); + vm_page_sunbusy(pp); } else { zfs_vmobject_wlock(obj); if (!vm_page_wired(pp) && pp->valid == 0 && @@ -571,29 +456,15 @@ mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) vm_page_sunbusy(pp); zfs_vmobject_wunlock(obj); } -#else - vm_page_do_sunbusy(pp); - vm_page_lock(pp); - if (error) { - if (pp->wire_count == 0 && pp->valid == 0 && - !vm_page_busied(pp)) - vm_page_free(pp); - } else { - pp->valid = VM_PAGE_BITS_ALL; - vm_page_activate(pp); - } - vm_page_unlock(pp); -#endif } else { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_do_sunbusy(pp); + vm_page_sunbusy(pp); } if (error) break; zfs_uio_advance(uio, bytes); len -= bytes; } - zfs_vmobject_wunlock_12(obj); return (error); } @@ -623,7 +494,6 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) start = zfs_uio_offset(uio); off = start & PAGEOFFSET; - zfs_vmobject_wlock_12(obj); for (start &= PAGEMASK; len > 0; start += PAGESIZE) { vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); @@ -632,25 +502,20 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) struct sf_buf *sf; caddr_t va; - zfs_vmobject_wunlock_12(obj); va = zfs_map_page(pp, &sf); error = vn_io_fault_uiomove(va + off, bytes, GET_UIO_STRUCT(uio)); zfs_unmap_page(sf); - zfs_vmobject_wlock_12(obj); page_unhold(pp); } else { - zfs_vmobject_wunlock_12(obj); error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, bytes); - zfs_vmobject_wlock_12(obj); } len -= bytes; off = 0; if (error) break; } - zfs_vmobject_wunlock_12(obj); return (error); } @@ -786,9 +651,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, znode_t *zdp = VTOZ(dvp); znode_t *zp; zfsvfs_t *zfsvfs = zdp->z_zfsvfs; -#if __FreeBSD_version > 1300124 seqc_t dvp_seqc; -#endif int error = 0; /* @@ -814,9 +677,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0) return (error); -#if __FreeBSD_version > 1300124 dvp_seqc = vn_seqc_read_notmodify(dvp); -#endif *vpp = NULL; @@ -895,7 +756,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, zfs_exit(zfsvfs, FTAG); ltype = VOP_ISLOCKED(dvp); - VOP_UNLOCK1(dvp); + VOP_UNLOCK(dvp); error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, &zfsctl_vp); if (error == 0) { @@ -996,7 +857,6 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, } } -#if __FreeBSD_version > 1300124 if ((cnp->cn_flags & ISDOTDOT) != 0) { /* * FIXME: zfs_lookup_lock relocks vnodes and does nothing to @@ -1014,7 +874,6 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cnp->cn_flags &= ~MAKEENTRY; } } -#endif /* Insert name into cache (as non-existent) if appropriate. */ if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && @@ -1149,7 +1008,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, goto out; } - getnewvnode_reserve_(); + getnewvnode_reserve(); tx = dmu_tx_create(os); @@ -1183,7 +1042,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, * delete the newly created dnode. */ zfs_znode_delete(zp, tx); - VOP_UNLOCK1(ZTOV(zp)); + VOP_UNLOCK(ZTOV(zp)); zrele(zp); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); @@ -1512,7 +1371,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, /* * Add a new entry to the directory. */ - getnewvnode_reserve_(); + getnewvnode_reserve(); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); @@ -1547,7 +1406,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, error = zfs_link_create(dzp, dirname, zp, tx, ZNEW); if (error != 0) { zfs_znode_delete(zp, tx); - VOP_UNLOCK1(ZTOV(zp)); + VOP_UNLOCK(ZTOV(zp)); zrele(zp); goto out; } @@ -1575,16 +1434,6 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, return (error); } -#if __FreeBSD_version < 1300124 -static void -cache_vop_rmdir(struct vnode *dvp, struct vnode *vp) -{ - - cache_purge(dvp); - cache_purge(vp); -} -#endif - /* * Remove a directory subdir entry. If the current working * directory is the same as the subdir to be removed, the @@ -2984,9 +2833,9 @@ zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, znode_t *sdzp, *tdzp, *szp, *tzp; int error; - VOP_UNLOCK1(tdvp); + VOP_UNLOCK(tdvp); if (*tvpp != NULL && *tvpp != tdvp) - VOP_UNLOCK1(*tvpp); + VOP_UNLOCK(*tvpp); relock: error = vn_lock(sdvp, LK_EXCLUSIVE); @@ -2994,13 +2843,13 @@ zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, goto out; error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); if (error != 0) { - VOP_UNLOCK1(sdvp); + VOP_UNLOCK(sdvp); if (error != EBUSY) goto out; error = vn_lock(tdvp, LK_EXCLUSIVE); if (error) goto out; - VOP_UNLOCK1(tdvp); + VOP_UNLOCK(tdvp); goto relock; } tdzp = VTOZ(tdvp); @@ -3008,8 +2857,8 @@ zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp); if (error != 0) { - VOP_UNLOCK1(sdvp); - VOP_UNLOCK1(tdvp); + VOP_UNLOCK(sdvp); + VOP_UNLOCK(tdvp); goto out; } svp = ZTOV(szp); @@ -3021,8 +2870,8 @@ zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, nvp = svp; error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); if (error != 0) { - VOP_UNLOCK1(sdvp); - VOP_UNLOCK1(tdvp); + VOP_UNLOCK(sdvp); + VOP_UNLOCK(tdvp); if (tvp != NULL) vrele(tvp); if (error != EBUSY) { @@ -3034,7 +2883,7 @@ zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, vrele(nvp); goto out; } - VOP_UNLOCK1(nvp); + VOP_UNLOCK(nvp); /* * Concurrent rename race. * XXX ? @@ -3058,9 +2907,9 @@ zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, nvp = tvp; error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); if (error != 0) { - VOP_UNLOCK1(sdvp); - VOP_UNLOCK1(tdvp); - VOP_UNLOCK1(*svpp); + VOP_UNLOCK(sdvp); + VOP_UNLOCK(tdvp); + VOP_UNLOCK(*svpp); if (error != EBUSY) { vrele(nvp); goto out; @@ -3137,19 +2986,6 @@ zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) return (error); } -#if __FreeBSD_version < 1300124 -static void -cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, - struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp) -{ - - cache_purge(fvp); - if (tvp != NULL) - cache_purge(tvp); - cache_purge_negative(tdvp); -} -#endif - static int zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, @@ -3205,13 +3041,13 @@ zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, } error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr); - VOP_UNLOCK1(sdvp); - VOP_UNLOCK1(*svpp); + VOP_UNLOCK(sdvp); + VOP_UNLOCK(*svpp); out: if (*tvpp != NULL) - VOP_UNLOCK1(*tvpp); + VOP_UNLOCK(*tvpp); if (tdvp != *tvpp) - VOP_UNLOCK1(tdvp); + VOP_UNLOCK(tdvp); return (error); } @@ -3463,17 +3299,17 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, tdvp = ZTOV(tdzp); error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); if (sdzp->z_zfsvfs->z_replay == B_FALSE) - VOP_UNLOCK1(sdvp); + VOP_UNLOCK(sdvp); if (error != 0) goto fail; - VOP_UNLOCK1(svp); + VOP_UNLOCK(svp); vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME); if (error == EJUSTRETURN) tvp = NULL; else if (error != 0) { - VOP_UNLOCK1(tdvp); + VOP_UNLOCK(tdvp); goto fail; } @@ -3564,7 +3400,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, return (SET_ERROR(EDQUOT)); } - getnewvnode_reserve_(); + getnewvnode_reserve(); tx = dmu_tx_create(zfsvfs->z_os); fuid_dirtied = zfsvfs->z_fuid_dirty; dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); @@ -3611,7 +3447,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, error = zfs_link_create(dzp, name, zp, tx, ZNEW); if (error != 0) { zfs_znode_delete(zp, tx); - VOP_UNLOCK1(ZTOV(zp)); + VOP_UNLOCK(ZTOV(zp)); zrele(zp); } else { zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); @@ -4472,7 +4308,6 @@ zfs_freebsd_write(struct vop_write_args *ap) ap->a_cred)); } -#if __FreeBSD_version >= 1300102 /* * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see * the comment above cache_fplookup for details. @@ -4497,9 +4332,7 @@ zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v) return (EAGAIN); return (0); } -#endif -#if __FreeBSD_version >= 1300139 static int zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) { @@ -4519,7 +4352,6 @@ zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) } return (cache_symlink_resolve(v->a_fpl, target, strlen(target))); } -#endif #ifndef _SYS_SYSPROTO_H_ struct vop_access_args { @@ -4557,13 +4389,8 @@ zfs_freebsd_access(struct vop_access_args *ap) if (error == 0) { accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); if (accmode != 0) { -#if __FreeBSD_version >= 1300105 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, zp->z_gid, accmode, ap->a_cred); -#else - error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, - zp->z_gid, accmode, ap->a_cred, NULL); -#endif } } @@ -4898,7 +4725,7 @@ zfs_freebsd_setattr(struct vop_setattr_args *ap) * otherwise, they behave like unprivileged processes. */ if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || - spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) { + priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) { if (zflags & (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { error = securelevel_gt(cred, 0); @@ -5017,10 +4844,8 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap) struct componentname *cnp = ap->a_cnp; vattr_t *vap = ap->a_vap; znode_t *zp = NULL; -#if __FreeBSD_version >= 1300139 char *symlink; size_t symlink_len; -#endif int rc; #if __FreeBSD_version < 1400068 @@ -5036,7 +4861,6 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap) if (rc == 0) { *ap->a_vpp = ZTOV(zp); ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); -#if __FreeBSD_version >= 1300139 MPASS(zp->z_cached_symlink == NULL); symlink_len = strlen(ap->a_target); symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); @@ -5046,7 +4870,6 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap) atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, (uintptr_t)symlink); } -#endif } return (rc); } @@ -5064,15 +4887,12 @@ zfs_freebsd_readlink(struct vop_readlink_args *ap) { zfs_uio_t uio; int error; -#if __FreeBSD_version >= 1300139 znode_t *zp = VTOZ(ap->a_vp); char *symlink, *base; size_t symlink_len; bool trycache; -#endif zfs_uio_init(&uio, ap->a_uio); -#if __FreeBSD_version >= 1300139 trycache = false; if (zfs_uio_segflg(&uio) == UIO_SYSSPACE && zfs_uio_iovcnt(&uio) == 1) { @@ -5080,9 +4900,7 @@ zfs_freebsd_readlink(struct vop_readlink_args *ap) symlink_len = zfs_uio_iovlen(&uio, 0); trycache = true; } -#endif error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL); -#if __FreeBSD_version >= 1300139 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL || error != 0 || !trycache) { return (error); @@ -5097,7 +4915,6 @@ zfs_freebsd_readlink(struct vop_readlink_args *ap) cache_symlink_free(symlink, symlink_len + 1); } } -#endif return (error); } @@ -5139,15 +4956,10 @@ zfs_freebsd_inactive(struct vop_inactive_args *ap) { vnode_t *vp = ap->a_vp; -#if __FreeBSD_version >= 1300123 zfs_inactive(vp, curthread->td_ucred, NULL); -#else - zfs_inactive(vp, ap->a_td->td_ucred, NULL); -#endif return (0); } -#if __FreeBSD_version >= 1300042 #ifndef _SYS_SYSPROTO_H_ struct vop_need_inactive_args { struct vnode *a_vp; @@ -5173,7 +4985,6 @@ zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap) return (need); } -#endif #ifndef _SYS_SYSPROTO_H_ struct vop_reclaim_args { @@ -5191,10 +5002,6 @@ zfs_freebsd_reclaim(struct vop_reclaim_args *ap) ASSERT3P(zp, !=, NULL); -#if __FreeBSD_version < 1300042 - /* Destroy the vm object and flush associated pages. */ - vnode_destroy_vobject(vp); -#endif /* * z_teardown_inactive_lock protects from a race with * zfs_znode_dmu_fini in zfsvfs_teardown during @@ -5406,7 +5213,7 @@ zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) } else if (ap->a_uio != NULL) error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); - VOP_UNLOCK1(vp); + VOP_UNLOCK(vp); vn_close(vp, flags, ap->a_cred, td); return (error); } @@ -5693,7 +5500,7 @@ zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) if (error == 0) VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); - VOP_UNLOCK1(vp); + VOP_UNLOCK(vp); vn_close(vp, flags, ap->a_cred, td); return (error); } @@ -6175,26 +5982,13 @@ zfs_vptocnp(struct vop_vptocnp_args *ap) zfs_exit(zfsvfs, FTAG); covered_vp = vp->v_mount->mnt_vnodecovered; -#if __FreeBSD_version >= 1300045 enum vgetstate vs = vget_prep(covered_vp); -#else - vhold(covered_vp); -#endif ltype = VOP_ISLOCKED(vp); - VOP_UNLOCK1(vp); -#if __FreeBSD_version >= 1300045 + VOP_UNLOCK(vp); error = vget_finish(covered_vp, LK_SHARED, vs); -#else - error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); -#endif if (error == 0) { -#if __FreeBSD_version >= 1300123 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf, ap->a_buflen); -#else - error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, - ap->a_buf, ap->a_buflen); -#endif vput(covered_vp); } vn_lock(vp, ltype | LK_RETRY); @@ -6252,7 +6046,6 @@ zfs_deallocate(struct vop_deallocate_args *ap) } #endif -#if __FreeBSD_version >= 1300039 #ifndef _SYS_SYSPROTO_H_ struct vop_copy_file_range_args { struct vnode *a_invp; @@ -6279,7 +6072,6 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap) struct vnode *invp = ap->a_invp; struct vnode *outvp = ap->a_outvp; struct mount *mp; - struct uio io; int error; uint64_t len = *ap->a_lenp; @@ -6327,12 +6119,6 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap) goto out_locked; #endif - io.uio_offset = *ap->a_outoffp; - io.uio_resid = *ap->a_lenp; - error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); - if (error != 0) - goto out_locked; - error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp), ap->a_outoffp, &len, ap->a_outcred); if (error == EXDEV || error == EAGAIN || error == EINVAL || @@ -6357,7 +6143,6 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap) error = ENOSYS; return (error); } -#endif struct vop_vector zfs_vnodeops; struct vop_vector zfs_fifoops; @@ -6366,16 +6151,10 @@ struct vop_vector zfs_shareops; struct vop_vector zfs_vnodeops = { .vop_default = &default_vnodeops, .vop_inactive = zfs_freebsd_inactive, -#if __FreeBSD_version >= 1300042 .vop_need_inactive = zfs_freebsd_need_inactive, -#endif .vop_reclaim = zfs_freebsd_reclaim, -#if __FreeBSD_version >= 1300102 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, -#endif -#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, -#endif .vop_access = zfs_freebsd_access, .vop_allocate = VOP_EINVAL, #if __FreeBSD_version >= 1400032 @@ -6414,29 +6193,21 @@ struct vop_vector zfs_vnodeops = { .vop_getpages = zfs_freebsd_getpages, .vop_putpages = zfs_freebsd_putpages, .vop_vptocnp = zfs_vptocnp, -#if __FreeBSD_version >= 1300064 .vop_lock1 = vop_lock, .vop_unlock = vop_unlock, .vop_islocked = vop_islocked, -#endif #if __FreeBSD_version >= 1400043 .vop_add_writecount = vop_stdadd_writecount_nomsync, #endif -#if __FreeBSD_version >= 1300039 .vop_copy_file_range = zfs_freebsd_copy_file_range, -#endif }; VFS_VOP_VECTOR_REGISTER(zfs_vnodeops); struct vop_vector zfs_fifoops = { .vop_default = &fifo_specops, .vop_fsync = zfs_freebsd_fsync, -#if __FreeBSD_version >= 1300102 - .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, -#endif -#if __FreeBSD_version >= 1300139 + .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, -#endif .vop_access = zfs_freebsd_access, .vop_getattr = zfs_freebsd_getattr, .vop_inactive = zfs_freebsd_inactive, @@ -6460,12 +6231,8 @@ VFS_VOP_VECTOR_REGISTER(zfs_fifoops); */ struct vop_vector zfs_shareops = { .vop_default = &default_vnodeops, -#if __FreeBSD_version >= 1300121 .vop_fplookup_vexec = VOP_EAGAIN, -#endif -#if __FreeBSD_version >= 1300139 .vop_fplookup_symlink = VOP_EAGAIN, -#endif .vop_access = zfs_freebsd_access, .vop_inactive = zfs_freebsd_inactive, .vop_reclaim = zfs_freebsd_reclaim, diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_znode.c index 0eea2a849416..e5c50874e1dd 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_znode.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -92,7 +92,7 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL -#if !defined(KMEM_DEBUG) && __FreeBSD_version >= 1300102 +#if !defined(KMEM_DEBUG) #define _ZFS_USE_SMR static uma_zone_t znode_uma_zone; #else @@ -236,7 +236,7 @@ zfs_znode_init(void) ASSERT3P(znode_cache, ==, NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, - zfs_znode_cache_destructor, NULL, NULL, NULL, 0); + zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE); } static znode_t * @@ -434,13 +434,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ("%s: fast path lookup enabled without smr", __func__)); #endif -#if __FreeBSD_version >= 1300076 KASSERT(curthread->td_vp_reserved != NULL, ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); -#else - KASSERT(curthread->td_vp_reserv > 0, - ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); -#endif error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp); if (error != 0) { zfs_znode_free_kmem(zp); @@ -468,9 +463,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zp->z_sync_cnt = 0; zp->z_sync_writes_cnt = 0; zp->z_async_writes_cnt = 0; -#if __FreeBSD_version >= 1300139 atomic_store_ptr(&zp->z_cached_symlink, NULL); -#endif zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); @@ -942,7 +935,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) int locked; int err; - getnewvnode_reserve_(); + getnewvnode_reserve(); again: *zpp = NULL; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); @@ -1055,7 +1048,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) err = insmntque(vp, zfsvfs->z_vfs); if (err == 0) { vp->v_hash = obj_num; - VOP_UNLOCK1(vp); + VOP_UNLOCK(vp); } else { zp->z_vnode = NULL; zfs_znode_dmu_fini(zp); @@ -1275,9 +1268,7 @@ void zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; -#if __FreeBSD_version >= 1300139 char *symlink; -#endif ASSERT3P(zp->z_sa_hdl, ==, NULL); zp->z_vnode = NULL; @@ -1286,14 +1277,12 @@ zfs_znode_free(znode_t *zp) list_remove(&zfsvfs->z_all_znodes, zp); mutex_exit(&zfsvfs->z_znodes_lock); -#if __FreeBSD_version >= 1300139 symlink = atomic_load_ptr(&zp->z_cached_symlink); if (symlink != NULL) { atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, (uintptr_t)NULL); cache_symlink_free(symlink, strlen(symlink) + 1); } -#endif if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zvol_os.c index 38e9debbe877..ddb20b031448 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/zvol_os.c @@ -30,6 +30,7 @@ * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2024, Klara, Inc. */ /* Portions Copyright 2011 Martin Matuska */ @@ -250,7 +251,7 @@ zvol_geom_open(struct g_provider *pp, int flag, int count) } mutex_enter(&zv->zv_state_lock); - if (zv->zv_zso->zso_dying) { + if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { rw_exit(&zvol_state_lock); err = SET_ERROR(ENXIO); goto out_zv_locked; @@ -683,6 +684,11 @@ zvol_geom_bio_strategy(struct bio *bp) rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); + if (zv->zv_flags & ZVOL_REMOVING) { + error = SET_ERROR(ENXIO); + goto resume; + } + switch (bp->bio_cmd) { case BIO_READ: doread = B_TRUE; @@ -1312,11 +1318,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname) args.mda_si_drv2 = zv; if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, newname) == 0) { -#if __FreeBSD_version > 1300130 dev->si_iosize_max = maxphys; -#else - dev->si_iosize_max = MAXPHYS; -#endif zsd->zsd_cdev = dev; } } @@ -1362,6 +1364,7 @@ zvol_os_free(zvol_state_t *zv) } mutex_destroy(&zv->zv_state_lock); + cv_destroy(&zv->zv_removing_cv); dataset_kstats_destroy(&zv->zv_kstat); kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); @@ -1419,6 +1422,7 @@ zvol_os_create_minor(const char *name) zv = kmem_zalloc(sizeof (*zv), KM_SLEEP); zv->zv_hash = hash; mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); zv->zv_zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP); zv->zv_volmode = volmode; if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { @@ -1456,11 +1460,7 @@ zvol_os_create_minor(const char *name) args.mda_si_drv2 = zv; if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, name) == 0) { -#if __FreeBSD_version > 1300130 dev->si_iosize_max = maxphys; -#else - dev->si_iosize_max = MAXPHYS; -#endif zsd->zsd_cdev = dev; knlist_init_sx(&zsd->zsd_selinfo.si_note, &zv->zv_state_lock); diff --git a/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-kmem-cache.c index 737c2e063f71..16412bc9e6cf 100644 --- a/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-kmem-cache.c +++ b/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-kmem-cache.c @@ -144,6 +144,8 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; + if (skc->skc_flags & KMC_RECLAIMABLE) + lflags |= __GFP_RECLAIMABLE; ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM); /* Resulting allocated memory will be page aligned */ @@ -424,6 +426,8 @@ spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj) if (!empty) return (-EEXIST); + if (skc->skc_flags & KMC_RECLAIMABLE) + lflags |= __GFP_RECLAIMABLE; ske = kmalloc(sizeof (*ske), lflags); if (ske == NULL) return (-ENOMEM); @@ -663,6 +667,7 @@ spl_magazine_destroy(spl_kmem_cache_t *skc) * KMC_KVMEM Force kvmem backed SPL cache * KMC_SLAB Force Linux slab backed cache * KMC_NODEBUG Disable debugging (unsupported) + * KMC_RECLAIMABLE Memory can be freed under pressure */ spl_kmem_cache_t * spl_kmem_cache_create(const char *name, size_t size, size_t align, @@ -780,6 +785,9 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, if (size > spl_kmem_cache_slab_limit) goto out; + if (skc->skc_flags & KMC_RECLAIMABLE) + slabflags |= SLAB_RECLAIM_ACCOUNT; + #if defined(SLAB_USERCOPY) /* * Required for PAX-enabled kernels if the slab is to be diff --git a/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-zlib.c b/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-zlib.c index 8c6282ee5d16..a7b6c14ee150 100644 --- a/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-zlib.c +++ b/sys/contrib/subrepo-openzfs/module/os/linux/spl/spl-zlib.c @@ -202,7 +202,7 @@ spl_zlib_init(void) zlib_workspace_cache = kmem_cache_create( "spl_zlib_workspace_cache", size, 0, NULL, NULL, NULL, NULL, NULL, - KMC_KVMEM); + KMC_KVMEM | KMC_RECLAIMABLE); if (!zlib_workspace_cache) return (-ENOMEM); diff --git a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/abd_os.c index 4bf9eaf771b5..f7af20c619a4 100644 --- a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/abd_os.c @@ -281,7 +281,7 @@ abd_alloc_chunks(abd_t *abd, size_t size) struct sg_table table; struct scatterlist *sg; struct page *page, *tmp_page = NULL; - gfp_t gfp = __GFP_NOWARN | GFP_NOIO; + gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO; gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM; unsigned int max_order = MIN(zfs_abd_scatter_max_order, ABD_MAX_ORDER - 1); @@ -403,7 +403,7 @@ abd_alloc_chunks(abd_t *abd, size_t size) struct scatterlist *sg = NULL; struct sg_table table; struct page *page; - gfp_t gfp = __GFP_NOWARN | GFP_NOIO; + gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO; int nr_pages = abd_chunkcnt_for_bytes(size); int i = 0; @@ -762,7 +762,7 @@ abd_init(void) int i; abd_cache = kmem_cache_create("abd_t", sizeof (abd_t), - 0, NULL, NULL, NULL, NULL, NULL, 0); + 0, NULL, NULL, NULL, NULL, NULL, KMC_RECLAIMABLE); wmsum_init(&abd_sums.abdstat_struct_size, 0); wmsum_init(&abd_sums.abdstat_linear_cnt, 0); diff --git a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/arc_os.c index 02dd80c06062..75a9ea53225e 100644 --- a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/arc_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/arc_os.c @@ -49,6 +49,7 @@ #include #include #include +#include #endif #include #include @@ -58,6 +59,7 @@ #include #include +#ifdef _KERNEL /* * This is a limit on how many pages the ARC shrinker makes available for * eviction in response to one page allocation attempt. Note that in @@ -72,11 +74,20 @@ * See also the comment in arc_shrinker_count(). * Set to 0 to disable limit. */ -int zfs_arc_shrinker_limit = 10000; +static int zfs_arc_shrinker_limit = 10000; + +/* + * Relative cost of ARC eviction, AKA number of seeks needed to restore evicted + * page. Bigger values make ARC more precious and evictions smaller comparing + * to other kernel subsystems. Value of 4 means parity with page cache, + * according to my reading of kernel's do_shrink_slab() and other code. + */ +static int zfs_arc_shrinker_seeks = DEFAULT_SEEKS; #ifdef CONFIG_MEMORY_HOTPLUG static struct notifier_block arc_hotplug_callback_mem_nb; #endif +#endif /* * Return a default max arc size based on the amount of physical memory. @@ -170,22 +181,7 @@ static unsigned long arc_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { /* - * __GFP_FS won't be set if we are called from ZFS code (see - * kmem_flags_convert(), which removes it). To avoid a deadlock, we - * don't allow evicting in this case. We return 0 rather than - * SHRINK_STOP so that the shrinker logic doesn't accumulate a - * deficit against us. - */ - if (!(sc->gfp_mask & __GFP_FS)) { - return (0); - } - - /* - * This code is reached in the "direct reclaim" case, where the - * kernel (outside ZFS) is trying to allocate a page, and the system - * is low on memory. - * - * The kernel's shrinker code doesn't understand how many pages the + * The kernel's shrinker code may not understand how many pages the * ARC's callback actually frees, so it may ask the ARC to shrink a * lot for one page allocation. This is problematic because it may * take a long time, thus delaying the page allocation, and because @@ -204,40 +200,44 @@ arc_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) * * See also the comment above zfs_arc_shrinker_limit. */ + int64_t can_free = btop(arc_evictable_memory()); int64_t limit = zfs_arc_shrinker_limit != 0 ? zfs_arc_shrinker_limit : INT64_MAX; - return (MIN(limit, btop((int64_t)arc_evictable_memory()))); + return (MIN(can_free, limit)); } static unsigned long arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { - ASSERT((sc->gfp_mask & __GFP_FS) != 0); - /* The arc is considered warm once reclaim has occurred */ if (unlikely(arc_warm == B_FALSE)) arc_warm = B_TRUE; + /* + * We are experiencing memory pressure which the arc_evict_zthr was + * unable to keep up with. Set arc_no_grow to briefly pause ARC + * growth to avoid compounding the memory pressure. + */ + arc_no_grow = B_TRUE; + /* * Evict the requested number of pages by reducing arc_c and waiting - * for the requested amount of data to be evicted. + * for the requested amount of data to be evicted. To avoid deadlock + * do not wait for eviction if we may be called from ZFS itself (see + * kmem_flags_convert() removing __GFP_FS). It may cause excessive + * eviction later if many evictions are accumulated, but just skipping + * the eviction is not good either if most of memory is used by ARC. */ - arc_reduce_target_size(ptob(sc->nr_to_scan)); - arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE); + uint64_t to_free = arc_reduce_target_size(ptob(sc->nr_to_scan)); + if (sc->gfp_mask & __GFP_FS) + arc_wait_for_eviction(to_free, B_FALSE, B_FALSE); if (current->reclaim_state != NULL) #ifdef HAVE_RECLAIM_STATE_RECLAIMED - current->reclaim_state->reclaimed += sc->nr_to_scan; + current->reclaim_state->reclaimed += btop(to_free); #else - current->reclaim_state->reclaimed_slab += sc->nr_to_scan; + current->reclaim_state->reclaimed_slab += btop(to_free); #endif - /* - * We are experiencing memory pressure which the arc_evict_zthr was - * unable to keep up with. Set arc_no_grow to briefly pause arc - * growth to avoid compounding the memory pressure. - */ - arc_no_grow = B_TRUE; - /* * When direct reclaim is observed it usually indicates a rapid * increase in memory pressure. This occurs because the kswapd @@ -250,7 +250,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) ARCSTAT_BUMP(arcstat_memory_direct_count); } - return (sc->nr_to_scan); + return (btop(to_free)); } static struct shrinker *arc_shrinker = NULL; @@ -304,9 +304,7 @@ arc_set_sys_free(uint64_t allmem) * arc_wait_for_eviction() will wait until at least the * high_wmark_pages() are free (see arc_evict_state_impl()). * - * Note: Even when the system is very low on memory, the kernel's - * shrinker code may only ask for one "batch" of pages (512KB) to be - * evicted. If concurrent allocations consume these pages, there may + * Note: If concurrent allocations consume these pages, there may * still be insufficient free pages, and the OOM killer takes action. * * By setting arc_sys_free large enough, and having @@ -318,20 +316,26 @@ arc_set_sys_free(uint64_t allmem) * It's hard to iterate the zones from a linux kernel module, which * makes it difficult to determine the watermark dynamically. Instead * we compute the maximum high watermark for this system, based - * on the amount of memory, assuming default parameters on Linux kernel - * 5.3. + * on the amount of memory, using the same method as the kernel uses + * to calculate its internal `min_free_kbytes` variable. See + * torvalds/linux@ee8eb9a5fe86 for the change in the upper clamp value + * from 64M to 256M. */ /* * Base wmark_low is 4 * the square root of Kbytes of RAM. */ - long wmark = 4 * int_sqrt(allmem/1024) * 1024; + long wmark = int_sqrt(allmem / 1024 * 16) * 1024; /* - * Clamp to between 128K and 64MB. + * Clamp to between 128K and 256/64MB. */ wmark = MAX(wmark, 128 * 1024); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0) + wmark = MIN(wmark, 256 * 1024 * 1024); +#else wmark = MIN(wmark, 64 * 1024 * 1024); +#endif /* * watermark_boost can increase the wmark by up to 150%. @@ -357,7 +361,7 @@ arc_lowmem_init(void) * swapping out pages when it is preferable to shrink the arc. */ arc_shrinker = spl_register_shrinker("zfs-arc-shrinker", - arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS); + arc_shrinker_count, arc_shrinker_scan, zfs_arc_shrinker_seeks); VERIFY(arc_shrinker); arc_set_sys_free(allmem); @@ -500,3 +504,5 @@ arc_unregister_hotplug(void) ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, "Limit on number of pages that ARC shrinker can reclaim at once"); +ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_seeks, INT, ZMOD_RD, + "Relative cost of ARC eviction vs other kernel subsystems"); diff --git a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_vfsops.c index 2015c20d7340..a52f08868d96 100644 --- a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -1264,14 +1264,22 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) defined(SHRINK_CONTROL_HAS_NID) && \ defined(SHRINKER_NUMA_AWARE) if (shrinker->flags & SHRINKER_NUMA_AWARE) { + long tc = 1; + for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + tc += c; + } *objects = 0; for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + if (c > tc) + tc = c; + sc.nr_to_scan = mult_frac(nr_to_scan, c, tc) + 1; *objects += (*shrinker->scan_objects)(shrinker, &sc); - /* - * reset sc.nr_to_scan, modified by - * scan_objects == super_cache_scan - */ - sc.nr_to_scan = nr_to_scan; } } else { *objects = (*shrinker->scan_objects)(shrinker, &sc); diff --git a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_znode.c index b99df188c64b..265153e011e7 100644 --- a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_znode.c +++ b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zfs_znode.c @@ -194,7 +194,8 @@ zfs_znode_init(void) ASSERT(znode_cache == NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, - zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB); + zfs_znode_cache_destructor, NULL, NULL, NULL, + KMC_SLAB | KMC_RECLAIMABLE); ASSERT(znode_hold_cache == NULL); znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache", diff --git a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zvol_os.c index c01caa6da8b4..83f80f62aee7 100644 --- a/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/linux/zfs/zvol_os.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2012, 2020 by Delphix. All rights reserved. + * Copyright (c) 2024, Klara, Inc. */ #include @@ -526,6 +527,11 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint64_t size = io_size(bio, rq); int rw = io_data_dir(bio, rq); + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + END_IO(zv, bio, rq, -SET_ERROR(ENXIO)); + goto out; + } + if (zvol_request_sync || zv->zv_threading == B_FALSE) force_sync = 1; @@ -730,10 +736,17 @@ zvol_open(struct block_device *bdev, fmode_t flag) #endif if (zv == NULL) { rw_exit(&zvol_state_lock); - return (SET_ERROR(-ENXIO)); + return (-SET_ERROR(ENXIO)); } mutex_enter(&zv->zv_state_lock); + + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + mutex_exit(&zv->zv_state_lock); + rw_exit(&zvol_state_lock); + return (-SET_ERROR(ENXIO)); + } + /* * Make sure zvol is not suspended during first open * (hold zv_suspend_lock) and respect proper lock acquisition @@ -795,10 +808,10 @@ zvol_open(struct block_device *bdev, fmode_t flag) #ifdef HAVE_BLKDEV_GET_ERESTARTSYS schedule(); - return (SET_ERROR(-ERESTARTSYS)); + return (-SET_ERROR(ERESTARTSYS)); #else if ((gethrtime() - start) > timeout) - return (SET_ERROR(-ERESTARTSYS)); + return (-SET_ERROR(ERESTARTSYS)); schedule_timeout_interruptible( MSEC_TO_TICK(10)); @@ -821,7 +834,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) if (zv->zv_open_count == 0) zvol_last_close(zv); - error = SET_ERROR(-EROFS); + error = -SET_ERROR(EROFS); } else { zv->zv_open_count++; } @@ -1313,6 +1326,7 @@ zvol_alloc(dev_t dev, const char *name) list_link_init(&zv->zv_next); mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); #ifdef HAVE_BLK_MQ zv->zv_zso->use_blk_mq = zvol_use_blk_mq; @@ -1438,6 +1452,7 @@ zvol_os_free(zvol_state_t *zv) ida_simple_remove(&zvol_ida, MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS); + cv_destroy(&zv->zv_removing_cv); mutex_destroy(&zv->zv_state_lock); dataset_kstats_destroy(&zv->zv_kstat); diff --git a/sys/contrib/subrepo-openzfs/module/zcommon/zpool_prop.c b/sys/contrib/subrepo-openzfs/module/zcommon/zpool_prop.c index e2e3bf5be69e..afdbb6f15e97 100644 --- a/sys/contrib/subrepo-openzfs/module/zcommon/zpool_prop.c +++ b/sys/contrib/subrepo-openzfs/module/zcommon/zpool_prop.c @@ -23,7 +23,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2021, Colm Buckley - * Copyright (c) 2021, Klara Inc. + * Copyright (c) 2021, 2023, Klara Inc. */ #include @@ -125,6 +125,9 @@ zpool_prop_init(void) zprop_register_number(ZPOOL_PROP_BCLONERATIO, "bcloneratio", 0, PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if cloned>", "BCLONE_RATIO", B_FALSE, sfeatures); + zprop_register_number(ZPOOL_PROP_DEDUP_TABLE_SIZE, "dedup_table_size", + 0, PROP_READONLY, ZFS_TYPE_POOL, "", "DDTSIZE", B_FALSE, + sfeatures); /* default number properties */ zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, @@ -133,6 +136,9 @@ zpool_prop_init(void) zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_DEFAULT, ZFS_TYPE_POOL, "", "ASHIFT", B_FALSE, sfeatures); + zprop_register_number(ZPOOL_PROP_DEDUP_TABLE_QUOTA, "dedup_table_quota", + UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_POOL, "", "DDTQUOTA", + B_FALSE, sfeatures); /* default index (boolean) properties */ zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1, @@ -177,6 +183,9 @@ zpool_prop_init(void) zprop_register_hidden(ZPOOL_PROP_DEDUPDITTO, "dedupditto", PROP_TYPE_NUMBER, PROP_DEFAULT, ZFS_TYPE_POOL, "DEDUPDITTO", B_FALSE, sfeatures); + zprop_register_hidden(ZPOOL_PROP_DEDUPCACHED, + ZPOOL_DEDUPCACHED_PROP_NAME, PROP_TYPE_NUMBER, PROP_READONLY, + ZFS_TYPE_POOL, "DEDUPCACHED", B_FALSE, sfeatures); zfs_mod_list_supported_free(sfeatures); } @@ -381,6 +390,12 @@ vdev_prop_init(void) zprop_register_number(VDEV_PROP_INITIALIZE_ERRORS, "initialize_errors", 0, PROP_READONLY, ZFS_TYPE_VDEV, "", "INITERR", B_FALSE, sfeatures); + zprop_register_number(VDEV_PROP_TRIM_ERRORS, "trim_errors", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "TRIMERR", B_FALSE, + sfeatures); + zprop_register_number(VDEV_PROP_SLOW_IOS, "slow_ios", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "", "SLOW", B_FALSE, + sfeatures); zprop_register_number(VDEV_PROP_OPS_NULL, "null_ops", 0, PROP_READONLY, ZFS_TYPE_VDEV, "", "NULLOP", B_FALSE, sfeatures); @@ -448,6 +463,9 @@ vdev_prop_init(void) zprop_register_index(VDEV_PROP_RAIDZ_EXPANDING, "raidz_expanding", 0, PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0, + PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP", + boolean_table, sfeatures); /* default index properties */ zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, diff --git a/sys/contrib/subrepo-openzfs/module/zfs/abd.c b/sys/contrib/subrepo-openzfs/module/zfs/abd.c index 2c0cda25dbc6..94f492522f0d 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/abd.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/abd.c @@ -1050,6 +1050,31 @@ abd_cmp(abd_t *dabd, abd_t *sabd) abd_cmp_cb, NULL)); } +/* + * Check if ABD content is all-zeroes. + */ +static int +abd_cmp_zero_off_cb(void *data, size_t len, void *private) +{ + (void) private; + + /* This function can only check whole uint64s. Enforce that. */ + ASSERT0(P2PHASE(len, 8)); + + uint64_t *end = (uint64_t *)((char *)data + len); + for (uint64_t *word = (uint64_t *)data; word < end; word++) + if (*word != 0) + return (1); + + return (0); +} + +int +abd_cmp_zero_off(abd_t *abd, size_t off, size_t size) +{ + return (abd_iterate_func(abd, off, size, abd_cmp_zero_off_cb, NULL)); +} + /* * Iterate over code ABDs and a data ABD and call @func_raidz_gen. * diff --git a/sys/contrib/subrepo-openzfs/module/zfs/arc.c b/sys/contrib/subrepo-openzfs/module/zfs/arc.c index 30d30b98a6c6..78c2cf8ec5c3 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/arc.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/arc.c @@ -26,7 +26,7 @@ * Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2019, loli10K . All rights reserved. * Copyright (c) 2020, George Amanakis. All rights reserved. - * Copyright (c) 2019, Klara Inc. + * Copyright (c) 2019, 2023, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2020, The FreeBSD Foundation [1] * @@ -1258,7 +1258,7 @@ buf_init(void) } hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, - 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0); + 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, KMC_RECLAIMABLE); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL, NULL, NULL, 0); @@ -4235,6 +4235,18 @@ arc_evict_adj(uint64_t frac, uint64_t total, uint64_t up, uint64_t down, return (frac + up - down); } +/* + * Calculate (x * multiplier / divisor) without unnecesary overflows. + */ +static uint64_t +arc_mf(uint64_t x, uint64_t multiplier, uint64_t divisor) +{ + uint64_t q = (x / divisor); + uint64_t r = (x % divisor); + + return ((q * multiplier) + ((r * multiplier) / divisor)); +} + /* * Evict buffers from the cache, such that arcstat_size is capped by arc_c. */ @@ -4287,17 +4299,20 @@ arc_evict(void) */ int64_t prune = 0; int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size); + int64_t nem = zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA]) + + zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA]) + - zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) + - zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]); w = wt * (int64_t)(arc_meta >> 16) >> 16; - if (zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA]) + - zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA]) - - zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) - - zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]) > - w * 3 / 4) { + if (nem > w * 3 / 4) { prune = dn / sizeof (dnode_t) * zfs_arc_dnode_reduce_percent / 100; - } else if (dn > arc_dnode_limit) { - prune = (dn - arc_dnode_limit) / sizeof (dnode_t) * - zfs_arc_dnode_reduce_percent / 100; + if (nem < w && w > 4) + prune = arc_mf(prune, nem - w * 3 / 4, w / 4); + } + if (dn > arc_dnode_limit) { + prune = MAX(prune, (dn - arc_dnode_limit) / sizeof (dnode_t) * + zfs_arc_dnode_reduce_percent / 100); } if (prune > 0) arc_prune_async(prune); @@ -4398,13 +4413,14 @@ arc_flush(spa_t *spa, boolean_t retry) (void) arc_flush_state(arc_uncached, guid, ARC_BUFC_METADATA, retry); } -void -arc_reduce_target_size(int64_t to_free) +uint64_t +arc_reduce_target_size(uint64_t to_free) { - uint64_t c = arc_c; - - if (c <= arc_c_min) - return; + /* + * Get the actual arc size. Even if we don't need it, this updates + * the aggsum lower bound estimate for arc_is_overflowing(). + */ + uint64_t asize = aggsum_value(&arc_sums.arcstat_size); /* * All callers want the ARC to actually evict (at least) this much @@ -4414,16 +4430,28 @@ arc_reduce_target_size(int64_t to_free) * immediately have arc_c < arc_size and therefore the arc_evict_zthr * will evict. */ - uint64_t asize = aggsum_value(&arc_sums.arcstat_size); - if (asize < c) - to_free += c - asize; - arc_c = MAX((int64_t)c - to_free, (int64_t)arc_c_min); + uint64_t c = arc_c; + if (c > arc_c_min) { + c = MIN(c, MAX(asize, arc_c_min)); + to_free = MIN(to_free, c - arc_c_min); + arc_c = c - to_free; + } else { + to_free = 0; + } - /* See comment in arc_evict_cb_check() on why lock+flag */ - mutex_enter(&arc_evict_lock); - arc_evict_needed = B_TRUE; - mutex_exit(&arc_evict_lock); - zthr_wakeup(arc_evict_zthr); + /* + * Whether or not we reduced the target size, request eviction if the + * current size is over it now, since caller obviously wants some RAM. + */ + if (asize > arc_c) { + /* See comment in arc_evict_cb_check() on why lock+flag */ + mutex_enter(&arc_evict_lock); + arc_evict_needed = B_TRUE; + mutex_exit(&arc_evict_lock); + zthr_wakeup(arc_evict_zthr); + } + + return (to_free); } /* @@ -4630,9 +4658,9 @@ arc_reap_cb_check(void *arg, zthr_t *zthr) static void arc_reap_cb(void *arg, zthr_t *zthr) { - (void) arg, (void) zthr; + int64_t can_free, free_memory, to_free; - int64_t free_memory; + (void) arg, (void) zthr; fstrans_cookie_t cookie = spl_fstrans_mark(); /* @@ -4660,13 +4688,10 @@ arc_reap_cb(void *arg, zthr_t *zthr) * amount, reduce by what is needed to hit the fractional amount. */ free_memory = arc_available_memory(); - - int64_t can_free = arc_c - arc_c_min; - if (can_free > 0) { - int64_t to_free = (can_free >> arc_shrink_shift) - free_memory; - if (to_free > 0) - arc_reduce_target_size(to_free); - } + can_free = arc_c - arc_c_min; + to_free = (MAX(can_free, 0) >> arc_shrink_shift) - free_memory; + if (to_free > 0) + arc_reduce_target_size(to_free); spl_fstrans_unmark(cookie); } @@ -4754,16 +4779,11 @@ arc_adapt(uint64_t bytes) } /* - * Check if arc_size has grown past our upper threshold, determined by - * zfs_arc_overflow_shift. + * Check if ARC current size has grown past our upper thresholds. */ static arc_ovf_level_t -arc_is_overflowing(boolean_t use_reserve) +arc_is_overflowing(boolean_t lax, boolean_t use_reserve) { - /* Always allow at least one block of overflow */ - int64_t overflow = MAX(SPA_MAXBLOCKSIZE, - arc_c >> zfs_arc_overflow_shift); - /* * We just compare the lower bound here for performance reasons. Our * primary goals are to make sure that the arc never grows without @@ -4773,12 +4793,22 @@ arc_is_overflowing(boolean_t use_reserve) * in the ARC. In practice, that's in the tens of MB, which is low * enough to be safe. */ - int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) - - arc_c - overflow / 2; - if (!use_reserve) - overflow /= 2; - return (over < 0 ? ARC_OVF_NONE : - over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE); + int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) - arc_c - + zfs_max_recordsize; + + /* Always allow at least one block of overflow. */ + if (over < 0) + return (ARC_OVF_NONE); + + /* If we are under memory pressure, report severe overflow. */ + if (!lax) + return (ARC_OVF_SEVERE); + + /* We are not under pressure, so be more or less relaxed. */ + int64_t overflow = (arc_c >> zfs_arc_overflow_shift) / 2; + if (use_reserve) + overflow *= 3; + return (over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE); } static abd_t * @@ -4810,15 +4840,17 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, const void *tag) /* * Wait for the specified amount of data (in bytes) to be evicted from the - * ARC, and for there to be sufficient free memory in the system. Waiting for - * eviction ensures that the memory used by the ARC decreases. Waiting for - * free memory ensures that the system won't run out of free pages, regardless - * of ARC behavior and settings. See arc_lowmem_init(). + * ARC, and for there to be sufficient free memory in the system. + * The lax argument specifies that caller does not have a specific reason + * to wait, not aware of any memory pressure. Low memory handlers though + * should set it to B_FALSE to wait for all required evictions to complete. + * The use_reserve argument allows some callers to wait less than others + * to not block critical code paths, possibly blocking other resources. */ void -arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve) +arc_wait_for_eviction(uint64_t amount, boolean_t lax, boolean_t use_reserve) { - switch (arc_is_overflowing(use_reserve)) { + switch (arc_is_overflowing(lax, use_reserve)) { case ARC_OVF_NONE: return; case ARC_OVF_SOME: @@ -4913,7 +4945,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag, * under arc_c. See the comment above zfs_arc_eviction_pct. */ arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100, - alloc_flags & ARC_HDR_USE_RESERVE); + B_TRUE, alloc_flags & ARC_HDR_USE_RESERVE); arc_buf_contents_t type = arc_buf_type(hdr); if (type == ARC_BUFC_METADATA) { @@ -5454,6 +5486,57 @@ arc_read_done(zio_t *zio) } } +/* + * Lookup the block at the specified DVA (in bp), and return the manner in + * which the block is cached. A zero return indicates not cached. + */ +int +arc_cached(spa_t *spa, const blkptr_t *bp) +{ + arc_buf_hdr_t *hdr = NULL; + kmutex_t *hash_lock = NULL; + uint64_t guid = spa_load_guid(spa); + int flags = 0; + + if (BP_IS_EMBEDDED(bp)) + return (ARC_CACHED_EMBEDDED); + + hdr = buf_hash_find(guid, bp, &hash_lock); + if (hdr == NULL) + return (0); + + if (HDR_HAS_L1HDR(hdr)) { + arc_state_t *state = hdr->b_l1hdr.b_state; + /* + * We switch to ensure that any future arc_state_type_t + * changes are handled. This is just a shift to promote + * more compile-time checking. + */ + switch (state->arcs_state) { + case ARC_STATE_ANON: + break; + case ARC_STATE_MRU: + flags |= ARC_CACHED_IN_MRU | ARC_CACHED_IN_L1; + break; + case ARC_STATE_MFU: + flags |= ARC_CACHED_IN_MFU | ARC_CACHED_IN_L1; + break; + case ARC_STATE_UNCACHED: + /* The header is still in L1, probably not for long */ + flags |= ARC_CACHED_IN_L1; + break; + default: + break; + } + } + if (HDR_HAS_L2HDR(hdr)) + flags |= ARC_CACHED_IN_L2; + + mutex_exit(hash_lock); + + return (flags); +} + /* * "Read" the block at the specified DVA (in bp) via the * cache. If the block is found in the cache, invoke the provided @@ -5508,19 +5591,6 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, */ fstrans_cookie_t cookie = spl_fstrans_mark(); top: - /* - * Verify the block pointer contents are reasonable. This should - * always be the case since the blkptr is protected by a checksum. - * However, if there is damage it's desirable to detect this early - * and treat it as a checksum error. This allows an alternate blkptr - * to be tried when one is available (e.g. ditto blocks). - */ - if (!zfs_blkptr_verify(spa, bp, (zio_flags & ZIO_FLAG_CONFIG_WRITER) ? - BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { - rc = SET_ERROR(ECKSUM); - goto done; - } - if (!embedded_bp) { /* * Embedded BP's have no DVA and require no I/O to "read". @@ -5540,6 +5610,18 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) { boolean_t is_data = !HDR_ISTYPE_METADATA(hdr); + /* + * Verify the block pointer contents are reasonable. This + * should always be the case since the blkptr is protected by + * a checksum. + */ + if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP, + BLK_VERIFY_LOG)) { + mutex_exit(hash_lock); + rc = SET_ERROR(ECKSUM); + goto done; + } + if (HDR_IO_IN_PROGRESS(hdr)) { if (*arc_flags & ARC_FLAG_CACHED_ONLY) { mutex_exit(hash_lock); @@ -5693,6 +5775,20 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, goto done; } + /* + * Verify the block pointer contents are reasonable. This + * should always be the case since the blkptr is protected by + * a checksum. + */ + if (!zfs_blkptr_verify(spa, bp, + (zio_flags & ZIO_FLAG_CONFIG_WRITER) ? + BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { + if (hash_lock != NULL) + mutex_exit(hash_lock); + rc = SET_ERROR(ECKSUM); + goto done; + } + if (hdr == NULL) { /* * This block is not in the cache or it has diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c b/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c index 56fe2c4dbe30..099883ba2652 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c @@ -2705,6 +2705,9 @@ void dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + ASSERT0(db->db_level); + ASSERT(db->db_blkid != DMU_BONUS_BLKID); + ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT); /* * Block cloning: We are going to clone into this block, so undirty @@ -2716,11 +2719,22 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) VERIFY(!dbuf_undirty(db, tx)); ASSERT0P(dbuf_find_dirty_eq(db, tx->tx_txg)); if (db->db_buf != NULL) { - arc_buf_destroy(db->db_buf, db); + /* + * If there is an associated ARC buffer with this dbuf we can + * only destroy it if the previous dirty record does not + * reference it. + */ + dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records); + if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) + arc_buf_destroy(db->db_buf, db); + db->db_buf = NULL; dbuf_clear_data(db); } + ASSERT3P(db->db_buf, ==, NULL); + ASSERT3P(db->db.db_data, ==, NULL); + db->db_state = DB_NOFILL; DTRACE_SET_STATE(db, "allocating NOFILL buffer for clone"); @@ -3103,7 +3117,11 @@ dbuf_destroy(dmu_buf_impl_t *db) */ mutex_enter(&dn->dn_mtx); dnode_rele_and_unlock(dn, db, B_TRUE); +#ifdef USE_DNODE_HANDLE db->db_dnode_handle = NULL; +#else + db->db_dnode = NULL; +#endif dbuf_hash_remove(db); } else { @@ -3252,7 +3270,11 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, db->db_level = level; db->db_blkid = blkid; db->db_dirtycnt = 0; +#ifdef USE_DNODE_HANDLE db->db_dnode_handle = dn->dn_handle; +#else + db->db_dnode = dn; +#endif db->db_parent = parent; db->db_blkptr = blkptr; db->db_hash = hash; @@ -4390,7 +4412,7 @@ dbuf_lightweight_bp(dbuf_dirty_record_t *dr) dmu_buf_impl_t *parent_db = dr->dr_parent->dr_dbuf; int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; VERIFY3U(parent_db->db_level, ==, 1); - VERIFY3P(parent_db->db_dnode_handle->dnh_dnode, ==, dn); + VERIFY3P(DB_DNODE(parent_db), ==, dn); VERIFY3U(dr->dt.dll.dr_blkid >> epbs, ==, parent_db->db_blkid); blkptr_t *bp = parent_db->db.db_data; return (&bp[dr->dt.dll.dr_blkid & ((1 << epbs) - 1)]); @@ -4813,14 +4835,13 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb) { (void) zio, (void) buf; dmu_buf_impl_t *db = vdb; - dnode_t *dn; blkptr_t *bp; unsigned int epbs, i; ASSERT3U(db->db_level, >, 0); DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; + epbs = DB_DNODE(db)->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; + DB_DNODE_EXIT(db); ASSERT3U(epbs, <, 31); /* Determine if all our children are holes */ @@ -4843,7 +4864,6 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb) memset(db->db.db_data, 0, db->db.db_size); rw_exit(&db->db_rwlock); } - DB_DNODE_EXIT(db); } static void @@ -5062,8 +5082,7 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx) } } else if (db->db.db_object == DMU_META_DNODE_OBJECT) { dnode_phys_t *dnp = db->db.db_data; - ASSERT3U(db->db_dnode_handle->dnh_dnode->dn_type, ==, - DMU_OT_DNODE); + ASSERT3U(dn->dn_type, ==, DMU_OT_DNODE); for (int i = 0; i < db->db.db_size >> DNODE_SHIFT; i += dnp[i].dn_extra_slots + 1) { for (int j = 0; j < dnp[i].dn_nblkptr; j++) { diff --git a/sys/contrib/subrepo-openzfs/module/zfs/ddt.c b/sys/contrib/subrepo-openzfs/module/zfs/ddt.c index 4c53cb0a2f9b..d70ae1a031d5 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/ddt.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/ddt.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2016 by Delphix. All rights reserved. * Copyright (c) 2022 by Pawel Jakub Dawidek - * Copyright (c) 2023, Klara Inc. + * Copyright (c) 2019, 2023, Klara Inc. */ #include @@ -101,6 +101,22 @@ * object and (if necessary), removed from an old one. ddt_tree is cleared and * the next txg can start. * + * ## Dedup quota + * + * A maximum size for all DDTs on the pool can be set with the + * dedup_table_quota property. This is determined in ddt_over_quota() and + * enforced during ddt_lookup(). If the pool is at or over its quota limit, + * ddt_lookup() will only return entries for existing blocks, as updates are + * still possible. New entries will not be created; instead, ddt_lookup() will + * return NULL. In response, the DDT write stage (zio_ddt_write()) will remove + * the D bit on the block and reissue the IO as a regular write. The block will + * not be deduplicated. + * + * Note that this is based on the on-disk size of the dedup store. Reclaiming + * this space after deleting entries relies on the ZAP "shrinking" behaviour, + * without which, no space would be recovered and the DDT would continue to be + * considered "over quota". See zap_shrink_enabled. + * * ## Repair IO * * If a read on a dedup block fails, but there are other copies of the block in @@ -152,6 +168,13 @@ static kmem_cache_t *ddt_entry_cache; */ int zfs_dedup_prefetch = 0; +/* + * If the dedup class cannot satisfy a DDT allocation, treat as over quota + * for this many TXGs. + */ +uint_t dedup_class_wait_txgs = 5; + + static const ddt_ops_t *const ddt_ops[DDT_TYPES] = { &ddt_zap_ops, }; @@ -317,6 +340,16 @@ ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ddt->ddt_object[type][class], ddk); } +static void +ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class) +{ + if (!ddt_object_exists(ddt, type, class)) + return; + + ddt_ops[type]->ddt_op_prefetch_all(ddt->ddt_os, + ddt->ddt_object[type][class]); +} + static int ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ddt_entry_t *dde, dmu_tx_t *tx) @@ -554,8 +587,6 @@ ddt_alloc(const ddt_key_t *ddk) static void ddt_free(ddt_entry_t *dde) { - ASSERT(dde->dde_flags & DDE_FLAG_LOADED); - for (int p = 0; p < DDT_PHYS_TYPES; p++) ASSERT3P(dde->dde_lead_zio[p], ==, NULL); @@ -575,9 +606,88 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde) ddt_free(dde); } +static boolean_t +ddt_special_over_quota(spa_t *spa, metaslab_class_t *mc) +{ + if (mc != NULL && metaslab_class_get_space(mc) > 0) { + /* Over quota if allocating outside of this special class */ + if (spa_syncing_txg(spa) <= spa->spa_dedup_class_full_txg + + dedup_class_wait_txgs) { + /* Waiting for some deferred frees to be processed */ + return (B_TRUE); + } + + /* + * We're considered over quota when we hit 85% full, or for + * larger drives, when there is less than 8GB free. + */ + uint64_t allocated = metaslab_class_get_alloc(mc); + uint64_t capacity = metaslab_class_get_space(mc); + uint64_t limit = MAX(capacity * 85 / 100, + (capacity > (1LL<<33)) ? capacity - (1LL<<33) : 0); + + return (allocated >= limit); + } + return (B_FALSE); +} + +/* + * Check if the DDT is over its quota. This can be due to a few conditions: + * 1. 'dedup_table_quota' property is not 0 (none) and the dedup dsize + * exceeds this limit + * + * 2. 'dedup_table_quota' property is set to automatic and + * a. the dedup or special allocation class could not satisfy a DDT + * allocation in a recent transaction + * b. the dedup or special allocation class has exceeded its 85% limit + */ +static boolean_t +ddt_over_quota(spa_t *spa) +{ + if (spa->spa_dedup_table_quota == 0) + return (B_FALSE); + + if (spa->spa_dedup_table_quota != UINT64_MAX) + return (ddt_get_ddt_dsize(spa) > spa->spa_dedup_table_quota); + + /* + * For automatic quota, table size is limited by dedup or special class + */ + if (ddt_special_over_quota(spa, spa_dedup_class(spa))) + return (B_TRUE); + else if (spa_special_has_ddt(spa) && + ddt_special_over_quota(spa, spa_special_class(spa))) + return (B_TRUE); + + return (B_FALSE); +} + +void +ddt_prefetch_all(spa_t *spa) +{ + /* + * Load all DDT entries for each type/class combination. This is + * indended to perform a prefetch on all such blocks. For the same + * reason that ddt_prefetch isn't locked, this is also not locked. + */ + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + if (!ddt) + continue; + + for (ddt_type_t type = 0; type < DDT_TYPES; type++) { + for (ddt_class_t class = 0; class < DDT_CLASSES; + class++) { + ddt_object_prefetch_all(ddt, type, class); + } + } + } +} + ddt_entry_t * ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) { + spa_t *spa = ddt->ddt_spa; ddt_key_t search; ddt_entry_t *dde; ddt_type_t type; @@ -592,13 +702,28 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) /* Find an existing live entry */ dde = avl_find(&ddt->ddt_tree, &search, &where); if (dde != NULL) { - /* Found it. If it's already loaded, we can just return it. */ + /* If we went over quota, act like we didn't find it */ + if (dde->dde_flags & DDE_FLAG_OVERQUOTA) + return (NULL); + + /* If it's already loaded, we can just return it. */ if (dde->dde_flags & DDE_FLAG_LOADED) return (dde); /* Someone else is loading it, wait for it. */ + dde->dde_waiters++; while (!(dde->dde_flags & DDE_FLAG_LOADED)) cv_wait(&dde->dde_cv, &ddt->ddt_lock); + dde->dde_waiters--; + + /* Loaded but over quota, forget we were ever here */ + if (dde->dde_flags & DDE_FLAG_OVERQUOTA) { + if (dde->dde_waiters == 0) { + avl_remove(&ddt->ddt_tree, dde); + ddt_free(dde); + } + return (NULL); + } return (dde); } @@ -639,14 +764,27 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) dde->dde_type = type; /* will be DDT_TYPES if no entry found */ dde->dde_class = class; /* will be DDT_CLASSES if no entry found */ - if (error == 0) + if (dde->dde_type == DDT_TYPES && + dde->dde_class == DDT_CLASSES && + ddt_over_quota(spa)) { + /* Over quota. If no one is waiting, clean up right now. */ + if (dde->dde_waiters == 0) { + avl_remove(&ddt->ddt_tree, dde); + ddt_free(dde); + return (NULL); + } + + /* Flag cleanup required */ + dde->dde_flags |= DDE_FLAG_OVERQUOTA; + } else if (error == 0) { ddt_stat_update(ddt, dde, -1ULL); + } /* Entry loaded, everyone can proceed now */ dde->dde_flags |= DDE_FLAG_LOADED; cv_broadcast(&dde->dde_cv); - return (dde); + return (dde->dde_flags & DDE_FLAG_OVERQUOTA ? NULL : dde); } void @@ -775,6 +913,7 @@ ddt_load(spa_t *spa) memcpy(&ddt->ddt_histogram_cache, ddt->ddt_histogram, sizeof (ddt->ddt_histogram)); spa->spa_dedup_dspace = ~0ULL; + spa->spa_dedup_dsize = ~0ULL; } return (0); @@ -1032,6 +1171,7 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) memcpy(&ddt->ddt_histogram_cache, ddt->ddt_histogram, sizeof (ddt->ddt_histogram)); spa->spa_dedup_dspace = ~0ULL; + spa->spa_dedup_dsize = ~0ULL; } void @@ -1123,7 +1263,13 @@ ddt_addref(spa_t *spa, const blkptr_t *bp) ddt_enter(ddt); dde = ddt_lookup(ddt, bp, B_TRUE); - ASSERT3P(dde, !=, NULL); + + /* Can be NULL if the entry for this block was pruned. */ + if (dde == NULL) { + ddt_exit(ddt); + spa_config_exit(spa, SCL_ZIO, FTAG); + return (B_FALSE); + } if (dde->dde_type < DDT_TYPES) { ddt_phys_t *ddp; diff --git a/sys/contrib/subrepo-openzfs/module/zfs/ddt_stats.c b/sys/contrib/subrepo-openzfs/module/zfs/ddt_stats.c index af5365a1d114..82b682019ae9 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/ddt_stats.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/ddt_stats.c @@ -129,7 +129,8 @@ ddt_histogram_empty(const ddt_histogram_t *ddh) void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) { - /* Sum the statistics we cached in ddt_object_sync(). */ + memset(ddo_total, 0, sizeof (*ddo_total)); + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { ddt_t *ddt = spa->spa_ddt[c]; if (!ddt) @@ -138,8 +139,32 @@ ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) for (ddt_type_t type = 0; type < DDT_TYPES; type++) { for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { + dmu_object_info_t doi; + uint64_t cnt; + int err; + + /* + * These stats were originally calculated + * during ddt_object_load(). + */ + + err = ddt_object_info(ddt, type, class, &doi); + if (err != 0) + continue; + + err = ddt_object_count(ddt, type, class, &cnt); + if (err != 0) + continue; + ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; + + ddo->ddo_count = cnt; + ddo->ddo_dspace = + doi.doi_physical_blocks_512 << 9; + ddo->ddo_mspace = doi.doi_fill_count * + doi.doi_data_block_size; + ddo_total->ddo_count += ddo->ddo_count; ddo_total->ddo_dspace += ddo->ddo_dspace; ddo_total->ddo_mspace += ddo->ddo_mspace; @@ -147,11 +172,24 @@ ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) } } - /* ... and compute the averages. */ - if (ddo_total->ddo_count != 0) { - ddo_total->ddo_dspace /= ddo_total->ddo_count; - ddo_total->ddo_mspace /= ddo_total->ddo_count; - } + /* + * This returns raw counts (not averages). One of the consumers, + * print_dedup_stats(), historically has expected raw counts. + */ + + spa->spa_dedup_dsize = ddo_total->ddo_dspace; +} + +uint64_t +ddt_get_ddt_dsize(spa_t *spa) +{ + ddt_object_t ddo_total; + + /* recalculate after each txg sync */ + if (spa->spa_dedup_dsize == ~0ULL) + ddt_get_dedup_object_stats(spa, &ddo_total); + + return (spa->spa_dedup_dsize); } void @@ -210,3 +248,32 @@ ddt_get_pool_dedup_ratio(spa_t *spa) return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize); } + +int +ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize) +{ + uint64_t l1sz, l1tot, l2sz, l2tot; + int err = 0; + + l1tot = l2tot = 0; + *psize = 0; + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + if (ddt == NULL) + continue; + for (ddt_type_t type = 0; type < DDT_TYPES; type++) { + for (ddt_class_t class = 0; class < DDT_CLASSES; + class++) { + err = dmu_object_cached_size(ddt->ddt_os, + ddt->ddt_object[type][class], &l1sz, &l2sz); + if (err != 0) + return (err); + l1tot += l1sz; + l2tot += l2sz; + } + } + } + + *psize = l1tot + l2tot; + return (err); +} diff --git a/sys/contrib/subrepo-openzfs/module/zfs/ddt_zap.c b/sys/contrib/subrepo-openzfs/module/zfs/ddt_zap.c index 741554de3c60..7ce7461a2b25 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/ddt_zap.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/ddt_zap.c @@ -147,6 +147,12 @@ ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); } +static void +ddt_zap_prefetch_all(objset_t *os, uint64_t object) +{ + (void) zap_prefetch_object(os, object); +} + static int ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx) @@ -231,6 +237,7 @@ const ddt_ops_t ddt_zap_ops = { ddt_zap_lookup, ddt_zap_contains, ddt_zap_prefetch, + ddt_zap_prefetch_all, ddt_zap_update, ddt_zap_remove, ddt_zap_walk, diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dmu.c b/sys/contrib/subrepo-openzfs/module/zfs/dmu.c index 8b440aafba43..3dcf49ceb64e 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dmu.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dmu.c @@ -26,7 +26,7 @@ * Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. * Copyright (c) 2019 Datto Inc. - * Copyright (c) 2019, Klara Inc. + * Copyright (c) 2019, 2023, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2022 Hewlett Packard Enterprise Development LP. * Copyright (c) 2021, 2022 by Pawel Jakub Dawidek @@ -276,13 +276,14 @@ dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx) dnode_t *dn; int error; + if (newsize < 0 || newsize > db_fake->db_size) + return (SET_ERROR(EINVAL)); + DB_DNODE_ENTER(db); dn = DB_DNODE(db); if (dn->dn_bonus != db) { error = SET_ERROR(EINVAL); - } else if (newsize < 0 || newsize > db_fake->db_size) { - error = SET_ERROR(EINVAL); } else { dnode_setbonuslen(dn, newsize, tx); error = 0; @@ -299,12 +300,13 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx) dnode_t *dn; int error; + if (!DMU_OT_IS_VALID(type)) + return (SET_ERROR(EINVAL)); + DB_DNODE_ENTER(db); dn = DB_DNODE(db); - if (!DMU_OT_IS_VALID(type)) { - error = SET_ERROR(EINVAL); - } else if (dn->dn_bonus != db) { + if (dn->dn_bonus != db) { error = SET_ERROR(EINVAL); } else { dnode_setbonus_type(dn, type, tx); @@ -319,12 +321,10 @@ dmu_object_type_t dmu_get_bonustype(dmu_buf_t *db_fake) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - dnode_t *dn; dmu_object_type_t type; DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - type = dn->dn_bonustype; + type = DB_DNODE(db)->dn_bonustype; DB_DNODE_EXIT(db); return (type); @@ -486,7 +486,6 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag, dmu_buf_t **dbp) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus; - dnode_t *dn; int err; uint32_t db_flags = DB_RF_CANFAIL; @@ -494,8 +493,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag, db_flags |= DB_RF_NO_DECRYPT; DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - err = dmu_spill_hold_by_dnode(dn, db_flags, tag, dbp); + err = dmu_spill_hold_by_dnode(DB_DNODE(db), db_flags, tag, dbp); DB_DNODE_EXIT(db); return (err); @@ -668,13 +666,11 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset, dmu_buf_t ***dbpp) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - dnode_t *dn; int err; DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp, DMU_READ_PREFETCH); + err = dmu_buf_hold_array_by_dnode(DB_DNODE(db), offset, length, read, + tag, numbufsp, dbpp, DMU_READ_PREFETCH); DB_DNODE_EXIT(db); return (err); @@ -701,7 +697,7 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, const void *tag) * Issue prefetch I/Os for the given blocks. If level is greater than 0, the * indirect blocks prefetched will be those that point to the blocks containing * the data starting at offset, and continuing to offset + len. If the range - * it too long, prefetch the first dmu_prefetch_max bytes as requested, while + * is too long, prefetch the first dmu_prefetch_max bytes as requested, while * for the rest only a higher level, also fitting within dmu_prefetch_max. It * should primarily help random reads, since for long sequential reads there is * a speculative prefetcher. @@ -777,6 +773,106 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset, rw_exit(&dn->dn_struct_rwlock); } +typedef struct { + kmutex_t dpa_lock; + kcondvar_t dpa_cv; + uint64_t dpa_pending_io; +} dmu_prefetch_arg_t; + +static void +dmu_prefetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t issued) +{ + (void) level; (void) blkid; (void)issued; + dmu_prefetch_arg_t *dpa = arg; + + ASSERT0(level); + + mutex_enter(&dpa->dpa_lock); + ASSERT3U(dpa->dpa_pending_io, >, 0); + if (--dpa->dpa_pending_io == 0) + cv_broadcast(&dpa->dpa_cv); + mutex_exit(&dpa->dpa_lock); +} + +static void +dmu_prefetch_wait_by_dnode(dnode_t *dn, uint64_t offset, uint64_t len) +{ + dmu_prefetch_arg_t dpa; + + mutex_init(&dpa.dpa_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dpa.dpa_cv, NULL, CV_DEFAULT, NULL); + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + + uint64_t start = dbuf_whichblock(dn, 0, offset); + uint64_t end = dbuf_whichblock(dn, 0, offset + len - 1) + 1; + dpa.dpa_pending_io = end - start; + + for (uint64_t blk = start; blk < end; blk++) { + (void) dbuf_prefetch_impl(dn, 0, blk, ZIO_PRIORITY_ASYNC_READ, + 0, dmu_prefetch_done, &dpa); + } + + rw_exit(&dn->dn_struct_rwlock); + + /* wait for prefetch L0 reads to finish */ + mutex_enter(&dpa.dpa_lock); + while (dpa.dpa_pending_io > 0) { + cv_wait(&dpa.dpa_cv, &dpa.dpa_lock); + + } + mutex_exit(&dpa.dpa_lock); + + mutex_destroy(&dpa.dpa_lock); + cv_destroy(&dpa.dpa_cv); +} + +/* + * Issue prefetch I/Os for the given L0 block range and wait for the I/O + * to complete. This does not enforce dmu_prefetch_max and will prefetch + * the entire range. The blocks are read from disk into the ARC but no + * decompression occurs (i.e., the dbuf cache is not required). + */ +int +dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, uint64_t size) +{ + dnode_t *dn; + int err = 0; + + err = dnode_hold(os, object, FTAG, &dn); + if (err != 0) + return (err); + + /* + * Chunk the requests (16 indirects worth) so that we can be interrupted + */ + uint64_t chunksize; + if (dn->dn_indblkshift) { + uint64_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1); + chunksize = (nbps * 16) << dn->dn_datablkshift; + } else { + chunksize = dn->dn_datablksz; + } + + while (size > 0) { + uint64_t mylen = MIN(size, chunksize); + + dmu_prefetch_wait_by_dnode(dn, offset, mylen); + + offset += mylen; + size -= mylen; + + if (issig()) { + err = SET_ERROR(EINTR); + break; + } + } + + dnode_rele(dn, FTAG); + + return (err); +} + /* * Issue prefetch I/Os for the given object's dnode. */ @@ -815,6 +911,13 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks) ASSERT3U(minimum, <=, *start); + /* dn_nlevels == 1 means we don't have any L1 blocks */ + if (dn->dn_nlevels <= 1) { + *l1blks = 0; + *start = minimum; + return (0); + } + /* * Check if we can free the entire range assuming that all of the * L1 blocks in this range have data. If we can, we use this @@ -1301,15 +1404,13 @@ int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; - dnode_t *dn; int err; if (size == 0) return (0); DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - err = dmu_read_uio_dnode(dn, uio, size); + err = dmu_read_uio_dnode(DB_DNODE(db), uio, size); DB_DNODE_EXIT(db); return (err); @@ -1403,15 +1504,13 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; - dnode_t *dn; int err; if (size == 0) return (0); DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - err = dmu_write_uio_dnode(dn, uio, size, tx); + err = dmu_write_uio_dnode(DB_DNODE(db), uio, size, tx); DB_DNODE_EXIT(db); return (err); @@ -1444,6 +1543,114 @@ dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size, } #endif /* _KERNEL */ +static void +dmu_cached_bps(spa_t *spa, blkptr_t *bps, uint_t nbps, + uint64_t *l1sz, uint64_t *l2sz) +{ + int cached_flags; + + if (bps == NULL) + return; + + for (size_t blk_off = 0; blk_off < nbps; blk_off++) { + blkptr_t *bp = &bps[blk_off]; + + if (BP_IS_HOLE(bp)) + continue; + + cached_flags = arc_cached(spa, bp); + if (cached_flags == 0) + continue; + + if ((cached_flags & (ARC_CACHED_IN_L1 | ARC_CACHED_IN_L2)) == + ARC_CACHED_IN_L2) + *l2sz += BP_GET_LSIZE(bp); + else + *l1sz += BP_GET_LSIZE(bp); + } +} + +/* + * Estimate DMU object cached size. + */ +int +dmu_object_cached_size(objset_t *os, uint64_t object, + uint64_t *l1sz, uint64_t *l2sz) +{ + dnode_t *dn; + dmu_object_info_t doi; + int err = 0; + + *l1sz = *l2sz = 0; + + if (dnode_hold(os, object, FTAG, &dn) != 0) + return (0); + + if (dn->dn_nlevels < 2) { + dnode_rele(dn, FTAG); + return (0); + } + + dmu_object_info_from_dnode(dn, &doi); + + for (uint64_t off = 0; off < doi.doi_max_offset; + off += dmu_prefetch_max) { + /* dbuf_read doesn't prefetch L1 blocks. */ + dmu_prefetch_by_dnode(dn, 1, off, + dmu_prefetch_max, ZIO_PRIORITY_SYNC_READ); + } + + /* + * Hold all valid L1 blocks, asking ARC the status of each BP + * contained in each such L1 block. + */ + uint_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1); + uint64_t l1blks = 1 + (dn->dn_maxblkid / nbps); + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + for (uint64_t blk = 0; blk < l1blks; blk++) { + dmu_buf_impl_t *db = NULL; + + if (issig()) { + /* + * On interrupt, get out, and bubble up EINTR + */ + err = EINTR; + break; + } + + /* + * If we get an i/o error here, the L1 can't be read, + * and nothing under it could be cached, so we just + * continue. Ignoring the error from dbuf_hold_impl + * or from dbuf_read is then a reasonable choice. + */ + err = dbuf_hold_impl(dn, 1, blk, B_TRUE, B_FALSE, FTAG, &db); + if (err != 0) { + /* + * ignore error and continue + */ + err = 0; + continue; + } + + err = dbuf_read(db, NULL, DB_RF_CANFAIL); + if (err == 0) { + dmu_cached_bps(dmu_objset_spa(os), db->db.db_data, + nbps, l1sz, l2sz); + } + /* + * error may be ignored, and we continue + */ + err = 0; + dbuf_rele(db, FTAG); + } + rw_exit(&dn->dn_struct_rwlock); + + dnode_rele(dn, FTAG); + return (err); +} + /* * Allocate a loaned anonymous arc buffer. */ @@ -1539,11 +1746,11 @@ dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, dmu_tx_t *tx) { int err; - dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle; - DB_DNODE_ENTER(dbuf); - err = dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx); - DB_DNODE_EXIT(dbuf); + DB_DNODE_ENTER(db); + err = dmu_assign_arcbuf_by_dnode(DB_DNODE(db), offset, buf, tx); + DB_DNODE_EXIT(db); return (err); } @@ -1782,7 +1989,6 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) dmu_sync_arg_t *dsa; zbookmark_phys_t zb; zio_prop_t zp; - dnode_t *dn; ASSERT(pio != NULL); ASSERT(txg != 0); @@ -1791,8 +1997,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) db->db.db_object, db->db_level, db->db_blkid); DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp); + dmu_write_policy(os, DB_DNODE(db), db->db_level, WP_DMU_SYNC, &zp); DB_DNODE_EXIT(db); /* @@ -1877,11 +2082,14 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) * zio_done(), which VERIFYs that the override BP is identical * to the on-disk BP. */ - DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - if (dr_next != NULL || dnode_block_freed(dn, db->db_blkid)) + if (dr_next != NULL) { zp.zp_nopwrite = B_FALSE; - DB_DNODE_EXIT(db); + } else { + DB_DNODE_ENTER(db); + if (dnode_block_freed(DB_DNODE(db), db->db_blkid)) + zp.zp_nopwrite = B_FALSE; + DB_DNODE_EXIT(db); + } ASSERT(dr->dr_txg == txg); if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC || @@ -2154,6 +2362,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN); zp->zp_zpl_smallblk = DMU_OT_IS_FILE(zp->zp_type) ? os->os_zpl_special_smallblock : 0; + zp->zp_storage_type = dn ? dn->dn_storage_type : DMU_OT_NONE; ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT); } @@ -2487,11 +2696,9 @@ void dmu_object_dnsize_from_db(dmu_buf_t *db_fake, int *dnsize) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - dnode_t *dn; DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - *dnsize = dn->dn_num_slots << DNODE_SHIFT; + *dnsize = DB_DNODE(db)->dn_num_slots << DNODE_SHIFT; DB_DNODE_EXIT(db); } diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dmu_tx.c b/sys/contrib/subrepo-openzfs/module/zfs/dmu_tx.c index 8451b5082e86..2c2a6c7642a5 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dmu_tx.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dmu_tx.c @@ -1520,11 +1520,8 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) ASSERT(tx->tx_txg == 0); dmu_tx_hold_spill(tx, object); } else { - dnode_t *dn; - DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - if (dn->dn_have_spill) { + if (DB_DNODE(db)->dn_have_spill) { ASSERT(tx->tx_txg == 0); dmu_tx_hold_spill(tx, object); } diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dnode.c b/sys/contrib/subrepo-openzfs/module/zfs/dnode.c index a703fd414f87..ecc6761f8fa4 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dnode.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dnode.c @@ -306,7 +306,7 @@ dnode_init(void) { ASSERT(dnode_cache == NULL); dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t), - 0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0); + 0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_RECLAIMABLE); kmem_cache_set_move(dnode_cache, dnode_move); wmsum_init(&dnode_sums.dnode_hold_dbuf_hold, 0); @@ -543,6 +543,17 @@ dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx) rw_exit(&dn->dn_struct_rwlock); } +void +dnode_set_storage_type(dnode_t *dn, dmu_object_type_t newtype) +{ + /* + * This is not in the dnode_phys, but it should be, and perhaps one day + * will. For now we require it be set after taking a hold. + */ + ASSERT3U(zfs_refcount_count(&dn->dn_holds), >=, 1); + dn->dn_storage_type = newtype; +} + void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx) { @@ -604,6 +615,8 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0); dn->dn_id_flags = 0; + dn->dn_storage_type = DMU_OT_NONE; + dmu_zfetch_init(&dn->dn_zfetch, dn); ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type)); @@ -687,6 +700,8 @@ dnode_destroy(dnode_t *dn) dn->dn_newprojid = ZFS_DEFAULT_PROJID; dn->dn_id_flags = 0; + dn->dn_storage_type = DMU_OT_NONE; + dmu_zfetch_fini(&dn->dn_zfetch); kmem_cache_free(dnode_cache, dn); arc_space_return(sizeof (dnode_t), ARC_SPACE_DNODE); @@ -946,6 +961,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_newgid = odn->dn_newgid; ndn->dn_newprojid = odn->dn_newprojid; ndn->dn_id_flags = odn->dn_id_flags; + ndn->dn_storage_type = odn->dn_storage_type; dmu_zfetch_init(&ndn->dn_zfetch, ndn); /* @@ -1004,6 +1020,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) odn->dn_newgid = 0; odn->dn_newprojid = ZFS_DEFAULT_PROJID; odn->dn_id_flags = 0; + odn->dn_storage_type = DMU_OT_NONE; /* * Mark the dnode. @@ -1020,6 +1037,19 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg) int64_t refcount; uint32_t dbufs; +#ifndef USE_DNODE_HANDLE + /* + * We can't move dnodes if dbufs reference them directly without + * using handles and respecitve locking. Unless USE_DNODE_HANDLE + * is defined the code below is only to make sure it still builds, + * but it should never be used, since it is unsafe. + */ +#ifdef ZFS_DEBUG + PANIC("dnode_move() called without USE_DNODE_HANDLE"); +#endif + return (KMEM_CBRC_NO); +#endif + /* * The dnode is on the objset's list of known dnodes if the objset * pointer is valid. We set the low bit of the objset pointer when @@ -1757,7 +1787,7 @@ dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting) * handle. */ #ifdef ZFS_DEBUG - ASSERT(refs > 0 || dnh->dnh_zrlock.zr_owner != curthread); + ASSERT(refs > 0 || zrl_owner(&dnh->dnh_zrlock) != curthread); #endif /* NOTE: the DNODE_DNODE does not have a dn_dbuf */ diff --git a/sys/contrib/subrepo-openzfs/module/zfs/lz4_zfs.c b/sys/contrib/subrepo-openzfs/module/zfs/lz4_zfs.c index 820556effb8b..de90c45f2f07 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/lz4_zfs.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/lz4_zfs.c @@ -886,7 +886,8 @@ void lz4_init(void) { lz4_cache = kmem_cache_create("lz4_cache", - sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); + sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, + KMC_RECLAIMABLE); } void diff --git a/sys/contrib/subrepo-openzfs/module/zfs/sa.c b/sys/contrib/subrepo-openzfs/module/zfs/sa.c index 0ae4c331dd36..bc4c9dff31e7 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/sa.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/sa.c @@ -236,7 +236,7 @@ sa_cache_init(void) { sa_cache = kmem_cache_create("sa_cache", sizeof (sa_handle_t), 0, sa_cache_constructor, - sa_cache_destructor, NULL, NULL, NULL, 0); + sa_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE); } void @@ -1501,6 +1501,42 @@ sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) return (error); } +/* + * Return size of an attribute + */ + +static int +sa_size_locked(sa_handle_t *hdl, sa_attr_type_t attr, int *size) +{ + sa_bulk_attr_t bulk; + int error; + + bulk.sa_data = NULL; + bulk.sa_attr = attr; + bulk.sa_data_func = NULL; + + ASSERT(hdl); + ASSERT(MUTEX_HELD(&hdl->sa_lock)); + if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { + return (error); + } + *size = bulk.sa_size; + + return (0); +} + +int +sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) +{ + int error; + + mutex_enter(&hdl->sa_lock); + error = sa_size_locked(hdl, attr, size); + mutex_exit(&hdl->sa_lock); + + return (error); +} + #ifdef _KERNEL int sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, zfs_uio_t *uio) @@ -1542,6 +1578,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid) uint64_t crtime[2], mtime[2], ctime[2], atime[2]; zfs_acl_phys_t znode_acl = { 0 }; char scanstamp[AV_SCANSTAMP_SZ]; + char *dxattr_obj = NULL; + int dxattr_size = 0; if (zp->z_acl_cached == NULL) { zfs_acl_t *aclp; @@ -1623,6 +1661,17 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid) if (err != 0 && err != ENOENT) goto out; + err = sa_size_locked(hdl, SA_ZPL_DXATTR(zfsvfs), &dxattr_size); + if (err != 0 && err != ENOENT) + goto out; + if (dxattr_size != 0) { + dxattr_obj = vmem_alloc(dxattr_size, KM_SLEEP); + err = sa_lookup_locked(hdl, SA_ZPL_DXATTR(zfsvfs), dxattr_obj, + dxattr_size); + if (err != 0 && err != ENOENT) + goto out; + } + zp->z_projid = projid; zp->z_pflags |= ZFS_PROJID; links = ZTONLNK(zp); @@ -1674,6 +1723,11 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid) zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP; } + if (dxattr_obj) { + SA_ADD_BULK_ATTR(attrs, count, SA_ZPL_DXATTR(zfsvfs), + NULL, dxattr_obj, dxattr_size); + } + VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); VERIFY(sa_replace_all_by_template_locked(hdl, attrs, count, tx) == 0); if (znode_acl.z_acl_extern_obj) { @@ -1688,6 +1742,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid) mutex_exit(&hdl->sa_lock); kmem_free(attrs, sizeof (sa_bulk_attr_t) * ZPL_END); kmem_free(bulk, sizeof (sa_bulk_attr_t) * ZPL_END); + if (dxattr_obj) + vmem_free(dxattr_obj, dxattr_size); return (err); } #endif @@ -1852,7 +1908,6 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, { sa_os_t *sa = hdl->sa_os->os_sa; dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; - dnode_t *dn; sa_bulk_attr_t *attr_desc; void *old_data[2]; int bonus_attr_count = 0; @@ -1872,8 +1927,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, /* First make of copy of the old data */ DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - if (dn->dn_bonuslen != 0) { + if (DB_DNODE(db)->dn_bonuslen != 0) { bonus_data_size = hdl->sa_bonus->db_size; old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); memcpy(old_data[0], hdl->sa_bonus->db_data, @@ -2059,32 +2113,6 @@ sa_update(sa_handle_t *hdl, sa_attr_type_t type, return (error); } -/* - * Return size of an attribute - */ - -int -sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) -{ - sa_bulk_attr_t bulk; - int error; - - bulk.sa_data = NULL; - bulk.sa_attr = attr; - bulk.sa_data_func = NULL; - - ASSERT(hdl); - mutex_enter(&hdl->sa_lock); - if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { - mutex_exit(&hdl->sa_lock); - return (error); - } - *size = bulk.sa_size; - - mutex_exit(&hdl->sa_lock); - return (0); -} - int sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) { diff --git a/sys/contrib/subrepo-openzfs/module/zfs/spa.c b/sys/contrib/subrepo-openzfs/module/zfs/spa.c index 638572996c3a..cafc7196c354 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/spa.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/spa.c @@ -34,7 +34,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2021, Colm Buckley * Copyright (c) 2023 Hewlett Packard Enterprise Development LP. - * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2023, 2024, Klara Inc. */ /* @@ -337,6 +337,55 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, const char *strval, nvlist_free(propval); } +static int +spa_prop_add(spa_t *spa, const char *propname, nvlist_t *outnvl) +{ + zpool_prop_t prop = zpool_name_to_prop(propname); + zprop_source_t src = ZPROP_SRC_NONE; + uint64_t intval; + int err; + + /* + * NB: Not all properties lookups via this API require + * the spa props lock, so they must explicitly grab it here. + */ + switch (prop) { + case ZPOOL_PROP_DEDUPCACHED: + err = ddt_get_pool_dedup_cached(spa, &intval); + if (err != 0) + return (SET_ERROR(err)); + break; + default: + return (SET_ERROR(EINVAL)); + } + + spa_prop_add_list(outnvl, prop, NULL, intval, src); + + return (0); +} + +int +spa_prop_get_nvlist(spa_t *spa, char **props, unsigned int n_props, + nvlist_t **outnvl) +{ + int err = 0; + + if (props == NULL) + return (0); + + if (*outnvl == NULL) { + err = nvlist_alloc(outnvl, NV_UNIQUE_NAME, KM_SLEEP); + if (err) + return (err); + } + + for (unsigned int i = 0; i < n_props && err == 0; i++) { + err = spa_prop_add(spa, props[i], *outnvl); + } + + return (err); +} + /* * Add a user property (source=src, propname=propval) to an nvlist. */ @@ -406,6 +455,9 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONERATIO, NULL, brt_get_ratio(spa), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUP_TABLE_SIZE, NULL, + ddt_get_ddt_dsize(spa), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, rvd->vdev_state, src); @@ -500,9 +552,11 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) dsl_pool_t *dp; int err; - err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); - if (err) - return (err); + if (*nvp == NULL) { + err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); + if (err) + return (err); + } dp = spa_get_dsl(spa); dsl_pool_config_enter(dp, FTAG); @@ -672,6 +726,10 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) error = SET_ERROR(EINVAL); break; + case ZPOOL_PROP_DEDUP_TABLE_QUOTA: + error = nvpair_value_uint64(elem, &intval); + break; + case ZPOOL_PROP_DELEGATION: case ZPOOL_PROP_AUTOREPLACE: case ZPOOL_PROP_LISTSNAPS: @@ -4732,6 +4790,8 @@ spa_ld_get_props(spa_t *spa) spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); + spa_prop_find(spa, ZPOOL_PROP_DEDUP_TABLE_QUOTA, + &spa->spa_dedup_table_quota); spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost); spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim); spa->spa_autoreplace = (autoreplace != 0); @@ -6588,6 +6648,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM); + spa->spa_dedup_table_quota = + zpool_prop_default_numeric(ZPOOL_PROP_DEDUP_TABLE_QUOTA); if (props != NULL) { spa_configfile_set(spa, props, B_FALSE); @@ -6755,6 +6817,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_spares.sav_sync = B_TRUE; + spa->spa_spares.sav_label_sync = B_TRUE; } if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { @@ -6770,6 +6833,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_l2cache.sav_sync = B_TRUE; + spa->spa_l2cache.sav_label_sync = B_TRUE; } /* @@ -9631,6 +9695,9 @@ spa_sync_props(void *arg, dmu_tx_t *tx) case ZPOOL_PROP_MULTIHOST: spa->spa_multihost = intval; break; + case ZPOOL_PROP_DEDUP_TABLE_QUOTA: + spa->spa_dedup_table_quota = intval; + break; default: break; } diff --git a/sys/contrib/subrepo-openzfs/module/zfs/spa_misc.c b/sys/contrib/subrepo-openzfs/module/zfs/spa_misc.c index d1d41bbe7214..97191e768549 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/spa_misc.c @@ -1996,13 +1996,31 @@ spa_dedup_class(spa_t *spa) return (spa->spa_dedup_class); } +boolean_t +spa_special_has_ddt(spa_t *spa) +{ + return (zfs_ddt_data_is_special && + spa->spa_special_class->mc_groups != 0); +} + /* * Locate an appropriate allocation class */ metaslab_class_t * -spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype, - uint_t level, uint_t special_smallblk) +spa_preferred_class(spa_t *spa, const zio_t *zio) { + const zio_prop_t *zp = &zio->io_prop; + + /* + * Override object type for the purposes of selecting a storage class. + * Primarily for DMU_OTN_ types where we can't explicitly control their + * storage class; instead, choose a static type most closely matches + * what we want. + */ + dmu_object_type_t objtype = + zp->zp_storage_type == DMU_OT_NONE ? + zp->zp_type : zp->zp_storage_type; + /* * ZIL allocations determine their class in zio_alloc_zil(). */ @@ -2020,14 +2038,15 @@ spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype, } /* Indirect blocks for user data can land in special if allowed */ - if (level > 0 && (DMU_OT_IS_FILE(objtype) || objtype == DMU_OT_ZVOL)) { + if (zp->zp_level > 0 && + (DMU_OT_IS_FILE(objtype) || objtype == DMU_OT_ZVOL)) { if (has_special_class && zfs_user_indirect_is_special) return (spa_special_class(spa)); else return (spa_normal_class(spa)); } - if (DMU_OT_IS_METADATA(objtype) || level > 0) { + if (DMU_OT_IS_METADATA(objtype) || zp->zp_level > 0) { if (has_special_class) return (spa_special_class(spa)); else @@ -2040,7 +2059,7 @@ spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype, * zfs_special_class_metadata_reserve_pct exclusively for metadata. */ if (DMU_OT_IS_FILE(objtype) && - has_special_class && size <= special_smallblk) { + has_special_class && zio->io_size <= zp->zp_zpl_smallblk) { metaslab_class_t *special = spa_special_class(spa); uint64_t alloc = metaslab_class_get_alloc(special); uint64_t space = metaslab_class_get_space(special); diff --git a/sys/contrib/subrepo-openzfs/module/zfs/vdev.c b/sys/contrib/subrepo-openzfs/module/zfs/vdev.c index 11cc39ba3527..6ae0a14127bf 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/vdev.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/vdev.c @@ -6222,6 +6222,16 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) vd->vdev_stat.vs_initialize_errors, ZPROP_SRC_NONE); continue; + case VDEV_PROP_TRIM_ERRORS: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_trim_errors, + ZPROP_SRC_NONE); + continue; + case VDEV_PROP_SLOW_IOS: + vdev_prop_add_list(outnvl, propname, NULL, + vd->vdev_stat.vs_slow_ios, + ZPROP_SRC_NONE); + continue; case VDEV_PROP_OPS_NULL: vdev_prop_add_list(outnvl, propname, NULL, vd->vdev_stat.vs_ops[ZIO_TYPE_NULL], @@ -6306,6 +6316,14 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) ZPROP_SRC_NONE); } continue; + case VDEV_PROP_TRIM_SUPPORT: + /* only valid for leaf vdevs */ + if (vd->vdev_ops->vdev_op_leaf) { + vdev_prop_add_list(outnvl, propname, + NULL, vd->vdev_has_trim, + ZPROP_SRC_NONE); + } + continue; /* Numeric Properites */ case VDEV_PROP_ALLOCATING: /* Leaf vdevs cannot have this property */ diff --git a/sys/contrib/subrepo-openzfs/module/zfs/vdev_label.c b/sys/contrib/subrepo-openzfs/module/zfs/vdev_label.c index ed592514fded..47346dd5acff 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/vdev_label.c @@ -1007,6 +1007,47 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, return (state == POOL_STATE_ACTIVE); } +static nvlist_t * +vdev_aux_label_generate(vdev_t *vd, boolean_t reason_spare) +{ + /* + * For inactive hot spares and level 2 ARC devices, we generate + * a special label that identifies as a mutually shared hot + * spare or l2cache device. We write the label in case of + * addition or removal of hot spare or l2cache vdev (in which + * case we want to revert the labels). + */ + nvlist_t *label = fnvlist_alloc(); + fnvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, + spa_version(vd->vdev_spa)); + fnvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, reason_spare ? + POOL_STATE_SPARE : POOL_STATE_L2CACHE); + fnvlist_add_uint64(label, ZPOOL_CONFIG_GUID, vd->vdev_guid); + + /* + * This is merely to facilitate reporting the ashift of the + * cache device through zdb. The actual retrieval of the + * ashift (in vdev_alloc()) uses the nvlist + * spa->spa_l2cache->sav_config (populated in + * spa_ld_open_aux_vdevs()). + */ + if (!reason_spare) + fnvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift); + + /* + * Add path information to help find it during pool import + */ + if (vd->vdev_path != NULL) + fnvlist_add_string(label, ZPOOL_CONFIG_PATH, vd->vdev_path); + if (vd->vdev_devid != NULL) + fnvlist_add_string(label, ZPOOL_CONFIG_DEVID, vd->vdev_devid); + if (vd->vdev_physpath != NULL) { + fnvlist_add_string(label, ZPOOL_CONFIG_PHYS_PATH, + vd->vdev_physpath); + } + return (label); +} + /* * Initialize a vdev label. We check to make sure each leaf device is not in * use, and writable. We put down an initial label which we will later @@ -1121,49 +1162,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * be written again with a meaningful txg by spa_sync(). */ if (reason_spare || reason_l2cache) { - /* - * For inactive hot spares and level 2 ARC devices, we generate - * a special label that identifies as a mutually shared hot - * spare or l2cache device. We write the label in case of - * addition or removal of hot spare or l2cache vdev (in which - * case we want to revert the labels). - */ - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, - spa_version(spa)) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, - reason_spare ? POOL_STATE_SPARE : POOL_STATE_L2CACHE) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, - vd->vdev_guid) == 0); - - /* - * This is merely to facilitate reporting the ashift of the - * cache device through zdb. The actual retrieval of the - * ashift (in vdev_alloc()) uses the nvlist - * spa->spa_l2cache->sav_config (populated in - * spa_ld_open_aux_vdevs()). - */ - if (reason_l2cache) { - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, - vd->vdev_ashift) == 0); - } - - /* - * Add path information to help find it during pool import - */ - if (vd->vdev_path != NULL) { - VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PATH, - vd->vdev_path) == 0); - } - if (vd->vdev_devid != NULL) { - VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_DEVID, - vd->vdev_devid) == 0); - } - if (vd->vdev_physpath != NULL) { - VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PHYS_PATH, - vd->vdev_physpath) == 0); - } + label = vdev_aux_label_generate(vd, reason_spare); /* * When spare or l2cache (aux) vdev is added during pool @@ -1900,6 +1899,8 @@ vdev_label_sync(zio_t *zio, uint64_t *good_writes, abd_t *vp_abd; char *buf; size_t buflen; + vdev_t *pvd = vd->vdev_parent; + boolean_t spare_in_use = B_FALSE; for (int c = 0; c < vd->vdev_children; c++) { vdev_label_sync(zio, good_writes, @@ -1920,10 +1921,17 @@ vdev_label_sync(zio_t *zio, uint64_t *good_writes, if (vd->vdev_ops == &vdev_draid_spare_ops) return; + if (pvd && pvd->vdev_ops == &vdev_spare_ops) + spare_in_use = B_TRUE; + /* * Generate a label describing the top-level config to which we belong. */ - label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE); + if ((vd->vdev_isspare && !spare_in_use) || vd->vdev_isl2cache) { + label = vdev_aux_label_generate(vd, vd->vdev_isspare); + } else { + label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE); + } vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); abd_zero(vp_abd, sizeof (vdev_phys_t)); @@ -1973,6 +1981,24 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) zio_nowait(vio); } + /* + * AUX path may have changed during import + */ + spa_aux_vdev_t *sav[2] = {&spa->spa_spares, &spa->spa_l2cache}; + for (int i = 0; i < 2; i++) { + for (int v = 0; v < sav[i]->sav_count; v++) { + uint64_t *good_writes; + if (!sav[i]->sav_label_sync) + continue; + good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); + zio_t *vio = zio_null(zio, spa, NULL, + vdev_label_sync_ignore_done, good_writes, flags); + vdev_label_sync(vio, good_writes, sav[i]->sav_vdevs[v], + l, txg, flags); + zio_nowait(vio); + } + } + error = zio_wait(zio); /* @@ -1983,6 +2009,15 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) zio_flush(zio, vd); + for (int i = 0; i < 2; i++) { + if (!sav[i]->sav_label_sync) + continue; + for (int v = 0; v < sav[i]->sav_count; v++) + zio_flush(zio, sav[i]->sav_vdevs[v]); + if (l == 1) + sav[i]->sav_label_sync = B_FALSE; + } + (void) zio_wait(zio); return (error); diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zap_micro.c b/sys/contrib/subrepo-openzfs/module/zfs/zap_micro.c index 7ebd88418740..026f53e32d64 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zap_micro.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zap_micro.c @@ -1072,6 +1072,21 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name) return (err); } +int +zap_prefetch_object(objset_t *os, uint64_t zapobj) +{ + int error; + dmu_object_info_t doi; + + error = dmu_object_info(os, zapobj, &doi); + if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP) + error = SET_ERROR(EINVAL); + if (error == 0) + dmu_prefetch_wait(os, zapobj, 0, doi.doi_max_offset); + + return (error); +} + int zap_lookup_by_dnode(dnode_t *dn, const char *name, uint64_t integer_size, uint64_t num_integers, void *buf) @@ -1784,6 +1799,7 @@ EXPORT_SYMBOL(zap_lookup_uint64); EXPORT_SYMBOL(zap_contains); EXPORT_SYMBOL(zap_prefetch); EXPORT_SYMBOL(zap_prefetch_uint64); +EXPORT_SYMBOL(zap_prefetch_object); EXPORT_SYMBOL(zap_add); EXPORT_SYMBOL(zap_add_by_dnode); EXPORT_SYMBOL(zap_add_uint64); diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/subrepo-openzfs/module/zfs/zfs_ioctl.c index 8cad4304f809..8f9d1a360f98 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zfs_ioctl.c @@ -38,7 +38,7 @@ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. - * Copyright (c) 2019, 2021, 2024, Klara Inc. + * Copyright (c) 2019, 2021, 2023, 2024, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright 2024 Oxide Computer Company */ @@ -3009,34 +3009,51 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc) return (error); } +/* + * innvl: { + * "get_props_names": [ "prop1", "prop2", ..., "propN" ] + * } + */ + +static const zfs_ioc_key_t zfs_keys_get_props[] = { + { ZPOOL_GET_PROPS_NAMES, DATA_TYPE_STRING_ARRAY, ZK_OPTIONAL }, +}; + static int -zfs_ioc_pool_get_props(zfs_cmd_t *zc) +zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) { + nvlist_t *nvp = outnvl; spa_t *spa; + char **props = NULL; + unsigned int n_props = 0; int error; - nvlist_t *nvp = NULL; - if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) { + if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES, + &props, &n_props) != 0) { + props = NULL; + } + + if ((error = spa_open(pool, &spa, FTAG)) != 0) { /* * If the pool is faulted, there may be properties we can still * get (such as altroot and cachefile), so attempt to get them * anyway. */ mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(zc->zc_name)) != NULL) + if ((spa = spa_lookup(pool)) != NULL) { error = spa_prop_get(spa, &nvp); + if (error == 0 && props != NULL) + error = spa_prop_get_nvlist(spa, props, n_props, + &nvp); + } mutex_exit(&spa_namespace_lock); } else { error = spa_prop_get(spa, &nvp); + if (error == 0 && props != NULL) + error = spa_prop_get_nvlist(spa, props, n_props, &nvp); spa_close(spa, FTAG); } - if (error == 0 && zc->zc_nvlist_dst != 0) - error = put_nvlist(zc, nvp); - else - error = SET_ERROR(EFAULT); - - nvlist_free(nvp); return (error); } @@ -4031,6 +4048,52 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl, return (spa_checkpoint_discard(poolname)); } +/* + * Loads specific types of data for the given pool + * + * innvl: { + * "prefetch_type" -> int32_t + * } + * + * outnvl: empty + */ +static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = { + {ZPOOL_PREFETCH_TYPE, DATA_TYPE_INT32, 0}, +}; + +static int +zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + (void) outnvl; + + int error; + spa_t *spa; + int32_t type; + + /* + * Currently, only ZPOOL_PREFETCH_DDT is supported + */ + if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 || + type != ZPOOL_PREFETCH_DDT) { + return (EINVAL); + } + + error = spa_open(poolname, &spa, FTAG); + if (error != 0) + return (error); + + hrtime_t start_time = gethrtime(); + + ddt_prefetch_all(spa); + + zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name, + (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + + spa_close(spa, FTAG); + + return (error); +} + /* * inputs: * zc_name name of dataset to destroy @@ -7283,6 +7346,12 @@ zfs_ioctl_init(void) zfs_keys_pool_discard_checkpoint, ARRAY_SIZE(zfs_keys_pool_discard_checkpoint)); + zfs_ioctl_register("zpool_prefetch", + ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch, + zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE, + zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch)); + zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE, zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE, @@ -7328,6 +7397,11 @@ zfs_ioctl_init(void) POOL_CHECK_NONE, B_TRUE, B_TRUE, zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub)); + zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS, + zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, + POOL_CHECK_NONE, B_FALSE, B_FALSE, + zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, @@ -7383,8 +7457,6 @@ zfs_ioctl_init(void) zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); - zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, - zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zfs_log.c b/sys/contrib/subrepo-openzfs/module/zfs/zfs_log.c index fa4e7093ca46..399f5a0117bb 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zfs_log.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zfs_log.c @@ -665,13 +665,13 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, DB_DNODE_ENTER(db); err = dmu_read_by_dnode(DB_DNODE(db), off, len, lr + 1, DMU_READ_NO_PREFETCH); + DB_DNODE_EXIT(db); if (err != 0) { zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; wr_state = WR_NEED_COPY; } - DB_DNODE_EXIT(db); } itx->itx_wr_state = wr_state; diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zfs_replay.c b/sys/contrib/subrepo-openzfs/module/zfs/zfs_replay.c index 2e0af60f6db4..810550161f8b 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zfs_replay.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zfs_replay.c @@ -439,7 +439,7 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) bail: if (error == 0 && zp != NULL) { #ifdef __FreeBSD__ - VOP_UNLOCK1(ZTOV(zp)); + VOP_UNLOCK(ZTOV(zp)); #endif zrele(zp); } @@ -595,7 +595,7 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) out: if (error == 0 && zp != NULL) { #ifdef __FreeBSD__ - VOP_UNLOCK1(ZTOV(zp)); + VOP_UNLOCK(ZTOV(zp)); #endif zrele(zp); } diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zil.c b/sys/contrib/subrepo-openzfs/module/zfs/zil.c index ac56272f6396..bd37e6cef1ab 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zil.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zil.c @@ -99,6 +99,9 @@ static uint_t zfs_commit_timeout_pct = 10; static zil_kstat_values_t zil_stats = { { "zil_commit_count", KSTAT_DATA_UINT64 }, { "zil_commit_writer_count", KSTAT_DATA_UINT64 }, + { "zil_commit_error_count", KSTAT_DATA_UINT64 }, + { "zil_commit_stall_count", KSTAT_DATA_UINT64 }, + { "zil_commit_suspend_count", KSTAT_DATA_UINT64 }, { "zil_itx_count", KSTAT_DATA_UINT64 }, { "zil_itx_indirect_count", KSTAT_DATA_UINT64 }, { "zil_itx_indirect_bytes", KSTAT_DATA_UINT64 }, @@ -360,6 +363,9 @@ zil_sums_init(zil_sums_t *zs) { wmsum_init(&zs->zil_commit_count, 0); wmsum_init(&zs->zil_commit_writer_count, 0); + wmsum_init(&zs->zil_commit_error_count, 0); + wmsum_init(&zs->zil_commit_stall_count, 0); + wmsum_init(&zs->zil_commit_suspend_count, 0); wmsum_init(&zs->zil_itx_count, 0); wmsum_init(&zs->zil_itx_indirect_count, 0); wmsum_init(&zs->zil_itx_indirect_bytes, 0); @@ -382,6 +388,9 @@ zil_sums_fini(zil_sums_t *zs) { wmsum_fini(&zs->zil_commit_count); wmsum_fini(&zs->zil_commit_writer_count); + wmsum_fini(&zs->zil_commit_error_count); + wmsum_fini(&zs->zil_commit_stall_count); + wmsum_fini(&zs->zil_commit_suspend_count); wmsum_fini(&zs->zil_itx_count); wmsum_fini(&zs->zil_itx_indirect_count); wmsum_fini(&zs->zil_itx_indirect_bytes); @@ -406,6 +415,12 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums) wmsum_value(&zil_sums->zil_commit_count); zs->zil_commit_writer_count.value.ui64 = wmsum_value(&zil_sums->zil_commit_writer_count); + zs->zil_commit_error_count.value.ui64 = + wmsum_value(&zil_sums->zil_commit_error_count); + zs->zil_commit_stall_count.value.ui64 = + wmsum_value(&zil_sums->zil_commit_stall_count); + zs->zil_commit_suspend_count.value.ui64 = + wmsum_value(&zil_sums->zil_commit_suspend_count); zs->zil_itx_count.value.ui64 = wmsum_value(&zil_sums->zil_itx_count); zs->zil_itx_indirect_count.value.ui64 = @@ -512,9 +527,26 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, for (; lrp < end; lrp += reclen) { lr_t *lr = (lr_t *)lrp; + + /* + * Are the remaining bytes large enough to hold an + * log record? + */ + if ((char *)(lr + 1) > end) { + cmn_err(CE_WARN, "zil_parse: lr_t overrun"); + error = SET_ERROR(ECKSUM); + arc_buf_destroy(abuf, &abuf); + goto done; + } reclen = lr->lrc_reclen; - ASSERT3U(reclen, >=, sizeof (lr_t)); - ASSERT3U(reclen, <=, end - lrp); + if (reclen < sizeof (lr_t) || reclen > end - lrp) { + cmn_err(CE_WARN, + "zil_parse: lr_t has an invalid reclen"); + error = SET_ERROR(ECKSUM); + arc_buf_destroy(abuf, &abuf); + goto done; + } + if (lr->lrc_seq > claim_lr_seq) { arc_buf_destroy(abuf, &abuf); goto done; @@ -2824,6 +2856,7 @@ zil_commit_writer_stall(zilog_t *zilog) * (which is achieved via the txg_wait_synced() call). */ ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); + ZIL_STAT_BUMP(zilog, zil_commit_stall_count); txg_wait_synced(zilog->zl_dmu_pool, 0); ASSERT(list_is_empty(&zilog->zl_lwb_list)); } @@ -3593,6 +3626,7 @@ zil_commit(zilog_t *zilog, uint64_t foid) * semantics, and avoid calling those functions altogether. */ if (zilog->zl_suspend > 0) { + ZIL_STAT_BUMP(zilog, zil_commit_suspend_count); txg_wait_synced(zilog->zl_dmu_pool, 0); return; } @@ -3646,10 +3680,12 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid) * implications, but the expectation is for this to be * an exceptional case, and shouldn't occur often. */ + ZIL_STAT_BUMP(zilog, zil_commit_error_count); DTRACE_PROBE2(zil__commit__io__error, zilog_t *, zilog, zil_commit_waiter_t *, zcw); txg_wait_synced(zilog->zl_dmu_pool, 0); } else if (wtxg != 0) { + ZIL_STAT_BUMP(zilog, zil_commit_suspend_count); txg_wait_synced(zilog->zl_dmu_pool, wtxg); } diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zio.c b/sys/contrib/subrepo-openzfs/module/zfs/zio.c index d68d5ababe79..6d08d4bd1633 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zio.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zio.c @@ -194,6 +194,10 @@ zio_init(void) cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ? KMC_NODEBUG : 0; data_cflags = KMC_NODEBUG; + if (abd_size_alloc_linear(size)) { + cflags |= KMC_RECLAIMABLE; + data_cflags |= KMC_RECLAIMABLE; + } if (cflags == data_cflags) { /* * Resulting kmem caches would be identical. @@ -1101,45 +1105,50 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, { int errors = 0; - if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) { + if (unlikely(!DMU_OT_IS_VALID(BP_GET_TYPE(bp)))) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px has invalid TYPE %llu", bp, (longlong_t)BP_GET_TYPE(bp)); } - if (BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS) { - errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %px has invalid CHECKSUM %llu", - bp, (longlong_t)BP_GET_CHECKSUM(bp)); - } - if (BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS) { + if (unlikely(BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS)) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px has invalid COMPRESS %llu", bp, (longlong_t)BP_GET_COMPRESS(bp)); } - if (BP_GET_LSIZE(bp) > SPA_MAXBLOCKSIZE) { + if (unlikely(BP_GET_LSIZE(bp) > SPA_MAXBLOCKSIZE)) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px has invalid LSIZE %llu", bp, (longlong_t)BP_GET_LSIZE(bp)); } - if (BP_GET_PSIZE(bp) > SPA_MAXBLOCKSIZE) { - errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %px has invalid PSIZE %llu", - bp, (longlong_t)BP_GET_PSIZE(bp)); - } - if (BP_IS_EMBEDDED(bp)) { - if (BPE_GET_ETYPE(bp) >= NUM_BP_EMBEDDED_TYPES) { + if (unlikely(BPE_GET_ETYPE(bp) >= NUM_BP_EMBEDDED_TYPES)) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px has invalid ETYPE %llu", bp, (longlong_t)BPE_GET_ETYPE(bp)); } + if (unlikely(BPE_GET_PSIZE(bp) > BPE_PAYLOAD_SIZE)) { + errors += zfs_blkptr_verify_log(spa, bp, blk_verify, + "blkptr at %px has invalid PSIZE %llu", + bp, (longlong_t)BPE_GET_PSIZE(bp)); + } + return (errors == 0); + } + if (unlikely(BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS)) { + errors += zfs_blkptr_verify_log(spa, bp, blk_verify, + "blkptr at %px has invalid CHECKSUM %llu", + bp, (longlong_t)BP_GET_CHECKSUM(bp)); + } + if (unlikely(BP_GET_PSIZE(bp) > SPA_MAXBLOCKSIZE)) { + errors += zfs_blkptr_verify_log(spa, bp, blk_verify, + "blkptr at %px has invalid PSIZE %llu", + bp, (longlong_t)BP_GET_PSIZE(bp)); } /* * Do not verify individual DVAs if the config is not trusted. This * will be done once the zio is executed in vdev_mirror_map_alloc. */ - if (!spa->spa_trust_config) + if (unlikely(!spa->spa_trust_config)) return (errors == 0); switch (blk_config) { @@ -1168,20 +1177,20 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, const dva_t *dva = &bp->blk_dva[i]; uint64_t vdevid = DVA_GET_VDEV(dva); - if (vdevid >= spa->spa_root_vdev->vdev_children) { + if (unlikely(vdevid >= spa->spa_root_vdev->vdev_children)) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px DVA %u has invalid VDEV %llu", bp, i, (longlong_t)vdevid); continue; } vdev_t *vd = spa->spa_root_vdev->vdev_child[vdevid]; - if (vd == NULL) { + if (unlikely(vd == NULL)) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px DVA %u has invalid VDEV %llu", bp, i, (longlong_t)vdevid); continue; } - if (vd->vdev_ops == &vdev_hole_ops) { + if (unlikely(vd->vdev_ops == &vdev_hole_ops)) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px DVA %u has hole VDEV %llu", bp, i, (longlong_t)vdevid); @@ -1199,7 +1208,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, uint64_t asize = DVA_GET_ASIZE(dva); if (DVA_GET_GANG(dva)) asize = vdev_gang_header_asize(vd); - if (offset + asize > vd->vdev_asize) { + if (unlikely(offset + asize > vd->vdev_asize)) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %px DVA %u has invalid OFFSET %llu", bp, i, (longlong_t)offset); @@ -1850,8 +1859,13 @@ zio_write_compress(zio_t *zio) if (compress != ZIO_COMPRESS_OFF && !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { void *cbuf = NULL; - psize = zio_compress_data(compress, zio->io_abd, &cbuf, lsize, - zp->zp_complevel); + if (abd_cmp_zero(zio->io_abd, lsize) == 0) + psize = 0; + else if (compress == ZIO_COMPRESS_EMPTY) + psize = lsize; + else + psize = zio_compress_data(compress, zio->io_abd, &cbuf, + lsize, zp->zp_complevel); if (psize == 0) { compress = ZIO_COMPRESS_OFF; } else if (psize >= lsize) { @@ -1915,10 +1929,12 @@ zio_write_compress(zio_t *zio) * receive, we must check whether the block can be compressed * to a hole. */ - psize = zio_compress_data(ZIO_COMPRESS_EMPTY, - zio->io_abd, NULL, lsize, zp->zp_complevel); - if (psize == 0 || psize >= lsize) + if (abd_cmp_zero(zio->io_abd, lsize) == 0) { + psize = 0; compress = ZIO_COMPRESS_OFF; + } else { + psize = lsize; + } } else if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS && !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) { /* @@ -3069,7 +3085,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) zp.zp_checksum = gio->io_prop.zp_checksum; zp.zp_compress = ZIO_COMPRESS_OFF; zp.zp_complevel = gio->io_prop.zp_complevel; - zp.zp_type = DMU_OT_NONE; + zp.zp_type = zp.zp_storage_type = DMU_OT_NONE; zp.zp_level = 0; zp.zp_copies = gio->io_prop.zp_copies; zp.zp_dedup = B_FALSE; @@ -3503,6 +3519,15 @@ zio_ddt_write(zio_t *zio) ddt_enter(ddt); dde = ddt_lookup(ddt, bp, B_TRUE); + if (dde == NULL) { + /* DDT size is over its quota so no new entries */ + zp->zp_dedup = B_FALSE; + BP_SET_DEDUP(bp, B_FALSE); + if (zio->io_bp_override == NULL) + zio->io_pipeline = ZIO_WRITE_PIPELINE; + ddt_exit(ddt); + return (zio); + } ddp = &dde->dde_phys[p]; if (zp->zp_dedup_verify && zio_ddt_collision(zio, ddt, dde)) { @@ -3628,8 +3653,7 @@ zio_dva_throttle(zio_t *zio) metaslab_class_t *mc; /* locate an appropriate allocation class */ - mc = spa_preferred_class(spa, zio->io_size, zio->io_prop.zp_type, - zio->io_prop.zp_level, zio->io_prop.zp_zpl_smallblk); + mc = spa_preferred_class(spa, zio); if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE || !mc->mc_alloc_throttle_enabled || @@ -3701,9 +3725,7 @@ zio_dva_allocate(zio_t *zio) */ mc = zio->io_metaslab_class; if (mc == NULL) { - mc = spa_preferred_class(spa, zio->io_size, - zio->io_prop.zp_type, zio->io_prop.zp_level, - zio->io_prop.zp_zpl_smallblk); + mc = spa_preferred_class(spa, zio); zio->io_metaslab_class = mc; } @@ -3727,6 +3749,26 @@ zio_dva_allocate(zio_t *zio) * Fallback to normal class when an alloc class is full */ if (error == ENOSPC && mc != spa_normal_class(spa)) { + /* + * When the dedup or special class is spilling into the normal + * class, there can still be significant space available due + * to deferred frees that are in-flight. We track the txg when + * this occurred and back off adding new DDT entries for a few + * txgs to allow the free blocks to be processed. + */ + if ((mc == spa_dedup_class(spa) || (spa_special_has_ddt(spa) && + mc == spa_special_class(spa))) && + spa->spa_dedup_class_full_txg != zio->io_txg) { + spa->spa_dedup_class_full_txg = zio->io_txg; + zfs_dbgmsg("%s[%d]: %s class spilling, req size %d, " + "%llu allocated of %llu", + spa_name(spa), (int)zio->io_txg, + mc == spa_dedup_class(spa) ? "dedup" : "special", + (int)zio->io_size, + (u_longlong_t)metaslab_class_get_alloc(mc), + (u_longlong_t)metaslab_class_get_space(mc)); + } + /* * If throttling, transfer reservation over to normal class. * The io_allocator slot can remain the same even though we diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zio_compress.c b/sys/contrib/subrepo-openzfs/module/zfs/zio_compress.c index c8a10db7483b..e12d5498ccda 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zio_compress.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zio_compress.c @@ -111,19 +111,6 @@ zio_compress_select(spa_t *spa, enum zio_compress child, return (result); } -static int -zio_compress_zeroed_cb(void *data, size_t len, void *private) -{ - (void) private; - - uint64_t *end = (uint64_t *)((char *)data + len); - for (uint64_t *word = (uint64_t *)data; word < end; word++) - if (*word != 0) - return (1); - - return (0); -} - size_t zio_compress_data(enum zio_compress c, abd_t *src, void **dst, size_t s_len, uint8_t level) @@ -132,18 +119,9 @@ zio_compress_data(enum zio_compress c, abd_t *src, void **dst, size_t s_len, uint8_t complevel; zio_compress_info_t *ci = &zio_compress_table[c]; - ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS); - ASSERT((uint_t)c == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL); - - /* - * If the data is all zeroes, we don't even need to allocate - * a block for it. We indicate this by returning zero size. - */ - if (abd_iterate_func(src, 0, s_len, zio_compress_zeroed_cb, NULL) == 0) - return (0); - - if (c == ZIO_COMPRESS_EMPTY) - return (s_len); + ASSERT3U(c, <, ZIO_COMPRESS_FUNCTIONS); + ASSERT3U(ci->ci_compress, !=, NULL); + ASSERT3U(s_len, >, 0); /* Compress at least 12.5% */ d_len = s_len - (s_len >> 3); diff --git a/sys/contrib/subrepo-openzfs/module/zfs/zvol.c b/sys/contrib/subrepo-openzfs/module/zfs/zvol.c index 5b6a3f5cb410..001f774a6d16 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/zvol.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/zvol.c @@ -37,6 +37,7 @@ * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2012, 2019 by Delphix. All rights reserved. + * Copyright (c) 2024, Klara, Inc. */ /* @@ -894,6 +895,9 @@ zvol_resume(zvol_state_t *zv) */ atomic_dec(&zv->zv_suspend_ref); + if (zv->zv_flags & ZVOL_REMOVING) + cv_broadcast(&zv->zv_removing_cv); + return (SET_ERROR(error)); } @@ -929,6 +933,9 @@ zvol_last_close(zvol_state_t *zv) ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + if (zv->zv_flags & ZVOL_REMOVING) + cv_broadcast(&zv->zv_removing_cv); + zvol_shutdown_zv(zv); dmu_objset_disown(zv->zv_objset, 1, zv); @@ -1221,6 +1228,41 @@ zvol_create_minor(const char *name) * Remove minors for specified dataset including children and snapshots. */ +/* + * Remove the minor for a given zvol. This will do it all: + * - flag the zvol for removal, so new requests are rejected + * - wait until outstanding requests are completed + * - remove it from lists + * - free it + * It's also usable as a taskq task, and smells nice too. + */ +static void +zvol_remove_minor_task(void *arg) +{ + zvol_state_t *zv = (zvol_state_t *)arg; + + ASSERT(!RW_LOCK_HELD(&zvol_state_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + + mutex_enter(&zv->zv_state_lock); + while (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { + zv->zv_flags |= ZVOL_REMOVING; + cv_wait(&zv->zv_removing_cv, &zv->zv_state_lock); + } + mutex_exit(&zv->zv_state_lock); + + rw_enter(&zvol_state_lock, RW_WRITER); + mutex_enter(&zv->zv_state_lock); + + zvol_remove(zv); + zvol_os_clear_private(zv); + + mutex_exit(&zv->zv_state_lock); + rw_exit(&zvol_state_lock); + + zvol_os_free(zv); +} + static void zvol_free_task(void *arg) { @@ -1233,11 +1275,13 @@ zvol_remove_minors_impl(const char *name) zvol_state_t *zv, *zv_next; int namelen = ((name) ? strlen(name) : 0); taskqid_t t; - list_t free_list; + list_t delay_list, free_list; if (zvol_inhibit_dev) return; + list_create(&delay_list, sizeof (zvol_state_t), + offsetof(zvol_state_t, zv_next)); list_create(&free_list, sizeof (zvol_state_t), offsetof(zvol_state_t, zv_next)); @@ -1256,9 +1300,24 @@ zvol_remove_minors_impl(const char *name) * one is currently using this zv */ - /* If in use, leave alone */ + /* + * If in use, try to throw everyone off and try again + * later. + */ if (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { + zv->zv_flags |= ZVOL_REMOVING; + t = taskq_dispatch( + zv->zv_objset->os_spa->spa_zvol_taskq, + zvol_remove_minor_task, zv, TQ_SLEEP); + if (t == TASKQID_INVALID) { + /* + * Couldn't create the task, so we'll + * do it in place once the loop is + * finished. + */ + list_insert_head(&delay_list, zv); + } mutex_exit(&zv->zv_state_lock); continue; } @@ -1285,7 +1344,11 @@ zvol_remove_minors_impl(const char *name) } rw_exit(&zvol_state_lock); - /* Drop zvol_state_lock before calling zvol_free() */ + /* Wait for zvols that we couldn't create a remove task for */ + while ((zv = list_remove_head(&delay_list)) != NULL) + zvol_remove_minor_task(zv); + + /* Free any that we couldn't free in parallel earlier */ while ((zv = list_remove_head(&free_list)) != NULL) zvol_os_free(zv); } @@ -1305,33 +1368,38 @@ zvol_remove_minor_impl(const char *name) zv_next = list_next(&zvol_state_list, zv); mutex_enter(&zv->zv_state_lock); - if (strcmp(zv->zv_name, name) == 0) { - /* - * By holding zv_state_lock here, we guarantee that no - * one is currently using this zv - */ + if (strcmp(zv->zv_name, name) == 0) + /* Found, leave the the loop with zv_lock held */ + break; + mutex_exit(&zv->zv_state_lock); + } - /* If in use, leave alone */ - if (zv->zv_open_count > 0 || - atomic_read(&zv->zv_suspend_ref)) { - mutex_exit(&zv->zv_state_lock); - continue; - } - zvol_remove(zv); + if (zv == NULL) { + rw_exit(&zvol_state_lock); + return; + } - zvol_os_clear_private(zv); - mutex_exit(&zv->zv_state_lock); - break; - } else { - mutex_exit(&zv->zv_state_lock); - } + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + + if (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { + /* + * In use, so try to throw everyone off, then wait + * until finished. + */ + zv->zv_flags |= ZVOL_REMOVING; + mutex_exit(&zv->zv_state_lock); + rw_exit(&zvol_state_lock); + zvol_remove_minor_task(zv); + return; } - /* Drop zvol_state_lock before calling zvol_free() */ + zvol_remove(zv); + zvol_os_clear_private(zv); + + mutex_exit(&zv->zv_state_lock); rw_exit(&zvol_state_lock); - if (zv != NULL) - zvol_os_free(zv); + zvol_os_free(zv); } /* diff --git a/sys/contrib/subrepo-openzfs/rpm/generic/zfs.spec.in b/sys/contrib/subrepo-openzfs/rpm/generic/zfs.spec.in index 2e89abd0edfd..c7a00c61f6bb 100644 --- a/sys/contrib/subrepo-openzfs/rpm/generic/zfs.spec.in +++ b/sys/contrib/subrepo-openzfs/rpm/generic/zfs.spec.in @@ -532,6 +532,7 @@ systemctl --system daemon-reload >/dev/null || true %attr(440, root, root) %config(noreplace) %{_sysconfdir}/sudoers.d/* %config(noreplace) %{_bashcompletiondir}/zfs +%config(noreplace) %{_bashcompletiondir}/zpool %files -n libzpool5 %{_libdir}/libzpool.so.* diff --git a/sys/contrib/subrepo-openzfs/tests/runfiles/common.run b/sys/contrib/subrepo-openzfs/tests/runfiles/common.run index a90187555c74..26dfd1b5bd5b 100644 --- a/sys/contrib/subrepo-openzfs/tests/runfiles/common.run +++ b/sys/contrib/subrepo-openzfs/tests/runfiles/common.run @@ -81,7 +81,8 @@ tests = ['block_cloning_clone_mmap_cached', 'block_cloning_cross_enc_dataset', 'block_cloning_copyfilerange_fallback_same_txg', 'block_cloning_replay', 'block_cloning_replay_encrypted', - 'block_cloning_lwb_buffer_overflow', 'block_cloning_clone_mmap_write'] + 'block_cloning_lwb_buffer_overflow', 'block_cloning_clone_mmap_write', + 'block_cloning_rlimit_fsize'] tags = ['functional', 'block_cloning'] [tests/functional/bootfs] @@ -153,6 +154,10 @@ tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos', 'clean_mirror_003_pos', 'clean_mirror_004_pos'] tags = ['functional', 'clean_mirror'] +[tests/functional/cli_root/json] +tests = ['json_sanity'] +tags = ['functional', 'cli_root', 'json'] + [tests/functional/cli_root/zinject] tests = ['zinject_args'] pre = @@ -208,6 +213,10 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_verbose'] tags = ['functional', 'cli_root', 'zfs_create'] +[tests/functional/cli_root/zpool_prefetch] +tests = ['zpool_prefetch_001_pos'] +tags = ['functional', 'cli_root', 'zpool_prefetch'] + [tests/functional/cli_root/zfs_destroy] tests = ['zfs_clone_livelist_condense_and_disable', 'zfs_clone_livelist_condense_races', 'zfs_clone_livelist_dedup', @@ -662,6 +671,12 @@ pre = post = tags = ['functional', 'deadman'] +[tests/functional/dedup] +tests = ['dedup_quota'] +pre = +post = +tags = ['functional', 'dedup'] + [tests/functional/delegate] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', diff --git a/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run b/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run index bd6cc56f3589..5817e649003c 100644 --- a/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run +++ b/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run @@ -201,7 +201,7 @@ tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos', tags = ['functional', 'tmpfile'] [tests/functional/upgrade:Linux] -tests = ['upgrade_projectquota_001_pos'] +tests = ['upgrade_projectquota_001_pos', 'upgrade_projectquota_002_pos'] tags = ['functional', 'upgrade'] [tests/functional/user_namespace:Linux] diff --git a/sys/contrib/subrepo-openzfs/tests/test-runner/bin/test-runner.py.in b/sys/contrib/subrepo-openzfs/tests/test-runner/bin/test-runner.py.in index 65247f4f06fc..6488fa8318ff 100755 --- a/sys/contrib/subrepo-openzfs/tests/test-runner/bin/test-runner.py.in +++ b/sys/contrib/subrepo-openzfs/tests/test-runner/bin/test-runner.py.in @@ -111,7 +111,7 @@ class Result(object): class Output(object): """ This class is a slightly modified version of the 'Stream' class found - here: http://goo.gl/aSGfv + here: https://stackoverflow.com/q/4984549/ """ def __init__(self, stream, debug=False): self.stream = stream diff --git a/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in index de06c7c6e2c1..1177e80e1a75 100755 --- a/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in @@ -254,6 +254,7 @@ maybe = { 'tmpfile/setup': ['SKIP', tmpfile_reason], 'trim/setup': ['SKIP', trim_reason], 'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason], + 'upgrade/upgrade_projectquota_002_pos': ['SKIP', project_id_reason], 'user_namespace/setup': ['SKIP', user_ns_reason], 'userquota/setup': ['SKIP', exec_reason], 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', known_reason], @@ -330,6 +331,8 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_reason], 'block_cloning/block_cloning_replay_encrypted': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_rlimit_fsize': + ['SKIP', cfr_reason], 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason], 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason], 'cp_files/cp_files_002_pos': ['SKIP', cfr_reason], @@ -350,10 +353,6 @@ elif sys.platform.startswith('linux'): 'mmp/mmp_active_import': ['FAIL', known_reason], 'mmp/mmp_exported_import': ['FAIL', known_reason], 'mmp/mmp_inactive_import': ['FAIL', known_reason], - 'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872], - 'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621], - 'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872], - 'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason], }) # Not all Github actions runners have scsi_debug module, so we may skip diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/cmd/mmap_seek.c b/sys/contrib/subrepo-openzfs/tests/zfs-tests/cmd/mmap_seek.c index 7be92d109565..2d250554a13f 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/cmd/mmap_seek.c +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/cmd/mmap_seek.c @@ -35,6 +35,16 @@ #include #endif +/* some older uClibc's lack the defines, so we'll manually define them */ +#ifdef __UCLIBC__ +#ifndef SEEK_DATA +#define SEEK_DATA 3 +#endif +#ifndef SEEK_HOLE +#define SEEK_HOLE 4 +#endif +#endif + static void seek_data(int fd, off_t offset, off_t expected) { diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/commands.cfg index 78d5a78912a9..77371d306e12 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/commands.cfg +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/commands.cfg @@ -46,6 +46,7 @@ export SYSTEM_FILES_COMMON='awk hostname id iostat + jq kill ksh ldd diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/tunables.cfg index 721cf27f48ca..3de316a12504 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/tunables.cfg +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/include/tunables.cfg @@ -28,6 +28,9 @@ CONDENSE_INDIRECT_COMMIT_ENTRY_DELAY_MS condense.indirect_commit_entry_delay_ms CONDENSE_INDIRECT_OBSOLETE_PCT condense.indirect_obsolete_pct zfs_condense_indirect_obsolete_pct CONDENSE_MIN_MAPPING_BYTES condense.min_mapping_bytes zfs_condense_min_mapping_bytes DBUF_CACHE_SHIFT dbuf.cache_shift dbuf_cache_shift +DDT_ZAP_DEFAULT_BS dedup.ddt_zap_default_bs ddt_zap_default_bs +DDT_ZAP_DEFAULT_IBS dedup.ddt_zap_default_ibs ddt_zap_default_ibs +DDT_DATA_IS_SPECIAL ddt_data_is_special zfs_ddt_data_is_special DEADMAN_CHECKTIME_MS deadman.checktime_ms zfs_deadman_checktime_ms DEADMAN_EVENTS_PER_SECOND deadman_events_per_second zfs_deadman_events_per_second DEADMAN_FAILMODE deadman.failmode zfs_deadman_failmode @@ -94,8 +97,8 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev VOL_MODE vol.mode zvol_volmode VOL_RECURSIVE vol.recursive UNSUPPORTED VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq -BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled -BCLONE_WAIT_DIRTY zfs_bclone_wait_dirty zfs_bclone_wait_dirty +BCLONE_ENABLED bclone_enabled zfs_bclone_enabled +BCLONE_WAIT_DIRTY bclone_wait_dirty zfs_bclone_wait_dirty XATTR_COMPAT xattr_compat zfs_xattr_compat ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am index a0040215fe22..24af98146c19 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am @@ -550,6 +550,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_replay.ksh \ functional/block_cloning/block_cloning_replay_encrypted.ksh \ functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \ + functional/block_cloning/block_cloning_rlimit_fsize.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ @@ -678,6 +679,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/clean_mirror/clean_mirror_004_pos.ksh \ functional/clean_mirror/cleanup.ksh \ functional/clean_mirror/setup.ksh \ + functional/cli_root/json/cleanup.ksh \ + functional/cli_root/json/setup.ksh \ + functional/cli_root/json/json_sanity.ksh \ functional/cli_root/zinject/zinject_args.ksh \ functional/cli_root/zdb/zdb_002_pos.ksh \ functional/cli_root/zdb/zdb_003_pos.ksh \ @@ -1248,6 +1252,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_online/setup.ksh \ functional/cli_root/zpool_online/zpool_online_001_pos.ksh \ functional/cli_root/zpool_online/zpool_online_002_neg.ksh \ + functional/cli_root/zpool_prefetch/cleanup.ksh \ + functional/cli_root/zpool_prefetch/setup.ksh \ + functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh \ functional/cli_root/zpool_remove/cleanup.ksh \ functional/cli_root/zpool_remove/setup.ksh \ functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh \ @@ -1487,6 +1494,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/deadman/deadman_ratelimit.ksh \ functional/deadman/deadman_sync.ksh \ functional/deadman/deadman_zio.ksh \ + functional/dedup/cleanup.ksh \ + functional/dedup/setup.ksh \ + functional/dedup/dedup_quota.ksh \ functional/delegate/cleanup.ksh \ functional/delegate/setup.ksh \ functional/delegate/zfs_allow_001_pos.ksh \ @@ -2092,6 +2102,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/upgrade/cleanup.ksh \ functional/upgrade/setup.ksh \ functional/upgrade/upgrade_projectquota_001_pos.ksh \ + functional/upgrade/upgrade_projectquota_002_pos.ksh \ functional/upgrade/upgrade_readonly_pool.ksh \ functional/upgrade/upgrade_userobj_001_pos.ksh \ functional/user_namespace/cleanup.ksh \ diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh new file mode 100755 index 000000000000..a8a64e52491a --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# When block cloning is used to implement copy_file_range(2), the +# RLIMIT_FSIZE limit must be respected. +# +# STRATEGY: +# 1. Create a pool. +# 2. ??? +# + +verify_runnable "global" + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for RLIMIT_FSIZE handling with block cloning enabled" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s 1G $VDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV + +log_must dd if=/dev/random of=/$TESTPOOL/file1 bs=1 count=1000 + +ulimit -f 2 +log_must clonefile -f /$TESTPOOL/file1 /$TESTPOOL/file2 0 0 all +ulimit -f 1 +log_mustnot clonefile -f /$TESTPOOL/file1 /$TESTPOOL/file3 0 0 all + +log_pass "copy_file_range(2) respects RLIMIT_FSIZE" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/cleanup.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/cleanup.ksh new file mode 100755 index 000000000000..f82a90962292 --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/cleanup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. + +. $STF_SUITE/include/libtest.shlib + +zpool destroy testpool1 +zpool destroy testpool2 + +rm $TESTDIR/file{1..28} +rmdir $TESTDIR diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh new file mode 100755 index 000000000000..e598dd57181e --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh @@ -0,0 +1,57 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Basic sanity check for valid JSON from zfs & zpool commands. +# +# STRATEGY: +# 1. Run different zfs/zpool -j commands and check for valid JSON + +list=( + "zpool status -j -g --json-int --json-flat-vdevs --json-pool-key-guid" + "zpool status -p -j -g --json-int --json-flat-vdevs --json-pool-key-guid" + "zpool status -j -c upath" + "zpool status -j" + "zpool status -j testpool1" + "zpool list -j" + "zpool list -j -g" + "zpool list -j -o fragmentation" + "zpool get -j size" + "zpool get -j all" + "zpool version -j" + "zfs list -j" + "zfs list -j testpool1" + "zfs get -j all" + "zfs get -j available" + "zfs mount -j" + "zfs version -j" +) + +for cmd in "${list[@]}" ; do + log_must eval "$cmd | jq > /dev/null" +done + +log_pass "zpool and zfs commands outputted valid JSON" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/setup.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/setup.ksh new file mode 100755 index 000000000000..f94dc5697423 --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/json/setup.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. + +. $STF_SUITE/include/libtest.shlib + +# Sanity check that 'testpool1' or 'testpool2' don't exist +log_mustnot zpool status -j | \ + jq -e '.pools | has("testpool1") or has("testpool2")' &> /dev/null + +mkdir -p $TESTDIR +truncate -s 80M $TESTDIR/file{1..28} + +DISK=${DISKS%% *} + +# Create complex pool configs to exercise JSON +zpool create -f testpool1 draid $TESTDIR/file{1..10} \ + special $DISK \ + dedup $TESTDIR/file11 \ + special $TESTDIR/file12 \ + cache $TESTDIR/file13 \ + log $TESTDIR/file14 + +zpool create -f testpool2 mirror $TESTDIR/file{15,16} \ + raidz1 $TESTDIR/file{17,18,19} \ + cache $TESTDIR/file20 \ + log $TESTDIR/file21 \ + special mirror $TESTDIR/file{22,23} \ + dedup mirror $TESTDIR/file{24,25} \ + spare $TESTDIR/file{26,27,28} diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh index 643bf1cf28e7..226a665f669d 100755 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh @@ -210,19 +210,15 @@ log_must local_cleanup log_note "verify clone list truncated correctly" fs=$TESTPOOL/$TESTFS1 xs=""; for i in {1..200}; do xs+="x"; done -if is_linux; then - ZFS_MAXPROPLEN=4096 -else - ZFS_MAXPROPLEN=1024 -fi +maxproplen=8192 log_must zfs create $fs log_must zfs snapshot $fs@snap -for (( i = 1; i <= (ZFS_MAXPROPLEN / 200 + 1); i++ )); do +for (( i = 1; i <= (maxproplen / 200 + 1); i++ )); do log_must zfs clone ${fs}@snap ${fs}/${TESTCLONE}${xs}.${i} done clone_list=$(zfs list -o clones $fs@snap) char_count=$(echo "$clone_list" | tail -1 | wc -c) -[[ $char_count -eq $ZFS_MAXPROPLEN ]] || \ +[[ $char_count -eq $maxproplen ]] || \ log_fail "Clone list not truncated correctly. Unexpected character count" \ "$char_count" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib index 5617bd01ba42..a7a93a3046d2 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib @@ -84,7 +84,8 @@ function do_vol_test vol=$TESTPOOL/$TESTVOL1 vol_b_path=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL1 - log_must zfs create -V $VOLSIZE -o copies=$copies $vol + log_must zfs create -V $VOLSIZE -o compression=off -o copies=$copies \ + $vol log_must zfs set refreservation=none $vol block_device_wait $vol_b_path @@ -116,31 +117,30 @@ function do_vol_test else log_must zpool create $TESTPOOL1 $vol_b_path fi - log_must zfs create $TESTPOOL1/$TESTFS1 + log_must zfs create -o compression=off $TESTPOOL1/$TESTFS1 + sync_pool $TESTPOOL1 ;; *) log_unsupported "$type test not implemented" ;; esac - ((nfilesize = copies * ${FILESIZE%m})) + sync_pool $TESTPOOL pre_used=$(get_prop used $vol) - ((target_size = pre_used + nfilesize)) if [[ $type == "zfs" ]]; then log_must mkfile $FILESIZE /$TESTPOOL1/$TESTFS1/$FILE + sync_pool $TESTPOOL1 else log_must mkfile $FILESIZE $mntp/$FILE + log_must sync fi + sync_pool $TESTPOOL post_used=$(get_prop used $vol) - ((retries = 0)) - while ((post_used < target_size && retries++ < 42)); do - sleep 1 - post_used=$(get_prop used $vol) - done ((used = post_used - pre_used)) + ((nfilesize = copies * ${FILESIZE%m})) if ((used < nfilesize)); then log_fail "The space is not charged correctly while setting" \ "copies as $copies ($used < $nfilesize)" \ @@ -153,5 +153,7 @@ function do_vol_test log_must umount $mntp fi + # Ubuntu 20.04 wants a sync here + log_must sync log_must zfs destroy $vol } diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg index c3b9efd6464a..6cfa7eaf7514 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg @@ -72,4 +72,7 @@ typeset -a properties=( io_t slow_io_n slow_io_t + trim_support + trim_errors + slow_ios ) diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 6ebce9459190..e8a94ce209bc 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -47,6 +47,8 @@ typeset -a properties=( "listsnapshots" "autoexpand" "dedupratio" + "dedup_table_quota" + "dedup_table_size" "free" "allocated" "readonly" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/cleanup.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/cleanup.ksh new file mode 100755 index 000000000000..79cd6e9f908e --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/setup.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/setup.ksh new file mode 100755 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh new file mode 100755 index 000000000000..a96a38ff178a --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh @@ -0,0 +1,128 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019, 2023 by Klara Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool prefetch -t ddt ' can successfully load a pool's DDT on demand. +# +# STRATEGY: +# 1. Build up storage pool with deduplicated dataset. +# 2. Export the pool. +# 3. Import the pool, and use zpool prefetch -t ddt to load its table. +# 4. Verify the DDT was loaded successfully using ddt cache stats +# + +verify_runnable "both" + +log_assert "'zpool prefetch -t ddt ' can successfully load the DDT for a pool." + +function getddtstats +{ + typeset -n gds=$1 + typeset pool=$2 + + out=$(zpool status -DDp $pool | awk '/^ dedup: / {print $6 " " $9 " " $12}') + log_note "status -DDp output: ${out}" + + gds.ondisk=$(echo $out | cut -d" " -f1) + gds.incore=$(echo $out | cut -d" " -f2) + gds.cached=$(echo $out | cut -d" " -f3) + + # In case of missing data, reset to 0. This should normally be due + # to a pool without any DDT. + [ -z "${gds.ondisk}" ] && gds.ondisk="0" + [ -z "${gds.incore}" ] && gds.incore="0" + [ -z "${gds.cached}" ] && gds.cached="0" + + return true +} + +# Confirm that nothing happens on a standard pool config. +typeset -A before +log_must getddtstats before $TESTPOOL +log_note "before stats: ${before}" +log_must test "${before.ondisk}" -eq "0" +log_must test "${before.incore}" -eq "0" +log_must test "${before.cached}" -eq "0" +log_must zpool prefetch -t ddt $TESTPOOL + +# Build up the deduplicated dataset. This consists of creating enough files +# to generate a reasonable size DDT for testing purposes. + +DATASET=$TESTPOOL/ddt +log_must zfs create -o dedup=on $DATASET +MNTPOINT=$(get_prop mountpoint $TESTPOOL/ddt) + +log_note "Generating dataset ..." +typeset -i i=0 +while (( i < 16384 )); do + echo -n $i > $MNTPOINT/f.$i + + # Create some copies of the original mainly for the purpose of + # having duplicate entries. About half will have no copies, while + # the remainder will have an equal distribution of 1-4 copies, + # depending on the number put into the original. + typeset -i j + ((j = i % 8)) + while (( j < 4 )); do + cp $MNTPOINT/f.$i $MNTPOINT/f.$i.$j + ((j += 1)) + done + ((i += 1)) +done +log_note "Dataset generation completed." + +typeset -A generated +log_must getddtstats generated $TESTPOOL +log_note "generated stats: ${generated}" +log_must test "${generated.ondisk}" -ge "1048576" +log_must test "${generated.incore}" -ge "1048576" +log_must test "${generated.cached}" -ge "1048576" +log_must zpool prefetch -t ddt $TESTPOOL + +# Do an export/import series to flush the DDT dataset cache. +typeset -A reimport +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must getddtstats reimport $TESTPOOL +log_note "reimport stats: ${reimport}" +log_must test "${reimport.ondisk}" -ge "1048576" +log_must test "${reimport.incore}" -ge "1048576" +# On reimport, only the first block or two should be cached. +log_must test "${reimport.cached}" -le "65536" + +# Finally, reload it and check again. +typeset -A reloaded +log_must zpool prefetch -t ddt $TESTPOOL +log_must getddtstats reloaded $TESTPOOL +log_note "reloaded stats: ${reloaded}" +log_must test "${reloaded.ondisk}" -ge "1048576" +log_must test "${reloaded.incore}" -ge "1048576" +log_must test "${reloaded.cached}" -eq "${reloaded.incore}" + +log_pass "'zpool prefetch -t ddt ' success." diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_001_pos.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_001_pos.ksh index 4b9097933f37..55518ae9debe 100755 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_001_pos.ksh +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_001_pos.ksh @@ -56,16 +56,10 @@ typeset -a values=() # for the null byte) names+=("$(awk 'BEGIN { printf "x:"; while (c++ < (256 - 2 - 1)) printf "a" }')") values+=("long-property-name") -# Longest property value (the limits are 1024 on FreeBSD and 4096 on Linux, so -# pick the right one; the longest value can use limit minus 1 bytes for the -# null byte) -if is_linux; then - typeset ZFS_MAXPROPLEN=4096 -else - typeset ZFS_MAXPROPLEN=1024 -fi +# Longest property value (8191 bytes, which is the 8192-byte limit minus 1 byte +# for the null byte). names+=("long:property:value") -values+=("$(awk -v max="$ZFS_MAXPROPLEN" 'BEGIN { while (c++ < (max - 1)) printf "A" }')") +values+=("$(awk 'BEGIN { while (c++ < (8192 - 1)) printf "A" }')") # Valid property names for i in {1..10}; do typeset -i len @@ -80,8 +74,8 @@ while ((i < ${#names[@]})); do typeset name="${names[$i]}" typeset value="${values[$i]}" - log_must eval "zpool set $name='$value' $TESTPOOL" - log_must eval "check_user_prop $TESTPOOL $name '$value'" + log_must zpool set "$name=$value" "$TESTPOOL" + log_must check_user_prop "$TESTPOOL" "$name" "$value" ((i += 1)) done diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_002_neg.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_002_neg.ksh index 7c8fcba6e471..5783eb3a0d32 100755 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_002_neg.ksh +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/user_property_002_neg.ksh @@ -51,20 +51,24 @@ log_onexit cleanup_user_prop $TESTPOOL typeset -a names=() typeset -a values=() -# Too long property name (256 bytes, which is the 256-byte limit minus 1 byte -# for the null byte plus 1 byte to reach back over the limit) -names+=("$(awk 'BEGIN { printf "x:"; while (c++ < (256 - 2 - 1 + 1)) printf "a" }')") +# A property name that is too long consists of 256 or more bytes (which is (1) +# the 256-byte limit (2) minus 1 byte for the null byte (3) plus 1 byte to +# reach back over the limit). +names+=("$(awk ' + BEGIN { + # Print a 2-byte prefix of the name. + printf "x:"; + # Print the remaining 254 bytes. + while (c++ < (256 - 2 - 1 + 1)) + printf "a" + }' +)") values+=("too-long-property-name") -# Too long property value (the limits are 1024 on FreeBSD and 4096 on Linux, so -# pick the right one; the too long value is, e.g., the limit minus 1 bytes for the -# null byte plus 1 byte to reach back over the limit) -if is_linux; then - typeset ZFS_MAXPROPLEN=4096 -else - typeset ZFS_MAXPROPLEN=1024 -fi +# A property value that is too long consists of at least 8192 bytes. +# The smallest too-long value is (1) the limit (2) minus 1 byte for the null +# byte (2) plus 1 byte to reach back over the limit). names+=("too:long:property:value") -values+=("$(awk -v max="$ZFS_MAXPROPLEN" 'BEGIN { while (c++ < (max - 1 + 1)) printf "A" }')") +values+=("$(awk 'BEGIN { while (c++ < (8192 - 1 + 1)) printf "A" }')") # Invalid property names for i in {1..10}; do typeset -i len diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh index 43bb8ab572d2..d0880c9270f6 100755 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh @@ -57,13 +57,8 @@ MYPWD="$PWD" cd /$TESTPOOL/cp_stress CPUS=$(get_num_cpus) -if is_freebsd ; then - # 'seekflood' takes longer on FreeBSD and can timeout the test - RUNS=3 -else - RUNS=10 -fi - +# should run in ~2 minutes on Linux and FreeBSD +RUNS=3 for i in $(seq 1 $RUNS) ; do # Each run takes around 12 seconds. log_must $STF_SUITE/tests/functional/cp_files/seekflood 2000 $CPUS diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/seekflood.c b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/seekflood.c index 02c2c8e6eca5..f832db85970d 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/seekflood.c +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/cp_files/seekflood.c @@ -36,6 +36,13 @@ #include #include +/* some older uClibc's lack the defines, so we'll manually define them */ +#ifdef __UCLIBC__ +#ifndef SEEK_DATA +#define SEEK_DATA 3 +#endif +#endif + #define DATASIZE (4096) char data[DATASIZE]; diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/cleanup.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/cleanup.ksh new file mode 100755 index 000000000000..b3c4c04d7761 --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/cleanup.ksh @@ -0,0 +1,29 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh new file mode 100755 index 000000000000..5b83a1ca396f --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh @@ -0,0 +1,223 @@ +#!/bin/ksh -p +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023, Klara Inc. +# + +# DESCRIPTION: +# Verify that new entries are not added to the DDT when dedup_table_quota has +# been exceeded. +# +# STRATEGY: +# 1. Create a pool with dedup=on +# 2. Set threshold for on-disk DDT via dedup_table_quota +# 3. Verify the threshold is exceeded after zpool sync +# 4. Verify no new entries are added after subsequent sync's +# 5. Remove all but one entry from DDT +# 6. Verify new entries are added to DDT +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/events/events_common.kshlib + +verify_runnable "both" + +log_assert "DDT quota is enforced" + +MOUNTDIR="$TEST_BASE_DIR/dedup_mount" +FILEPATH="$MOUNTDIR/dedup_file" +VDEV_GENERAL="$TEST_BASE_DIR/vdevfile.general.$$" +VDEV_DEDUP="$TEST_BASE_DIR/vdevfile.dedup.$$" +POOL="dedup_pool" + +save_tunable TXG_TIMEOUT + +function cleanup +{ + if poolexists $POOL ; then + destroy_pool $POOL + fi + log_must rm -fd $VDEV_GENERAL $VDEV_DEDUP $MOUNTDIR + log_must restore_tunable TXG_TIMEOUT +} + + +function do_clean +{ + log_must destroy_pool $POOL + log_must rm -fd $VDEV_GENERAL $VDEV_DEDUP $MOUNTDIR +} + +function do_setup +{ + log_must truncate -s 5G $VDEV_GENERAL + # Use 'xattr=sa' to prevent selinux xattrs influencing our accounting + log_must zpool create -o ashift=12 -f -O xattr=sa -m $MOUNTDIR $POOL $VDEV_GENERAL + log_must zfs set dedup=on $POOL + log_must set_tunable32 TXG_TIMEOUT 600 +} + +function dedup_table_size +{ + get_pool_prop dedup_table_size $POOL +} + +function dedup_table_quota +{ + get_pool_prop dedup_table_quota $POOL +} + +function ddt_entries +{ + typeset -i entries=$(zpool status -D $POOL | \ + grep "dedup: DDT entries" | awk '{print $4}') + + echo ${entries} +} + +function ddt_add_entry +{ + count=$1 + offset=$2 + expand=$3 + + if [ -z "$offset" ]; then + offset=1 + fi + + for i in {$offset..$count}; do + echo "$i" > $MOUNTDIR/dedup-$i.txt + done + log_must sync_pool $POOL + + log_note range $offset - $(( count + offset - 1 )) + log_note ddt_add_entry got $(ddt_entries) +} + +# Create 6000 entries over three syncs +function ddt_nolimit +{ + do_setup + + log_note base ddt entries is $(ddt_entries) + + ddt_add_entry 1 + ddt_add_entry 100 + ddt_add_entry 101 5000 + ddt_add_entry 5001 6000 + + log_must test $(ddt_entries) -eq 6000 + + do_clean +} + +function ddt_limit +{ + do_setup + + log_note base ddt entries is $(ddt_entries) + + log_must zpool set dedup_table_quota=32768 $POOL + ddt_add_entry 100 + + # it's possible to exceed dedup_table_quota over a single transaction, + # ensure that the threshold has been exceeded + cursize=$(dedup_table_size) + log_must test $cursize -gt $(dedup_table_quota) + + # count the entries we have + log_must test $(ddt_entries) -ge 100 + + # attempt to add new entries + ddt_add_entry 101 200 + log_must test $(ddt_entries) -eq 100 + log_must test $cursize -eq $(dedup_table_size) + + # remove all but one entry + for i in {2..100}; do + rm $MOUNTDIR/dedup-$i.txt + done + log_must sync_pool $POOL + + log_must test $(ddt_entries) -eq 1 + log_must test $cursize -gt $(dedup_table_size) + cursize=$(dedup_table_size) + + log_must zpool set dedup_table_quota=none $POOL + + # create more entries + zpool status -D $POOL + ddt_add_entry 101 200 + log_must sync_pool $POOL + + log_must test $(ddt_entries) -eq 101 + log_must test $cursize -lt $(dedup_table_size) + + do_clean +} + +function ddt_dedup_vdev_limit +{ + do_setup + + # add a dedicated dedup/special VDEV and enable an automatic quota + if (( RANDOM % 2 == 0 )) ; then + class="special" + else + class="dedup" + fi + log_must truncate -s 200M $VDEV_DEDUP + log_must zpool add $POOL $class $VDEV_DEDUP + log_must zpool set dedup_table_quota=auto $POOL + + log_must zfs set recordsize=1K $POOL + log_must zfs set compression=zstd $POOL + + # Generate a working set to fill up the dedup/special allocation class + log_must fio --directory=$MOUNTDIR --name=dedup-filler-1 \ + --rw=read --bs=1m --numjobs=2 --iodepth=8 \ + --size=512M --end_fsync=1 --ioengine=posixaio --runtime=1 \ + --group_reporting --fallocate=none --output-format=terse \ + --dedupe_percentage=0 + log_must sync_pool $POOL + + zpool status -D $POOL + zpool list -v $POOL + echo DDT size $(dedup_table_size), with $(ddt_entries) entries + + # + # With no DDT quota in place, the above workload will produce over + # 800,000 entries by using space in the normal class. With a quota, + # it will be well below 500,000 entries. + # + log_must test $(ddt_entries) -le 500000 + + do_clean +} + +log_onexit cleanup + +ddt_limit +ddt_nolimit +ddt_dedup_vdev_limit + +log_pass "DDT quota is enforced" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/setup.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/setup.ksh new file mode 100755 index 000000000000..3c0830401f81 --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/dedup/setup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/history/history.cfg b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/history/history.cfg index a508a7935684..6020443bcdb0 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/history/history.cfg +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/history/history.cfg @@ -37,11 +37,7 @@ export TMP_HISTORY=$TEST_BASE_DIR/tmp_history.$$ export NEW_HISTORY=$TEST_BASE_DIR/new_history.$$ export MIGRATEDPOOLNAME=${MIGRATEDPOOLNAME:-history_pool} -if is_freebsd; then - export TIMEZONE=${TIMEZONE:-America/Denver} -else - export TIMEZONE=${TIMEZONE:-US/Mountain} -fi +export TIMEZONE=${TIMEZONE:-America/Denver} export HIST_USER="huser" export HIST_GROUP="hgroup" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh index 2fa146556358..f14b9f450826 100755 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh @@ -41,13 +41,13 @@ verify_runnable "global" if ! $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then - log_unsupported "Requires io_uring support" + log_unsupported "Requires io_uring support within Kernel" fi if [ -e /etc/os-release ] ; then source /etc/os-release - if [ -n "$REDHAT_SUPPORT_PRODUCT_VERSION" ] && ((floor($REDHAT_SUPPORT_PRODUCT_VERSION) == 9)) ; then - log_unsupported "Disabled on CentOS 9, fails with 'Operation not permitted'" + if [ $PLATFORM_ID = "platform:el9" ]; then + log_unsupported "Disabled on RHEL 9 variants: fails with 'Operation not permitted'" fi fi diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_002_pos.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_002_pos.ksh new file mode 100755 index 000000000000..fe837435190c --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_002_pos.ksh @@ -0,0 +1,80 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Nutanix. All rights reserved. +# + +. $STF_SUITE/tests/functional/upgrade/upgrade_common.kshlib + +# +# DESCRIPTION: +# +# Check DXATTR is intact after sa re-layout by setting projid on old file/dir after upgrade +# +# STRATEGY: +# 1. Create a pool with all features disabled +# 2. Create a dataset for testing +# 3. Set DXATTR on file and directory +# 4. upgrade zpool to support all features +# 5. set project id on file and directory to trigger sa re-layout for projid +# 6. verify DXATTR on file and directory are intact +# + +TESTFS=$TESTPOOL/testfs +TESTFSDIR=$TESTDIR/testfs + +verify_runnable "global" + +log_assert "Check DXATTR is intact after sa re-layout by setting projid on old file/dir after upgrade" +log_onexit cleanup_upgrade + +log_must zpool create -d -m $TESTDIR $TESTPOOL $TMPDEV + +log_must zfs create -o xattr=sa $TESTFS +log_must mkdir $TESTFSDIR/dir +log_must touch $TESTFSDIR/file +log_must set_xattr test test $TESTFSDIR/dir +log_must set_xattr test test $TESTFSDIR/file + +dirino=$(stat -c '%i' $TESTFSDIR/dir) +fileino=$(stat -c '%i' $TESTFSDIR/file) +log_must zpool sync $TESTPOOL +log_must zdb -ddddd $TESTFS $dirino +log_must zdb -ddddd $TESTFS $fileino + +log_mustnot chattr -p 100 $TESTFSDIR/dir +log_mustnot chattr -p 100 $TESTFSDIR/file + +log_must zpool upgrade $TESTPOOL + +log_must chattr -p 100 $TESTFSDIR/dir +log_must chattr -p 100 $TESTFSDIR/file +log_must zpool sync $TESTPOOL +log_must zfs umount $TESTFS +log_must zfs mount $TESTFS +log_must zdb -ddddd $TESTFS $dirino +log_must zdb -ddddd $TESTFS $fileino +log_must get_xattr test $TESTFSDIR/dir +log_must get_xattr test $TESTFSDIR/file + +log_pass "Check DXATTR is intact after sa re-layout by setting projid on old file/dir after upgrade" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh index 619d8d0e8f07..9ebd5b149118 100755 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh @@ -45,15 +45,6 @@ fi if ! is_linux ; then log_unsupported "Only linux supports dd with oflag=dsync for FUA writes" -else - if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi - - # Disabled for the CentOS 9 kernel - if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi fi typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)" diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh index c0b191aafd45..47cc42b9be7d 100755 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh @@ -44,15 +44,6 @@ verify_runnable "global" if is_linux ; then - if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi - - # Disabled for the CentOS 9 kernel - if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then - log_unsupported "Disabled while issue #14872 is being worked" - fi - # We need '--force' here since the prior tests may leave a filesystem # on the zvol, and blkdiscard will see that filesystem and print a # warning unless you force it.