From bc14d0b23fd44342cbeb70edbc0f102a68babf4a Mon Sep 17 00:00:00 2001 From: lorenz Date: Wed, 8 Jan 2020 02:03:00 +0100 Subject: [PATCH 01/32] Avoid here-documents in systemd mount generator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some systems - openSUSE, for example - there is not yet a writeable temporary file system available, so bash bails out with an error, 'cannot create temp file for here-document: Read-only file system', on the here documents in zfs-mount-generator. The simple fix is to change these into a multi-line echo statement. Reviewed-by: Brian Behlendorf Reviewed-By: Richard Laager Reviewed-by: George Melikov Signed-off-by: Lorenz Hüdepohl Closes #9802 --- .../system-generators/zfs-mount-generator.in | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 850396fb6c2e..066896009e94 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -104,8 +104,13 @@ process_line() { fi # Generate the key-load .service unit - cat > "${dest_norm}/${keyloadunit}" << EOF -# Automatically generated by zfs-mount-generator + # + # Note: It is tempting to use a `< "${dest_norm}/${keyloadunit}" fi # Update the dependencies for the mount file to require the # key-loading unit. @@ -235,8 +239,11 @@ EOF # Create the .mount unit file. # By ordering before zfs-mount.service, we avoid race conditions. - cat > "${dest_norm}/${mountfile}" << EOF -# Automatically generated by zfs-mount-generator + # + # (Do not use `< "${dest_norm}/${mountfile}" # Finally, create the appropriate dependency ln -s "../${mountfile}" "${req_dir}" From f96aa1cb5bc977d8668b789eb23b79b345500f5f Mon Sep 17 00:00:00 2001 From: Richard Laager Date: Wed, 22 Jan 2020 14:45:25 -0600 Subject: [PATCH 02/32] Order zfs-import-*.service after multipathd If someone is using both multipathd and ZFS, they are probably using them together. Ordering the zpool imports after multipathd is ready fixes import issues for multipath configurations. Tested-by: Mike Pastore Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Signed-off-by: Richard Laager Closes #9863 --- etc/systemd/system/zfs-import-cache.service.in | 1 + etc/systemd/system/zfs-import-scan.service.in | 1 + 2 files changed, 2 insertions(+) diff --git a/etc/systemd/system/zfs-import-cache.service.in b/etc/systemd/system/zfs-import-cache.service.in index cacb53651545..82495638fdd9 100644 --- a/etc/systemd/system/zfs-import-cache.service.in +++ b/etc/systemd/system/zfs-import-cache.service.in @@ -5,6 +5,7 @@ DefaultDependencies=no Requires=systemd-udev-settle.service After=systemd-udev-settle.service After=cryptsetup.target +After=multipathd.target After=systemd-remount-fs.service Before=zfs-import.target ConditionPathExists=@sysconfdir@/zfs/zpool.cache diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in index 4aae9f06e504..278f937febe2 100644 --- a/etc/systemd/system/zfs-import-scan.service.in +++ b/etc/systemd/system/zfs-import-scan.service.in @@ -5,6 +5,7 @@ DefaultDependencies=no Requires=systemd-udev-settle.service After=systemd-udev-settle.service After=cryptsetup.target +After=multipathd.target Before=zfs-import.target ConditionPathExists=!@sysconfdir@/zfs/zpool.cache From 799952a0004d3c87a3b7a94d7066d279b846e205 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 13 Feb 2020 11:23:02 -0800 Subject: [PATCH 03/32] Missed wakeup when growing kmem cache When growing the size of a (VMEM or KVMEM) kmem cache, spl_cache_grow() always does taskq_dispatch(spl_cache_grow_work), and then waits for the KMC_BIT_GROWING to be cleared by the taskq thread. The taskq thread (spl_cache_grow_work()) does: 1. allocate new slab and add to list 2. wake_up_all(skc_waitq) 3. clear_bit(KMC_BIT_GROWING) Therefore, the waiting thread can wake up before GROWING has been cleared. It will see that the growing has not yet completed, and go back to sleep until it hits the 100ms timeout. This can have an extreme performance impact on workloads that alloc/free more than fits in the (statically-sized) magazines. These workloads allocate and free slabs with high frequency. The problem can be observed with `funclatency spl_cache_grow`, which on some workloads shows that 99.5% of the time it takes <64us to allocate slabs, but we spend ~70% of our time in outliers, waiting for the 100ms timeout. The fix is to do `clear_bit(KMC_BIT_GROWING)` before `wake_up_all(skc_waitq)`. A future investigation should evaluate if we still actually need to taskq_dispatch() at all, and if so on which kernel versions. Reviewed-by: Paul Dagnelie Reviewed-by: Pavel Zakharov Reviewed-by: Brian Behlendorf Reviewed-by: George Wilson Signed-off-by: Matthew Ahrens Closes #9989 --- module/spl/spl-kmem-cache.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index 7baf56de6f93..d71b4b348bb1 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -1176,7 +1176,6 @@ __spl_cache_grow(spl_kmem_cache_t *skc, int flags) smp_mb__before_atomic(); clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags); smp_mb__after_atomic(); - wake_up_all(&skc->skc_waitq); } spin_unlock(&skc->skc_lock); @@ -1189,12 +1188,14 @@ spl_cache_grow_work(void *data) spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data; spl_kmem_cache_t *skc = ska->ska_cache; - (void) __spl_cache_grow(skc, ska->ska_flags); + int error = __spl_cache_grow(skc, ska->ska_flags); atomic_dec(&skc->skc_ref); smp_mb__before_atomic(); clear_bit(KMC_BIT_GROWING, &skc->skc_flags); smp_mb__after_atomic(); + if (error == 0) + wake_up_all(&skc->skc_waitq); kfree(ska); } @@ -1245,8 +1246,10 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj) */ if (!(skc->skc_flags & KMC_VMEM)) { rc = __spl_cache_grow(skc, flags | KM_NOSLEEP); - if (rc == 0) + if (rc == 0) { + wake_up_all(&skc->skc_waitq); return (0); + } } /* From 87c5d5d14e09f4ddb83b3f8b08c27474b3e1dbe7 Mon Sep 17 00:00:00 2001 From: Richard Laager Date: Thu, 13 Feb 2020 13:55:59 -0600 Subject: [PATCH 04/32] zfs-mount-generator: Fix escaping for / The correct name for the mount unit for / is "-.mount", not ".mount". Reviewed-by: InsanePrawn Reviewed-by: Brian Behlendorf Co-authored-by: Antonio Russo Signed-off-by: Richard Laager Closes #9970 --- etc/systemd/system-generators/zfs-mount-generator.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 066896009e94..b79ce5482e59 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -156,7 +156,7 @@ ExecStop=@sbindir@/zfs unload-key '${dataset}'" > "${dest_norm}/${keyloadunit} fi # Escape the mountpoint per systemd policy. - mountfile="$(systemd-escape "${p_mountpoint#?}").mount" + mountfile="$(systemd-escape --path --suffix=mount "${p_mountpoint}")" # Parse options # see lib/libzfs/libzfs_mount.c:zfs_add_options From 7fd85ddcef58548a27bf6cfa645d52bf2c143be7 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 28 Feb 2020 08:58:39 -0800 Subject: [PATCH 05/32] Linux 5.5 compat: blkg_tryget() Commit https://github.com/torvalds/linux/commit/9e8d42a0f accidentally converted the static inline function blkg_tryget() to GPL-only for kernels built with CONFIG_PREEMPT_RCU=y and CONFIG_BLK_CGROUP=y. Resolve the build issue by providing our own equivalent functionality when needed which uses rcu_read_lock_sched() internally as before. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9745 Closes #10072 --- config/kernel-bio-tryget.m4 | 37 +++++++++++++++++++++++++++++++++++++ config/kernel.m4 | 2 ++ module/zfs/vdev_disk.c | 32 +++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 config/kernel-bio-tryget.m4 diff --git a/config/kernel-bio-tryget.m4 b/config/kernel-bio-tryget.m4 new file mode 100644 index 000000000000..fb831ca3b3ec --- /dev/null +++ b/config/kernel-bio-tryget.m4 @@ -0,0 +1,37 @@ +dnl # +dnl # Linux 5.5 API, +dnl # +dnl # The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by +dnl # blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched(). +dnl # As a side effect the function was converted to GPL-only. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKG_TRYGET], [ + ZFS_LINUX_TEST_SRC([blkg_tryget], [ + #include + #include + #include + ],[ + struct blkcg_gq blkg __attribute__ ((unused)); + bool rc __attribute__ ((unused)); + rc = blkg_tryget(&blkg); + ], [], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKG_TRYGET], [ + AC_MSG_CHECKING([whether blkg_tryget() is available]) + ZFS_LINUX_TEST_RESULT([blkg_tryget], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLKG_TRYGET, 1, [blkg_tryget() is available]) + + AC_MSG_CHECKING([whether blkg_tryget() is GPL-only]) + ZFS_LINUX_TEST_RESULT([blkg_tryget_license], [ + AC_MSG_RESULT(no) + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLKG_TRYGET_GPL_ONLY, 1, + [blkg_tryget() GPL-only]) + ]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index dce619729d4c..16cc4568ba20 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG + ZFS_AC_KERNEL_SRC_BLKG_TRYGET ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE ZFS_AC_KERNEL_SRC_GET_DISK_RO ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL @@ -186,6 +187,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_BIO_BI_STATUS ZFS_AC_KERNEL_BIO_RW_BARRIER ZFS_AC_KERNEL_BIO_RW_DISCARD + ZFS_AC_KERNEL_BLKG_TRYGET ZFS_AC_KERNEL_BLK_QUEUE_BDI ZFS_AC_KERNEL_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 661f0f1b727c..8544bb8ffb6f 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -473,6 +473,36 @@ vdev_submit_bio_impl(struct bio *bio) #ifdef HAVE_BIO_SET_DEV #if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY) +/* + * The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by + * blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched(). + * As a side effect the function was converted to GPL-only. Define our + * own version when needed which uses rcu_read_lock_sched(). + */ +#if defined(HAVE_BLKG_TRYGET_GPL_ONLY) +static inline bool +vdev_blkg_tryget(struct blkcg_gq *blkg) +{ + struct percpu_ref *ref = &blkg->refcnt; + unsigned long __percpu *count; + bool rc; + + rcu_read_lock_sched(); + + if (__ref_is_percpu(ref, &count)) { + this_cpu_inc(*count); + rc = true; + } else { + rc = atomic_long_inc_not_zero(&ref->count); + } + + rcu_read_unlock_sched(); + + return (rc); +} +#elif defined(HAVE_BLKG_TRYGET) +#define vdev_blkg_tryget(bg) blkg_tryget(bg) +#endif /* * The Linux 5.0 kernel updated the bio_set_dev() macro so it calls the * GPL-only bio_associate_blkg() symbol thus inadvertently converting @@ -487,7 +517,7 @@ vdev_bio_associate_blkg(struct bio *bio) ASSERT3P(q, !=, NULL); ASSERT3P(bio->bi_blkg, ==, NULL); - if (q->root_blkg && blkg_tryget(q->root_blkg)) + if (q->root_blkg && vdev_blkg_tryget(q->root_blkg)) bio->bi_blkg = q->root_blkg; } #define bio_associate_blkg vdev_bio_associate_blkg From f562576ae64a66ecff7911a66cab6b704403847d Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 28 Feb 2020 09:23:48 -0800 Subject: [PATCH 06/32] Fix CONFIG_MODULES=no Linux kernel config When configuring as builtin (--enable-linux-builtin) for kernels without loadable module support (CONFIG_MODULES=n) only the object file is created. Never a loadable kmod. Update ZFS_LINUX_TRY_COMPILE to handle this in a manor similar to the ZFS_LINUX_TEST_COMPILE_ALL macro. Reviewed-by: George Melikov Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9887 Closes #10063 --- config/kernel.m4 | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/config/kernel.m4 b/config/kernel.m4 index 16cc4568ba20..7f3e18d597d5 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -811,11 +811,20 @@ dnl # $2 - source dnl # $3 - run on success (valid .ko generated) dnl # $4 - run on failure (unable to compile) dnl # +dnl # When configuring as builtin (--enable-linux-builtin) for kernels +dnl # without loadable module support (CONFIG_MODULES=n) only the object +dnl # file is created. See ZFS_LINUX_TEST_COMPILE_ALL for details. +dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [ - ZFS_LINUX_COMPILE_IFELSE( - [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])], - [test -f build/conftest/conftest.ko], - [$3], [$4]) + AS_IF([test "x$enable_linux_builtin" = "xyes"], [ + ZFS_LINUX_COMPILE_IFELSE( + [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])], + [test -f build/conftest/conftest.o], [$3], [$4]) + ], [ + ZFS_LINUX_COMPILE_IFELSE( + [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])], + [test -f build/conftest/conftest.ko], [$3], [$4]) + ]) ]) dnl # From 0f46da89d7d745ae1af41917a6fcbb93df4b6c31 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sat, 11 Jan 2020 19:14:23 +0100 Subject: [PATCH 07/32] Systemd mount generator: Silence shellcheck warnings Silences a warning about an intentionally unquoted variable. Fixes a warning caused by strings split across lines by slightly refactoring keyloadcmd. Reviewed-by: Richard Laager Reviewed-by: Antonio Russo Reviewed-by: Brian Behlendorf Signed-off-by: InsanePrawn Closes #9649 --- .../system-generators/zfs-mount-generator.in | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index b79ce5482e59..411b3f955349 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -60,6 +60,7 @@ process_line() { IFS="$(printf '\t')" # protect against special characters in, e.g., mountpoints set -f + # shellcheck disable=SC2086 set -- $1 dataset="${1}" p_mountpoint="${2}" @@ -87,17 +88,19 @@ process_line() { pathdep="RequiresMountsFor='${p_keyloc#file://}'" keyloadcmd="@sbindir@/zfs load-key '${dataset}'" elif [ "${p_keyloc}" = "prompt" ] ; then - keyloadcmd="/bin/sh -c 'set -eu;"\ -"keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";"\ -"[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;"\ -"count=0;"\ -"while [ \$\$count -lt 3 ];do"\ -" systemd-ask-password --id=\"zfs:${dataset}\""\ -" \"Enter passphrase for ${dataset}:\"|"\ -" @sbindir@/zfs load-key \"${dataset}\" && exit 0;"\ -" count=\$\$((count + 1));"\ -"done;"\ -"exit 1'" + keyloadcmd="\ +/bin/sh -c '\ +set -eu;\ +keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";\ +[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;\ +count=0;\ +while [ \$\$count -lt 3 ];do\ + systemd-ask-password --id=\"zfs:${dataset}\"\ + \"Enter passphrase for ${dataset}:\"|\ + @sbindir@/zfs load-key \"${dataset}\" && exit 0;\ + count=\$\$((count + 1));\ +done;\ +exit 1'" else printf 'zfs-mount-generator: (%s) invalid keylocation\n' \ "${dataset}" >/dev/kmsg From 04919d35891ce27c28e1baf12f784cdfc6836087 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Wed, 12 Feb 2020 18:01:15 +0100 Subject: [PATCH 08/32] Systemd mount generator: Generate noauto units; add control properties This commit refactors the systemd mount generators and makes the following major changes: - The generator now generates units for datasets marked canmount=noauto, too. These units are NOT WantedBy local-fs.target. If there are multiple noauto datasets for a path, no noauto unit will be created. Datasets with canmount=on are prioritized. - Introduces handling of new user properties which are now included in the zfs-list.cache files: - org.openzfs.systemd:requires: List of units to require for this mount unit - org.openzfs.systemd:requires-mounts-for: List of mounts to require by this mount unit - org.openzfs.systemd:before: List of units to order after this mount unit - org.openzfs.systemd:after: List of units to order before this mount unit - org.openzfs.systemd:wanted-by: List of units to add a Wants dependency on this mount unit to - org.openzfs.systemd:required-by: List of units to add a Requires dependency on this mount unit to - org.openzfs.systemd:nofail: Toggles between a wants and a requires dependency. - org.openzfs.systemd:ignore: Do not generate a mount unit for this dataset. Consult the updated man page for detailed documentation. - Restructures and extends the zfs-mount-generator(8) man page with the above properties, information on unit ordering and a license header. Reviewed-by: Richard Laager Reviewed-by: Antonio Russo Reviewed-by: Brian Behlendorf Signed-off-by: InsanePrawn Closes #9649 --- .../zed.d/history_event-zfs-list-cacher.sh.in | 17 +- .../system-generators/zfs-mount-generator.in | 217 +++++++++++++++--- man/man8/Makefile.am | 1 + man/man8/zfs-mount-generator.8.in | 193 ++++++++++++++-- 4 files changed, 384 insertions(+), 44 deletions(-) diff --git a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in index 6d0f44ab3260..053b4414a768 100755 --- a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in +++ b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in @@ -46,8 +46,13 @@ case "${ZEVENT_HISTORY_INTERNAL_NAME}" in set|inherit) # Only act if one of the tracked properties is altered. case "${ZEVENT_HISTORY_INTERNAL_STR%%=*}" in - canmount|mountpoint|atime|relatime|devices|exec| \ - readonly|setuid|nbmand|encroot|keylocation) ;; + canmount|mountpoint|atime|relatime|devices|exec|readonly| \ + setuid|nbmand|encroot|keylocation|org.openzfs.systemd:requires| \ + org.openzfs.systemd:requires-mounts-for| \ + org.openzfs.systemd:before|org.openzfs.systemd:after| \ + org.openzfs.systemd:wanted-by|org.openzfs.systemd:required-by| \ + org.openzfs.systemd:nofail|org.openzfs.systemd:ignore \ + ) ;; *) exit 0 ;; esac ;; @@ -61,8 +66,12 @@ esac zed_lock zfs-list trap abort_alter EXIT -PROPS="name,mountpoint,canmount,atime,relatime,devices,exec,readonly" -PROPS="${PROPS},setuid,nbmand,encroot,keylocation" +PROPS="name,mountpoint,canmount,atime,relatime,devices,exec\ +,readonly,setuid,nbmand,encroot,keylocation\ +,org.openzfs.systemd:requires,org.openzfs.systemd:requires-mounts-for\ +,org.openzfs.systemd:before,org.openzfs.systemd:after\ +,org.openzfs.systemd:wanted-by,org.openzfs.systemd:required-by\ +,org.openzfs.systemd:nofail,org.openzfs.systemd:ignore" "${ZFS}" list -H -t filesystem -o $PROPS -r "${ZEVENT_POOL}" > "${FSLIST_TMP}" diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 411b3f955349..bb735112dad1 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -2,6 +2,7 @@ # zfs-mount-generator - generates systemd mount units for zfs # Copyright (c) 2017 Antonio Russo +# Copyright (c) 2020 InsanePrawn # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -33,6 +34,35 @@ do_fail() { exit 1 } +# test if $1 is in space-separated list $2 +is_known() { + query="$1" + IFS=' ' + # protect against special characters + set -f + for element in $2 ; do + if [ "$query" = "$element" ] ; then + return 0 + fi + done + return 1 +} + +# create dependency on unit file $1 +# of type $2, i.e. "wants" or "requires" +# in the target units from space-separated list $3 +create_dependencies() { + unitfile="$1" + suffix="$2" + # protect against special characters + set -f + for target in $3 ; do + target_dir="${dest_norm}/${target}.${suffix}/" + mkdir -p "${target_dir}" + ln -s "../${unitfile}" "${target_dir}" + done +} + # see systemd.generator if [ $# -eq 0 ] ; then dest_norm="/tmp" @@ -42,11 +72,6 @@ else do_fail "zero or three arguments required" fi -# For ZFSs marked "auto", a dependency is created for local-fs.target. To -# avoid regressions, this dependency is reduced to "wants" rather than -# "requires". **THIS MAY CHANGE** -req_dir="${dest_norm}/local-fs.target.wants/" -mkdir -p "${req_dir}" # All needed information about each ZFS is available from # zfs list -H -t filesystem -o @@ -74,18 +99,58 @@ process_line() { p_nbmand="${10}" p_encroot="${11}" p_keyloc="${12}" + p_systemd_requires="${13}" + p_systemd_requiresmountsfor="${14}" + p_systemd_before="${15}" + p_systemd_after="${16}" + p_systemd_wantedby="${17}" + p_systemd_requiredby="${18}" + p_systemd_nofail="${19}" + p_systemd_ignore="${20}" # Minimal pre-requisites to mount a ZFS dataset + # By ordering before zfs-mount.service, we avoid race conditions. + after="zfs-import.target" + before="zfs-mount.service" wants="zfs-import.target" + requires="" + requiredmounts="" + wantedby="" + requiredby="" + noauto="off" + + if [ -n "${p_systemd_after}" ] && \ + [ "${p_systemd_after}" != "-" ] ; then + after="${p_systemd_after} ${after}" + fi + + if [ -n "${p_systemd_before}" ] && \ + [ "${p_systemd_before}" != "-" ] ; then + before="${p_systemd_before} ${before}" + fi + + if [ -n "${p_systemd_requires}" ] && \ + [ "${p_systemd_requires}" != "-" ] ; then + requires="Requires=${p_systemd_requires}" + fi + + if [ -n "${p_systemd_requiresmountsfor}" ] && \ + [ "${p_systemd_requiresmountsfor}" != "-" ] ; then + requiredmounts="RequiresMountsFor=${p_systemd_requiresmountsfor}" + fi # Handle encryption if [ -n "${p_encroot}" ] && [ "${p_encroot}" != "-" ] ; then keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service" if [ "${p_encroot}" = "${dataset}" ] ; then - pathdep="" + keymountdep="" if [ "${p_keyloc%%://*}" = "file" ] ; then - pathdep="RequiresMountsFor='${p_keyloc#file://}'" + if [ -n "${requiredmounts}" ] ; then + keymountdep="${requiredmounts} '${p_keyloc#file://}'" + else + keymountdep="RequiresMountsFor='${p_keyloc#file://}'" + fi keyloadcmd="@sbindir@/zfs load-key '${dataset}'" elif [ "${p_keyloc}" = "prompt" ] ; then keyloadcmd="\ @@ -121,8 +186,10 @@ SourcePath=${cachefile} Documentation=man:zfs-mount-generator(8) DefaultDependencies=no Wants=${wants} -After=${wants} -${pathdep} +After=${after} +Before=${before} +${requires} +${keymountdep} [Service] Type=oneshot @@ -130,23 +197,35 @@ RemainAfterExit=yes ExecStart=${keyloadcmd} ExecStop=@sbindir@/zfs unload-key '${dataset}'" > "${dest_norm}/${keyloadunit}" fi - # Update the dependencies for the mount file to require the + # Update the dependencies for the mount file to want the # key-loading unit. wants="${wants} ${keyloadunit}" + after="${after} ${keyloadunit}" fi # Prepare the .mount unit + # skip generation of the mount unit if org.openzfs.systemd:ignore is "on" + if [ -n "${p_systemd_ignore}" ] ; then + if [ "${p_systemd_ignore}" = "on" ] ; then + return + elif [ "${p_systemd_ignore}" = "-" ] \ + || [ "${p_systemd_ignore}" = "off" ] ; then + : # This is OK + else + do_fail "invalid org.openzfs.systemd:ignore for ${dataset}" + fi + fi + # Check for canmount=off . if [ "${p_canmount}" = "off" ] ; then return elif [ "${p_canmount}" = "noauto" ] ; then - # Don't let a noauto marked mountpoint block an "auto" marked mountpoint - return + noauto="on" elif [ "${p_canmount}" = "on" ] ; then : # This is OK else - do_fail "invalid canmount" + do_fail "invalid canmount for ${dataset}" fi # Check for legacy and blank mountpoints. @@ -155,7 +234,7 @@ ExecStop=@sbindir@/zfs unload-key '${dataset}'" > "${dest_norm}/${keyloadunit} elif [ "${p_mountpoint}" = "none" ] ; then return elif [ "${p_mountpoint%"${p_mountpoint#?}"}" != "/" ] ; then - do_fail "invalid mountpoint $*" + do_fail "invalid mountpoint for ${dataset}" fi # Escape the mountpoint per systemd policy. @@ -233,15 +312,91 @@ ExecStop=@sbindir@/zfs unload-key '${dataset}'" > "${dest_norm}/${keyloadunit} "${dataset}" >/dev/kmsg fi - # If the mountpoint has already been created, give it precedence. + if [ -n "${p_systemd_wantedby}" ] && \ + [ "${p_systemd_wantedby}" != "-" ] ; then + noauto="on" + if [ "${p_systemd_wantedby}" = "none" ] ; then + wantedby="" + else + wantedby="${p_systemd_wantedby}" + before="${before} ${wantedby}" + fi + fi + + if [ -n "${p_systemd_requiredby}" ] && \ + [ "${p_systemd_requiredby}" != "-" ] ; then + noauto="on" + if [ "${p_systemd_requiredby}" = "none" ] ; then + requiredby="" + else + requiredby="${p_systemd_requiredby}" + before="${before} ${requiredby}" + fi + fi + + # For datasets with canmount=on, a dependency is created for + # local-fs.target by default. To avoid regressions, this dependency + # is reduced to "wants" rather than "requires" when nofail is not "off". + # **THIS MAY CHANGE** + # noauto=on disables this behavior completely. + if [ "${noauto}" != "on" ] ; then + if [ "${p_systemd_nofail}" = "off" ] ; then + requiredby="local-fs.target" + before="${before} local-fs.target" + else + wantedby="local-fs.target" + if [ "${p_systemd_nofail}" != "on" ] ; then + before="${before} local-fs.target" + fi + fi + fi + + # Handle existing files: + # 1. We never overwrite existing files, although we may delete + # files if we're sure they were created by us. (see 5.) + # 2. We handle files differently based on canmount. Units with canmount=on + # always have precedence over noauto. This is enforced by the sort pipe + # in the loop around this function. + # It is important to use $p_canmount and not $noauto here, since we + # sort by canmount while other properties also modify $noauto, e.g. + # org.openzfs.systemd:wanted-by. + # 3. If no unit file exists for a noauto dataset, we create one. + # Additionally, we use $noauto_files to track the unit file names + # (which are the systemd-escaped mountpoints) of all (exclusively) + # noauto datasets that had a file created. + # 4. If the file to be created is found in the tracking variable, + # we do NOT create it. + # 5. If a file exists for a noauto dataset, we check whether the file + # name is in the variable. If it is, we have multiple noauto datasets + # for the same mountpoint. In such cases, we remove the file for safety. + # To avoid further noauto datasets creating a file for this path again, + # we leave the file name in the tracking variable. if [ -e "${dest_norm}/${mountfile}" ] ; then - printf 'zfs-mount-generator: %s already exists\n' "${mountfile}" \ - >/dev/kmsg + if is_known "$mountfile" "$noauto_files" ; then + # if it's in $noauto_files, we must be noauto too. See 2. + printf 'zfs-mount-generator: removing duplicate noauto %s\n' \ + "${mountfile}" >/dev/kmsg + # See 5. + rm "${dest_norm}/${mountfile}" + else + # don't log for canmount=noauto + if [ "${p_canmount}" = "on" ] ; then + printf 'zfs-mount-generator: %s already exists. Skipping.\n' \ + "${mountfile}" >/dev/kmsg + fi + fi + # file exists; Skip current dataset. return + else + if is_known "${mountfile}" "${noauto_files}" ; then + # See 4. + return + elif [ "${p_canmount}" = "noauto" ] ; then + noauto_files="${mountfile} ${noauto_files}" + fi fi # Create the .mount unit file. - # By ordering before zfs-mount.service, we avoid race conditions. # # (Do not use `< "${dest_norm}/${keyloadunit} [Unit] SourcePath=${cachefile} Documentation=man:zfs-mount-generator(8) -Before=local-fs.target zfs-mount.service -After=${wants} + +Before=${before} +After=${after} Wants=${wants} +${requires} +${requiredmounts} [Mount] Where=${p_mountpoint} @@ -261,13 +419,20 @@ What=${dataset} Type=zfs Options=defaults${opts},zfsutil" > "${dest_norm}/${mountfile}" - # Finally, create the appropriate dependency - ln -s "../${mountfile}" "${req_dir}" + # Finally, create the appropriate dependencies + create_dependencies "${mountfile}" "wants" "$wantedby" + create_dependencies "${mountfile}" "requires" "$requiredby" + } -# Feed each line into process_line for cachefile in "${FSLIST}/"* ; do - while read -r fs ; do - process_line "${fs}" - done < "${cachefile}" + # Sort cachefile's lines by canmount, "on" before "noauto" + # and feed each line into process_line + sort -t "$(printf '\t')" -k 3 -r "${cachefile}" | \ + ( # subshell is necessary for `sort|while read` and $noauto_files + noauto_files="" + while read -r fs ; do + process_line "${fs}" + done + ) done diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am index 5401ff06f2bb..d5df66530ac9 100644 --- a/man/man8/Makefile.am +++ b/man/man8/Makefile.am @@ -20,6 +20,7 @@ EXTRA_DIST = \ $(nodist_man_MANS): %: %.in -$(SED) -e 's,@zfsexecdir\@,$(zfsexecdir),g' \ + -e 's,@systemdgeneratordir\@,$(systemdgeneratordir),g' \ -e 's,@runstatedir\@,$(runstatedir),g' \ -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< >'$@' diff --git a/man/man8/zfs-mount-generator.8.in b/man/man8/zfs-mount-generator.8.in index a696eb4617d3..41a2999f0f0a 100644 --- a/man/man8/zfs-mount-generator.8.in +++ b/man/man8/zfs-mount-generator.8.in @@ -1,8 +1,33 @@ -.TH "ZFS\-MOUNT\-GENERATOR" "8" "ZFS" "zfs-mount-generator" "\"" +.\" +.\" Copyright 2018 Antonio Russo +.\" Copyright 2019 Kjeld Schouten-Lebbing +.\" Copyright 2020 InsanePrawn +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining +.\" a copy of this software and associated documentation files (the +.\" "Software"), to deal in the Software without restriction, including +.\" without limitation the rights to use, copy, modify, merge, publish, +.\" distribute, sublicense, and/or sell copies of the Software, and to +.\" permit persons to whom the Software is furnished to do so, subject to +.\" the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be +.\" included in all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +.\" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +.\" MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +.\" NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +.\" LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +.\" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +.\" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +.TH "ZFS\-MOUNT\-GENERATOR" "8" "2020-01-19" "ZFS" "zfs-mount-generator" "\"" + .SH "NAME" zfs\-mount\-generator \- generates systemd mount units for ZFS .SH SYNOPSIS -.B /lib/systemd/system-generators/zfs\-mount\-generator +.B @systemdgeneratordir@/zfs\-mount\-generator .sp .SH DESCRIPTION zfs\-mount\-generator implements the \fBGenerators Specification\fP @@ -11,22 +36,50 @@ of and is called during early boot to generate .BR systemd.mount (5) units for automatically mounted datasets. Mount ordering and dependencies -are created for all tracked pools (see below). If a dataset has -.BR canmount=on +are created for all tracked pools (see below). + +.SS ENCRYPTION KEYS +If the dataset is an encryption root, a service that loads the associated key (either from file or through a +.BR systemd\-ask\-password (1) +prompt) will be created. This service +. BR RequiresMountsFor +the path of the key (if file-based) and also copies the mount unit's +.BR After , +.BR Before and -.BR mountpoint -set, the -.BR auto -mount option will be set, and a dependency for +.BR Requires . +All mount units of encrypted datasets add the key\-load service for their encryption root to their +.BR Wants +and +.BR After . +The service will not be +.BR Want ed +or +.BR Require d +by .BR local-fs.target -on the mount will be created. +directly, and so will only be started manually or as a dependency of a started mount unit. + +.SS UNIT ORDERING AND DEPENDENCIES +mount unit's +.BR Before +\-> +key\-load service (if any) +\-> +mount unit +\-> +mount unit's +.BR After + +It is worth nothing that when a mount unit is activated, it activates all available mount units for parent paths to its mountpoint, i.e. activating the mount unit for /tmp/foo/1/2/3 automatically activates all available mount units for /tmp, /tmp/foo, /tmp/foo/1, and /tmp/foo/1/2. This is true for any combination of mount units from any sources, not just ZFS. -Because zfs pools may not be available very early in the boot process, -information on ZFS mountpoints must be stored separately. The output -of the command +.SS CACHE FILE +Because ZFS pools may not be available very early in the boot process, +information on ZFS mountpoints must be stored separately. The output of the command .PP .RS 4 -zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand,encroot,keylocation +zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand,encroot,keylocation,org.openzfs.systemd:requires,org.openzfs.systemd:requires-mounts-for,org.openzfs.systemd:before,org.openzfs.systemd:after,org.openzfs.systemd:wanted-by,org.openzfs.systemd:required-by,org.openzfs.systemd:nofail,org.openzfs.systemd:ignore + .RE .PP for datasets that should be mounted by systemd, should be kept @@ -45,6 +98,98 @@ history_event-zfs-list-cacher.sh . .RE .PP .sp +.SS PROPERTIES +The behavior of the generator script can be influenced by the following dataset properties: +.sp +.TP 4 +.BR canmount = on | off | noauto +If a dataset has +.BR mountpoint +set and +.BR canmount +is not +.BR off , +a mount unit will be generated. +Additionally, if +.BR canmount +is +.BR on , +.BR local-fs.target +will gain a dependency on the mount unit. + +This behavior is equal to the +.BR auto +and +.BR noauto +legacy mount options, see +.BR systemd.mount (5). + +Encryption roots always generate a key-load service, even for +.BR canmount=off . +.TP 4 +.BR org.openzfs.systemd:requires\-mounts\-for = \fIpath\fR... +Space\-separated list of mountpoints to require to be mounted for this mount unit +.TP 4 +.BR org.openzfs.systemd:before = \fIunit\fR... +The mount unit and associated key\-load service will be ordered before this space\-separated list of units. +.TP 4 +.BR org.openzfs.systemd:after = \fIunit\fR... +The mount unit and associated key\-load service will be ordered after this space\-separated list of units. +.TP 4 +.BR org.openzfs.systemd:wanted\-by = \fIunit\fR... +Space-separated list of units that will gain a +.BR Wants +dependency on this mount unit. +Setting this property implies +.BR noauto . +.TP 4 +.BR org.openzfs.systemd:required\-by = \fIunit\fR... +Space-separated list of units that will gain a +.BR Requires +dependency on this mount unit. +Setting this property implies +.BR noauto . +.TP 4 +.BR org.openzfs.systemd:nofail = unset | on | off +Toggles between a +.BR Wants +and +.BR Requires +type of dependency between the mount unit and +.BR local-fs.target , +if +.BR noauto +isn't set or implied. + +.BR on : +Mount will be +.BR WantedBy +local-fs.target + +.BR off : +Mount will be +.BR Before +and +.BR RequiredBy +local-fs.target + +.BR unset : +Mount will be +.BR Before +and +.BR WantedBy +local-fs.target +.TP 4 +.BR org.openzfs.systemd:ignore = on | off +If set to +.BR on , +do not generate a mount unit for this dataset. + +.RE +See also +.BR systemd.mount (5) + +.PP .SH EXAMPLE To begin, enable tracking for the pool: .PP @@ -63,7 +208,9 @@ systemctl enable zfs-zed.service systemctl restart zfs-zed.service .RE .PP -Force the running of the ZEDLET by setting canmount=on for at least one dataset in the pool: +Force the running of the ZEDLET by setting a monitored property, e.g. +.BR canmount , +for at least one dataset in the pool: .PP .RS 4 zfs set canmount=on @@ -71,6 +218,24 @@ zfs set canmount=on .RE .PP This forces an update to the stale cache file. + +To test the generator output, run +.PP +.RS 4 +@systemdgeneratordir@/zfs-mount-generator /tmp/zfs-mount-generator . . +.RE +.PP +This will generate units and dependencies in +.I /tmp/zfs-mount-generator +for you to inspect them. The second and third argument are ignored. + +If you're satisfied with the generated units, instruct systemd to re-run all generators: +.PP +.RS 4 +systemctl daemon-reload +.RE +.PP + .sp .SH SEE ALSO .BR zfs (5) From 44f70309e0d73b926d09f4d04de7e763d5c3333a Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Mon, 9 Mar 2020 19:09:09 +0100 Subject: [PATCH 09/32] Systemd mount generator: don't fail keyload from file if already loaded Previously the generated keyload units for encryption roots with keylocation=file://* didn't contain the code to detect if the key was already loaded and would be marked failed in such situations. Move the code to check whether the key is already loaded from keylocation=prompt handling to general key loading code. Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: InsanePrawn Closes #10103 --- .../system-generators/zfs-mount-generator.in | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index bb735112dad1..147855180537 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -151,13 +151,9 @@ process_line() { else keymountdep="RequiresMountsFor='${p_keyloc#file://}'" fi - keyloadcmd="@sbindir@/zfs load-key '${dataset}'" + keyloadscript="@sbindir@/zfs load-key \"${dataset}\"" elif [ "${p_keyloc}" = "prompt" ] ; then - keyloadcmd="\ -/bin/sh -c '\ -set -eu;\ -keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";\ -[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;\ + keyloadscript="\ count=0;\ while [ \$\$count -lt 3 ];do\ systemd-ask-password --id=\"zfs:${dataset}\"\ @@ -165,11 +161,19 @@ while [ \$\$count -lt 3 ];do\ @sbindir@/zfs load-key \"${dataset}\" && exit 0;\ count=\$\$((count + 1));\ done;\ -exit 1'" +exit 1" else printf 'zfs-mount-generator: (%s) invalid keylocation\n' \ "${dataset}" >/dev/kmsg fi + keyloadcmd="\ +/bin/sh -c '\ +set -eu;\ +keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";\ +[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;\ +${keyloadscript}'" + + # Generate the key-load .service unit # From 89a279a6c2b8b3088d2feacd38684df87b809af7 Mon Sep 17 00:00:00 2001 From: Richard Laager Date: Wed, 5 Feb 2020 18:01:49 -0600 Subject: [PATCH 10/32] Make init scripts depend on Makefile This brings it in line with the example: https://www.gnu.org/software/automake/manual/html_node/Scripts.html This way, if the substitution code is changed, they should update. Signed-off-by: Richard Laager --- etc/init.d/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/init.d/Makefile.am b/etc/init.d/Makefile.am index 93432386a2c4..34d385f2ad93 100644 --- a/etc/init.d/Makefile.am +++ b/etc/init.d/Makefile.am @@ -15,7 +15,7 @@ EXTRA_DIST = \ $(top_srcdir)/etc/init.d/zfs-zed.in \ $(top_srcdir)/etc/init.d/zfs.in -$(init_SCRIPTS) $(initconf_SCRIPTS) $(initcommon_SCRIPTS):%:%.in +$(init_SCRIPTS) $(initconf_SCRIPTS) $(initcommon_SCRIPTS):%:%.in Makefile -(if [ -e /etc/debian_version ]; then \ NFS_SRV=nfs-kernel-server; \ else \ From caa7744b91fcd38f68d54aacade7e5d5bc3e3539 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Mon, 26 Aug 2019 14:48:31 -0400 Subject: [PATCH 11/32] Restore :: in Makefile.am The double-colon looked like a typo, but it's actually an obscure feature. Rules with :: may appear multiple times and are run independently of one another in the order they appear. The use of :: for distclean-local was conventional, not accidental. Add comments to indicate the intentional use of double-colon rules. Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9210 --- contrib/dracut/02zfsexpandknowledge/Makefile.am | 2 ++ contrib/dracut/90zfs/Makefile.am | 1 + contrib/initramfs/hooks/Makefile.am | 2 ++ contrib/initramfs/scripts/Makefile.am | 2 ++ etc/init.d/Makefile.am | 1 + etc/modules-load.d/Makefile.am | 1 + etc/systemd/system-generators/Makefile.am | 1 + etc/systemd/system/Makefile.am | 1 + tests/zfs-tests/include/Makefile.am | 1 + tests/zfs-tests/tests/functional/pyzfs/Makefile.am | 1 + udev/rules.d/Makefile.am | 1 + 11 files changed, 14 insertions(+) diff --git a/contrib/dracut/02zfsexpandknowledge/Makefile.am b/contrib/dracut/02zfsexpandknowledge/Makefile.am index a5c567c161c8..6e553e8d456f 100644 --- a/contrib/dracut/02zfsexpandknowledge/Makefile.am +++ b/contrib/dracut/02zfsexpandknowledge/Makefile.am @@ -15,8 +15,10 @@ $(pkgdracut_SCRIPTS):%:%.in -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< >'$@' +# Double-colon rules are allowed; there are multiple independent definitions. clean-local:: -$(RM) $(pkgdracut_SCRIPTS) +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(pkgdracut_SCRIPTS) diff --git a/contrib/dracut/90zfs/Makefile.am b/contrib/dracut/90zfs/Makefile.am index 0a557f57f256..1680230fa34e 100644 --- a/contrib/dracut/90zfs/Makefile.am +++ b/contrib/dracut/90zfs/Makefile.am @@ -33,5 +33,6 @@ $(pkgdracut_SCRIPTS) $(pkgdracut_DATA) :%:%.in -e 's,@mounthelperdir\@,$(mounthelperdir),g' \ $< >'$@' +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(pkgdracut_SCRIPTS) $(pkgdracut_DATA) diff --git a/contrib/initramfs/hooks/Makefile.am b/contrib/initramfs/hooks/Makefile.am index 1735872c29b7..3d8ef627ed47 100644 --- a/contrib/initramfs/hooks/Makefile.am +++ b/contrib/initramfs/hooks/Makefile.am @@ -14,8 +14,10 @@ $(hooks_SCRIPTS):%:%.in -e 's,@mounthelperdir\@,$(mounthelperdir),g' \ $< >'$@' +# Double-colon rules are allowed; there are multiple independent definitions. clean-local:: -$(RM) $(hooks_SCRIPTS) +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(hooks_SCRIPTS) diff --git a/contrib/initramfs/scripts/Makefile.am b/contrib/initramfs/scripts/Makefile.am index 12c2641b80cc..3ab18ba2cbce 100644 --- a/contrib/initramfs/scripts/Makefile.am +++ b/contrib/initramfs/scripts/Makefile.am @@ -13,8 +13,10 @@ $(scripts_DATA):%:%.in -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< >'$@' +# Double-colon rules are allowed; there are multiple independent definitions. clean-local:: -$(RM) $(scripts_SCRIPTS) +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(scripts_SCRIPTS) diff --git a/etc/init.d/Makefile.am b/etc/init.d/Makefile.am index 34d385f2ad93..0cccca6416bd 100644 --- a/etc/init.d/Makefile.am +++ b/etc/init.d/Makefile.am @@ -40,5 +40,6 @@ $(init_SCRIPTS) $(initconf_SCRIPTS) $(initcommon_SCRIPTS):%:%.in Makefile [ '$@' = 'zfs-functions' -o '$@' = 'zfs' ] || \ chmod +x '$@') +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(init_SCRIPTS) $(initcommon_SCRIPTS) $(initconf_SCRIPTS) diff --git a/etc/modules-load.d/Makefile.am b/etc/modules-load.d/Makefile.am index 58c7acd44e7c..47762b7d0657 100644 --- a/etc/modules-load.d/Makefile.am +++ b/etc/modules-load.d/Makefile.am @@ -9,5 +9,6 @@ $(modulesload_DATA):%:%.in -e '' \ $< >'$@' +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(modulesload_DATA) diff --git a/etc/systemd/system-generators/Makefile.am b/etc/systemd/system-generators/Makefile.am index c730982a5152..b4df01322211 100644 --- a/etc/systemd/system-generators/Makefile.am +++ b/etc/systemd/system-generators/Makefile.am @@ -11,5 +11,6 @@ $(systemdgenerator_SCRIPTS): %: %.in -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< >'$@' +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(systemdgenerator_SCRIPTS) diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am index 130c6c757a59..4e14467a044f 100644 --- a/etc/systemd/system/Makefile.am +++ b/etc/systemd/system/Makefile.am @@ -35,5 +35,6 @@ install-data-hook: $(MKDIR_P) "$(DESTDIR)$(systemdunitdir)" ln -sf /dev/null "$(DESTDIR)$(systemdunitdir)/zfs-import.service" +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(systemdunit_DATA) $(systemdpreset_DATA) diff --git a/tests/zfs-tests/include/Makefile.am b/tests/zfs-tests/include/Makefile.am index 41e105287b48..86c387c677d7 100644 --- a/tests/zfs-tests/include/Makefile.am +++ b/tests/zfs-tests/include/Makefile.am @@ -16,5 +16,6 @@ $(nodist_pkgdata_DATA): %: %.in -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< >'$@' +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) default.cfg diff --git a/tests/zfs-tests/tests/functional/pyzfs/Makefile.am b/tests/zfs-tests/tests/functional/pyzfs/Makefile.am index 4d99285e49ca..0c68c252b93b 100644 --- a/tests/zfs-tests/tests/functional/pyzfs/Makefile.am +++ b/tests/zfs-tests/tests/functional/pyzfs/Makefile.am @@ -14,5 +14,6 @@ $(pkgpyzfs_SCRIPTS):%:%.in $< >'$@' -chmod 775 $@ +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(pkgpyzfs_SCRIPTS) diff --git a/udev/rules.d/Makefile.am b/udev/rules.d/Makefile.am index f79ea4b3c3e4..86c33fc697cc 100644 --- a/udev/rules.d/Makefile.am +++ b/udev/rules.d/Makefile.am @@ -16,5 +16,6 @@ $(udevrule_DATA):%:%.in -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< > '$@' +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: -$(RM) $(udevrule_DATA) From ba3d257b6ce74c28295723d922a5b5ae3d72c08e Mon Sep 17 00:00:00 2001 From: Richard Laager Date: Wed, 5 Feb 2020 18:02:43 -0600 Subject: [PATCH 12/32] Delete built init scripts in make clean Previously, they were being deleted in make distclean. This brings it in line with the example: https://www.gnu.org/software/automake/manual/html_node/Scripts.html Signed-off-by: Richard Laager --- etc/init.d/Makefile.am | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/etc/init.d/Makefile.am b/etc/init.d/Makefile.am index 0cccca6416bd..953c31fd5e4e 100644 --- a/etc/init.d/Makefile.am +++ b/etc/init.d/Makefile.am @@ -40,6 +40,4 @@ $(init_SCRIPTS) $(initconf_SCRIPTS) $(initcommon_SCRIPTS):%:%.in Makefile [ '$@' = 'zfs-functions' -o '$@' = 'zfs' ] || \ chmod +x '$@') -# Double-colon rules are allowed; there are multiple independent definitions. -distclean-local:: - -$(RM) $(init_SCRIPTS) $(initcommon_SCRIPTS) $(initconf_SCRIPTS) +CLEANFILES = $(init_SCRIPTS) $(initcommon_SCRIPTS) $(initconf_SCRIPTS) From 261c4f832bc1b946bb9a257bacb58d02269a247e Mon Sep 17 00:00:00 2001 From: Richard Laager Date: Sat, 22 Feb 2020 18:09:55 -0600 Subject: [PATCH 13/32] initramfs: Eliminate substitutions These are now handled in zfs-functions, so this is all duplicative and unnecessary. Signed-off-by: Richard Laager --- contrib/initramfs/scripts/Makefile.am | 18 +----------------- contrib/initramfs/scripts/{zfs.in => zfs} | 10 +--------- 2 files changed, 2 insertions(+), 26 deletions(-) rename contrib/initramfs/scripts/{zfs.in => zfs} (99%) diff --git a/contrib/initramfs/scripts/Makefile.am b/contrib/initramfs/scripts/Makefile.am index 3ab18ba2cbce..2a142096e449 100644 --- a/contrib/initramfs/scripts/Makefile.am +++ b/contrib/initramfs/scripts/Makefile.am @@ -1,22 +1,6 @@ scriptsdir = /usr/share/initramfs-tools/scripts -scripts_DATA = \ +dist_scripts_DATA = \ zfs SUBDIRS = local-top - -EXTRA_DIST = \ - $(top_srcdir)/contrib/initramfs/scripts/zfs.in - -$(scripts_DATA):%:%.in - -$(SED) -e 's,@sbindir\@,$(sbindir),g' \ - -e 's,@sysconfdir\@,$(sysconfdir),g' \ - $< >'$@' - -# Double-colon rules are allowed; there are multiple independent definitions. -clean-local:: - -$(RM) $(scripts_SCRIPTS) - -# Double-colon rules are allowed; there are multiple independent definitions. -distclean-local:: - -$(RM) $(scripts_SCRIPTS) diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs similarity index 99% rename from contrib/initramfs/scripts/zfs.in rename to contrib/initramfs/scripts/zfs index 4bbdf53a77d7..dbc4e253f113 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs @@ -6,17 +6,9 @@ # Enable this by passing boot=zfs on the kernel command line. # -# Source the common init script +# Source the common functions . /etc/zfs/zfs-functions -# Paths to what we need - in the initrd, these paths are hardcoded, -# so override the defines in zfs-functions. -ZFS="@sbindir@/zfs" -ZPOOL="@sbindir@/zpool" -ZPOOL_CACHE="@sysconfdir@/zfs/zpool.cache" -export ZFS ZPOOL ZPOOL_CACHE - - # Start interactive shell. # Use debian's panic() if defined, because it allows to prevent shell access # by setting panic in cmdline (e.g. panic=0 or panic=15). From b825a31413f1a8b86476f530952fdc601a5fd66e Mon Sep 17 00:00:00 2001 From: Richard Laager Date: Thu, 6 Feb 2020 09:28:20 -0800 Subject: [PATCH 14/32] Fix zfs-functions packaging bug This fixes a bug where the generated zfs-functions was being included along with original zfs-functions.in in the make dist tarball. This caused an unfortunate series of events during build/packaging that resulted in the RPM-installed /etc/zfs/zfs-functions listing the paths as: ZFS="/usr/local/sbin/zfs" ZED="/usr/local/sbin/zed" ZPOOL="/usr/local/sbin/zpool" When they should have been: ZFS="/sbin/zfs" ZED="/sbin/zed" ZPOOL="/sbin/zpool" This affects init.d (non-systemd) distros like CentOS 6. /etc/default/zfs and /etc/zfs/zfs-functions are also used by the initramfs, so they need to be built even when init.d support is not. They have been moved to the (new) etc/default and (existing) etc/zfs source directories, respectively. Fixes: #9443 Co-authored-by: Tony Hutter Signed-off-by: Richard Laager --- configure.ac | 1 + contrib/initramfs/Makefile.am | 11 ----------- etc/Makefile.am | 4 ++-- etc/default/.gitignore | 1 + etc/default/Makefile.am | 12 ++++++++++++ etc/{init.d => default}/zfs.in | 0 etc/init.d/.gitignore | 1 - etc/init.d/Makefile.am | 16 +++++----------- etc/init.d/README.md | 2 +- etc/zfs/.gitignore | 1 + etc/zfs/Makefile.am | 26 +++++++++++++++++++++++++- etc/{init.d => zfs}/zfs-functions.in | 0 12 files changed, 48 insertions(+), 27 deletions(-) create mode 100644 etc/default/.gitignore create mode 100644 etc/default/Makefile.am rename etc/{init.d => default}/zfs.in (100%) create mode 100644 etc/zfs/.gitignore rename etc/{init.d => zfs}/zfs-functions.in (100%) diff --git a/configure.ac b/configure.ac index 6fcc89044dd8..c68a9a9b7f4d 100644 --- a/configure.ac +++ b/configure.ac @@ -67,6 +67,7 @@ AC_CONFIG_FILES([ udev/Makefile udev/rules.d/Makefile etc/Makefile + etc/default/Makefile etc/init.d/Makefile etc/zfs/Makefile etc/systemd/Makefile diff --git a/contrib/initramfs/Makefile.am b/contrib/initramfs/Makefile.am index 52bdeb2afe54..849b1d83cc5b 100644 --- a/contrib/initramfs/Makefile.am +++ b/contrib/initramfs/Makefile.am @@ -6,15 +6,10 @@ initrd_SCRIPTS = \ SUBDIRS = hooks scripts EXTRA_DIST = \ - $(top_srcdir)/etc/init.d/zfs \ - $(top_srcdir)/etc/init.d/zfs-functions \ $(top_srcdir)/contrib/initramfs/conf.d/zfs \ $(top_srcdir)/contrib/initramfs/conf-hooks.d/zfs \ $(top_srcdir)/contrib/initramfs/README.initramfs.markdown -$(top_srcdir)/etc/init.d/zfs $(top_srcdir)/etc/init.d/zfs-functions: - $(MAKE) -C $(top_srcdir)/etc/init.d zfs zfs-functions - install-initrdSCRIPTS: $(EXTRA_DIST) for d in conf.d conf-hooks.d scripts/local-top; do \ $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ @@ -26,9 +21,3 @@ install-initrdSCRIPTS: $(EXTRA_DIST) cp $(top_builddir)/contrib/initramfs/$$d/zfs \ $(DESTDIR)$(initrddir)/$$d/; \ done - $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ - cp $(top_builddir)/etc/init.d/zfs \ - $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ - $(MKDIR_P) $(DESTDIR)$(sysconfdir)/zfs; \ - cp $(top_builddir)/etc/init.d/zfs-functions \ - $(DESTDIR)$(sysconfdir)/zfs/ diff --git a/etc/Makefile.am b/etc/Makefile.am index 28b955106e5f..67ef94a2017b 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS = zfs sudoers.d $(ZFS_INIT_SYSTEMD) $(ZFS_INIT_SYSV) $(ZFS_MODULE_LOAD) -DIST_SUBDIRS = init.d zfs systemd modules-load.d sudoers.d +SUBDIRS = default zfs sudoers.d $(ZFS_INIT_SYSTEMD) $(ZFS_INIT_SYSV) $(ZFS_MODULE_LOAD) +DIST_SUBDIRS = default init.d zfs systemd modules-load.d sudoers.d diff --git a/etc/default/.gitignore b/etc/default/.gitignore new file mode 100644 index 000000000000..73304bc2cd4a --- /dev/null +++ b/etc/default/.gitignore @@ -0,0 +1 @@ +zfs diff --git a/etc/default/Makefile.am b/etc/default/Makefile.am new file mode 100644 index 000000000000..f35abd8e5100 --- /dev/null +++ b/etc/default/Makefile.am @@ -0,0 +1,12 @@ +initconfdir = $(DEFAULT_INITCONF_DIR) +initconf_SCRIPTS = zfs + +EXTRA_DIST = \ + $(top_srcdir)/etc/default/zfs.in + +$(initconf_SCRIPTS):%:%.in Makefile + $(SED) \ + -e 's,@sysconfdir\@,$(sysconfdir),g' \ + $< >'$@' + +CLEANFILES = $(initconf_SCRIPTS) diff --git a/etc/init.d/zfs.in b/etc/default/zfs.in similarity index 100% rename from etc/init.d/zfs.in rename to etc/default/zfs.in diff --git a/etc/init.d/.gitignore b/etc/init.d/.gitignore index 3f16b08ecc25..43a673d55343 100644 --- a/etc/init.d/.gitignore +++ b/etc/init.d/.gitignore @@ -1,4 +1,3 @@ -zfs-functions zfs-import zfs-mount zfs-share diff --git a/etc/init.d/Makefile.am b/etc/init.d/Makefile.am index 953c31fd5e4e..19fa76a2bafc 100644 --- a/etc/init.d/Makefile.am +++ b/etc/init.d/Makefile.am @@ -1,21 +1,15 @@ initdir = $(DEFAULT_INIT_DIR) init_SCRIPTS = zfs-import zfs-mount zfs-share zfs-zed -initcommondir = $(sysconfdir)/zfs -initcommon_SCRIPTS = zfs-functions - initconfdir = $(DEFAULT_INITCONF_DIR) -initconf_SCRIPTS = zfs EXTRA_DIST = \ - $(top_srcdir)/etc/init.d/zfs-functions.in \ $(top_srcdir)/etc/init.d/zfs-share.in \ $(top_srcdir)/etc/init.d/zfs-import.in \ $(top_srcdir)/etc/init.d/zfs-mount.in \ - $(top_srcdir)/etc/init.d/zfs-zed.in \ - $(top_srcdir)/etc/init.d/zfs.in + $(top_srcdir)/etc/init.d/zfs-zed.in -$(init_SCRIPTS) $(initconf_SCRIPTS) $(initcommon_SCRIPTS):%:%.in Makefile +$(init_SCRIPTS):%:%.in Makefile -(if [ -e /etc/debian_version ]; then \ NFS_SRV=nfs-kernel-server; \ else \ @@ -26,7 +20,8 @@ $(init_SCRIPTS) $(initconf_SCRIPTS) $(initcommon_SCRIPTS):%:%.in Makefile else \ SHELL=/bin/sh; \ fi; \ - $(SED) -e 's,@bindir\@,$(bindir),g' \ + $(SED) \ + -e 's,@bindir\@,$(bindir),g' \ -e 's,@sbindir\@,$(sbindir),g' \ -e 's,@udevdir\@,$(udevdir),g' \ -e 's,@udevruledir\@,$(udevruledir),g' \ @@ -37,7 +32,6 @@ $(init_SCRIPTS) $(initconf_SCRIPTS) $(initcommon_SCRIPTS):%:%.in Makefile -e "s,@SHELL\@,$$SHELL,g" \ -e "s,@NFS_SRV\@,$$NFS_SRV,g" \ $< >'$@'; \ - [ '$@' = 'zfs-functions' -o '$@' = 'zfs' ] || \ chmod +x '$@') -CLEANFILES = $(init_SCRIPTS) $(initcommon_SCRIPTS) $(initconf_SCRIPTS) +CLEANFILES = $(init_SCRIPTS) diff --git a/etc/init.d/README.md b/etc/init.d/README.md index 89edb1da3118..ad7c053aacab 100644 --- a/etc/init.d/README.md +++ b/etc/init.d/README.md @@ -35,7 +35,7 @@ SUPPORT If you're making your own distribution and you want the scripts to work on that, the biggest problem you'll (probably) have is the part - at the beginning of the "zfs-functions.in" file which sets up the + at the beginning of the "zfs-functions" file which sets up the logging output. INSTALLING INIT SCRIPT LINKS diff --git a/etc/zfs/.gitignore b/etc/zfs/.gitignore new file mode 100644 index 000000000000..1b2d752debda --- /dev/null +++ b/etc/zfs/.gitignore @@ -0,0 +1 @@ +zfs-functions diff --git a/etc/zfs/Makefile.am b/etc/zfs/Makefile.am index 52f6634df694..81567a4fa0f5 100644 --- a/etc/zfs/Makefile.am +++ b/etc/zfs/Makefile.am @@ -6,5 +6,29 @@ pkgsysconf_DATA = \ vdev_id.conf.sas_switch.example \ vdev_id.conf.multipath.example \ vdev_id.conf.scsi.example +pkgsysconf_SCRIPTS = \ + zfs-functions -EXTRA_DIST = $(pkgsysconf_DATA) +EXTRA_DIST = $(pkgsysconf_DATA) \ + zfs-functions.in + +$(pkgsysconf_SCRIPTS):%:%.in Makefile + -(if [ -e /etc/debian_version ]; then \ + NFS_SRV=nfs-kernel-server; \ + else \ + NFS_SRV=nfs; \ + fi; \ + if [ -e /sbin/openrc-run ]; then \ + SHELL=/sbin/openrc-run; \ + else \ + SHELL=/bin/sh; \ + fi; \ + $(SED) \ + -e 's,@sbindir\@,$(sbindir),g' \ + -e 's,@sysconfdir\@,$(sysconfdir),g' \ + -e 's,@initconfdir\@,$(initconfdir),g' \ + $< >'$@'; \ + [ '$@' = 'zfs-functions' ] || \ + chmod +x '$@') + +CLEANFILES = $(pkgsysconf_SCRIPTS) diff --git a/etc/init.d/zfs-functions.in b/etc/zfs/zfs-functions.in similarity index 100% rename from etc/init.d/zfs-functions.in rename to etc/zfs/zfs-functions.in From 3c6e698e2eb57348dbd9a1f5213e67544618713e Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 18 Mar 2020 13:31:10 -0700 Subject: [PATCH 15/32] Deprecate deduplicated send streams Dedup send can only deduplicate over the set of blocks in the send command being invoked, and it does not take advantage of the dedup table to do so. This is a very common misconception among not only users, but developers, and makes the feature seem more useful than it is. As a result, many users are using the feature but not getting any benefit from it. Dedup send requires a nontrivial expenditure of memory and CPU to operate, especially if the dataset(s) being sent is (are) not already using a dedup-strength checksum. Dedup send adds developer burden. It expands the test matrix when developing new features, causing bugs in released code, and delaying development efforts by forcing more testing to be done. As a result, we are deprecating the use of `zfs send -D` and receiving of such streams. This change adds a warning to the man page, and also prints the warning whenever dedup send or receive are used. In a future release, we plan to: 1. remove the kernel code for generating deduplicated streams 2. make `zfs send -D` generate regular, non-deduplicated streams 3. remove the kernel code for receiving deduplicated streams 4. make `zfs receive` of deduplicated streams process them in userland to "re-duplicate" them, so that they can still be received. Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Matthew Ahrens Closes #7887 Closes #10117 --- cmd/zfs/zfs_main.c | 10 ++++++++++ include/libzfs_impl.h | 1 + lib/libzfs/libzfs_sendrecv.c | 20 ++++++++++++++++++++ man/man8/zfs.8 | 15 +++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index fa1c6aa30283..b8fbf1c89382 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -4144,6 +4144,16 @@ zfs_do_send(int argc, char **argv) } } + if (flags.dedup) { + (void) fprintf(stderr, + gettext("WARNING: deduplicated send is " + "deprecated, and will be removed in a\n" + "future release. (In the future, the flag will be " + "accepted, but a\n" + "regular, non-deduplicated stream will be " + "generated.)\n\n")); + } + argc -= optind; argv += optind; diff --git a/include/libzfs_impl.h b/include/libzfs_impl.h index 9a46b9f12960..d5614987c770 100644 --- a/include/libzfs_impl.h +++ b/include/libzfs_impl.h @@ -71,6 +71,7 @@ struct libzfs_handle { int libzfs_pool_iter; char libzfs_chassis_id[256]; boolean_t libzfs_prop_debug; + boolean_t libzfs_dedup_warning_printed; }; #define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */ diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 1875f79e7c35..10241f530f7d 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -3984,6 +3984,26 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) printf("found clone origin %s\n", origin); } + if (!hdl->libzfs_dedup_warning_printed && + (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_DEDUP)) { + (void) fprintf(stderr, + gettext("WARNING: This is a deduplicated send stream. " + "The ability to send and\n" + "receive deduplicated send streams is deprecated. " + "In the future, the\n" + "ability to receive a deduplicated send stream with " + "\"zfs receive\" will be\n" + "removed. However, in the future, a utility will be " + "provided to convert a\n" + "deduplicated send stream to a regular " + "(non-deduplicated) stream. This\n" + "future utility will require that the send stream be " + "located in a\n" + "seek-able file, rather than provided by a pipe.\n\n")); + hdl->libzfs_dedup_warning_printed = B_TRUE; + } + boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & DMU_BACKUP_FEATURE_RESUMING; boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 496363642b9e..029730bd4d64 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -3461,6 +3461,9 @@ By default, a full stream is generated. .Bl -tag -width "-D" .It Fl D, -dedup Generate a deduplicated stream. +\fBDeduplicated send is deprecated and will be removed in a future release.\fR +(In the future, the flag will be accepted but a regular, non-deduplicated +stream will be generated.) Blocks which would have been sent multiple times in the send stream will only be sent once. The receiving system must also support this feature to receive a deduplicated @@ -3835,6 +3838,18 @@ destroyed by using the .Nm zfs Cm destroy Fl d command. .Pp +Deduplicated send streams can be generated by using the +.Nm zfs Cm send Fl D +command. +\fBThe ability to send and receive deduplicated send streams is deprecated.\fR +In the future, the ability to receive a deduplicated send stream with +.Nm zfs Cm receive +will be removed. +However, in the future, a utility will be provided to convert a +deduplicated send stream to a regular (non-deduplicated) stream. +This future utility will require that the send stream be located in a +seek-able file, rather than provided by a pipe. +.Pp If .Fl o Em property Ns = Ns Ar value or From ce0005f3c310cfc448c293e6f8985a8b27e82d5f Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 26 Mar 2020 23:28:22 +0800 Subject: [PATCH 16/32] zfs_get: change time format string from %k to %H Issue #10090 reported that snapshots created between midnight and 1 AM are missing a padded zero in the creation property This change fixes the bug reported in issue #10090 where snapshots created between midnight and 1 AM were missing a padded zero in the creation timestamp output. The leading zero was missing because the time format string used `%k` which formats the hour as a decimal number from 0 to 23 where single digits are preceded by blanks[0] and is fixed by changing it to `%H` which formats the hour as 00-23. The difference in output is as below ``` -Thu Mar 26 0:39 2020 +Thu Mar 26 00:39 2020 ``` Reviewed-by: Brian Behlendorf Reviewed-by: Igor Kozhukhov Reviewed-by: George Melikov Signed-off-by: Alex John Closes #10090 Closes #10153 --- cmd/zfs/zfs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index b8fbf1c89382..3a91795f2ace 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -5964,7 +5964,7 @@ typedef struct holds_cbdata { size_t cb_max_taglen; } holds_cbdata_t; -#define STRFTIME_FMT_STR "%a %b %e %k:%M %Y" +#define STRFTIME_FMT_STR "%a %b %e %H:%M %Y" #define DATETIME_BUF_LEN (32) /* * From e3d882d9b88e5020e1e69c975d72a4d1194131ab Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Thu, 6 Feb 2020 18:25:29 +0100 Subject: [PATCH 17/32] Fix static data to link with -fno-common -fno-common is the new default in GCC 10, replacing -fcommon in GCC <= 9, so static data must only be allocated once. Reviewed-by: Brian Behlendorf Signed-off-by: Romain Dolbeau Closes #9943 --- cmd/zfs/zfs_util.h | 2 +- cmd/zpool/zpool_main.c | 2 ++ cmd/zpool/zpool_util.h | 2 +- lib/libshare/smb.c | 2 ++ lib/libshare/smb.h | 2 +- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cmd/zfs/zfs_util.h b/cmd/zfs/zfs_util.h index 3ddff9e22d7d..a56af59adb15 100644 --- a/cmd/zfs/zfs_util.h +++ b/cmd/zfs/zfs_util.h @@ -33,7 +33,7 @@ extern "C" { void * safe_malloc(size_t size); void nomem(void); -libzfs_handle_t *g_zfs; +extern libzfs_handle_t *g_zfs; #ifdef __cplusplus } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 3b7aaa0e73a3..7f100ef1d392 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -73,6 +73,8 @@ #include "statcommon.h" +libzfs_handle_t *g_zfs; + static int zpool_do_create(int, char **); static int zpool_do_destroy(int, char **); diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index 3afc82d54b52..ccc2fac47cff 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -80,7 +80,7 @@ void pool_list_free(zpool_list_t *); int pool_list_count(zpool_list_t *); void pool_list_remove(zpool_list_t *, zpool_handle_t *); -libzfs_handle_t *g_zfs; +extern libzfs_handle_t *g_zfs; typedef struct vdev_cmd_data diff --git a/lib/libshare/smb.c b/lib/libshare/smb.c index a95607ee0324..f567f7c49d76 100644 --- a/lib/libshare/smb.c +++ b/lib/libshare/smb.c @@ -65,6 +65,8 @@ static boolean_t smb_available(void); static sa_fstype_t *smb_fstype; +smb_share_t *smb_shares; + /* * Retrieve the list of SMB shares. */ diff --git a/lib/libshare/smb.h b/lib/libshare/smb.h index 7a0c0fd162d5..8ea44677f9ae 100644 --- a/lib/libshare/smb.h +++ b/lib/libshare/smb.h @@ -44,6 +44,6 @@ typedef struct smb_share_s { struct smb_share_s *next; } smb_share_t; -smb_share_t *smb_shares; +extern smb_share_t *smb_shares; void libshare_smb_init(void); From 97fcfbf21837c31b0d1eb42997c0f74a7f6fad7c Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 26 Feb 2020 13:18:07 -0800 Subject: [PATCH 18/32] Linux 5.6 compat: time_t As part of the Linux kernel's y2038 changes the time_t type has been fully retired. Callers are now required to use the time64_t type. Rather than move to the new type, I've removed the few remaining places where a time_t is used in the kernel code. They've been replaced with a uint64_t which is already how ZFS internally handled these values. Going forward we should work towards updating the remaining user space time_t consumers to the 64-bit interfaces. Reviewed-by: Matthew Macy Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #10052 Closes #10064 --- include/spl/sys/time.h | 2 +- include/sys/vdev_impl.h | 4 ++-- lib/libspl/include/sys/time.h | 2 +- module/zfs/vdev_initialize.c | 2 +- module/zfs/vdev_trim.c | 2 +- module/zfs/zfs_debug.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/spl/sys/time.h b/include/spl/sys/time.h index 312415b7bc83..24c1ec7c7ae9 100644 --- a/include/spl/sys/time.h +++ b/include/spl/sys/time.h @@ -85,7 +85,7 @@ gethrestime(inode_timespec_t *ts) #endif } -static inline time_t +static inline uint64_t gethrestime_sec(void) { #if defined(HAVE_INODE_TIMESPEC64_TIMES) diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index f6f7bbb4b284..090ba3fbc87f 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -274,7 +274,7 @@ struct vdev { range_tree_t *vdev_initialize_tree; /* valid while initializing */ uint64_t vdev_initialize_bytes_est; uint64_t vdev_initialize_bytes_done; - time_t vdev_initialize_action_time; /* start and end time */ + uint64_t vdev_initialize_action_time; /* start and end time */ /* TRIM related */ boolean_t vdev_trim_exit_wanted; @@ -295,7 +295,7 @@ struct vdev { uint64_t vdev_trim_rate; /* requested rate (bytes/sec) */ uint64_t vdev_trim_partial; /* requested partial TRIM */ uint64_t vdev_trim_secure; /* requested secure TRIM */ - time_t vdev_trim_action_time; /* start and end time */ + uint64_t vdev_trim_action_time; /* start and end time */ /* for limiting outstanding I/Os (initialize and TRIM) */ kmutex_t vdev_initialize_io_lock; diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h index 291f2190a28c..c9f6165047d2 100644 --- a/lib/libspl/include/sys/time.h +++ b/lib/libspl/include/sys/time.h @@ -88,7 +88,7 @@ gethrestime(inode_timespec_t *ts) ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC; } -static inline time_t +static inline uint64_t gethrestime_sec(void) { struct timeval tv; diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c index 9958a2958322..8a3635969e1a 100644 --- a/module/zfs/vdev_initialize.c +++ b/module/zfs/vdev_initialize.c @@ -700,7 +700,7 @@ vdev_initialize_restart(vdev_t *vd) vd->vdev_leaf_zap, VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME, sizeof (timestamp), 1, ×tamp); ASSERT(err == 0 || err == ENOENT); - vd->vdev_initialize_action_time = (time_t)timestamp; + vd->vdev_initialize_action_time = timestamp; if (vd->vdev_initialize_state == VDEV_INITIALIZE_SUSPENDED || vd->vdev_offline) { diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c index 5ad47cccdafe..b7548fc4c18d 100644 --- a/module/zfs/vdev_trim.c +++ b/module/zfs/vdev_trim.c @@ -1046,7 +1046,7 @@ vdev_trim_restart(vdev_t *vd) vd->vdev_leaf_zap, VDEV_LEAF_ZAP_TRIM_ACTION_TIME, sizeof (timestamp), 1, ×tamp); ASSERT(err == 0 || err == ENOENT); - vd->vdev_trim_action_time = (time_t)timestamp; + vd->vdev_trim_action_time = timestamp; if (vd->vdev_trim_state == VDEV_TRIM_SUSPENDED || vd->vdev_offline) { diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c index 538533d27d2b..cf8bbb3ce4f6 100644 --- a/module/zfs/zfs_debug.c +++ b/module/zfs/zfs_debug.c @@ -27,7 +27,7 @@ typedef struct zfs_dbgmsg { procfs_list_node_t zdm_node; - time_t zdm_timestamp; + uint64_t zdm_timestamp; int zdm_size; char zdm_msg[1]; /* variable length allocation */ } zfs_dbgmsg_t; From 78bd9555ade8719015683f82d5b8871a042af231 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 26 Feb 2020 12:42:33 -0800 Subject: [PATCH 19/32] Linux 5.6 compat: ktime_get_raw_ts64() The getrawmonotonic() and getrawmonotonic64() interfaces have been fully retired. Update gethrtime() to use the replacement interface ktime_get_raw_ts64() which was introduced in the 4.18 kernel. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #10052 Closes #10064 --- config/kernel-ktime.m4 | 55 +++++++++++++++++++++ config/kernel-ktime_get_coarse_real_ts64.m4 | 23 --------- config/kernel.m4 | 4 +- include/spl/sys/time.h | 5 ++ 4 files changed, 62 insertions(+), 25 deletions(-) create mode 100644 config/kernel-ktime.m4 delete mode 100644 config/kernel-ktime_get_coarse_real_ts64.m4 diff --git a/config/kernel-ktime.m4 b/config/kernel-ktime.m4 new file mode 100644 index 000000000000..64c3b5f90328 --- /dev/null +++ b/config/kernel-ktime.m4 @@ -0,0 +1,55 @@ +dnl # +dnl # 4.18: ktime_get_coarse_real_ts64() replaces current_kernel_time64(). +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64], [ + ZFS_LINUX_TEST_SRC([ktime_get_coarse_real_ts64], [ + #include + ], [ + struct timespec64 ts; + ktime_get_coarse_real_ts64(&ts); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], [ + AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) + ZFS_LINUX_TEST_RESULT([ktime_get_coarse_real_ts64], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, + [ktime_get_coarse_real_ts64() exists]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 4.18: ktime_get_raw_ts64() replaces getrawmonotonic64(). +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_RAW_TS64], [ + ZFS_LINUX_TEST_SRC([ktime_get_raw_ts64], [ + #include + ], [ + struct timespec64 ts; + ktime_get_raw_ts64(&ts); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_RAW_TS64], [ + AC_MSG_CHECKING([whether ktime_get_raw_ts64() exists]) + ZFS_LINUX_TEST_RESULT([ktime_get_raw_ts64], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KTIME_GET_RAW_TS64, 1, + [ktime_get_raw_ts64() exists]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME], [ + ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64 + ZFS_AC_KERNEL_SRC_KTIME_GET_RAW_TS64 +]) + +AC_DEFUN([ZFS_AC_KERNEL_KTIME], [ + ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64 + ZFS_AC_KERNEL_KTIME_GET_RAW_TS64 +]) diff --git a/config/kernel-ktime_get_coarse_real_ts64.m4 b/config/kernel-ktime_get_coarse_real_ts64.m4 deleted file mode 100644 index 28492bf04bcb..000000000000 --- a/config/kernel-ktime_get_coarse_real_ts64.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # 4.18: ktime_get_coarse_real_ts64() added. Use it in place of -dnl # current_kernel_time64(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64], [ - ZFS_LINUX_TEST_SRC([ktime_get_coarse_real_ts64], [ - #include - ], [ - struct timespec64 ts; - ktime_get_coarse_real_ts64(&ts); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], [ - AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) - ZFS_LINUX_TEST_RESULT([ktime_get_coarse_real_ts64], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, - [ktime_get_coarse_real_ts64() exists]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 7f3e18d597d5..8cee494c4cfb 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -137,7 +137,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_CURRENT_TIME ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL - ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64 + ZFS_AC_KERNEL_SRC_KTIME ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES ZFS_AC_KERNEL_SRC_KSTRTOUL @@ -254,7 +254,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_CURRENT_TIME ZFS_AC_KERNEL_USERNS_CAPABILITIES ZFS_AC_KERNEL_IN_COMPAT_SYSCALL - ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64 + ZFS_AC_KERNEL_KTIME ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES ZFS_AC_KERNEL_KSTRTOUL diff --git a/include/spl/sys/time.h b/include/spl/sys/time.h index 24c1ec7c7ae9..4309c300b268 100644 --- a/include/spl/sys/time.h +++ b/include/spl/sys/time.h @@ -105,8 +105,13 @@ gethrestime_sec(void) static inline hrtime_t gethrtime(void) { +#if defined(HAVE_KTIME_GET_RAW_TS64) + struct timespec64 ts; + ktime_get_raw_ts64(&ts); +#else struct timespec ts; getrawmonotonic(&ts); +#endif return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec); } From bcd54567f2e8414514c27f540c1f7bb8e6b63681 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 6 Feb 2020 12:37:25 -0800 Subject: [PATCH 20/32] Linux 5.6 compat: timestamp_truncate() The timestamp_truncate() function was added, it replaces the existing timespec64_trunc() function. This change renames our wrapper function to be consistent with the upstream name and updates the compatibility code for older kernels accordingly. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9956 Closes #9961 --- config/kernel-inode-times.m4 | 31 +++++++++++++++++++++++++++---- include/sys/zpl.h | 13 +++++++------ module/zfs/zfs_vnops.c | 8 ++++---- module/zfs/zpl_inode.c | 6 ++---- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/config/kernel-inode-times.m4 b/config/kernel-inode-times.m4 index 57e7f31fdcbb..8a79c299e1a3 100644 --- a/config/kernel-inode-times.m4 +++ b/config/kernel-inode-times.m4 @@ -1,8 +1,22 @@ -dnl # -dnl # 4.18 API change -dnl # i_atime, i_mtime, and i_ctime changed from timespec to timespec64. -dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ + + dnl # + dnl # 5.6 API change + dnl # timespec64_trunc() replaced by timestamp_truncate() interface. + dnl # + ZFS_LINUX_TEST_SRC([timestamp_truncate], [ + #include + ],[ + struct timespec64 ts; + struct inode ip; + + ts = timestamp_truncate(ts, &ip); + ]) + + dnl # + dnl # 4.18 API change + dnl # i_atime, i_mtime, and i_ctime changed from timespec to timespec64. + dnl # ZFS_LINUX_TEST_SRC([inode_times], [ #include ],[ @@ -15,6 +29,15 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ + AC_MSG_CHECKING([whether timestamp_truncate() exists]) + ZFS_LINUX_TEST_RESULT([timestamp_truncate], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_TIMESTAMP_TRUNCATE, 1, + [timestamp_truncate() exists]) + ],[ + AC_MSG_RESULT(no) + ]) + AC_MSG_CHECKING([whether inode->i_*time's are timespec64]) ZFS_LINUX_TEST_RESULT([inode_times], [ AC_MSG_RESULT(no) diff --git a/include/sys/zpl.h b/include/sys/zpl.h index 2766269f31c8..f88ccd5400a8 100644 --- a/include/sys/zpl.h +++ b/include/sys/zpl.h @@ -188,13 +188,14 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx) } #endif /* HAVE_VFS_ITERATE */ -/* - * Linux 4.18, inode times converted from timespec to timespec64. - */ -#if defined(HAVE_INODE_TIMESPEC64_TIMES) -#define zpl_inode_timespec_trunc(ts, gran) timespec64_trunc(ts, gran) +#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE) +#define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip) +#elif defined(HAVE_INODE_TIMESPEC64_TIMES) +#define zpl_inode_timestamp_truncate(ts, ip) \ + timespec64_trunc(ts, (ip)->i_sb->s_time_gran) #else -#define zpl_inode_timespec_trunc(ts, gran) timespec_trunc(ts, gran) +#define zpl_inode_timestamp_truncate(ts, ip) \ + timespec_trunc(ts, (ip)->i_sb->s_time_gran) #endif #endif /* _SYS_ZPL_H */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 03a8c4a50b04..c322edf6ad99 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -3415,8 +3415,8 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) if (mask & (ATTR_MTIME | ATTR_SIZE)) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); - ZTOI(zp)->i_mtime = zpl_inode_timespec_trunc(vap->va_mtime, - ZTOI(zp)->i_sb->s_time_gran); + ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate( + vap->va_mtime, ZTOI(zp)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, sizeof (mtime)); @@ -3424,8 +3424,8 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) if (mask & (ATTR_CTIME | ATTR_SIZE)) { ZFS_TIME_ENCODE(&vap->va_ctime, ctime); - ZTOI(zp)->i_ctime = zpl_inode_timespec_trunc(vap->va_ctime, - ZTOI(zp)->i_sb->s_time_gran); + ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime, + ZTOI(zp)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, sizeof (ctime)); } diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index 5660f8b0e56f..1f228dcf822c 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -390,10 +390,8 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia) vap->va_mtime = ia->ia_mtime; vap->va_ctime = ia->ia_ctime; - if (vap->va_mask & ATTR_ATIME) { - ip->i_atime = zpl_inode_timespec_trunc(ia->ia_atime, - ip->i_sb->s_time_gran); - } + if (vap->va_mask & ATTR_ATIME) + ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip); cookie = spl_fstrans_mark(); error = -zfs_setattr(ip, vap, 0, cr); From 6287157118cb7f0719dd657e38ccbb0064582199 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 6 Feb 2020 10:30:41 -0800 Subject: [PATCH 21/32] Linux 5.6 compat: struct proc_ops The proc_ops structure was introduced to replace the use of of the file_operations structure when registering proc handlers. This change creates a new kstat_proc_op_t typedef for compatibility which can be used to pass around the correct structure. This change additionally adds the 'const' keyword to all of the existing proc operations structures. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9961 --- config/kernel-proc-operations.m4 | 41 ++++++++++++++++++++++++++++++++ config/kernel.m4 | 2 ++ include/spl/sys/kstat.h | 8 ++++++- module/spl/spl-kstat.c | 14 ++++++++--- module/spl/spl-proc.c | 33 ++++++++++++++++++++----- module/spl/spl-procfs-list.c | 11 +++++++-- 6 files changed, 97 insertions(+), 12 deletions(-) create mode 100644 config/kernel-proc-operations.m4 diff --git a/config/kernel-proc-operations.m4 b/config/kernel-proc-operations.m4 new file mode 100644 index 000000000000..df216222ecc2 --- /dev/null +++ b/config/kernel-proc-operations.m4 @@ -0,0 +1,41 @@ +dnl # +dnl # 5.6 API Change +dnl # The proc_ops structure was introduced to replace the use of +dnl # of the file_operations structure when registering proc handlers. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_PROC_OPERATIONS], [ + ZFS_LINUX_TEST_SRC([proc_ops_struct], [ + #include + + int test_open(struct inode *ip, struct file *fp) { return 0; } + ssize_t test_read(struct file *fp, char __user *ptr, + size_t size, loff_t *offp) { return 0; } + ssize_t test_write(struct file *fp, const char __user *ptr, + size_t size, loff_t *offp) { return 0; } + loff_t test_lseek(struct file *fp, loff_t off, int flag) + { return 0; } + int test_release(struct inode *ip, struct file *fp) + { return 0; } + + const struct proc_ops test_ops __attribute__ ((unused)) = { + .proc_open = test_open, + .proc_read = test_read, + .proc_write = test_write, + .proc_lseek = test_lseek, + .proc_release = test_release, + }; + ], [ + struct proc_dir_entry *entry __attribute__ ((unused)) = + proc_create_data("test", 0444, NULL, &test_ops, NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_PROC_OPERATIONS], [ + AC_MSG_CHECKING([whether proc_ops structure exists]) + ZFS_LINUX_TEST_RESULT([proc_ops_struct], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PROC_OPS_STRUCT, 1, [proc_ops structure exists]) + ], [ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 8cee494c4cfb..b67fcef8c2f7 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -53,6 +53,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TIMER_SETUP ZFS_AC_KERNEL_SRC_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SRC_SUPER_USER_NS + ZFS_AC_KERNEL_SRC_PROC_OPERATIONS ZFS_AC_KERNEL_SRC_SUBMIT_BIO ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH @@ -170,6 +171,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TIMER_SETUP ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SUPER_USER_NS + ZFS_AC_KERNEL_PROC_OPERATIONS ZFS_AC_KERNEL_SUBMIT_BIO ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH diff --git a/include/spl/sys/kstat.h b/include/spl/sys/kstat.h index 3ce474248873..c93c53171d88 100644 --- a/include/spl/sys/kstat.h +++ b/include/spl/sys/kstat.h @@ -152,6 +152,12 @@ typedef struct kstat_named_s { #define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr) #define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len) +#ifdef HAVE_PROC_OPS_STRUCT +typedef struct proc_ops kstat_proc_op_t; +#else +typedef struct file_operations kstat_proc_op_t; +#endif + typedef struct kstat_intr { uint_t intrs[KSTAT_NUM_INTRS]; } kstat_intr_t; @@ -197,7 +203,7 @@ extern void kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module, const char *name); extern void kstat_proc_entry_delete(kstat_proc_entry_t *kpep); extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode, - const struct file_operations *file_ops, void *data); + const kstat_proc_op_t *file_ops, void *data); extern void __kstat_install(kstat_t *ksp); extern void __kstat_delete(kstat_t *ksp); diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c index c97b6d6cbcb7..c54378acec5d 100644 --- a/module/spl/spl-kstat.c +++ b/module/spl/spl-kstat.c @@ -507,12 +507,20 @@ proc_kstat_write(struct file *filp, const char __user *buf, size_t len, return (len); } -static struct file_operations proc_kstat_operations = { +static const kstat_proc_op_t proc_kstat_operations = { +#ifdef HAVE_PROC_OPS_STRUCT + .proc_open = proc_kstat_open, + .proc_write = proc_kstat_write, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, +#else .open = proc_kstat_open, .write = proc_kstat_write, .read = seq_read, .llseek = seq_lseek, .release = seq_release, +#endif }; void @@ -656,7 +664,7 @@ kstat_detect_collision(kstat_proc_entry_t *kpep) */ void kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode, - const struct file_operations *file_ops, void *data) + const kstat_proc_op_t *proc_ops, void *data) { kstat_module_t *module; kstat_proc_entry_t *tmp = NULL; @@ -690,7 +698,7 @@ kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode, kpep->kpe_owner = module; kpep->kpe_proc = proc_create_data(kpep->kpe_name, mode, - module->ksm_proc, file_ops, data); + module->ksm_proc, proc_ops, data); if (kpep->kpe_proc == NULL) { list_del_init(&kpep->kpe_list); if (list_empty(&module->ksm_kstat_list)) diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c index c0c13913cdff..40315ede3176 100644 --- a/module/spl/spl-proc.c +++ b/module/spl/spl-proc.c @@ -532,11 +532,18 @@ proc_slab_open(struct inode *inode, struct file *filp) return (seq_open(filp, &slab_seq_ops)); } -static struct file_operations proc_slab_operations = { - .open = proc_slab_open, - .read = seq_read, - .llseek = seq_lseek, +static const kstat_proc_op_t proc_slab_operations = { +#ifdef HAVE_PROC_OPS_STRUCT + .proc_open = proc_slab_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, +#else + .open = proc_slab_open, + .read = seq_read, + .llseek = seq_lseek, .release = seq_release, +#endif }; static void @@ -571,18 +578,32 @@ proc_taskq_open(struct inode *inode, struct file *filp) return (seq_open(filp, &taskq_seq_ops)); } -static struct file_operations proc_taskq_all_operations = { +static const kstat_proc_op_t proc_taskq_all_operations = { +#ifdef HAVE_PROC_OPS_STRUCT + .proc_open = proc_taskq_all_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, +#else .open = proc_taskq_all_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, +#endif }; -static struct file_operations proc_taskq_operations = { +static const kstat_proc_op_t proc_taskq_operations = { +#ifdef HAVE_PROC_OPS_STRUCT + .proc_open = proc_taskq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, +#else .open = proc_taskq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, +#endif }; static struct ctl_table spl_kmem_table[] = { diff --git a/module/spl/spl-procfs-list.c b/module/spl/spl-procfs-list.c index f6a00da5c919..189d6a7c6082 100644 --- a/module/spl/spl-procfs-list.c +++ b/module/spl/spl-procfs-list.c @@ -185,13 +185,20 @@ procfs_list_write(struct file *filp, const char __user *buf, size_t len, return (len); } -static struct file_operations procfs_list_operations = { - .owner = THIS_MODULE, +static const kstat_proc_op_t procfs_list_operations = { +#ifdef HAVE_PROC_OPS_STRUCT + .proc_open = procfs_list_open, + .proc_write = procfs_list_write, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release_private, +#else .open = procfs_list_open, .write = procfs_list_write, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, +#endif }; /* From c3e1f128126e8548f2cd69133c5745f848a99f8f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 2 Jan 2020 18:08:45 -0800 Subject: [PATCH 22/32] Static symbols exported by ICP The crypto_cipher_init_prov and crypto_cipher_init are declared static and should not be exported by the ICP. This resolves the following warnings observed when building with the 5.4 kernel. WARNING: "crypto_cipher_init" [.../icp] is a static EXPORT_SYMBOL WARNING: "crypto_cipher_init_prov" [.../icp] is a static EXPORT_SYMBOL Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9791 --- module/icp/api/kcf_cipher.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/module/icp/api/kcf_cipher.c b/module/icp/api/kcf_cipher.c index 1c9f6873e2c4..d66c1aafb179 100644 --- a/module/icp/api/kcf_cipher.c +++ b/module/icp/api/kcf_cipher.c @@ -916,8 +916,6 @@ crypto_decrypt_single(crypto_context_t context, crypto_data_t *ciphertext, } #if defined(_KERNEL) -EXPORT_SYMBOL(crypto_cipher_init_prov); -EXPORT_SYMBOL(crypto_cipher_init); EXPORT_SYMBOL(crypto_encrypt_prov); EXPORT_SYMBOL(crypto_encrypt); EXPORT_SYMBOL(crypto_encrypt_init_prov); From ced8decd18707e57602a11d149e6e6d44715231e Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 12 Mar 2020 13:52:03 -0400 Subject: [PATCH 23/32] Fix infinite scan on a pool with only special allocations Attempt to run scrub or resilver on a new pool containing only special allocations (special vdev added on creation) caused infinite loop because of dsl_scan_should_clear() limiting memory usage to 5% of pool size, which it calculated accounting only normal allocation class. Addition of special and just in case dedup classes fixes the issue. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Closes #10106 Closes #8694 --- module/zfs/dsl_scan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index d71113681236..8168a53cae01 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1187,10 +1187,13 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx) static boolean_t dsl_scan_should_clear(dsl_scan_t *scn) { + spa_t *spa = scn->scn_dp->dp_spa; vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev; - uint64_t mlim_hard, mlim_soft, mused; - uint64_t alloc = metaslab_class_get_alloc(spa_normal_class( - scn->scn_dp->dp_spa)); + uint64_t alloc, mlim_hard, mlim_soft, mused; + + alloc = metaslab_class_get_alloc(spa_normal_class(spa)); + alloc += metaslab_class_get_alloc(spa_special_class(spa)); + alloc += metaslab_class_get_alloc(spa_dedup_class(spa)); mlim_hard = MAX((physmem / zfs_scan_mem_lim_fact) * PAGESIZE, zfs_scan_mem_lim_min); From c9d993c4c35ee87f2441e27feef0dee724d25e71 Mon Sep 17 00:00:00 2001 From: Mark Roper Date: Thu, 12 Mar 2020 13:24:43 -0400 Subject: [PATCH 24/32] Prevent deadlock in arc_read in Linux memory reclaim callback Using zfs with Lustre, an arc_read can trigger kernel memory allocation that in turn leads to a memory reclaim callback and a deadlock within a single zfs process. This change uses spl_fstrans_mark and spl_trans_unmark to prevent the reclaim attempt and the deadlock (https://zfsonlinux.topicbox.com/groups/zfs-devel/T4db2c705ec1804ba). The stack trace observed is: __schedule at ffffffff81610f2e schedule at ffffffff81611558 schedule_preempt_disabled at ffffffff8161184a __mutex_lock at ffffffff816131e8 arc_buf_destroy at ffffffffa0bf37d7 [zfs] dbuf_destroy at ffffffffa0bfa6fe [zfs] dbuf_evict_one at ffffffffa0bfaa96 [zfs] dbuf_rele_and_unlock at ffffffffa0bfa561 [zfs] dbuf_rele_and_unlock at ffffffffa0bfa32b [zfs] osd_object_delete at ffffffffa0b64ecc [osd_zfs] lu_object_free at ffffffffa06d6a74 [obdclass] lu_site_purge_objects at ffffffffa06d7fc1 [obdclass] lu_cache_shrink_scan at ffffffffa06d81b8 [obdclass] shrink_slab at ffffffff811ca9d8 shrink_node at ffffffff811cfd94 do_try_to_free_pages at ffffffff811cfe63 try_to_free_pages at ffffffff811d01c4 __alloc_pages_slowpath at ffffffff811be7f2 __alloc_pages_nodemask at ffffffff811bf3ed new_slab at ffffffff81226304 ___slab_alloc at ffffffff812272ab __slab_alloc at ffffffff8122740c kmem_cache_alloc at ffffffff81227578 spl_kmem_cache_alloc at ffffffffa048a1fd [spl] arc_buf_alloc_impl at ffffffffa0befba2 [zfs] arc_read at ffffffffa0bf0924 [zfs] dbuf_read at ffffffffa0bf9083 [zfs] dmu_buf_hold_by_dnode at ffffffffa0c04869 [zfs] Reviewed-by: Brian Behlendorf Signed-off-by: Mark Roper Closes #9987 --- module/zfs/arc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index a16689dc6b07..ceb1e7a9dfbb 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -6178,6 +6178,17 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ASSERT(!embedded_bp || BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA); + /* + * Normally SPL_FSTRANS will already be set since kernel threads which + * expect to call the DMU interfaces will set it when created. System + * calls are similarly handled by setting/cleaning the bit in the + * registered callback (module/os/.../zfs/zpl_*). + * + * External consumers such as Lustre which call the exported DMU + * interfaces may not have set SPL_FSTRANS. To avoid a deadlock + * on the hash_lock always set and clear the bit. + */ + fstrans_cookie_t cookie = spl_fstrans_mark(); top: if (!embedded_bp) { /* @@ -6636,6 +6647,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, /* embedded bps don't actually go to disk */ if (!embedded_bp) spa_read_history_add(spa, zb, *arc_flags); + spl_fstrans_unmark(cookie); return (rc); } From 0c1ab5b530934d03509b73f75c7242f79b0f624b Mon Sep 17 00:00:00 2001 From: Fabio Scaccabarozzi Date: Wed, 1 Apr 2020 18:48:54 +0200 Subject: [PATCH 25/32] Bugfix/fix uio partial copies In zfs_write(), the loop continues to the next iteration without accounting for partial copies occurring in uiomove_iov when copy_from_user/__copy_from_user_inatomic return a non-zero status. This results in "zfs: accessing past end of object..." in the kernel log, and the write failing. Account for partial copies and update uio struct before returning EFAULT, leave a comment explaining the reason why this is done. Reviewed-by: Brian Behlendorf Reviewed-by: ilbsmart Signed-off-by: Fabio Scaccabarozzi Closes #8673 Closes #10148 --- module/zcommon/zfs_uio.c | 25 +++++++++++++++++-------- module/zfs/zfs_vnops.c | 9 +++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c index c1e31f51be02..d586e0a1220a 100644 --- a/module/zcommon/zfs_uio.c +++ b/module/zcommon/zfs_uio.c @@ -80,22 +80,31 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio) if (copy_to_user(iov->iov_base+skip, p, cnt)) return (EFAULT); } else { + unsigned long b_left = 0; if (uio->uio_fault_disable) { if (!zfs_access_ok(VERIFY_READ, (iov->iov_base + skip), cnt)) { return (EFAULT); } pagefault_disable(); - if (__copy_from_user_inatomic(p, - (iov->iov_base + skip), cnt)) { - pagefault_enable(); - return (EFAULT); - } + b_left = + __copy_from_user_inatomic(p, + (iov->iov_base + skip), cnt); pagefault_enable(); } else { - if (copy_from_user(p, - (iov->iov_base + skip), cnt)) - return (EFAULT); + b_left = + copy_from_user(p, + (iov->iov_base + skip), cnt); + } + if (b_left > 0) { + unsigned long c_bytes = + cnt - b_left; + uio->uio_skip += c_bytes; + ASSERT3U(uio->uio_skip, <, + iov->iov_len); + uio->uio_resid -= c_bytes; + uio->uio_loffset += c_bytes; + return (EFAULT); } } break; diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index c322edf6ad99..d2cc8dd1d614 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -829,6 +829,15 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) uio->uio_fault_disable = B_FALSE; if (error == EFAULT) { dmu_tx_commit(tx); + /* + * Account for partial writes before + * continuing the loop. + * Update needs to occur before the next + * uio_prefaultpages, or prefaultpages may + * error, and we may break the loop early. + */ + if (tx_bytes != uio->uio_resid) + n -= tx_bytes - uio->uio_resid; if (uio_prefaultpages(MIN(n, max_blksz), uio)) { break; } From 97795a288b3892f447c6ef8470d56f74abfc190b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20F=C3=BCl=C3=B6p?= Date: Mon, 10 Feb 2020 21:59:50 +0100 Subject: [PATCH 26/32] ICP: Improve AES-GCM performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently SIMD accelerated AES-GCM performance is limited by two factors: a. The need to disable preemption and interrupts and save the FPU state before using it and to do the reverse when done. Due to the way the code is organized (see (b) below) we have to pay this price twice for each 16 byte GCM block processed. b. Most processing is done in C, operating on single GCM blocks. The use of SIMD instructions is limited to the AES encryption of the counter block (AES-NI) and the Galois multiplication (PCLMULQDQ). This leads to the FPU not being fully utilized for crypto operations. To solve (a) we do crypto processing in larger chunks while owning the FPU. An `icp_gcm_avx_chunk_size` module parameter was introduced to make this chunk size tweakable. It defaults to 32 KiB. This step alone roughly doubles performance. (b) is tackled by porting and using the highly optimized openssl AES-GCM assembler routines, which do all the processing (CTR, AES, GMULT) in a single routine. Both steps together result in up to 32x reduction of the time spend in the en/decryption routines, leading up to approximately 12x throughput increase for large (128 KiB) blocks. Lastly, this commit changes the default encryption algorithm from AES-CCM to AES-GCM when setting the `encryption=on` property. Reviewed-By: Brian Behlendorf Reviewed-By: Jason King Reviewed-By: Tom Caputi Reviewed-By: Richard Laager Signed-off-by: Attila Fülöp Closes #9749 --- COPYRIGHT | 4 + config/toolchain-simd.m4 | 21 + include/linux/simd_x86.h | 23 +- include/sys/zio.h | 2 +- lib/libicp/Makefile.am | 2 + man/man8/zfs.8 | 2 +- man/man8/zfsprops.8 | 0 module/icp/Makefile.in | 17 + module/icp/algs/modes/gcm.c | 746 ++++++++++++++- .../modes/THIRDPARTYLICENSE.cryptogams | 36 + .../THIRDPARTYLICENSE.cryptogams.descrip | 1 + .../modes/THIRDPARTYLICENSE.openssl | 177 ++++ .../modes/THIRDPARTYLICENSE.openssl.descrip | 1 + .../icp/asm-x86_64/modes/aesni-gcm-x86_64.S | 892 ++++++++++++++++++ module/icp/asm-x86_64/modes/ghash-x86_64.S | 714 ++++++++++++++ module/icp/include/aes/aes_impl.h | 5 + module/icp/include/modes/modes.h | 29 +- .../zfs_create/zfs_create_crypt_combos.ksh | 2 +- .../zpool_create_crypt_combos.ksh | 2 +- .../functional/rsend/send_encrypted_props.ksh | 12 +- 20 files changed, 2657 insertions(+), 31 deletions(-) create mode 100644 man/man8/zfsprops.8 create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip create mode 100644 module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S create mode 100644 module/icp/asm-x86_64/modes/ghash-x86_64.S diff --git a/COPYRIGHT b/COPYRIGHT index 54fbceade1e4..1eda89510332 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -20,6 +20,10 @@ notable exceptions and their respective licenses include: * AES Implementation: module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl * PBKDF2 Implementation: lib/libzfs/THIRDPARTYLICENSE.openssl * SPL Implementation: module/spl/THIRDPARTYLICENSE.gplv2 + * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams + * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl + * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams + * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl This product includes software developed by the OpenSSL Project for use in the OpenSSL Toolkit (http://www.openssl.org/) diff --git a/config/toolchain-simd.m4 b/config/toolchain-simd.m4 index 37627b813bb3..e86eb7f17a0d 100644 --- a/config/toolchain-simd.m4 +++ b/config/toolchain-simd.m4 @@ -23,6 +23,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE ;; esac ]) @@ -401,3 +402,23 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ], [ AC_MSG_RESULT([no]) ]) ]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [ + AC_MSG_CHECKING([whether host toolchain supports MOVBE]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + void main() + { + __asm__ __volatile__("movbe 0(%eax), %eax"); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 1bde1d7c9240..bf44f6bf1e81 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -382,7 +382,8 @@ typedef enum cpuid_inst_sets { AVX512ER, AVX512VL, AES, - PCLMULQDQ + PCLMULQDQ, + MOVBE } cpuid_inst_sets_t; /* @@ -406,6 +407,7 @@ typedef struct cpuid_feature_desc { #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */ #define _AES_BIT (1U << 25) #define _PCLMULQDQ_BIT (1U << 1) +#define _MOVBE_BIT (1U << 22) /* * Descriptions of supported instruction sets @@ -433,6 +435,7 @@ static const cpuid_feature_desc_t cpuid_features[] = { [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }, [AES] = {1U, 0U, _AES_BIT, ECX }, [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX }, + [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX }, }; /* @@ -505,6 +508,7 @@ CPUID_FEATURE_CHECK(avx512er, AVX512ER); CPUID_FEATURE_CHECK(avx512vl, AVX512VL); CPUID_FEATURE_CHECK(aes, AES); CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); +CPUID_FEATURE_CHECK(movbe, MOVBE); #endif /* !defined(_KERNEL) */ @@ -719,6 +723,23 @@ zfs_pclmulqdq_available(void) #endif } +/* + * Check if MOVBE instruction is available + */ +static inline boolean_t +zfs_movbe_available(void) +{ +#if defined(_KERNEL) +#if defined(X86_FEATURE_MOVBE) + return (!!boot_cpu_has(X86_FEATURE_MOVBE)); +#else + return (B_FALSE); +#endif +#elif !defined(_KERNEL) + return (__cpuid_has_movbe()); +#endif +} + /* * AVX-512 family of instruction sets: * diff --git a/include/sys/zio.h b/include/sys/zio.h index aa58fe1fafd6..0046230a7b82 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -118,7 +118,7 @@ enum zio_encrypt { ZIO_CRYPT_FUNCTIONS }; -#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM +#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_GCM #define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF /* macros defining encryption lengths */ diff --git a/lib/libicp/Makefile.am b/lib/libicp/Makefile.am index e9f22cd70738..b92a7074a9b3 100644 --- a/lib/libicp/Makefile.am +++ b/lib/libicp/Makefile.am @@ -20,6 +20,8 @@ ASM_SOURCES_AS = \ asm-x86_64/aes/aes_amd64.S \ asm-x86_64/aes/aes_aesni.S \ asm-x86_64/modes/gcm_pclmulqdq.S \ + asm-x86_64/modes/aesni-gcm-x86_64.S \ + asm-x86_64/modes/ghash-x86_64.S \ asm-x86_64/sha1/sha1-x86_64.S \ asm-x86_64/sha2/sha256_impl.S \ asm-x86_64/sha2/sha512_impl.S diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 029730bd4d64..ec15e36859b9 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -1440,7 +1440,7 @@ Selecting .Sy encryption Ns = Ns Sy on when creating a dataset indicates that the default encryption suite will be selected, which is currently -.Sy aes-256-ccm . +.Sy aes-256-gcm . In order to provide consistent data protection, encryption must be specified at dataset creation time and it cannot be changed afterwards. .Pp diff --git a/man/man8/zfsprops.8 b/man/man8/zfsprops.8 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/module/icp/Makefile.in b/module/icp/Makefile.in index 18e8dc313b2e..c3cb2dede3ca 100644 --- a/module/icp/Makefile.in +++ b/module/icp/Makefile.in @@ -13,6 +13,16 @@ ASM_SOURCES += asm-x86_64/modes/gcm_pclmulqdq.o ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o +ASM_SOURCES += asm-x86_64/aes/aeskey.o +ASM_SOURCES += asm-x86_64/aes/aes_amd64.o +ASM_SOURCES += asm-x86_64/aes/aes_aesni.o +ASM_SOURCES += asm-x86_64/modes/gcm_pclmulqdq.o +ASM_SOURCES += asm-x86_64/modes/aesni-gcm-x86_64.o +ASM_SOURCES += asm-x86_64/modes/ghash-x86_64.o +ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o +ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o +ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o + endif ifeq ($(TARGET_ASM_DIR), asm-i386) @@ -72,6 +82,13 @@ $(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o +# Suppress objtool "can't find jump dest instruction at" warnings. They +# are caused by the constants which are defined in the text section of the +# assembly file using .byte instructions (e.g. bswap_mask). The objtool +# utility tries to interpret them as opcodes and obviously fails doing so. +OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y +OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y + ICP_DIRS = \ api \ core \ diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 014e90ceff8f..6c8dd9862658 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -30,12 +30,46 @@ #include #include #include +#ifdef CAN_USE_GCM_ASM +#include +#endif #define GHASH(c, d, t, o) \ xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ (uint64_t *)(void *)(t)); +/* Select GCM implementation */ +#define IMPL_FASTEST (UINT32_MAX) +#define IMPL_CYCLE (UINT32_MAX-1) +#ifdef CAN_USE_GCM_ASM +#define IMPL_AVX (UINT32_MAX-2) +#endif +#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) +static uint32_t icp_gcm_impl = IMPL_FASTEST; +static uint32_t user_sel_impl = IMPL_FASTEST; + +#ifdef CAN_USE_GCM_ASM +/* + * Whether to use the optimized openssl gcm and ghash implementations. + * Set to true if module parameter icp_gcm_impl == "avx". + */ +static boolean_t gcm_use_avx = B_FALSE; +#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) + +static inline boolean_t gcm_avx_will_work(void); +static inline void gcm_set_avx(boolean_t); +static inline boolean_t gcm_toggle_avx(void); + +static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, + crypto_data_t *, size_t); + +static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); +static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); +static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, + size_t, size_t); +#endif /* ifdef CAN_USE_GCM_ASM */ + /* * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode * is done in another function. @@ -47,6 +81,12 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { +#ifdef CAN_USE_GCM_ASM + if (ctx->gcm_use_avx == B_TRUE) + return (gcm_mode_encrypt_contiguous_blocks_avx( + ctx, data, length, out, block_size)); +#endif + const gcm_impl_ops_t *gops; size_t remainder = length; size_t need = 0; @@ -109,6 +149,14 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, ctx->gcm_processed_data_len += block_size; + /* + * The following copies a complete GCM block back to where it + * came from if there was a remainder in the last call and out + * is NULL. That doesn't seem to make sense. So we assert this + * can't happen and leave the code in for reference. + * See https://github.com/zfsonlinux/zfs/issues/9661 + */ + ASSERT(out != NULL); if (out == NULL) { if (ctx->gcm_remainder_len > 0) { bcopy(blockp, ctx->gcm_copy_to, @@ -169,6 +217,11 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { +#ifdef CAN_USE_GCM_ASM + if (ctx->gcm_use_avx == B_TRUE) + return (gcm_encrypt_final_avx(ctx, out, block_size)); +#endif + const gcm_impl_ops_t *gops; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); uint8_t *ghash, *macp = NULL; @@ -321,6 +374,11 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { +#ifdef CAN_USE_GCM_ASM + if (ctx->gcm_use_avx == B_TRUE) + return (gcm_decrypt_final_avx(ctx, out, block_size)); +#endif + const gcm_impl_ops_t *gops; size_t pt_len; size_t remainder; @@ -526,6 +584,9 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, return (CRYPTO_SUCCESS); } +/* + * Init the GCM context struct. Handle the cycle and avx implementations here. + */ int gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), @@ -556,11 +617,37 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, return (CRYPTO_MECHANISM_PARAM_INVALID); } - if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, - gcm_param->pAAD, gcm_param->ulAADLen, block_size, - encrypt_block, copy_block, xor_block) != 0) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; +#ifdef CAN_USE_GCM_ASM + /* + * Handle the "cycle" implementation by creating avx and non avx + * contexts alternately. + */ + if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { + gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; + } else { + gcm_ctx->gcm_use_avx = gcm_toggle_avx(); + } + /* We don't handle byte swapped key schedules in the avx code path. */ + aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; + if (ks->ops->needs_byteswap == B_TRUE) { + gcm_ctx->gcm_use_avx = B_FALSE; + } + /* Avx and non avx context initialization differs from here on. */ + if (gcm_ctx->gcm_use_avx == B_FALSE) { +#endif /* ifdef CAN_USE_GCM_ASM */ + if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, + gcm_param->pAAD, gcm_param->ulAADLen, block_size, + encrypt_block, copy_block, xor_block) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } +#ifdef CAN_USE_GCM_ASM + } else { + if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, + gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } } +#endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } @@ -590,11 +677,37 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, return (CRYPTO_MECHANISM_PARAM_INVALID); } - if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, - gmac_param->pAAD, gmac_param->ulAADLen, block_size, - encrypt_block, copy_block, xor_block) != 0) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; +#ifdef CAN_USE_GCM_ASM + /* + * Handle the "cycle" implementation by creating avx and non avx + * contexts alternately. + */ + if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { + gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; + } else { + gcm_ctx->gcm_use_avx = gcm_toggle_avx(); } + /* We don't handle byte swapped key schedules in the avx code path. */ + aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; + if (ks->ops->needs_byteswap == B_TRUE) { + gcm_ctx->gcm_use_avx = B_FALSE; + } + /* Avx and non avx context initialization differs from here on. */ + if (gcm_ctx->gcm_use_avx == B_FALSE) { +#endif /* ifdef CAN_USE_GCM_ASM */ + if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, + gmac_param->pAAD, gmac_param->ulAADLen, block_size, + encrypt_block, copy_block, xor_block) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } +#ifdef CAN_USE_GCM_ASM + } else { + if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, + gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } + } +#endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } @@ -645,15 +758,6 @@ const gcm_impl_ops_t *gcm_all_impl[] = { /* Indicate that benchmark has been completed */ static boolean_t gcm_impl_initialized = B_FALSE; -/* Select GCM implementation */ -#define IMPL_FASTEST (UINT32_MAX) -#define IMPL_CYCLE (UINT32_MAX-1) - -#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) - -static uint32_t icp_gcm_impl = IMPL_FASTEST; -static uint32_t user_sel_impl = IMPL_FASTEST; - /* Hold all supported implementations */ static size_t gcm_supp_impl_cnt = 0; static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; @@ -685,6 +789,16 @@ gcm_impl_get_ops() size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; ops = gcm_supp_impl[idx]; break; +#ifdef CAN_USE_GCM_ASM + case IMPL_AVX: + /* + * Make sure that we return a valid implementation while + * switching to the avx implementation since there still + * may be unfinished non-avx contexts around. + */ + ops = &gcm_generic_impl; + break; +#endif default: ASSERT3U(impl, <, gcm_supp_impl_cnt); ASSERT3U(gcm_supp_impl_cnt, >, 0); @@ -733,6 +847,16 @@ gcm_impl_init(void) strcpy(gcm_fastest_impl.name, "fastest"); +#ifdef CAN_USE_GCM_ASM + /* + * Use the avx implementation if it's available and the implementation + * hasn't changed from its default value of fastest on module load. + */ + if (gcm_avx_will_work() && + GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { + gcm_set_avx(B_TRUE); + } +#endif /* Finish initialization */ atomic_swap_32(&icp_gcm_impl, user_sel_impl); gcm_impl_initialized = B_TRUE; @@ -744,6 +868,9 @@ static const struct { } gcm_impl_opts[] = { { "cycle", IMPL_CYCLE }, { "fastest", IMPL_FASTEST }, +#ifdef CAN_USE_GCM_ASM + { "avx", IMPL_AVX }, +#endif }; /* @@ -777,6 +904,12 @@ gcm_impl_set(const char *val) /* Check mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { +#ifdef CAN_USE_GCM_ASM + /* Ignore avx implementation if it won't work. */ + if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { + continue; + } +#endif if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { impl = gcm_impl_opts[i].sel; err = 0; @@ -795,6 +928,18 @@ gcm_impl_set(const char *val) } } } +#ifdef CAN_USE_GCM_ASM + /* + * Use the avx implementation if available and the requested one is + * avx or fastest. + */ + if (gcm_avx_will_work() == B_TRUE && + (impl == IMPL_AVX || impl == IMPL_FASTEST)) { + gcm_set_avx(B_TRUE); + } else { + gcm_set_avx(B_FALSE); + } +#endif if (err == 0) { if (gcm_impl_initialized) @@ -826,6 +971,12 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) /* list mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { +#ifdef CAN_USE_GCM_ASM + /* Ignore avx implementation if it won't work. */ + if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { + continue; + } +#endif fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); } @@ -842,4 +993,563 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, NULL, 0644); MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); -#endif +#endif /* defined(__KERNEL) */ + +#ifdef CAN_USE_GCM_ASM +#define GCM_BLOCK_LEN 16 +/* + * The openssl asm routines are 6x aggregated and need that many bytes + * at minimum. + */ +#define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) +#define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) +/* + * Ensure the chunk size is reasonable since we are allocating a + * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. + */ +#define GCM_AVX_MAX_CHUNK_SIZE \ + (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) + +/* Get the chunk size module parameter. */ +#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size + +/* Clear the FPU registers since they hold sensitive internal state. */ +#define clear_fpu_regs() clear_fpu_regs_avx() +#define GHASH_AVX(ctx, in, len) \ + gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \ + in, len) + +#define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) + +/* + * Module parameter: number of bytes to process at once while owning the FPU. + * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is + * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. + */ +static uint32_t gcm_avx_chunk_size = + ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; + +extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); +extern void clear_fpu_regs_avx(void); +extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); +extern void aes_encrypt_intel(const uint32_t rk[], int nr, + const uint32_t pt[4], uint32_t ct[4]); + +extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]); +extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2], + const uint8_t *in, size_t len); + +extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, + const void *, uint64_t *, uint64_t *); + +extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, + const void *, uint64_t *, uint64_t *); + +static inline boolean_t +gcm_avx_will_work(void) +{ + /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ + return (kfpu_allowed() && + zfs_avx_available() && zfs_movbe_available() && + zfs_aes_available() && zfs_pclmulqdq_available()); +} + +static inline void +gcm_set_avx(boolean_t val) +{ + if (gcm_avx_will_work() == B_TRUE) { + atomic_swap_32(&gcm_use_avx, val); + } +} + +static inline boolean_t +gcm_toggle_avx(void) +{ + if (gcm_avx_will_work() == B_TRUE) { + return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); + } else { + return (B_FALSE); + } +} + +/* + * Clear senssitve data in the context. + * + * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and + * ctx->gcm_Htable contain the hash sub key which protects authentication. + * + * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for + * a known plaintext attack, they consists of the IV and the first and last + * counter respectively. If they should be cleared is debatable. + */ +static inline void +gcm_clear_ctx(gcm_ctx_t *ctx) +{ + bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); + bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); + bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable)); + bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); + bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); +} + +/* Increment the GCM counter block by n. */ +static inline void +gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) +{ + uint64_t counter_mask = ntohll(0x00000000ffffffffULL); + uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); + + counter = htonll(counter + n); + counter &= counter_mask; + ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; +} + +/* + * Encrypt multiple blocks of data in GCM mode. + * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines + * if possible. While processing a chunk the FPU is "locked". + */ +static int +gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, + size_t length, crypto_data_t *out, size_t block_size) +{ + size_t bleft = length; + size_t need = 0; + size_t done = 0; + uint8_t *datap = (uint8_t *)data; + size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); + uint64_t *ghash = ctx->gcm_ghash; + uint64_t *cb = ctx->gcm_cb; + uint8_t *ct_buf = NULL; + uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; + int rv = CRYPTO_SUCCESS; + + ASSERT(block_size == GCM_BLOCK_LEN); + /* + * If the last call left an incomplete block, try to fill + * it first. + */ + if (ctx->gcm_remainder_len > 0) { + need = block_size - ctx->gcm_remainder_len; + if (length < need) { + /* Accumulate bytes here and return. */ + bcopy(datap, (uint8_t *)ctx->gcm_remainder + + ctx->gcm_remainder_len, length); + + ctx->gcm_remainder_len += length; + if (ctx->gcm_copy_to == NULL) { + ctx->gcm_copy_to = datap; + } + return (CRYPTO_SUCCESS); + } else { + /* Complete incomplete block. */ + bcopy(datap, (uint8_t *)ctx->gcm_remainder + + ctx->gcm_remainder_len, need); + + ctx->gcm_copy_to = NULL; + } + } + + /* Allocate a buffer to encrypt to if there is enough input. */ + if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { + ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); + if (ct_buf == NULL) { + return (CRYPTO_HOST_MEMORY); + } + } + + /* If we completed an incomplete block, encrypt and write it out. */ + if (ctx->gcm_remainder_len > 0) { + kfpu_begin(); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, + (const uint32_t *)cb, (uint32_t *)tmp); + + gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); + GHASH_AVX(ctx, tmp, block_size); + clear_fpu_regs(); + kfpu_end(); + /* + * We don't follow gcm_mode_encrypt_contiguous_blocks() here + * but assert that out is not null. + * See gcm_mode_encrypt_contiguous_blocks() above and + * https://github.com/zfsonlinux/zfs/issues/9661 + */ + ASSERT(out != NULL); + rv = crypto_put_output_data(tmp, out, block_size); + out->cd_offset += block_size; + gcm_incr_counter_block(ctx); + ctx->gcm_processed_data_len += block_size; + bleft -= need; + datap += need; + ctx->gcm_remainder_len = 0; + } + + /* Do the bulk encryption in chunk_size blocks. */ + for (; bleft >= chunk_size; bleft -= chunk_size) { + kfpu_begin(); + done = aesni_gcm_encrypt( + datap, ct_buf, chunk_size, key, cb, ghash); + + clear_fpu_regs(); + kfpu_end(); + if (done != chunk_size) { + rv = CRYPTO_FAILED; + goto out_nofpu; + } + if (out != NULL) { + rv = crypto_put_output_data(ct_buf, out, chunk_size); + if (rv != CRYPTO_SUCCESS) { + goto out_nofpu; + } + out->cd_offset += chunk_size; + } + datap += chunk_size; + ctx->gcm_processed_data_len += chunk_size; + } + /* Check if we are already done. */ + if (bleft == 0) { + goto out_nofpu; + } + /* Bulk encrypt the remaining data. */ + kfpu_begin(); + if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { + done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); + if (done == 0) { + rv = CRYPTO_FAILED; + goto out; + } + if (out != NULL) { + rv = crypto_put_output_data(ct_buf, out, done); + if (rv != CRYPTO_SUCCESS) { + goto out; + } + out->cd_offset += done; + } + ctx->gcm_processed_data_len += done; + datap += done; + bleft -= done; + + } + /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ + while (bleft > 0) { + if (bleft < block_size) { + bcopy(datap, ctx->gcm_remainder, bleft); + ctx->gcm_remainder_len = bleft; + ctx->gcm_copy_to = datap; + goto out; + } + /* Encrypt, hash and write out. */ + aes_encrypt_intel(key->encr_ks.ks32, key->nr, + (const uint32_t *)cb, (uint32_t *)tmp); + + gcm_xor_avx(datap, tmp); + GHASH_AVX(ctx, tmp, block_size); + if (out != NULL) { + rv = crypto_put_output_data(tmp, out, block_size); + if (rv != CRYPTO_SUCCESS) { + goto out; + } + out->cd_offset += block_size; + } + gcm_incr_counter_block(ctx); + ctx->gcm_processed_data_len += block_size; + datap += block_size; + bleft -= block_size; + } +out: + clear_fpu_regs(); + kfpu_end(); +out_nofpu: + if (ct_buf != NULL) { + vmem_free(ct_buf, chunk_size); + } + return (rv); +} + +/* + * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual + * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. + */ +static int +gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) +{ + uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; + uint32_t *J0 = (uint32_t *)ctx->gcm_J0; + uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; + size_t rem_len = ctx->gcm_remainder_len; + const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; + int aes_rounds = ((aes_key_t *)keysched)->nr; + int rv; + + ASSERT(block_size == GCM_BLOCK_LEN); + + if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { + return (CRYPTO_DATA_LEN_RANGE); + } + + kfpu_begin(); + /* Pad last incomplete block with zeros, encrypt and hash. */ + if (rem_len > 0) { + uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; + const uint32_t *cb = (uint32_t *)ctx->gcm_cb; + + aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); + bzero(remainder + rem_len, block_size - rem_len); + for (int i = 0; i < rem_len; i++) { + remainder[i] ^= tmp[i]; + } + GHASH_AVX(ctx, remainder, block_size); + ctx->gcm_processed_data_len += rem_len; + /* No need to increment counter_block, it's the last block. */ + } + /* Finish tag. */ + ctx->gcm_len_a_len_c[1] = + htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); + GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); + aes_encrypt_intel(keysched, aes_rounds, J0, J0); + + gcm_xor_avx((uint8_t *)J0, ghash); + clear_fpu_regs(); + kfpu_end(); + + /* Output remainder. */ + if (rem_len > 0) { + rv = crypto_put_output_data(remainder, out, rem_len); + if (rv != CRYPTO_SUCCESS) + return (rv); + } + out->cd_offset += rem_len; + ctx->gcm_remainder_len = 0; + rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); + if (rv != CRYPTO_SUCCESS) + return (rv); + + out->cd_offset += ctx->gcm_tag_len; + /* Clear sensitive data in the context before returning. */ + gcm_clear_ctx(ctx); + return (CRYPTO_SUCCESS); +} + +/* + * Finalize decryption: We just have accumulated crypto text, so now we + * decrypt it here inplace. + */ +static int +gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) +{ + ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); + ASSERT3U(block_size, ==, 16); + + size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; + uint8_t *datap = ctx->gcm_pt_buf; + const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); + uint32_t *cb = (uint32_t *)ctx->gcm_cb; + uint64_t *ghash = ctx->gcm_ghash; + uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; + int rv = CRYPTO_SUCCESS; + size_t bleft, done; + + /* + * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be + * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of + * GCM_AVX_MIN_DECRYPT_BYTES. + */ + for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { + kfpu_begin(); + done = aesni_gcm_decrypt(datap, datap, chunk_size, + (const void *)key, ctx->gcm_cb, ghash); + clear_fpu_regs(); + kfpu_end(); + if (done != chunk_size) { + return (CRYPTO_FAILED); + } + datap += done; + } + /* Decrypt remainder, which is less then chunk size, in one go. */ + kfpu_begin(); + if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { + done = aesni_gcm_decrypt(datap, datap, bleft, + (const void *)key, ctx->gcm_cb, ghash); + if (done == 0) { + clear_fpu_regs(); + kfpu_end(); + return (CRYPTO_FAILED); + } + datap += done; + bleft -= done; + } + ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); + + /* + * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain, + * decrypt them block by block. + */ + while (bleft > 0) { + /* Incomplete last block. */ + if (bleft < block_size) { + uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; + + bzero(lastb, block_size); + bcopy(datap, lastb, bleft); + /* The GCM processing. */ + GHASH_AVX(ctx, lastb, block_size); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); + for (size_t i = 0; i < bleft; i++) { + datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; + } + break; + } + /* The GCM processing. */ + GHASH_AVX(ctx, datap, block_size); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); + gcm_xor_avx((uint8_t *)tmp, datap); + gcm_incr_counter_block(ctx); + + datap += block_size; + bleft -= block_size; + } + if (rv != CRYPTO_SUCCESS) { + clear_fpu_regs(); + kfpu_end(); + return (rv); + } + /* Decryption done, finish the tag. */ + ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); + GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, + (uint32_t *)ctx->gcm_J0); + + gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); + + /* We are done with the FPU, restore its state. */ + clear_fpu_regs(); + kfpu_end(); + + /* Compare the input authentication tag with what we calculated. */ + if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { + /* They don't match. */ + return (CRYPTO_INVALID_MAC); + } + rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); + if (rv != CRYPTO_SUCCESS) { + return (rv); + } + out->cd_offset += pt_len; + gcm_clear_ctx(ctx); + return (CRYPTO_SUCCESS); +} + +/* + * Initialize the GCM params H, Htabtle and the counter block. Save the + * initial counter block. + */ +static int +gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, + unsigned char *auth_data, size_t auth_data_len, size_t block_size) +{ + uint8_t *cb = (uint8_t *)ctx->gcm_cb; + uint64_t *H = ctx->gcm_H; + const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; + int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; + uint8_t *datap = auth_data; + size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + size_t bleft; + + ASSERT(block_size == GCM_BLOCK_LEN); + + /* Init H (encrypt zero block) and create the initial counter block. */ + bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); + bzero(H, sizeof (ctx->gcm_H)); + kfpu_begin(); + aes_encrypt_intel(keysched, aes_rounds, + (const uint32_t *)H, (uint32_t *)H); + + gcm_init_htab_avx(ctx->gcm_Htable, H); + + if (iv_len == 12) { + bcopy(iv, cb, 12); + cb[12] = 0; + cb[13] = 0; + cb[14] = 0; + cb[15] = 1; + /* We need the ICB later. */ + bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); + } else { + /* + * Most consumers use 12 byte IVs, so it's OK to use the + * original routines for other IV sizes, just avoid nesting + * kfpu_begin calls. + */ + clear_fpu_regs(); + kfpu_end(); + gcm_format_initial_blocks(iv, iv_len, ctx, block_size, + aes_copy_block, aes_xor_block); + kfpu_begin(); + } + + /* Openssl post increments the counter, adjust for that. */ + gcm_incr_counter_block(ctx); + + /* Ghash AAD in chunk_size blocks. */ + for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { + GHASH_AVX(ctx, datap, chunk_size); + datap += chunk_size; + clear_fpu_regs(); + kfpu_end(); + kfpu_begin(); + } + /* Ghash the remainder and handle possible incomplete GCM block. */ + if (bleft > 0) { + size_t incomp = bleft % block_size; + + bleft -= incomp; + if (bleft > 0) { + GHASH_AVX(ctx, datap, bleft); + datap += bleft; + } + if (incomp > 0) { + /* Zero pad and hash incomplete last block. */ + uint8_t *authp = (uint8_t *)ctx->gcm_tmp; + + bzero(authp, block_size); + bcopy(datap, authp, incomp); + GHASH_AVX(ctx, authp, block_size); + } + } + clear_fpu_regs(); + kfpu_end(); + return (CRYPTO_SUCCESS); +} + +#if defined(_KERNEL) +static int +icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) +{ + unsigned long val; + char val_rounded[16]; + int error = 0; + + error = kstrtoul(buf, 0, &val); + if (error) + return (error); + + val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; + + if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) + return (-EINVAL); + + snprintf(val_rounded, 16, "%u", (uint32_t)val); + error = param_set_uint(val_rounded, kp); + return (error); +} + +module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, + param_get_uint, &gcm_avx_chunk_size, 0644); + +MODULE_PARM_DESC(icp_gcm_avx_chunk_size, + "How many bytes to process while owning the FPU"); + +#endif /* defined(__KERNEL) */ +#endif /* ifdef CAN_USE_GCM_ASM */ diff --git a/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams new file mode 100644 index 000000000000..0de1883dc81b --- /dev/null +++ b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams @@ -0,0 +1,36 @@ +Copyright (c) 2006-2017, CRYPTOGAMS by +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain copyright notices, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + * Neither the name of the CRYPTOGAMS nor the names of its + copyright holder and contributors may be used to endorse or + promote products derived from this software without specific + prior written permission. + +ALTERNATIVELY, provided that this notice is retained in full, this +product may be distributed under the terms of the GNU General Public +License (GPL), in which case the provisions of the GPL apply INSTEAD OF +those given above. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip new file mode 100644 index 000000000000..6184759c8b74 --- /dev/null +++ b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip @@ -0,0 +1 @@ +PORTIONS OF GCM and GHASH FUNCTIONALITY diff --git a/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl new file mode 100644 index 000000000000..49cc83d2ee29 --- /dev/null +++ b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip new file mode 100644 index 000000000000..6184759c8b74 --- /dev/null +++ b/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip @@ -0,0 +1 @@ +PORTIONS OF GCM and GHASH FUNCTIONALITY diff --git a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S new file mode 100644 index 000000000000..bad0b7d23c46 --- /dev/null +++ b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -0,0 +1,892 @@ +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# +# AES-NI-CTR+GHASH stitch. +# +# February 2013 +# +# OpenSSL GCM implementation is organized in such way that its +# performance is rather close to the sum of its streamed components, +# in the context parallelized AES-NI CTR and modulo-scheduled +# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation +# was observed to perform significantly better than the sum of the +# components on contemporary CPUs, the effort was deemed impossible to +# justify. This module is based on combination of Intel submissions, +# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max +# Locktyukhin of Intel Corp. who verified that it reduces shuffles +# pressure with notable relative improvement, achieving 1.0 cycle per +# byte processed with 128-bit key on Haswell processor, 0.74 - on +# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled +# measurements for favourable packet size, one divisible by 96. +# Applications using the EVP interface will observe a few percent +# worse performance.] +# +# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP). +# +# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest +# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf + +# Generated once from +# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/aesni-gcm-x86_64.pl +# and modified for ICP. Modification are kept at a bare minimum to ease later +# upstream merges. + +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE) + +.text + +.type _aesni_ctr32_ghash_6x,@function +.align 32 +_aesni_ctr32_ghash_6x: + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp .Loop6x + +.align 32 +.Loop6x: + addl $100663296,%ebx + jc .Lhandle_ctr32 + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +.Lresume_ctr32: + vmovdqu %xmm1,(%r8) + vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $0x60,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 88(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 80(%r14),%r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 72(%r14),%r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 64(%r14),%r12 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 56(%r14),%r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 48(%r14),%r12 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 40(%r14),%r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 32(%r14),%r12 + vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movbeq 24(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 16(%r14),%r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movbeq 8(%r14),%r13 + vaesenc %xmm1,%xmm13,%xmm13 + movbeq 0(%r14),%r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $12,%ebp // ICP uses 10,12,14 not 9,11,13 for rounds. + jb .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + cmpl $14,%ebp // ICP does not zero key schedule. + jb .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp .Lenc_tail + +.align 32 +.Lhandle_ctr32: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp .Lresume_ctr32 + +.align 32 +.Lenc_tail: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $0x60,%r10 + subq $0x6,%rdx + jc .L6x_done + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp .Loop6x + +.L6x_done: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + .byte 0xf3,0xc3 +.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +.globl aesni_gcm_decrypt +.type aesni_gcm_decrypt,@function +.align 32 +aesni_gcm_decrypt: +.cfi_startproc + xorq %r10,%r10 + cmpq $0x60,%rdx + jb .Lgcm_dec_abort + + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + vmovdqu (%r9),%xmm8 + andq $-128,%rsp + vmovdqu (%r11),%xmm0 + leaq 128(%rcx),%rcx + leaq 32+32(%r9),%r9 + movl 504-128(%rcx),%ebp // ICP has a larger offset for rounds. + vpshufb %xmm0,%xmm8,%xmm8 + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Ldec_no_key_aliasing + cmpq $768,%r15 + jnc .Ldec_no_key_aliasing + subq %r15,%rsp +.Ldec_no_key_aliasing: + + vmovdqu 80(%rdi),%xmm7 + leaq (%rdi),%r14 + vmovdqu 64(%rdi),%xmm4 + leaq -192(%rdi,%rdx,1),%r15 + vmovdqu 48(%rdi),%xmm5 + shrq $4,%rdx + xorq %r10,%r10 + vmovdqu 32(%rdi),%xmm6 + vpshufb %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rdi),%xmm2 + vpshufb %xmm0,%xmm4,%xmm4 + vmovdqu (%rdi),%xmm3 + vpshufb %xmm0,%xmm5,%xmm5 + vmovdqu %xmm4,48(%rsp) + vpshufb %xmm0,%xmm6,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm2,%xmm2 + vmovdqu %xmm6,80(%rsp) + vpshufb %xmm0,%xmm3,%xmm3 + vmovdqu %xmm2,96(%rsp) + vmovdqu %xmm3,112(%rsp) + + call _aesni_ctr32_ghash_6x + + vmovups %xmm9,-96(%rsi) + vmovups %xmm10,-80(%rsi) + vmovups %xmm11,-64(%rsi) + vmovups %xmm12,-48(%rsi) + vmovups %xmm13,-32(%rsi) + vmovups %xmm14,-16(%rsi) + + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lgcm_dec_abort: + movq %r10,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt +.type _aesni_ctr32_6x,@function +.align 32 +_aesni_ctr32_6x: + vmovdqu 0-128(%rcx),%xmm4 + vmovdqu 32(%r11),%xmm2 + leaq -2(%rbp),%r13 // ICP uses 10,12,14 not 9,11,13 for rounds. + vmovups 16-128(%rcx),%xmm15 + leaq 32-128(%rcx),%r12 + vpxor %xmm4,%xmm1,%xmm9 + addl $100663296,%ebx + jc .Lhandle_ctr32_2 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddb %xmm2,%xmm11,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddb %xmm2,%xmm12,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 + +.align 16 +.Loop_ctr32: + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + vmovups (%r12),%xmm15 + leaq 16(%r12),%r12 + decl %r13d + jnz .Loop_ctr32 + + vmovdqu (%r12),%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 0(%rdi),%xmm3,%xmm4 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor 16(%rdi),%xmm3,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 32(%rdi),%xmm3,%xmm6 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 48(%rdi),%xmm3,%xmm8 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 64(%rdi),%xmm3,%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 80(%rdi),%xmm3,%xmm3 + leaq 96(%rdi),%rdi + + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm5,%xmm10,%xmm10 + vaesenclast %xmm6,%xmm11,%xmm11 + vaesenclast %xmm8,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + vaesenclast %xmm3,%xmm14,%xmm14 + vmovups %xmm9,0(%rsi) + vmovups %xmm10,16(%rsi) + vmovups %xmm11,32(%rsi) + vmovups %xmm12,48(%rsi) + vmovups %xmm13,64(%rsi) + vmovups %xmm14,80(%rsi) + leaq 96(%rsi),%rsi + + .byte 0xf3,0xc3 +.align 32 +.Lhandle_ctr32_2: + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpshufb %xmm0,%xmm14,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpshufb %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 +.size _aesni_ctr32_6x,.-_aesni_ctr32_6x + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +.align 32 +aesni_gcm_encrypt: +.cfi_startproc + xorq %r10,%r10 + cmpq $288,%rdx + jb .Lgcm_enc_abort + + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + leaq 128(%rcx),%rcx + vmovdqu (%r11),%xmm0 + andq $-128,%rsp + movl 504-128(%rcx),%ebp // ICP has an larger offset for rounds. + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Lenc_no_key_aliasing + cmpq $768,%r15 + jnc .Lenc_no_key_aliasing + subq %r15,%rsp +.Lenc_no_key_aliasing: + + leaq (%rsi),%r14 + leaq -192(%rsi,%rdx,1),%r15 + shrq $4,%rdx + + call _aesni_ctr32_6x + vpshufb %xmm0,%xmm9,%xmm8 + vpshufb %xmm0,%xmm10,%xmm2 + vmovdqu %xmm8,112(%rsp) + vpshufb %xmm0,%xmm11,%xmm4 + vmovdqu %xmm2,96(%rsp) + vpshufb %xmm0,%xmm12,%xmm5 + vmovdqu %xmm4,80(%rsp) + vpshufb %xmm0,%xmm13,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm14,%xmm7 + vmovdqu %xmm6,48(%rsp) + + call _aesni_ctr32_6x + + vmovdqu (%r9),%xmm8 + leaq 32+32(%r9),%r9 + subq $12,%rdx + movq $192,%r10 + vpshufb %xmm0,%xmm8,%xmm8 + + call _aesni_ctr32_ghash_6x + vmovdqu 32(%rsp),%xmm7 + vmovdqu (%r11),%xmm0 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm7,%xmm7,%xmm1 + vmovdqu 32-32(%r9),%xmm15 + vmovups %xmm9,-96(%rsi) + vpshufb %xmm0,%xmm9,%xmm9 + vpxor %xmm7,%xmm1,%xmm1 + vmovups %xmm10,-80(%rsi) + vpshufb %xmm0,%xmm10,%xmm10 + vmovups %xmm11,-64(%rsi) + vpshufb %xmm0,%xmm11,%xmm11 + vmovups %xmm12,-48(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vmovups %xmm13,-32(%rsi) + vpshufb %xmm0,%xmm13,%xmm13 + vmovups %xmm14,-16(%rsi) + vpshufb %xmm0,%xmm14,%xmm14 + vmovdqu %xmm9,16(%rsp) + vmovdqu 48(%rsp),%xmm6 + vmovdqu 16-32(%r9),%xmm0 + vpunpckhqdq %xmm6,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 + vpxor %xmm6,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + + vmovdqu 64(%rsp),%xmm9 + vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm9,%xmm9,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm5,%xmm5 + vpxor %xmm7,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vmovdqu 80(%rsp),%xmm1 + vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm4,%xmm7,%xmm7 + vpunpckhqdq %xmm1,%xmm1,%xmm4 + vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm6,%xmm9,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 96(%rsp),%xmm2 + vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm7,%xmm6,%xmm6 + vpunpckhqdq %xmm2,%xmm2,%xmm7 + vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm5,%xmm4,%xmm4 + + vpxor 112(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 + vmovdqu 112-32(%r9),%xmm0 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm1,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 + vpxor %xmm4,%xmm7,%xmm4 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm1 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 + vpxor %xmm14,%xmm1,%xmm1 + vpxor %xmm5,%xmm6,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 + vmovdqu 32-32(%r9),%xmm15 + vpxor %xmm2,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm6 + + vmovdqu 16-32(%r9),%xmm0 + vpxor %xmm5,%xmm7,%xmm9 + vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 + vpxor %xmm9,%xmm6,%xmm6 + vpunpckhqdq %xmm13,%xmm13,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 + vpxor %xmm13,%xmm2,%xmm2 + vpslldq $8,%xmm6,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + vpxor %xmm9,%xmm5,%xmm8 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm6,%xmm7,%xmm7 + + vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm12,%xmm12,%xmm9 + vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 + vpxor %xmm12,%xmm9,%xmm9 + vpxor %xmm14,%xmm13,%xmm13 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm11,%xmm11,%xmm1 + vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm13,%xmm12,%xmm12 + vxorps 16(%rsp),%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm9,%xmm9 + + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm10,%xmm10,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 + vpxor %xmm10,%xmm2,%xmm2 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpxor %xmm12,%xmm11,%xmm11 + vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + + vxorps %xmm7,%xmm14,%xmm14 + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 + vmovdqu 112-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm11,%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 + vpxor %xmm4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 + vpxor %xmm10,%xmm7,%xmm7 + vpxor %xmm2,%xmm6,%xmm6 + + vpxor %xmm5,%xmm7,%xmm4 + vpxor %xmm4,%xmm6,%xmm6 + vpslldq $8,%xmm6,%xmm1 + vmovdqu 16(%r11),%xmm3 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm1,%xmm5,%xmm8 + vpxor %xmm6,%xmm7,%xmm7 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm2,%xmm8,%xmm8 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm7,%xmm2,%xmm2 + vpxor %xmm2,%xmm8,%xmm8 + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lgcm_enc_abort: + movq %r10,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt + +/* Some utility routines */ + +/* + * clear all fpu registers + * void clear_fpu_regs_avx(void); + */ +.globl clear_fpu_regs_avx +.type clear_fpu_regs_avx,@function +.align 32 +clear_fpu_regs_avx: + vzeroall + ret +.size clear_fpu_regs_avx,.-clear_fpu_regs_avx + +/* + * void gcm_xor_avx(const uint8_t *src, uint8_t *dst); + * + * XORs one pair of unaligned 128-bit blocks from `src' and `dst' and + * stores the result at `dst'. The XOR is performed using FPU registers, + * so make sure FPU state is saved when running this in the kernel. + */ +.globl gcm_xor_avx +.type gcm_xor_avx,@function +.align 32 +gcm_xor_avx: + movdqu (%rdi), %xmm0 + movdqu (%rsi), %xmm1 + pxor %xmm1, %xmm0 + movdqu %xmm0, (%rsi) + ret +.size gcm_xor_avx,.-gcm_xor_avx + +/* + * Toggle a boolean_t value atomically and return the new value. + * boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); + */ +.globl atomic_toggle_boolean_nv +.type atomic_toggle_boolean_nv,@function +.align 32 +atomic_toggle_boolean_nv: + xorl %eax, %eax + lock + xorl $1, (%rdi) + jz 1f + movl $1, %eax +1: + ret +.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv + +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lpoly: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lone_msb: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Ltwo_lsb: +.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lone_lsb: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 + +/* Mark the stack non-executable. */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ diff --git a/module/icp/asm-x86_64/modes/ghash-x86_64.S b/module/icp/asm-x86_64/modes/ghash-x86_64.S new file mode 100644 index 000000000000..90cc36b43a78 --- /dev/null +++ b/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -0,0 +1,714 @@ +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# March, June 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that +# it uses 256 bytes per-key table [+128 bytes shared table]. GHASH +# function features so called "528B" variant utilizing additional +# 256+16 bytes of per-key storage [+512 bytes shared table]. +# Performance results are for this streamed GHASH subroutine and are +# expressed in cycles per processed byte, less is better: +# +# gcc 3.4.x(*) assembler +# +# P4 28.6 14.0 +100% +# Opteron 19.3 7.7 +150% +# Core2 17.8 8.1(**) +120% +# Atom 31.6 16.8 +88% +# VIA Nano 21.8 10.1 +115% +# +# (*) comparison is not completely fair, because C results are +# for vanilla "256B" implementation, while assembler results +# are for "528B";-) +# (**) it's mystery [to me] why Core2 result is not same as for +# Opteron; + +# May 2010 +# +# Add PCLMULQDQ version performing at 2.02 cycles per processed byte. +# See ghash-x86.pl for background information and details about coding +# techniques. +# +# Special thanks to David Woodhouse for providing access to a +# Westmere-based system on behalf of Intel Open Source Technology Centre. + +# December 2012 +# +# Overhaul: aggregate Karatsuba post-processing, improve ILP in +# reduction_alg9, increase reduction aggregate factor to 4x. As for +# the latter. ghash-x86.pl discusses that it makes lesser sense to +# increase aggregate factor. Then why increase here? Critical path +# consists of 3 independent pclmulqdq instructions, Karatsuba post- +# processing and reduction. "On top" of this we lay down aggregated +# multiplication operations, triplets of independent pclmulqdq's. As +# issue rate for pclmulqdq is limited, it makes lesser sense to +# aggregate more multiplications than it takes to perform remaining +# non-multiplication operations. 2x is near-optimal coefficient for +# contemporary Intel CPUs (therefore modest improvement coefficient), +# but not for Bulldozer. Latter is because logical SIMD operations +# are twice as slow in comparison to Intel, so that critical path is +# longer. A CPU with higher pclmulqdq issue rate would also benefit +# from higher aggregate factor... +# +# Westmere 1.78(+13%) +# Sandy Bridge 1.80(+8%) +# Ivy Bridge 1.80(+7%) +# Haswell 0.55(+93%) (if system doesn't support AVX) +# Broadwell 0.45(+110%)(if system doesn't support AVX) +# Skylake 0.44(+110%)(if system doesn't support AVX) +# Bulldozer 1.49(+27%) +# Silvermont 2.88(+13%) +# Knights L 2.12(-) (if system doesn't support AVX) +# Goldmont 1.08(+24%) + +# March 2013 +# +# ... 8x aggregate factor AVX code path is using reduction algorithm +# suggested by Shay Gueron[1]. Even though contemporary AVX-capable +# CPUs such as Sandy and Ivy Bridge can execute it, the code performs +# sub-optimally in comparison to above mentioned version. But thanks +# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that +# it performs in 0.41 cycles per byte on Haswell processor, in +# 0.29 on Broadwell, and in 0.36 on Skylake. +# +# Knights Landing achieves 1.09 cpb. +# +# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest + +# Generated once from +# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/ghash-x86_64.pl +# and modified for ICP. Modification are kept at a bare minimum to ease later +# upstream merges. + +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) + +.text + +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.cfi_startproc +.L_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_gmult_clmul,.-gcm_gmult_clmul + +.globl gcm_init_htab_avx +.type gcm_init_htab_avx,@function +.align 32 +gcm_init_htab_avx: +.cfi_startproc + vzeroupper + + vmovdqu (%rsi),%xmm2 + // KCF/ICP stores H in network byte order with the hi qword first + // so we need to swap all bytes, not the 2 qwords. + vmovdqu .Lbswap_mask(%rip),%xmm4 + vpshufb %xmm4,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp .Linit_start_avx +.align 32 +.Linit_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rdi) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +.Linit_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rdi) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rdi) + leaq 48(%rdi),%rdi + subq $1,%r10 + jnz .Linit_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rdi) + + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_init_htab_avx,.-gcm_init_htab_avx + +.globl gcm_gmult_avx +.type gcm_gmult_avx,@function +.align 32 +gcm_gmult_avx: +.cfi_startproc + jmp .L_gmult_clmul +.cfi_endproc +.size gcm_gmult_avx,.-gcm_gmult_avx +.globl gcm_ghash_avx +.type gcm_ghash_avx,@function +.align 32 +gcm_ghash_avx: +.cfi_startproc + vzeroupper + + vmovdqu (%rdi),%xmm10 + leaq .L0x1c2_polynomial(%rip),%r10 + leaq 64(%rsi),%rsi + vmovdqu .Lbswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $0x80,%rcx + jb .Lshort_avx + subq $0x80,%rcx + + vmovdqu 112(%rdx),%xmm14 + vmovdqu 0-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rsi),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%rdx),%xmm14 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rsi),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%rdx),%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + + leaq 128(%rdx),%rdx + cmpq $0x80,%rcx + jb .Ltail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $0x80,%rcx + jmp .Loop8x_avx + +.align 32 +.Loop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%rdx),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%rdx),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%rdx),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%rdx),%xmm14 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%rdx),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%rdx),%rdx + subq $0x80,%rcx + jnc .Loop8x_avx + + addq $0x80,%rcx + jmp .Ltail_no_xor_avx + +.align 32 +.Lshort_avx: + vmovdqu -16(%rdx,%rcx,1),%xmm14 + leaq (%rdx,%rcx,1),%rdx + vmovdqu 0-64(%rsi),%xmm6 + vmovdqu 32-64(%rsi),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rsi),%xmm7 + subq $0x10,%rcx + jmp .Ltail_avx + +.align 32 +.Ltail_avx: + vpxor %xmm10,%xmm15,%xmm15 +.Ltail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%rcx + jne .Lshort_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_ghash_avx,.-gcm_ghash_avx +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.L7_mask: +.long 7,0,7,0 +.L7_mask_poly: +.long 7,0,450,0 +.align 64 +.type .Lrem_4bit,@object +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.type .Lrem_8bit,@object +.Lrem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 + +/* Mark the stack non-executable. */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ diff --git a/module/icp/include/aes/aes_impl.h b/module/icp/include/aes/aes_impl.h index a0b82ade4559..0484462ca403 100644 --- a/module/icp/include/aes/aes_impl.h +++ b/module/icp/include/aes/aes_impl.h @@ -107,6 +107,11 @@ typedef union { } aes_ks_t; typedef struct aes_impl_ops aes_impl_ops_t; + +/* + * The absolute offset of the encr_ks (0) and the nr (504) fields are hard + * coded in aesni-gcm-x86_64, so please don't change (or adjust accordingly). + */ typedef struct aes_key aes_key_t; struct aes_key { aes_ks_t encr_ks; /* encryption key schedule */ diff --git a/module/icp/include/modes/modes.h b/module/icp/include/modes/modes.h index 7c1f10b16e76..9396eab5c56e 100644 --- a/module/icp/include/modes/modes.h +++ b/module/icp/include/modes/modes.h @@ -34,6 +34,16 @@ extern "C" { #include #include +/* + * Does the build chain support all instructions needed for the GCM assembler + * routines. AVX support should imply AES-NI and PCLMULQDQ, but make sure + * anyhow. + */ +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE) +#define CAN_USE_GCM_ASM +#endif + #define ECB_MODE 0x00000002 #define CBC_MODE 0x00000004 #define CTR_MODE 0x00000008 @@ -189,13 +199,17 @@ typedef struct ccm_ctx { * * gcm_H: Subkey. * + * gcm_Htable: Pre-computed and pre-shifted H, H^2, ... H^6 for the + * Karatsuba Algorithm in host byte order. + * * gcm_J0: Pre-counter block generated from the IV. * * gcm_len_a_len_c: 64-bit representations of the bit lengths of * AAD and ciphertext. * - * gcm_kmflag: Current value of kmflag. Used only for allocating - * the plaintext buffer during decryption. + * gcm_kmflag: Current value of kmflag. Used for allocating + * the plaintext buffer during decryption and a + * gcm_avx_chunk_size'd buffer for avx enabled encryption. */ typedef struct gcm_ctx { struct common_ctx gcm_common; @@ -203,12 +217,23 @@ typedef struct gcm_ctx { size_t gcm_processed_data_len; size_t gcm_pt_buf_len; uint32_t gcm_tmp[4]; + /* + * The relative positions of gcm_ghash, gcm_H and pre-computed + * gcm_Htable are hard coded in aesni-gcm-x86_64.S and ghash-x86_64.S, + * so please don't change (or adjust accordingly). + */ uint64_t gcm_ghash[2]; uint64_t gcm_H[2]; +#ifdef CAN_USE_GCM_ASM + uint64_t gcm_Htable[12][2]; +#endif uint64_t gcm_J0[2]; uint64_t gcm_len_a_len_c[2]; uint8_t *gcm_pt_buf; int gcm_kmflag; +#ifdef CAN_USE_GCM_ASM + boolean_t gcm_use_avx; +#endif } gcm_ctx_t; #define gcm_keysched gcm_common.cc_keysched diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh index a46cb55f3662..141b2557d622 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh @@ -53,7 +53,7 @@ set -A ENCRYPTION_ALGS \ "encryption=aes-256-gcm" set -A ENCRYPTION_PROPS \ - "encryption=aes-256-ccm" \ + "encryption=aes-256-gcm" \ "encryption=aes-128-ccm" \ "encryption=aes-192-ccm" \ "encryption=aes-256-ccm" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh index d28d5953c5b1..63391e8adb49 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh @@ -48,7 +48,7 @@ set -A ENCRYPTION_ALGS "encryption=on" \ "encryption=aes-192-gcm" \ "encryption=aes-256-gcm" -set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ +set -A ENCRYPTION_PROPS "encryption=aes-256-gcm" \ "encryption=aes-128-ccm" \ "encryption=aes-192-ccm" \ "encryption=aes-256-ccm" \ diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh index 8e21acd99d28..793904db91ca 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh @@ -124,7 +124,7 @@ ds=$TESTPOOL/recv log_must eval "zfs send $snap > $sendfile" log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \ "-o keylocation=file://$keyfile $ds < $sendfile" -log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" @@ -140,7 +140,7 @@ ds=$TESTPOOL/recv log_must eval "zfs send -p $snap > $sendfile" log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \ "-o keylocation=file://$keyfile $ds < $sendfile" -log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" @@ -158,7 +158,7 @@ ds=$TESTPOOL/recv log_must eval "zfs send -R $snap > $sendfile" log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \ "-o keylocation=file://$keyfile $ds < $sendfile" -log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" @@ -174,7 +174,7 @@ ds=$TESTPOOL/crypt/recv log_must eval "zfs send -p $snap > $sendfile" log_must eval "zfs recv -x encryption $ds < $sendfile" log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" -log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" recv_cksum=$(md5digest /$ds/$TESTFILE0) @@ -188,7 +188,7 @@ ds=$TESTPOOL/crypt/recv log_must eval "zfs send -R $snap > $sendfile" log_must eval "zfs recv -x encryption $ds < $sendfile" log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" -log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" recv_cksum=$(md5digest /$ds/$TESTFILE0) @@ -202,7 +202,7 @@ ds=$TESTPOOL/crypt/recv log_must eval "zfs send -R $snap2 > $sendfile" log_must eval "zfs recv -x encryption $ds < $sendfile" log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" -log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" recv_cksum=$(md5digest /$ds/$TESTFILE0) From bf81cc917e4648443e0c9eb68ca23b769a239a3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20F=C3=BCl=C3=B6p?= Date: Tue, 17 Mar 2020 18:24:38 +0100 Subject: [PATCH 27/32] ICP: gcm-avx: Support architectures lacking the MOVBE instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple of x86_64 architectures which support all needed features to make the accelerated GCM implementation work but the MOVBE instruction. Those are mainly Intel Sandy- and Ivy-Bridge and AMD Bulldozer, Piledriver, and Steamroller. By using MOVBE only if available and replacing it with a MOV followed by a BSWAP if not, those architectures now benefit from the new GCM routines and performance is considerably better compared to the original implementation. Reviewed-by: Brian Behlendorf Reviewed-by: Adam D. Moss Signed-off-by: Attila Fülöp Followup #9749 Closes #10029 --- module/icp/algs/modes/gcm.c | 47 ++- .../icp/asm-x86_64/modes/aesni-gcm-x86_64.S | 357 +++++++++++++++++- module/icp/include/modes/modes.h | 3 +- 3 files changed, 389 insertions(+), 18 deletions(-) diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 6c8dd9862658..0b89b0ef457b 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -50,6 +50,8 @@ static uint32_t icp_gcm_impl = IMPL_FASTEST; static uint32_t user_sel_impl = IMPL_FASTEST; #ifdef CAN_USE_GCM_ASM +/* Does the architecture we run on support the MOVBE instruction? */ +boolean_t gcm_avx_can_use_movbe = B_FALSE; /* * Whether to use the optimized openssl gcm and ghash implementations. * Set to true if module parameter icp_gcm_impl == "avx". @@ -60,6 +62,7 @@ static boolean_t gcm_use_avx = B_FALSE; static inline boolean_t gcm_avx_will_work(void); static inline void gcm_set_avx(boolean_t); static inline boolean_t gcm_toggle_avx(void); +extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t); @@ -618,19 +621,28 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, } #ifdef CAN_USE_GCM_ASM - /* - * Handle the "cycle" implementation by creating avx and non avx - * contexts alternately. - */ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; } else { + /* + * Handle the "cycle" implementation by creating avx and + * non-avx contexts alternately. + */ gcm_ctx->gcm_use_avx = gcm_toggle_avx(); - } - /* We don't handle byte swapped key schedules in the avx code path. */ - aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; - if (ks->ops->needs_byteswap == B_TRUE) { - gcm_ctx->gcm_use_avx = B_FALSE; + /* + * We don't handle byte swapped key schedules in the avx + * code path. + */ + aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; + if (ks->ops->needs_byteswap == B_TRUE) { + gcm_ctx->gcm_use_avx = B_FALSE; + } + /* Use the MOVBE and the BSWAP variants alternately. */ + if (gcm_ctx->gcm_use_avx == B_TRUE && + zfs_movbe_available() == B_TRUE) { + (void) atomic_toggle_boolean_nv( + (volatile boolean_t *)&gcm_avx_can_use_movbe); + } } /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { @@ -852,9 +864,15 @@ gcm_impl_init(void) * Use the avx implementation if it's available and the implementation * hasn't changed from its default value of fastest on module load. */ - if (gcm_avx_will_work() && - GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { - gcm_set_avx(B_TRUE); + if (gcm_avx_will_work()) { +#ifdef HAVE_MOVBE + if (zfs_movbe_available() == B_TRUE) { + atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); + } +#endif + if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { + gcm_set_avx(B_TRUE); + } } #endif /* Finish initialization */ @@ -1029,7 +1047,6 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); static uint32_t gcm_avx_chunk_size = ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; -extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); extern void clear_fpu_regs_avx(void); extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); extern void aes_encrypt_intel(const uint32_t rk[], int nr, @@ -1050,8 +1067,8 @@ gcm_avx_will_work(void) { /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ return (kfpu_allowed() && - zfs_avx_available() && zfs_movbe_available() && - zfs_aes_available() && zfs_pclmulqdq_available()); + zfs_avx_available() && zfs_aes_available() && + zfs_pclmulqdq_available()); } static inline void diff --git a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index bad0b7d23c46..ed9f660fce5b 100644 --- a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -45,10 +45,13 @@ # upstream merges. #if defined(__x86_64__) && defined(HAVE_AVX) && \ - defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE) + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) + +.extern gcm_avx_can_use_movbe .text +#ifdef HAVE_MOVBE .type _aesni_ctr32_ghash_6x,@function .align 32 _aesni_ctr32_ghash_6x: @@ -361,6 +364,333 @@ _aesni_ctr32_ghash_6x: .byte 0xf3,0xc3 .size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +#endif /* ifdef HAVE_MOVBE */ + +.type _aesni_ctr32_ghash_no_movbe_6x,@function +.align 32 +_aesni_ctr32_ghash_no_movbe_6x: + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp .Loop6x_nmb + +.align 32 +.Loop6x_nmb: + addl $100663296,%ebx + jc .Lhandle_ctr32_nmb + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +.Lresume_ctr32_nmb: + vmovdqu %xmm1,(%r8) + vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $0x60,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movq 88(%r14),%r13 + bswapq %r13 + vaesenc %xmm15,%xmm11,%xmm11 + movq 80(%r14),%r12 + bswapq %r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movq 72(%r14),%r13 + bswapq %r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movq 64(%r14),%r12 + bswapq %r12 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movq 56(%r14),%r13 + bswapq %r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movq 48(%r14),%r12 + bswapq %r12 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movq 40(%r14),%r13 + bswapq %r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movq 32(%r14),%r12 + bswapq %r12 + vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movq 24(%r14),%r13 + bswapq %r13 + vaesenc %xmm15,%xmm11,%xmm11 + movq 16(%r14),%r12 + bswapq %r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movq 8(%r14),%r13 + bswapq %r13 + vaesenc %xmm1,%xmm13,%xmm13 + movq 0(%r14),%r12 + bswapq %r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $12,%ebp // ICP uses 10,12,14 not 9,11,13 for rounds. + jb .Lenc_tail_nmb + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + cmpl $14,%ebp // ICP does not zero key schedule. + jb .Lenc_tail_nmb + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp .Lenc_tail_nmb + +.align 32 +.Lhandle_ctr32_nmb: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp .Lresume_ctr32_nmb + +.align 32 +.Lenc_tail_nmb: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $0x60,%r10 + subq $0x6,%rdx + jc .L6x_done_nmb + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp .Loop6x_nmb + +.L6x_done_nmb: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + .byte 0xf3,0xc3 +.size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x + .globl aesni_gcm_decrypt .type aesni_gcm_decrypt,@function .align 32 @@ -431,8 +761,19 @@ aesni_gcm_decrypt: vmovdqu %xmm2,96(%rsp) vmovdqu %xmm3,112(%rsp) +#ifdef HAVE_MOVBE +#ifdef _KERNEL + testl $1,gcm_avx_can_use_movbe(%rip) +#else + testl $1,gcm_avx_can_use_movbe@GOTPCREL(%rip) +#endif + jz 1f call _aesni_ctr32_ghash_6x - + jmp 2f +1: +#endif + call _aesni_ctr32_ghash_no_movbe_6x +2: vmovups %xmm9,-96(%rsi) vmovups %xmm10,-80(%rsi) vmovups %xmm11,-64(%rsi) @@ -624,7 +965,19 @@ aesni_gcm_encrypt: movq $192,%r10 vpshufb %xmm0,%xmm8,%xmm8 +#ifdef HAVE_MOVBE +#ifdef _KERNEL + testl $1,gcm_avx_can_use_movbe(%rip) +#else + testl $1,gcm_avx_can_use_movbe@GOTPCREL(%rip) +#endif + jz 1f call _aesni_ctr32_ghash_6x + jmp 2f +1: +#endif + call _aesni_ctr32_ghash_no_movbe_6x +2: vmovdqu 32(%rsp),%xmm7 vmovdqu (%r11),%xmm0 vmovdqu 0-32(%r9),%xmm3 diff --git a/module/icp/include/modes/modes.h b/module/icp/include/modes/modes.h index 9396eab5c56e..57a211ccf1bf 100644 --- a/module/icp/include/modes/modes.h +++ b/module/icp/include/modes/modes.h @@ -40,8 +40,9 @@ extern "C" { * anyhow. */ #if defined(__x86_64__) && defined(HAVE_AVX) && \ - defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE) + defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) #define CAN_USE_GCM_ASM +extern boolean_t gcm_avx_can_use_movbe; #endif #define ECB_MODE 0x00000002 From 35793aaeb93021833f9e25cc66194bc8add74d58 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Thu, 20 Feb 2020 11:10:47 -0500 Subject: [PATCH 28/32] Fix icp include directories for in-tree build When zfs is built in-tree using --enable-linux-builtin, the compile commands are executed from the kernel build directory. If the build directory is different from the kernel source directory, passing -Ifs/zfs/icp will not find the headers as they are not present in the build directory. Fix this by adding @abs_top_srcdir@ to pull the headers from the zfs source tree instead. Reviewed-by: Brian Behlendorf Signed-off-by: Arvind Sankar Closes #10021 --- module/icp/Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/icp/Makefile.in b/module/icp/Makefile.in index c3cb2dede3ca..8ce60dec4b4f 100644 --- a/module/icp/Makefile.in +++ b/module/icp/Makefile.in @@ -35,9 +35,9 @@ endif obj-$(CONFIG_ZFS) := $(MODULE).o -asflags-y := -I$(src)/include +asflags-y := -I@abs_top_srcdir@/module/icp/include asflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) -ccflags-y := -I$(src)/include +ccflags-y := -I@abs_top_srcdir@/module/icp/include ccflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) $(MODULE)-objs += illumos-crypto.o From 4a336b06ccbfba26f3c7066a59ac82cc389ad050 Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Fri, 1 Nov 2019 10:37:33 -0700 Subject: [PATCH 29/32] Prefix struct rangelock A struct rangelock already exists on FreeBSD. Add a zfs_ prefix as per our convention to prevent any conflict with existing symbols. This change is a follow up to 2cc479d0. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Matt Macy Closes #9534 --- cmd/ztest/ztest.c | 4 +- include/sys/dmu.h | 2 +- include/sys/zfs_rlock.h | 32 +++++++------- include/sys/zfs_znode.h | 2 +- module/zfs/zfs_rlock.c | 96 ++++++++++++++++++++++------------------- module/zfs/zfs_vnops.c | 6 +-- module/zfs/zfs_znode.c | 8 ++-- module/zfs/zvol.c | 4 +- 8 files changed, 80 insertions(+), 74 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 8fe412672ff2..77cab28f22b4 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -2225,7 +2225,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zgd->zgd_private = zd; if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd, + zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd, object, offset, size, RL_READER); error = dmu_read(os, object, offset, size, buf, @@ -2240,7 +2240,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, offset = 0; } - zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd, + zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd, object, offset, size, RL_READER); error = dmu_buf_hold(os, object, offset, zgd, &db, diff --git a/include/sys/dmu.h b/include/sys/dmu.h index dd8d12376cc5..56f20f3b1fd1 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -1042,7 +1042,7 @@ typedef struct zgd { struct lwb *zgd_lwb; struct blkptr *zgd_bp; dmu_buf_t *zgd_db; - struct locked_range *zgd_lr; + struct zfs_locked_range *zgd_lr; void *zgd_private; } zgd_t; diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index 5f1e2a364e48..0ac1561f907f 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -39,40 +39,40 @@ typedef enum { RL_READER, RL_WRITER, RL_APPEND -} rangelock_type_t; +} zfs_rangelock_type_t; -struct locked_range; +struct zfs_locked_range; -typedef void (rangelock_cb_t)(struct locked_range *, void *); +typedef void (zfs_rangelock_cb_t)(struct zfs_locked_range *, void *); -typedef struct rangelock { +typedef struct zfs_rangelock { avl_tree_t rl_tree; /* contains locked_range_t */ kmutex_t rl_lock; - rangelock_cb_t *rl_cb; + zfs_rangelock_cb_t *rl_cb; void *rl_arg; -} rangelock_t; +} zfs_rangelock_t; -typedef struct locked_range { - rangelock_t *lr_rangelock; /* rangelock that this lock applies to */ +typedef struct zfs_locked_range { + zfs_rangelock_t *lr_rangelock; /* rangelock that this lock applies to */ avl_node_t lr_node; /* avl node link */ uint64_t lr_offset; /* file range offset */ uint64_t lr_length; /* file range length */ uint_t lr_count; /* range reference count in tree */ - rangelock_type_t lr_type; /* range type */ + zfs_rangelock_type_t lr_type; /* range type */ kcondvar_t lr_write_cv; /* cv for waiting writers */ kcondvar_t lr_read_cv; /* cv for waiting readers */ uint8_t lr_proxy; /* acting for original range */ uint8_t lr_write_wanted; /* writer wants to lock this range */ uint8_t lr_read_wanted; /* reader wants to lock this range */ -} locked_range_t; +} zfs_locked_range_t; -void zfs_rangelock_init(rangelock_t *, rangelock_cb_t *, void *); -void zfs_rangelock_fini(rangelock_t *); +void zfs_rangelock_init(zfs_rangelock_t *, zfs_rangelock_cb_t *, void *); +void zfs_rangelock_fini(zfs_rangelock_t *); -locked_range_t *zfs_rangelock_enter(rangelock_t *, - uint64_t, uint64_t, rangelock_type_t); -void zfs_rangelock_exit(locked_range_t *); -void zfs_rangelock_reduce(locked_range_t *, uint64_t, uint64_t); +zfs_locked_range_t *zfs_rangelock_enter(zfs_rangelock_t *, + uint64_t, uint64_t, zfs_rangelock_type_t); +void zfs_rangelock_exit(zfs_locked_range_t *); +void zfs_rangelock_reduce(zfs_locked_range_t *, uint64_t, uint64_t); #ifdef __cplusplus } diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index ced5a73867ae..146cf4d9f09a 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -191,7 +191,7 @@ typedef struct znode { krwlock_t z_parent_lock; /* parent lock for directories */ krwlock_t z_name_lock; /* "master" lock for dirent locks */ zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ - rangelock_t z_rangelock; /* file range locks */ + zfs_rangelock_t z_rangelock; /* file range locks */ boolean_t z_unlinked; /* file has been unlinked */ boolean_t z_atime_dirty; /* atime needs to be synced */ boolean_t z_zn_prefetch; /* Prefetch znodes? */ diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index 94203a40c582..454a02a770ca 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -106,8 +106,8 @@ static int zfs_rangelock_compare(const void *arg1, const void *arg2) { - const locked_range_t *rl1 = (const locked_range_t *)arg1; - const locked_range_t *rl2 = (const locked_range_t *)arg2; + const zfs_locked_range_t *rl1 = (const zfs_locked_range_t *)arg1; + const zfs_locked_range_t *rl2 = (const zfs_locked_range_t *)arg2; return (AVL_CMP(rl1->lr_offset, rl2->lr_offset)); } @@ -118,17 +118,17 @@ zfs_rangelock_compare(const void *arg1, const void *arg2) * and may increase the range that's locked for RL_WRITER. */ void -zfs_rangelock_init(rangelock_t *rl, rangelock_cb_t *cb, void *arg) +zfs_rangelock_init(zfs_rangelock_t *rl, zfs_rangelock_cb_t *cb, void *arg) { mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&rl->rl_tree, zfs_rangelock_compare, - sizeof (locked_range_t), offsetof(locked_range_t, lr_node)); + sizeof (zfs_locked_range_t), offsetof(zfs_locked_range_t, lr_node)); rl->rl_cb = cb; rl->rl_arg = arg; } void -zfs_rangelock_fini(rangelock_t *rl) +zfs_rangelock_fini(zfs_rangelock_t *rl) { mutex_destroy(&rl->rl_lock); avl_destroy(&rl->rl_tree); @@ -138,14 +138,14 @@ zfs_rangelock_fini(rangelock_t *rl) * Check if a write lock can be grabbed, or wait and recheck until available. */ static void -zfs_rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) +zfs_rangelock_enter_writer(zfs_rangelock_t *rl, zfs_locked_range_t *new) { avl_tree_t *tree = &rl->rl_tree; - locked_range_t *lr; + zfs_locked_range_t *lr; avl_index_t where; uint64_t orig_off = new->lr_offset; uint64_t orig_len = new->lr_length; - rangelock_type_t orig_type = new->lr_type; + zfs_rangelock_type_t orig_type = new->lr_type; for (;;) { /* @@ -178,12 +178,12 @@ zfs_rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) if (lr != NULL) goto wait; /* already locked at same offset */ - lr = (locked_range_t *)avl_nearest(tree, where, AVL_AFTER); + lr = avl_nearest(tree, where, AVL_AFTER); if (lr != NULL && lr->lr_offset < new->lr_offset + new->lr_length) goto wait; - lr = (locked_range_t *)avl_nearest(tree, where, AVL_BEFORE); + lr = avl_nearest(tree, where, AVL_BEFORE); if (lr != NULL && lr->lr_offset + lr->lr_length > new->lr_offset) goto wait; @@ -208,10 +208,10 @@ zfs_rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) * If this is an original (non-proxy) lock then replace it by * a proxy and return the proxy. */ -static locked_range_t * -zfs_rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) +static zfs_locked_range_t * +zfs_rangelock_proxify(avl_tree_t *tree, zfs_locked_range_t *lr) { - locked_range_t *proxy; + zfs_locked_range_t *proxy; if (lr->lr_proxy) return (lr); /* already a proxy */ @@ -223,7 +223,7 @@ zfs_rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) lr->lr_count = 0; /* create a proxy range lock */ - proxy = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); + proxy = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); proxy->lr_offset = lr->lr_offset; proxy->lr_length = lr->lr_length; proxy->lr_count = 1; @@ -240,9 +240,11 @@ zfs_rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) * Split the range lock at the supplied offset * returning the *front* proxy. */ -static locked_range_t * -zfs_rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) +static zfs_locked_range_t * +zfs_rangelock_split(avl_tree_t *tree, zfs_locked_range_t *lr, uint64_t off) { + zfs_locked_range_t *rear; + ASSERT3U(lr->lr_length, >, 1); ASSERT3U(off, >, lr->lr_offset); ASSERT3U(off, <, lr->lr_offset + lr->lr_length); @@ -250,7 +252,7 @@ zfs_rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) ASSERT(lr->lr_read_wanted == B_FALSE); /* create the rear proxy range lock */ - locked_range_t *rear = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); + rear = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); rear->lr_offset = off; rear->lr_length = lr->lr_offset + lr->lr_length - off; rear->lr_count = lr->lr_count; @@ -259,7 +261,7 @@ zfs_rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) rear->lr_write_wanted = B_FALSE; rear->lr_read_wanted = B_FALSE; - locked_range_t *front = zfs_rangelock_proxify(tree, lr); + zfs_locked_range_t *front = zfs_rangelock_proxify(tree, lr); front->lr_length = off - lr->lr_offset; avl_insert_here(tree, rear, front, AVL_AFTER); @@ -272,8 +274,10 @@ zfs_rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) static void zfs_rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) { + zfs_locked_range_t *lr; + ASSERT(len != 0); - locked_range_t *lr = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); + lr = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); lr->lr_offset = off; lr->lr_length = len; lr->lr_count = 1; @@ -285,10 +289,10 @@ zfs_rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) } static void -zfs_rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, - locked_range_t *prev, avl_index_t where) +zfs_rangelock_add_reader(avl_tree_t *tree, zfs_locked_range_t *new, + zfs_locked_range_t *prev, avl_index_t where) { - locked_range_t *next; + zfs_locked_range_t *next; uint64_t off = new->lr_offset; uint64_t len = new->lr_length; @@ -375,10 +379,10 @@ zfs_rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, * Check if a reader lock can be grabbed, or wait and recheck until available. */ static void -zfs_rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) +zfs_rangelock_enter_reader(zfs_rangelock_t *rl, zfs_locked_range_t *new) { avl_tree_t *tree = &rl->rl_tree; - locked_range_t *prev, *next; + zfs_locked_range_t *prev, *next; avl_index_t where; uint64_t off = new->lr_offset; uint64_t len = new->lr_length; @@ -389,7 +393,7 @@ zfs_rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) retry: prev = avl_find(tree, new, &where); if (prev == NULL) - prev = (locked_range_t *)avl_nearest(tree, where, AVL_BEFORE); + prev = avl_nearest(tree, where, AVL_BEFORE); /* * Check the previous range for a writer lock overlap. @@ -415,7 +419,7 @@ zfs_rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) if (prev != NULL) next = AVL_NEXT(tree, prev); else - next = (locked_range_t *)avl_nearest(tree, where, AVL_AFTER); + next = avl_nearest(tree, where, AVL_AFTER); for (; next != NULL; next = AVL_NEXT(tree, next)) { if (off + len <= next->lr_offset) goto got_lock; @@ -447,13 +451,15 @@ zfs_rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) * the range lock structure for later unlocking (or reduce range if the * entire file is locked as RL_WRITER). */ -locked_range_t * -zfs_rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, - rangelock_type_t type) +zfs_locked_range_t * +zfs_rangelock_enter(zfs_rangelock_t *rl, uint64_t off, uint64_t len, + zfs_rangelock_type_t type) { + zfs_locked_range_t *new; + ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); - locked_range_t *new = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); + new = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); new->lr_rangelock = rl; new->lr_offset = off; if (len + off < off) /* overflow */ @@ -483,10 +489,10 @@ zfs_rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, } /* - * Safely free the locked_range_t. + * Safely free the zfs_locked_range_t. */ static void -zfs_rangelock_free(locked_range_t *lr) +zfs_rangelock_free(zfs_locked_range_t *lr) { if (lr->lr_write_wanted) cv_destroy(&lr->lr_write_cv); @@ -494,14 +500,14 @@ zfs_rangelock_free(locked_range_t *lr) if (lr->lr_read_wanted) cv_destroy(&lr->lr_read_cv); - kmem_free(lr, sizeof (locked_range_t)); + kmem_free(lr, sizeof (zfs_locked_range_t)); } /* * Unlock a reader lock */ static void -zfs_rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, +zfs_rangelock_exit_reader(zfs_rangelock_t *rl, zfs_locked_range_t *remove, list_t *free_list) { avl_tree_t *tree = &rl->rl_tree; @@ -530,11 +536,11 @@ zfs_rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, * then decrement ref count on all proxies * that make up this range, freeing them as needed. */ - locked_range_t *lr = avl_find(tree, remove, NULL); + zfs_locked_range_t *lr = avl_find(tree, remove, NULL); ASSERT3P(lr, !=, NULL); ASSERT3U(lr->lr_count, !=, 0); ASSERT3U(lr->lr_type, ==, RL_READER); - locked_range_t *next = NULL; + zfs_locked_range_t *next = NULL; for (len = remove->lr_length; len != 0; lr = next) { len -= lr->lr_length; if (len != 0) { @@ -555,7 +561,7 @@ zfs_rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, list_insert_tail(free_list, lr); } } - kmem_free(remove, sizeof (locked_range_t)); + kmem_free(remove, sizeof (zfs_locked_range_t)); } } @@ -563,11 +569,11 @@ zfs_rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, * Unlock range and destroy range lock structure. */ void -zfs_rangelock_exit(locked_range_t *lr) +zfs_rangelock_exit(zfs_locked_range_t *lr) { - rangelock_t *rl = lr->lr_rangelock; + zfs_rangelock_t *rl = lr->lr_rangelock; list_t free_list; - locked_range_t *free_lr; + zfs_locked_range_t *free_lr; ASSERT(lr->lr_type == RL_WRITER || lr->lr_type == RL_READER); ASSERT(lr->lr_count == 1 || lr->lr_count == 0); @@ -577,8 +583,8 @@ zfs_rangelock_exit(locked_range_t *lr) * The free list is used to defer the cv_destroy() and * subsequent kmem_free until after the mutex is dropped. */ - list_create(&free_list, sizeof (locked_range_t), - offsetof(locked_range_t, lr_node)); + list_create(&free_list, sizeof (zfs_locked_range_t), + offsetof(zfs_locked_range_t, lr_node)); mutex_enter(&rl->rl_lock); if (lr->lr_type == RL_WRITER) { @@ -592,7 +598,7 @@ zfs_rangelock_exit(locked_range_t *lr) } else { /* * lock may be shared, let rangelock_exit_reader() - * release the lock and free the locked_range_t. + * release the lock and free the zfs_locked_range_t. */ zfs_rangelock_exit_reader(rl, lr, &free_list); } @@ -610,9 +616,9 @@ zfs_rangelock_exit(locked_range_t *lr) * entry in the tree. */ void -zfs_rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len) +zfs_rangelock_reduce(zfs_locked_range_t *lr, uint64_t off, uint64_t len) { - rangelock_t *rl = lr->lr_rangelock; + zfs_rangelock_t *rl = lr->lr_rangelock; /* Ensure there are no other locks */ ASSERT3U(avl_numnodes(&rl->rl_tree), ==, 1); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index d2cc8dd1d614..af45d10a2c8d 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -485,7 +485,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) /* * Lock the range against changes. */ - locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, + zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, uio->uio_loffset, uio->uio_resid, RL_READER); /* @@ -666,7 +666,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) /* * If in append mode, set the io offset pointer to eof. */ - locked_range_t *lr; + zfs_locked_range_t *lr; if (ioflag & FAPPEND) { /* * Obtain an appending range lock to guarantee file append @@ -4526,7 +4526,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) redirty_page_for_writepage(wbc, pp); unlock_page(pp); - locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, + zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, pgoff, pglen, RL_WRITER); lock_page(pp); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 59b7cd3c81d3..7770e0f687a2 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -90,7 +90,7 @@ int zfs_unlink_suspend_progress = 0; * called with the rangelock_t's rl_lock held, which avoids races. */ static void -zfs_rangelock_cb(locked_range_t *new, void *arg) +zfs_rangelock_cb(zfs_locked_range_t *new, void *arg) { znode_t *zp = arg; @@ -1477,7 +1477,7 @@ zfs_extend(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = ZTOZSB(zp); dmu_tx_t *tx; - locked_range_t *lr; + zfs_locked_range_t *lr; uint64_t newblksz; int error; @@ -1595,7 +1595,7 @@ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfsvfs_t *zfsvfs = ZTOZSB(zp); - locked_range_t *lr; + zfs_locked_range_t *lr; int error; /* @@ -1675,7 +1675,7 @@ zfs_trunc(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = ZTOZSB(zp); dmu_tx_t *tx; - locked_range_t *lr; + zfs_locked_range_t *lr; int error; sa_bulk_attr_t bulk[2]; int count = 0; diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 93719dcca5fd..2d16c44828a1 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -122,7 +122,7 @@ struct zvol_state { uint32_t zv_open_count; /* open counts */ uint32_t zv_changed; /* disk changed */ zilog_t *zv_zilog; /* ZIL handle */ - rangelock_t zv_rangelock; /* for range locking */ + zfs_rangelock_t zv_rangelock; /* for range locking */ dnode_t *zv_dn; /* dnode hold */ dev_t zv_dev; /* device id */ struct gendisk *zv_disk; /* generic disk */ @@ -720,7 +720,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, typedef struct zv_request { zvol_state_t *zv; struct bio *bio; - locked_range_t *lr; + zfs_locked_range_t *lr; } zv_request_t; static void From 1e0428709a59c0106bb30793ff8449966c679d94 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 9 Apr 2020 09:16:46 -0700 Subject: [PATCH 30/32] Linux 5.7 compat: blk_alloc_queue() Commit https://github.com/torvalds/linux/commit/3d745ea5 simplified the blk_alloc_queue() interface by updating it to take the request queue as an argument. Add a wrapper function which accepts the new arguments and internally uses the available interfaces. Other minor changes include increasing the Linux-Maximum to 5.6 now that 5.6 has been released. It was not bumped to 5.7 because this release has not yet been finalized and is still subject to change. Added local 'struct zvol_state_os *zso' variable to zvol_alloc. Reviewed-by: George Melikov Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #10181 Closes #10187 --- META | 2 +- config/kernel-make-request-fn.m4 | 49 ++++++++++++++++++++++++++------ include/linux/blkdev_compat.h | 14 +++++++++ module/zfs/zvol.c | 3 +- 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/META b/META index 4871ede9cf77..35753912883e 100644 --- a/META +++ b/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS on Linux -Linux-Maximum: 5.4 +Linux-Maximum: 5.6 Linux-Minimum: 2.6.32 diff --git a/config/kernel-make-request-fn.m4 b/config/kernel-make-request-fn.m4 index 86339aa0450d..d26e12f991b9 100644 --- a/config/kernel-make-request-fn.m4 +++ b/config/kernel-make-request-fn.m4 @@ -25,20 +25,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ],[ blk_queue_make_request(NULL, &make_request); ]) + + ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn], [ + #include + blk_qc_t make_request(struct request_queue *q, + struct bio *bio) { return (BLK_QC_T_NONE); } + ],[ + struct request_queue *q __attribute__ ((unused)); + q = blk_alloc_queue(make_request, NUMA_NO_NODE); + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ dnl # - dnl # Legacy API - dnl # make_request_fn returns int. + dnl # Linux 5.7 API Change + dnl # blk_alloc_queue() expects request function. dnl # - AC_MSG_CHECKING([whether make_request_fn() returns int]) - ZFS_LINUX_TEST_RESULT([make_request_fn_int], [ + AC_MSG_CHECKING([whether blk_alloc_queue() expects request function]) + ZFS_LINUX_TEST_RESULT([blk_alloc_queue_request_fn], [ + AC_MSG_RESULT(yes) + + dnl # Checked as part of the blk_alloc_queue_request_fn test + AC_MSG_CHECKING([whether make_request_fn() returns blk_qc_t]) AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, int, + + AC_DEFINE(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN, 1, + [blk_alloc_queue() expects request function]) + AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, [make_request_fn() return type]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, 1, - [Noting that make_request_fn() returns int]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, + [Noting that make_request_fn() returns blk_qc_t]) ],[ AC_MSG_RESULT(no) @@ -70,7 +86,24 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ [Noting that make_request_fn() ] [returns blk_qc_t]) ],[ - ZFS_LINUX_TEST_ERROR([make_request_fn]) + AC_MSG_RESULT(no) + + dnl # + dnl # Legacy API + dnl # make_request_fn returns int. + dnl # + AC_MSG_CHECKING( + [whether make_request_fn() returns int]) + ZFS_LINUX_TEST_RESULT([make_request_fn_int], [ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, int, + [make_request_fn() return type]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, + 1, [Noting that make_request_fn() ] + [returns int]) + ],[ + ZFS_LINUX_TEST_ERROR([make_request_fn]) + ]) ]) ]) ]) diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index ace461dc98f1..f3e97b77b71c 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -669,4 +669,18 @@ blk_generic_end_io_acct(struct request_queue *q, int rw, #endif } +static inline struct request_queue * +blk_generic_alloc_queue(make_request_fn make_request, int node_id) +{ +#if defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN) + return (blk_alloc_queue(make_request, node_id)); +#else + struct request_queue *q = blk_alloc_queue(GFP_KERNEL); + if (q != NULL) + blk_queue_make_request(q, make_request); + + return (q); +#endif +} + #endif /* _ZFS_BLKDEV_H */ diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 2d16c44828a1..7a55403e270f 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1703,11 +1703,10 @@ zvol_alloc(dev_t dev, const char *name) mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); - zv->zv_queue = blk_alloc_queue(GFP_ATOMIC); + zv->zv_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE); if (zv->zv_queue == NULL) goto out_kmem; - blk_queue_make_request(zv->zv_queue, zvol_request); blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE); /* Limit read-ahead to a single page to prevent over-prefetching. */ From 6c00cc6b5cd01b58336dcaca4becb8c291222ee1 Mon Sep 17 00:00:00 2001 From: George Amanakis Date: Tue, 28 Apr 2020 12:53:45 -0400 Subject: [PATCH 31/32] Add missing zfs_refcount_destroy() in key_mapping_rele() Otherwise when running with reference_tracking_enable=TRUE mounting and unmounting an encrypted dataset panics with: Call Trace: dump_stack+0x66/0x90 slab_err+0xcd/0xf2 ? __kmalloc+0x174/0x260 ? __kmem_cache_shutdown+0x158/0x240 __kmem_cache_shutdown.cold+0x1d/0x115 shutdown_cache+0x11/0x140 kmem_cache_destroy+0x210/0x230 spl_kmem_cache_destroy+0x122/0x3e0 [spl] zfs_refcount_fini+0x11/0x20 [zfs] spa_fini+0x4b/0x120 [zfs] zfs_kmod_fini+0x6b/0xa0 [zfs] _fini+0xa/0x68c [zfs] __x64_sys_delete_module+0x19c/0x2b0 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reviewed-By: Brian Behlendorf Reviewed-By: Tom Caputi Signed-off-by: George Amanakis Closes #10246 --- module/zfs/dsl_crypt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 162a3613c282..581876d7778e 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1008,6 +1008,7 @@ key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag) rw_exit(&spa->spa_keystore.sk_km_lock); spa_keystore_dsl_key_rele(spa, km->km_key, km); + zfs_refcount_destroy(&km->km_refcnt); kmem_free(km, sizeof (dsl_key_mapping_t)); } From f5fd31fa849d0e0d7858e7fb14a84846c1957b13 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Wed, 29 Apr 2020 16:10:33 -0700 Subject: [PATCH 32/32] Tag zfs-0.8.4 META file and changelog updated. Signed-off-by: Tony Hutter TEST_ZIMPORT_SKIP="yes" --- META | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/META b/META index 35753912883e..bc5bbd4100c6 100644 --- a/META +++ b/META @@ -1,7 +1,7 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 0.8.3 +Version: 0.8.4 Release: 1 Release-Tags: relext License: CDDL