From bd6357ea580c21692128fbd1caec62eeabf1a957 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 09:50:54 -0700 Subject: [PATCH 01/36] github actions: Incorporate feedback on workflows Add workflows for pushes and pull requests. Signed-off-by: Greg Rose --- .github/workflows/diffdiff.py | 129 +++++++++++++++++++ .github/workflows/github-actions-demo.yml | 26 ++++ .github/workflows/process-git-request.rb | 140 +++++++++++++++++++++ .github/workflows/process-pull-request.yml | 55 ++++++++ .github/workflows/push-check_aarch64.yml | 33 +++++ .github/workflows/push-check_x86_64.yml | 33 +++++ 6 files changed, 416 insertions(+) create mode 100755 .github/workflows/diffdiff.py create mode 100644 .github/workflows/github-actions-demo.yml create mode 100644 .github/workflows/process-git-request.rb create mode 100644 .github/workflows/process-pull-request.yml create mode 100644 .github/workflows/push-check_aarch64.yml create mode 100644 .github/workflows/push-check_x86_64.yml diff --git a/.github/workflows/diffdiff.py b/.github/workflows/diffdiff.py new file mode 100755 index 000000000000..dc2c5ab0d1e9 --- /dev/null +++ b/.github/workflows/diffdiff.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# coding: utf-8 +# + +import argparse +import copy +import difflib +import io +import git +import os +import re +import subprocess +import sys +import tempfile + +verbose = False + + +def get_upstream_commit(upstream, c): + for l in c.message.splitlines(): + try: + sha = re.match('\s*commit\s+(?P\S+)', l).groups()[0].upper() + return upstream.commit(sha) + except: + True + +def get_diff(d): + dif = '' + df = False + for l in d.splitlines(): + if l[:10] == 'diff --git': + df = True + if not df: + continue + dif = dif + l + '\n' + return dif + + +def trim_unchanged_files(lines): + dl = [] + ld = 0 # Last line with a 'diff --git' we saw + hd = False # Have we seen a changed line since ld? + i = 0 + for i, l in enumerate(lines): + if l[:4] == '+++ ' or l[:4] == '--- ' : + continue + if l[0] == '+' or l[0] == '-': + hd = True + if l[:11] == ' diff --git': + if ld: # We are at a new diff now, last one started at 'ld' + if not hd: + dl.insert(0, (ld, i+1),) + ld = i + hd = False # Reset hasdiff to False as we start a new section + # and check the tail + if not hd: + dl.insert(0, (ld, i+1),) + # delete the unchanged file sections + for d in dl: + del lines[d[0]:d[1]] + return lines + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-v', action='store_true', help='Verbose') + parser.add_argument('--colour', action='store_true', help='Colorize the diff. Green for additions, red for deletions') + parser.add_argument('--commit', help='Commit in current tree to diffdiff. Default is the most recent commit.') + parser.add_argument('--upstream', help='A directory that contains the current upstream of linus kernel tree where we can find the commits we reference. Default is the current repo') + args = parser.parse_args() + + + if args.v: + verbose = True + + srcgit = git.Repo.init('.') + upstream = git.Repo.init(args.upstream) + c = srcgit.head.commit if not args.commit else srcgit.commit(args.commit) + uc = get_upstream_commit(upstream, c) + + dc = get_diff(srcgit.git.show(c)) + duc = get_diff(upstream.git.show(uc)) + + with open('c.diff', 'w') as f: + f.write(dc) + with open('u.diff', 'w') as f: + f.write(duc) + + res = subprocess.run(['diff', '-u', 'u.diff', 'c.diff'], + check=False, stdout=subprocess.PIPE) + lines = res.stdout.splitlines() + dd = [] + for l in lines: + l = str(l)[2:-1] + if l[:6] == '-index': + continue + if l[:6] == '+index': + continue + if l[:3] == '-@@': + continue + if l[:3] == '+@@': + dd.append(' ' + l[1:]) + continue + dd.append(l) + + # trim diffs for files that did not change + lines = trim_unchanged_files(dd) + + # colorize the diff + diffs = 0 + if args.colour: + dd = [] + for l in lines: + if l[0:4] != '+++ ' and l[0:4] != '--- ': + if l[0] == '+': + l = '\033[42m' + l + '\033[0m' + diffs = diffs + 1 + if l[0] == '-': + l = '\033[41m' + l + '\033[0m' + diffs = diffs + 1 + dd.append(l) + lines = dd + + + if diffs: + for l in lines: + print(l) + + sys.exit(diffs) diff --git a/.github/workflows/github-actions-demo.yml b/.github/workflows/github-actions-demo.yml new file mode 100644 index 000000000000..de3dbc4d34b9 --- /dev/null +++ b/.github/workflows/github-actions-demo.yml @@ -0,0 +1,26 @@ +name: GitHub Actions Sanity Check +run-name: ${{ github.actor }} is running actions - this runs as a sanity check 🚀 +on: + push: + branches: + - '**' + - '!mainline' + +jobs: + Explore-GitHub-Actions: + runs-on: ubuntu-latest + steps: + - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" + - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - name: Check out repository code + uses: actions/checkout@v4 + - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." + - run: echo "🖥️ The workflow is now ready to test your code on the runner." + - name: List files in the repository + run: | + ls ${{ github.workspace }} + df . + df / + pwd + - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/.github/workflows/process-git-request.rb b/.github/workflows/process-git-request.rb new file mode 100644 index 000000000000..a93c46ee3088 --- /dev/null +++ b/.github/workflows/process-git-request.rb @@ -0,0 +1,140 @@ +require 'open3' + +requestors = { "gvrose8192" => "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" } + +def file_prepend(file, str) + new_contents = "" + File.open(file, 'r') do |fd| + contents = fd.read + new_contents = str << contents + end + # Overwrite file but now with prepended string on it + File.open(file, 'w') do |fd| + fd.write(new_contents) + end +end + +def process_git_request(fname, target_branch, source_branch, prj_dir) + retcode = 200 #presume success +# puts "Opening file " + fname + file = File.new(fname, "w") + working_dir = prj_dir +# puts "Working Dir : " + working_dir + Dir.chdir working_dir +# puts "pwd : " + Dir.pwd + git_cmd = "git log --oneline --no-abbrev-commit origin/" + target_branch + ".." + "origin/" + source_branch +# puts git_cmd + out, err, status = Open3.capture3(git_cmd) + if status.exitstatus != 0 + puts "Command error output is " + err + file.write("Command error output is " + err) + file.close + retcode = 201 + return retcode + end + output_lines = out.split(' ') +# we just want the commit sha IDs + output_lines.each { |x| +# puts "This is output_lines " + x + upstream_diff = false + if !x[/\H/] + if x.length < 40 + next + end + git_cmd = "git show " + x + gitlog_out, gitlog_err, gitlog_status = Open3.capture3(git_cmd) + if gitlog_status.exitstatus != 0 + file.write("git show command error output is " + gitlog_err) + retcode = 201 + end + loglines = gitlog_out.lines.map(&:chomp) + lines_counted = 0 + local_diffdiff_sha = "" + upstream_diffdiff_sha = "" + loglines.each { |logline| + lines_counted = lines_counted + 1 + if lines_counted == 1 + local_commit_sha = logline.match("[0-9a-f]\{40\}") + local_diffdiff_sha = local_commit_sha.to_s +# puts "Local : " + local_diffdiff_sha + file.write("Merge Request sha: " + local_diffdiff_sha) + file.write("\n") + end + if lines_counted == 2 #email address + if !logline.downcase.include? "ciq.com" + # Bad Author + s = "error:\nBad " + logline + "\n" + puts s + file.write(s) + retcode = 201 + else + file.write("\t" + logline + "\n") + end + end + if lines_counted > 1 + if logline.downcase.include? "jira" + file.write("\t" + logline + "\n") + end + if logline.downcase.include? "upstream-diff" + upstream_diff = true + end + if logline.downcase.include? "commit" + commit_sha = logline.match("[0-9a-f]\{40\}") + upstream_diffdiff_sha = commit_sha.to_s +# puts "Upstream : " + upstream_diffdiff_sha + if (!upstream_diffdiff_sha.empty?) + file.write("\tUpstream sha: " + upstream_diffdiff_sha) + file.write("\n") + end + end + end + if lines_counted > 8 #Everything we need should be in the first 8 lines + break + end + } + if !local_diffdiff_sha.empty? && !upstream_diffdiff_sha.empty? + diff_cmd = Dir.pwd + "/.github/workflows/diffdiff.py --colour --commit " + local_diffdiff_sha + puts "diffdiff: " + diff_cmd + diff_out, diff_err, diff_status = Open3.capture3(diff_cmd) + if diff_status.exitstatus != 0 && !upstream_diff + puts "diffdiff out: " + diff_out + puts "diffdiff err: " + diff_err + retcode = 201 + file.write("error:\nCommit: " + local_diffdiff_sha + " differs with no upstream tag in commit message\n") + end + end + end + } + file.close + return retcode +end + +first_arg, *argv_in = ARGV +if argv_in.length < 5 + puts "Not enough arguments: fname, target_branch, source_branch, prj_dir, pull_request, requestor" + exit +end +fname = first_arg.to_s +fname = "tmp-" + fname +# puts "filename is " + fname +target_branch = argv_in[0].to_s +# puts "target branch is " + target_branch +source_branch = argv_in[1].to_s +# puts "source branch is " + source_branch +prj_dir = argv_in[2].to_s +# puts "project dir is " + prj_dir +pullreq = argv_in[3].to_s +# puts "pull request is " + pullreq +requestor = argv_in[4].to_s +retcode = process_git_request(fname, target_branch, source_branch, prj_dir) +if retcode != 200 + File.open(fname, 'r') do |fd| + contents = fd.read + puts contents + end + exit(1) +else + puts "Done" +end +exit(0) + diff --git a/.github/workflows/process-pull-request.yml b/.github/workflows/process-pull-request.yml new file mode 100644 index 000000000000..a4f9f43fa425 --- /dev/null +++ b/.github/workflows/process-pull-request.yml @@ -0,0 +1,55 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Pull Request Checker + +on: + pull_request: + branches: + - '**' + - '!mainline' + +permissions: + contents: read + +jobs: + test: + + runs-on: + labels: kernel-build + strategy: + matrix: + ruby-version: ['3.0'] + + steps: + - uses: actions/checkout@v4 + - name: Set up Ruby + # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby, + # change this to (see https://github.com/ruby/setup-ruby#versioning): + uses: ruby/setup-ruby@v1 + # uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0 + with: + ruby-version: ${{ matrix.ruby-version }} + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + - name: Set up Python + uses: actions/setup-python@v5 + - name: Run tests + run: | + /usr/bin/pip3 install gitPython + python -c "import sys; import git; print(sys.version)" + git fetch origin ${{ github.base_ref }} + git fetch origin ${{ github.head_ref }} + git remote add linux https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + git fetch --shallow-since="3 years ago" linux + echo "Will run process-git-request.rb with:" + echo "fname = ${{ github.run_id }}" + echo "target_branch = ${{ github.base_ref }}" + echo "source_branch = ${{ github.head_ref }}" + echo "prj_dir = ${{ github.workspace }}" + echo "pull_request = ${{ github.ref }}" + echo "requestor = ${{ github.actor }}" + cd ${{ github.workspace }} + /usr/bin/ruby .github/workflows/process-git-request.rb ${{ github.run_id }} ${{ github.base_ref }} \ + ${{ github.head_ref }} ${{ github.workspace }} ${{ github.ref }} ${{ github.actor }} diff --git a/.github/workflows/push-check_aarch64.yml b/.github/workflows/push-check_aarch64.yml new file mode 100644 index 000000000000..2dda81c43aa7 --- /dev/null +++ b/.github/workflows/push-check_aarch64.yml @@ -0,0 +1,33 @@ +name: CI +on: + push: + branches: + - '**' + - '!mainline' + +jobs: + kernel-build-job: + runs-on: + labels: kernel-build-arm64 + container: + image: rockylinux:8 + env: + ROCKY_ENV: rocky8 + ports: + - 80 + options: --cpus 8 + steps: + - name: Install tools and Libraries + run: | + dnf groupinstall 'Development Tools' -y + dnf install --enablerepo=devel bc dwarves kernel-devel openssl-devel elfutils-libelf-devel -y + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Build the Kernel + run: | + git config --global --add safe.directory /__w/kernel-src-git/kernel-src-git + cp configs/kernel-4.18.0-aarch64.config .config + make olddefconfig + make -j8 diff --git a/.github/workflows/push-check_x86_64.yml b/.github/workflows/push-check_x86_64.yml new file mode 100644 index 000000000000..2aa1eb2ed4f1 --- /dev/null +++ b/.github/workflows/push-check_x86_64.yml @@ -0,0 +1,33 @@ +name: CI +on: + push: + branches: + - '**' + - '!mainline' + +jobs: + kernel-build-job: + runs-on: + labels: kernel-build + container: + image: rockylinux:8 + env: + ROCKY_ENV: rocky8 + ports: + - 80 + options: --cpus 8 + steps: + - name: Install tools and Libraries + run: | + dnf groupinstall 'Development Tools' -y + dnf install --enablerepo=devel bc dwarves kernel-devel openssl-devel elfutils-libelf-devel -y + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Build the Kernel + run: | + git config --global --add safe.directory /__w/kernel-src-git/kernel-src-git + cp configs/kernel-4.18.0-x86_64.config .config + make olddefconfig + make -j8 From c3df0adbf156b298dcd331feace66e8b9ec4b5de Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 28 Oct 2024 08:56:30 -0700 Subject: [PATCH 02/36] ionic: clean interrupt before enabling queue to avoid credit race jira VULN-5563 pre cve CVE-2024-39502 commit-author Neel Patel commit e8797a058466b60fc5a3291b92430c93ba90eaff upstream-diff some fuzz around placement of the napi_enable() call - the fix for CVE-2024-39502 will place it correctly. Clear the interrupt credits before enabling the queue rather than after to be sure that the enabled queue starts at 0 and that we don't wipe away possible credits after enabling the queue. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Neel Patel Signed-off-by: Shannon Nelson Reviewed-by: Leon Romanovsky Signed-off-by: Jakub Kicinski (cherry picked from commit e8797a058466b60fc5a3291b92430c93ba90eaff) Signed-off-by: Greg Rose --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ab27ddf10e15..49ceb07938d9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -267,6 +267,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) .oper = IONIC_Q_ENABLE, }, }; + int ret; idev = &lif->ionic->idev; dev = lif->ionic->dev; @@ -274,6 +275,16 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n", ctx.cmd.q_control.index, ctx.cmd.q_control.type); + if (qcq->flags & IONIC_QCQ_F_INTR) + ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); + + ret = ionic_adminq_post_wait(lif, &ctx); + if (ret) + return ret; + + if (qcq->napi.poll) + napi_enable(&qcq->napi); + if (qcq->flags & IONIC_QCQ_F_INTR) { irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); @@ -283,7 +294,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) IONIC_INTR_MASK_CLEAR); } - return ionic_adminq_post_wait(lif, &ctx); + return 0; } static int ionic_qcq_disable(struct ionic_lif *lif, struct ionic_qcq *qcq, int fw_err) From f443dda6438c2f61028e23bc45d08b27bee21ad3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 7 Oct 2024 12:07:28 -0700 Subject: [PATCH 03/36] ionic: fix use after netif_napi_del() jira VULN-5563 cve CVE-2024-39502 commit-author Taehee Yoo commit 79f18a41dd056115d685f3b0a419c7cd40055e13 upstream-diff There's other modifications in and around this code in the upstream patch. Also, the napi_enable from the patch gets placed on the wrong line. Remove the extra cruft and move the napi_enable() to where it should be according to the commit. When queues are started, netif_napi_add() and napi_enable() are called. If there are 4 queues and only 3 queues are used for the current configuration, only 3 queues' napi should be registered and enabled. The ionic_qcq_enable() checks whether the .poll pointer is not NULL for enabling only the using queue' napi. Unused queues' napi will not be registered by netif_napi_add(), so the .poll pointer indicates NULL. But it couldn't distinguish whether the napi was unregistered or not because netif_napi_del() doesn't reset the .poll pointer to NULL. So, ionic_qcq_enable() calls napi_enable() for the queue, which was unregistered by netif_napi_del(). Reproducer: ethtool -L rx 1 tx 1 combined 0 ethtool -L rx 0 tx 0 combined 1 ethtool -L rx 0 tx 0 combined 4 Splat looks like: kernel BUG at net/core/dev.c:6666! Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1057 Comm: kworker/3:3 Not tainted 6.10.0-rc2+ #16 Workqueue: events ionic_lif_deferred_work [ionic] RIP: 0010:napi_enable+0x3b/0x40 Code: 48 89 c2 48 83 e2 f6 80 b9 61 09 00 00 00 74 0d 48 83 bf 60 01 00 00 00 74 03 80 ce 01 f0 4f RSP: 0018:ffffb6ed83227d48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff97560cda0828 RCX: 0000000000000029 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff97560cda0a28 RBP: ffffb6ed83227d50 R08: 0000000000000400 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: ffff97560ce3c1a0 R14: 0000000000000000 R15: ffff975613ba0a20 FS: 0000000000000000(0000) GS:ffff975d5f780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f734ee200 CR3: 0000000103e50000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? die+0x33/0x90 ? do_trap+0xd9/0x100 ? napi_enable+0x3b/0x40 ? do_error_trap+0x83/0xb0 ? napi_enable+0x3b/0x40 ? napi_enable+0x3b/0x40 ? exc_invalid_op+0x4e/0x70 ? napi_enable+0x3b/0x40 ? asm_exc_invalid_op+0x16/0x20 ? napi_enable+0x3b/0x40 ionic_qcq_enable+0xb7/0x180 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_start_queues+0xc4/0x290 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_link_status_check+0x11c/0x170 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_lif_deferred_work+0x129/0x280 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] process_one_work+0x145/0x360 worker_thread+0x2bb/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0xcc/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2d/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Taehee Yoo Reviewed-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240612060446.1754392-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski (cherry picked from commit 79f18a41dd056115d685f3b0a419c7cd40055e13) Signed-off-by: Greg Rose Conflicts: drivers/net/ethernet/pensando/ionic/ionic_lif.c --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 49ceb07938d9..0899de5dcec0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -286,9 +286,9 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) napi_enable(&qcq->napi); if (qcq->flags & IONIC_QCQ_F_INTR) { + napi_enable(&qcq->napi); irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); - napi_enable(&qcq->napi); ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_CLEAR); From d37ab595fcd252244bd162254d651c0c5c4f4598 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 17 Oct 2024 16:19:33 -0700 Subject: [PATCH 04/36] netfilter: nftables: add catch-all set element support jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit aaa31047a6d25da0fa101da1ed544e1247949b40 upstream-diff We only take one small piece of code from this patch. The netfilters folks made a big NO NO by making a commit with a new feature as well as adding some additional safety checks. Red Hat took the additional safety checks but without any of the rest of this rather large upstream patch but this commit has the necessary bits for backporting the remaining netfilter bits for this VULN ticket. This patch extends the set infrastructure to add a special catch-all set element. If the lookup fails to find an element (or range) in the set, then the catch-all element is selected. Users can specify a mapping, expression(s) and timeout to be attached to the catch-all element. This patch adds a catchall list to the set, this list might contain more than one single catch-all element (e.g. in case that the catch-all element is removed and a new one is added in the same transaction). However, most of the time, there will be either one element or no elements at all in this list. The catch-all element is identified via NFT_SET_ELEM_CATCHALL flag and such special element has no NFTA_SET_ELEM_KEY attribute. There is a new nft_set_elem_catchall object that stores a reference to the dummy catch-all element (catchall->elem) whose layout is the same of the set element type to reuse the existing set element codebase. The set size does not apply to the catch-all element, users can define a catch-all element even if the set is full. The check for valid set element flags hava been updates to report EOPNOTSUPP in case userspace requests flags that are not supported when using new userspace nftables and old kernel. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fea6a2d16778..3dd7ae16126a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4630,7 +4630,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) && + nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; @@ -5224,6 +5225,13 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx, return -ENOMEM; } +static void nft_setelem_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + set->ops->remove(net, set, elem); +} + static bool nft_setelem_valid_key_end(const struct nft_set *set, struct nlattr **nla, u32 flags) { @@ -5577,7 +5585,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return 0; err_set_full: - set->ops->remove(ctx->net, set, &elem); + nft_setelem_remove(ctx->net, set, &elem); err_element_clash: kfree(trans); err_elem_expr: From ed9224f22af7756b13dac606a8abe5e099127523 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Sun, 27 Oct 2024 10:23:51 -0700 Subject: [PATCH 05/36] netfilter: nf_tables: Remove spurious code jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 Remove spurious code that does not exist in 4.18.0-534 but is still hanging around. Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3dd7ae16126a..82de162db72d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5535,10 +5535,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { - *nft_set_ext_obj(ext) = obj; - obj->use++; - } err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs); if (err < 0) goto err_elem_expr; From 53282264e4d9ee60c1246a0d7bcfc95650a7642f Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 18 Oct 2024 14:38:08 -0700 Subject: [PATCH 06/36] netfilter: nf_tables: honor NLM_F_CREATE and NLM_F_EXCL in event notification jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 6fb721cf781808ee2ca5e737fb0592cc68de3381 upstream-diff The catchall features from previous commits were not actually backported by Red Hat, so this patch has to work around that. Include the NLM_F_CREATE and NLM_F_EXCL flags in netlink event notifications, otherwise userspace cannot distiguish between create and add commands. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 6fb721cf781808ee2ca5e737fb0592cc68de3381) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 4 +-- net/netfilter/nf_tables_api.c | 49 +++++++++++++++++++++++-------- net/netfilter/nft_quota.c | 2 +- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index afd104d4fe75..3d462d287293 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1167,8 +1167,8 @@ struct nft_object *nft_obj_lookup(const struct net *net, u8 genmask); void nft_obj_notify(struct net *net, const struct nft_table *table, - struct nft_object *obj, u32 portid, u32 seq, - int event, int family, int report, gfp_t gfp); + struct nft_object *obj, u32 portid, u32 seq, int event, + u16 flags, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 82de162db72d..cb271d49c97a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -760,6 +760,7 @@ static void nft_notify_enqueue(struct sk_buff *skb, bool report, static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -770,8 +771,11 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table); + event, flags, ctx->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -1493,6 +1497,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -1503,8 +1508,11 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table, + event, flags, ctx->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -2767,6 +2775,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -2777,6 +2786,9 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, 0, ctx->family, ctx->table, ctx->chain, rule, NULL); @@ -3810,8 +3822,9 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, int event, gfp_t gfp_flags) { - struct sk_buff *skb; u32 portid = ctx->portid; + struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -3822,7 +3835,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; - err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + + err = nf_tables_fill_set(skb, ctx, set, event, flags); if (err < 0) { kfree_skb(skb); goto err; @@ -5011,11 +5027,12 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_set_elem *elem, - int event, u16 flags) + int event) { struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -5025,6 +5042,9 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, set, elem); if (err < 0) { @@ -6492,7 +6512,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, - int family, int report, gfp_t gfp) + u16 flags, int family, int report, gfp_t gfp) { struct sk_buff *skb; int err; @@ -6516,8 +6536,9 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, if (skb == NULL) goto err; - err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family, - table, obj, false); + err = nf_tables_fill_obj_info(skb, net, portid, seq, event, + flags & (NLM_F_CREATE | NLM_F_EXCL), + family, table, obj, false); if (err < 0) { kfree_skb(skb); goto err; @@ -6534,7 +6555,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->family, ctx->report, GFP_KERNEL); + ctx->flags, ctx->family, ctx->report, GFP_KERNEL); } /* @@ -7148,6 +7169,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, int event) { struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -7158,8 +7180,11 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, - ctx->seq, event, 0, + ctx->seq, event, flags, ctx->family, flowtable); if (err < 0) { kfree_skb(skb); @@ -7999,7 +8024,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te->set->ops->activate(net, te->set, &te->elem); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_NEWSETELEM, 0); + NFT_MSG_NEWSETELEM); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -8007,7 +8032,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_DELSETELEM, 0); + NFT_MSG_DELSETELEM); te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index e0c6e843cb22..cf5cf49681d7 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -63,7 +63,7 @@ static void nft_quota_obj_eval(struct nft_object *obj, if (overquota && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, - NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); + NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC); } static int nft_quota_do_init(const struct nlattr * const tb[], From 80355bf1173b8fb852f72a631a6baa9b51a2a45c Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 09:40:55 -0700 Subject: [PATCH 07/36] netfilter: nf_tables: integrate pipapo into commit protocol jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 212ed75dc5fb9d1423b3942c8f872a868cda3466 upstream-diff The offsets for the code are too divergent for the upstream patch to apply cleanly, but except for that fuzz I believe it is correct. The pipapo set backend follows copy-on-update approach, maintaining one clone of the existing datastructure that is being updated. The clone and current datastructures are swapped via rcu from the commit step. The existing integration with the commit protocol is flawed because there is no operation to clean up the clone if the transaction is aborted. Moreover, the datastructure swap happens on set element activation. This patch adds two new operations for sets: commit and abort, these new operations are invoked from the commit and abort steps, after the transactions have been digested, and it updates the pipapo set backend to use it. This patch adds a new ->pending_update field to sets to maintain a list of sets that require this new commit and abort operations. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 212ed75dc5fb9d1423b3942c8f872a868cda3466) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 3 ++ net/netfilter/nf_tables_api.c | 56 +++++++++++++++++++++++++++++++ net/netfilter/nft_set_pipapo.c | 53 ++++++++++++++++++++--------- 3 files changed, 97 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d462d287293..a34c506de01a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -388,6 +388,8 @@ struct nft_set_ops { const struct nft_set_elem *elem, unsigned int flags); + void (*commit)(const struct nft_set *set); + void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, @@ -489,6 +491,7 @@ struct nft_set { u16 policy; u16 udlen; unsigned char *udata; + struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; u16 flags:14, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cb271d49c97a..17fb43e24646 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4311,6 +4311,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } set->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) @@ -7878,9 +7879,24 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) } } +static void nft_set_commit_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->commit) + continue; + + set->ops->commit(set); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; @@ -8025,6 +8041,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM); + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -8036,6 +8057,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -8074,6 +8100,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_set_commit_update(&set_update_list); + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, net->nft.base_seq); @@ -8126,9 +8154,24 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } +static void nft_set_abort_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->abort) + continue; + + set->ops->abort(set); + } +} + static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; if (action == NFNL_ABORT_VALIDATE && @@ -8211,6 +8254,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) te = (struct nft_trans_elem *)trans->data; te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); + + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; @@ -8219,6 +8268,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: @@ -8249,6 +8303,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + nft_set_abort_update(&set_update_list); + synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 60cc5e253fd1..9ae3743c0734 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1495,17 +1495,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m) } } -/** - * pipapo_reclaim_match - RCU callback to free fields from old matching data - * @rcu: RCU head - */ -static void pipapo_reclaim_match(struct rcu_head *rcu) +static void pipapo_free_match(struct nft_pipapo_match *m) { - struct nft_pipapo_match *m; int i; - m = container_of(rcu, struct nft_pipapo_match, rcu); - for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); @@ -1517,7 +1510,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) } /** - * pipapo_commit() - Replace lookup data with current working copy + * pipapo_reclaim_match - RCU callback to free fields from old matching data + * @rcu: RCU head + */ +static void pipapo_reclaim_match(struct rcu_head *rcu) +{ + struct nft_pipapo_match *m; + + m = container_of(rcu, struct nft_pipapo_match, rcu); + pipapo_free_match(m); +} + +/** + * nft_pipapo_commit() - Replace lookup data with current working copy * @set: nftables API set representation * * While at it, check if we should perform garbage collection on the working @@ -1527,7 +1532,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1552,6 +1557,26 @@ static void pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static void nft_pipapo_abort(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *new_clone, *m; + + if (!priv->dirty) + return; + + m = rcu_dereference(priv->match); + + new_clone = pipapo_clone(m); + if (IS_ERR(new_clone)) + return; + + priv->dirty = false; + + pipapo_free_match(priv->clone); + priv->clone = new_clone; +} + /** * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace @@ -1559,8 +1584,7 @@ static void pipapo_commit(const struct nft_set *set) * @elem: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently - * in use for lookups, and not directly inserted into current lookup data, so - * we'll take care of that by calling pipapo_commit() here. Both + * in use for lookups, and not directly inserted into current lookup data. Both * nft_pipapo_insert() and nft_pipapo_activate() are called once for each * element, hence we can't purpose either one as a real commit operation. */ @@ -1576,8 +1600,6 @@ static void nft_pipapo_activate(const struct net *net, nft_set_elem_change_active(net, set, &e->ext); nft_set_elem_clear_busy(&e->ext); - - pipapo_commit(set); } /** @@ -1823,7 +1845,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, if (i == m->field_count) { priv->dirty = true; pipapo_drop(m, rulemap); - pipapo_commit(set); return; } @@ -2132,6 +2153,8 @@ struct nft_set_type nft_set_pipapo_type __read_mostly = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; From 03506827183ce71edad4fa219824aad5f01b8a10 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 10:43:19 -0700 Subject: [PATCH 08/36] netfilter: nf_tables: disallow element updates of bound anonymous sets jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit c88c535b592d3baeee74009f3eceeeaf0fdd5e1b Anonymous sets come with NFT_SET_CONSTANT from userspace. Although API allows to create anonymous sets without NFT_SET_CONSTANT, it makes no sense to allow to add and to delete elements for bound anonymous sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit c88c535b592d3baeee74009f3eceeeaf0fdd5e1b) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 17fb43e24646..ba549cf76265 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5648,7 +5648,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { @@ -5853,7 +5854,9 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { From ccdc2f63a6b6bdd5c583d1fca064d721142c031f Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 12:00:03 -0700 Subject: [PATCH 09/36] netfilter: nf_tables: disallow updates of anonymous sets jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit b770283c98e0eee9133c47bc03b6cc625dc94723 Disallow updates of set timeout and garbage collection parameters for anonymous sets. Fixes: 123b99619cca ("netfilter: nf_tables: honor set timeout and garbage collection updates") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit b770283c98e0eee9133c47bc03b6cc625dc94723) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ba549cf76265..6a183e0021c9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4207,6 +4207,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + return 0; } From 11494295a242f19d0242220a87d6e06f1e497ff3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 14:04:44 -0700 Subject: [PATCH 10/36] netfilter: nf_tables: disallow timeout for anonymous sets jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit e26d3009efda338f19016df4175f354a9bd0a4ab Never used from userspace, disallow these parameters. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit e26d3009efda338f19016df4175f354a9bd0a4ab) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6a183e0021c9..f9977814bbdf 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4156,6 +4156,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); if (err) return err; @@ -4164,6 +4167,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -5348,6 +5355,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT], &timeout); if (err) From 7dd04734583497f4b12cc50e513ea7e342e77cfb Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 21 Oct 2024 14:41:41 -0700 Subject: [PATCH 11/36] netfilter: nf_tables: add nft_chain_add() jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 04b7db414490ea9254d0c1d8930ea9571f8ce9f0 upstream-diff same as most in this series, patch looks identical but has offset fuzz. This patch adds a helper function to add the chain to the hashtable and the chain list. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 04b7db414490ea9254d0c1d8930ea9571f8ce9f0) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f9977814bbdf..dbd910c5c335 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1986,6 +1986,20 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, return 0; } +static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +{ + int err; + + err = rhltable_insert_key(&table->chains_ht, chain->name, + &chain->rhlhead, nft_chain_ht_params); + if (err) + return err; + + list_add_tail_rcu(&chain->list, &table->chains); + + return 0; +} + static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, u8 policy, u32 flags) { @@ -2065,16 +2079,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; - err = rhltable_insert_key(&table->chains_ht, chain->name, - &chain->rhlhead, nft_chain_ht_params); - if (err) - goto err2; - trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - rhltable_remove(&table->chains_ht, &chain->rhlhead, - nft_chain_ht_params); goto err2; } @@ -2082,9 +2089,13 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nft_is_base_chain(chain)) nft_trans_chain_policy(trans) = policy; + err = nft_chain_add(table, chain); + if (err < 0) { + nft_trans_destroy(trans); + goto err2; + } table->use++; - list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: From 5cc169dfdc0b557bc2c7970ff4d7138321025ab3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 09:34:30 -0700 Subject: [PATCH 12/36] netfilter: nf_tables: report use refcount overflow jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 1689f25924ada8fe14a4a82c38925d04994c7142 upstream-diff This cherry pick is a complete mess and I tried to follow the 5.18.0-534 code as the guiding light, but the upstream diff is a large. Overflow use refcount checks are not complete. Add helper function to deal with object reference counter tracking. Report -EMFILE in case UINT_MAX is reached. nft_use_dec() splats in case that reference counter underflows, which should not ever happen. Add nft_use_inc_restore() and nft_use_dec_restore() which are used to restore reference counter from error and abort paths. Use u32 in nft_flowtable and nft_object since helper functions cannot work on bitfields. Remove the few early incomplete checks now that the helper functions are in place and used to check for refcount overflow. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 1689f25924ada8fe14a4a82c38925d04994c7142) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 31 ++++- net/netfilter/nf_tables_api.c | 189 ++++++++++++++++++------------ net/netfilter/nft_flow_offload.c | 6 +- net/netfilter/nft_objref.c | 8 +- 4 files changed, 151 insertions(+), 83 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a34c506de01a..2865638e0a79 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1079,6 +1079,29 @@ int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); +static inline bool nft_use_inc(u32 *use) +{ + if (*use == UINT_MAX) + return false; + + (*use)++; + + return true; +} + +static inline void nft_use_dec(u32 *use) +{ + WARN_ON_ONCE((*use)-- == 0); +} + +/* For error and abort path: restore use counter to previous state. */ +static inline void nft_use_inc_restore(u32 *use) +{ + WARN_ON_ONCE(!nft_use_inc(use)); +} + +#define nft_use_dec_restore nft_use_dec + /** * struct nft_table - nf_tables table * @@ -1148,8 +1171,8 @@ struct nft_object { struct list_head list; struct rhlist_head rhlhead; struct nft_object_hash_key key; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; @@ -1249,8 +1272,8 @@ struct nft_flowtable { char *name; int hooknum; int ops_len; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; /* runtime data below here */ struct list_head hook_list ____cacheline_aligned; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dbd910c5c335..a10c3d593df0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -349,7 +349,7 @@ static int nft_delchain(struct nft_ctx *ctx) if (IS_ERR(trans)) return PTR_ERR(trans); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nft_deactivate_next(ctx->net, ctx->chain); return 0; @@ -390,7 +390,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) /* You cannot delete the same rule twice */ if (nft_is_active_next(ctx->net, rule)) { nft_deactivate_next(ctx->net, rule); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); return 0; } return -ENOENT; @@ -490,7 +490,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) return err; nft_deactivate_next(ctx->net, set); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -522,7 +522,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; nft_deactivate_next(ctx->net, obj); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -556,7 +556,7 @@ static int nft_delflowtable(struct nft_ctx *ctx, return err; nft_deactivate_next(ctx->net, flowtable); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -2012,9 +2012,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_rule **rules; int err; - if (table->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_CHAIN_HOOK]) { struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; @@ -2079,6 +2076,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err_use; + } + trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2095,10 +2097,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err2; } - table->use++; - return 0; err2: + nft_use_dec_restore(&table->use); +err_use: nf_tables_unregister_hook(net, table, chain); err1: nf_tables_chain_destroy(ctx); @@ -3158,9 +3160,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EINVAL; handle = nf_tables_alloc_handle(table); - if (chain->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); old_rule = __nft_rule_lookup(chain, pos_handle); @@ -3250,6 +3249,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } } + if (!nft_use_inc(&chain->use)) { + err = -EMFILE; + goto err_release_rule; + } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { @@ -3283,7 +3287,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } } kvfree(info); - chain->use++; if (flow) nft_trans_flow_rule(trans) = flow; @@ -3293,6 +3296,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err_destroy_flow_rule: + nft_use_dec_restore(&chain->use); if (flow) nft_flow_rule_destroy(flow); err_release_rule: @@ -4246,10 +4250,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (ops->privsize != NULL) size = ops->privsize(nla, &desc); + if (!nft_use_inc(&table->use)) + return -EMFILE; + set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); if (!set) { err = -ENOMEM; - goto err1; + goto err_alloc; } name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); @@ -4339,7 +4346,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, goto err_set_expr_alloc; list_add_tail_rcu(&set->list, &table->sets); - table->use++; + return 0; err_set_expr_alloc: @@ -4351,8 +4358,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, kfree(set->name); err_set_name: kvfree(set); -err1: +err_alloc: module_put(to_set_type(ops)->owner); + nft_use_dec_restore(&table->use); + return err; } @@ -4436,9 +4445,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; - if (set->use == UINT_MAX) - return -EOVERFLOW; - if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -4463,10 +4469,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, return iter.err; } bind: + if (!nft_use_inc(&set->use)) + return -EMFILE; + binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); - set->use++; return 0; } @@ -4491,7 +4499,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) if (nft_set_is_anonymous(set)) nft_clear(ctx->net, set); - set->use++; + nft_use_inc_restore(&set->use); } EXPORT_SYMBOL_GPL(nf_tables_activate_set); @@ -4507,18 +4515,17 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, else list_del_rcu(&binding->list); - set->use--; + nft_use_dec(&set->use); break; case NFT_TRANS_PREPARE: if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); - - set->use--; + nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - set->use--; - /* fall through */ + nft_use_dec(&set->use); + fallthrough; default: nf_tables_unbind_set(ctx, set, binding, phase == NFT_TRANS_COMMIT); @@ -5189,7 +5196,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -5496,8 +5503,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); + obj = NULL; + goto err_parse_key_end; + } + + if (!nft_use_inc(&obj->use)) { + err = -EMFILE; + obj = NULL; goto err_parse_key_end; } + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); if (err < 0) goto err_parse_key_end; @@ -5576,6 +5591,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; + + if (obj) + *nft_set_ext_obj(ext) = obj; + if (ulen > 0) { udata = nft_set_ext_userdata(ext); udata->len = ulen - 1; @@ -5631,14 +5650,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err_element_clash: kfree(trans); err_elem_expr: - if (obj) - obj->use--; - nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&data, desc.type); err_parse_key_end: + if (obj) + nft_use_dec_restore(&obj->use); + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); @@ -5702,11 +5721,14 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, */ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { + struct nft_chain *chain; + if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use++; + chain = data->verdict.chain; + nft_use_inc_restore(&chain->use); break; } } @@ -5721,7 +5743,7 @@ static void nft_set_elem_activate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use++; + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } static void nft_set_elem_deactivate(const struct net *net, @@ -5733,7 +5755,7 @@ static void nft_set_elem_deactivate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, @@ -6212,9 +6234,14 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + type = nft_obj_type_get(net, objtype); - if (IS_ERR(type)) - return PTR_ERR(type); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err_type; + } obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { @@ -6240,7 +6267,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err4; list_add_tail_rcu(&obj->list, &table->objects); - table->use++; + return 0; err4: /* queued in transaction log */ @@ -6254,6 +6281,9 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, kfree(obj); err1: module_put(type->owner); +err_type: + nft_use_dec_restore(&table->use); + return err; } @@ -6639,8 +6669,8 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - flowtable->use--; - /* fall through */ + nft_use_dec(&flowtable->use); + fallthrough; default: return; } @@ -6871,9 +6901,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); - if (!flowtable) - return -ENOMEM; + if (!flowtable) { + err = -ENOMEM; + goto flowtable_alloc; + } flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); @@ -6925,7 +6960,6 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err5; list_add_tail_rcu(&flowtable->list, &table->flowtables); - table->use++; return 0; err5: @@ -6942,6 +6976,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, kfree(flowtable->name); err1: kfree(flowtable); +flowtable_alloc: + nft_use_dec_restore(&table->use); + return err; } @@ -7213,7 +7250,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, - ctx->seq, event, flags, + ctx->seq, event, 0, ctx->family, flowtable); if (err < 0) { kfree_skb(skb); @@ -8051,7 +8088,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + nft_use_dec(&trans->ctx.table->use); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); @@ -8231,7 +8268,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, @@ -8239,12 +8276,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } break; case NFT_MSG_DELCHAIN: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: - trans->ctx.chain->use--; + nft_use_dec_restore(&trans->ctx.chain->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), @@ -8253,7 +8290,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: - trans->ctx.chain->use++; + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) @@ -8262,7 +8299,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -8270,7 +8307,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); nft_trans_destroy(trans); break; @@ -8308,23 +8345,23 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, - nft_trans_flowtable(trans)); - break; + nft_trans_flowtable(trans)); case NFT_MSG_DELFLOWTABLE: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); + list_del_rcu(&nft_trans_flowtable(trans)->list); nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); nft_trans_destroy(trans); break; @@ -8716,8 +8753,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; + if (!nft_use_inc(&chain->use)) + return -EMFILE; - chain->use++; data->verdict.chain = chain; break; default: @@ -8731,10 +8769,13 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { + struct nft_chain *chain; + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use--; + chain = data->verdict.chain; + nft_use_dec(&chain->use); break; } } @@ -8888,11 +8929,11 @@ int __nft_release_basechain(struct nft_ctx *ctx) nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); nf_tables_rule_release(ctx, rule); } nft_chain_del(ctx->chain); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nf_tables_chain_destroy(ctx); return 0; @@ -8933,35 +8974,35 @@ static void __nft_release_tables(struct net *net) }; list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - ctx.family = table->family; - ctx.table = table; - list_for_each_entry(chain, &table->chains, list) { - ctx.chain = chain; - list_for_each_entry_safe(rule, nr, &chain->rules, list) { - list_del(&rule->list); - chain->use--; - nf_tables_rule_release(&ctx, rule); - } - } + ctx.family = table->family; + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + nft_use_dec(&chain->use); + nf_tables_rule_release(&ctx, rule); + } + } list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { list_del(&flowtable->list); - table->use--; + nft_use_dec(&table->use); nf_tables_flowtable_destroy(flowtable); } list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); - table->use--; + nft_use_dec(&table->use); nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { nft_obj_del(obj); - table->use--; + nft_use_dec(&table->use); nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { ctx.chain = chain; nft_chain_del(chain); - table->use--; + nft_use_dec(&table->use); nf_tables_chain_destroy(&ctx); } list_del(&table->list); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 5aac5c994c8f..66619fd61ae7 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -169,8 +169,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, if (IS_ERR(flowtable)) return PTR_ERR(flowtable); + if (!nft_use_inc(&flowtable->use)) + return -EMFILE; + priv->flowtable = flowtable; - flowtable->use++; return nf_ct_netns_get(ctx->net, ctx->family); } @@ -189,7 +191,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx, { struct nft_flow_offload *priv = nft_expr_priv(expr); - priv->flowtable->use++; + nft_use_inc_restore(&priv->flowtable->use); } static void nft_flow_offload_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 54e0f94498da..b04f92607f66 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -44,8 +44,10 @@ static int nft_objref_init(const struct nft_ctx *ctx, if (IS_ERR(obj)) return -ENOENT; + if (!nft_use_inc(&obj->use)) + return -EMFILE; + nft_objref_priv(expr) = obj; - obj->use++; return 0; } @@ -74,7 +76,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx, if (phase == NFT_TRANS_COMMIT) return; - obj->use--; + nft_use_dec(&obj->use); } static void nft_objref_activate(const struct nft_ctx *ctx, @@ -82,7 +84,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx, { struct nft_object *obj = nft_objref_priv(expr); - obj->use++; + nft_use_inc_restore(&obj->use); } static struct nft_expr_type nft_objref_type; From a8fe11b4cc11e8750a2908e8d71936bbc1ce7c05 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 09:58:39 -0700 Subject: [PATCH 13/36] netfilter: nf_tables: fix spurious set element insertion failure jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit ddbd8be68941985f166f5107109a90ce13147c44 On some platforms there is a padding hole in the nft_verdict structure, between the verdict code and the chain pointer. On element insertion, if the new element clashes with an existing one and NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as the data associated with duplicated element is the same as the existing one. The data equality check uses memcmp. For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT padding area leads to spurious failure even if the verdict data is the same. This then makes the insertion fail with 'already exists' error, even though the new "key : data" matches an existing entry and userspace told the kernel that it doesn't want to receive an error indication. Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion") Signed-off-by: Florian Westphal (cherry picked from commit ddbd8be68941985f166f5107109a90ce13147c44) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a10c3d593df0..9cfdcdd18c5c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8732,6 +8732,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; + + /* zero padding hole for memcmp */ + memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { From d870474e515dd6aba4ec65ea02bc6d1e1f462df1 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 10:14:44 -0700 Subject: [PATCH 14/36] netfilter: nf_tables: don't skip expired elements during walk jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 24138933b97b055d486e8064b4a1721702442a9b There is an asymmetry between commit/abort and preparation phase if the following conditions are met: 1. set is a verdict map ("1.2.3.4 : jump foo") 2. timeouts are enabled In this case, following sequence is problematic: 1. element E in set S refers to chain C 2. userspace requests removal of set S 3. kernel does a set walk to decrement chain->use count for all elements from preparation phase 4. kernel does another set walk to remove elements from the commit phase (or another walk to do a chain->use increment for all elements from abort phase) If E has already expired in 1), it will be ignored during list walk, so its use count won't have been changed. Then, when set is culled, ->destroy callback will zap the element via nf_tables_set_elem_destroy(), but this function is only safe for elements that have been deactivated earlier from the preparation phase: lack of earlier deactivate removes the element but leaks the chain use count, which results in a WARN splat when the chain gets removed later, plus a leak of the nft_chain structure. Update pipapo_get() not to skip expired elements, otherwise flush command reports bogus ENOENT errors. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 24138933b97b055d486e8064b4a1721702442a9b) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ++++ net/netfilter/nft_set_hash.c | 2 -- net/netfilter/nft_set_pipapo.c | 18 ++++++++++++------ net/netfilter/nft_set_rbtree.c | 2 -- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9cfdcdd18c5c..b7a18ef44839 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4754,8 +4754,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); struct nft_set_dump_args *args; + if (nft_set_elem_expired(ext)) + return 0; + args = container_of(iter, struct nft_set_dump_args, iter); return nf_tables_fill_setelem(args->skb, set, elem); } diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 9457b5ab1164..037a2a90e4be 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -262,8 +262,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&he->ext)) - goto cont; if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 9ae3743c0734..90562786e3af 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -704,8 +704,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; if (last) { - if (nft_set_elem_expired(&f->mt[b].e->ext) || - (genmask && + if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) goto next_match; @@ -739,8 +738,17 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, void *nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { - return pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + struct nft_pipapo_elem *ret; + + ret = pipapo_get(net, set, (const u8 *)elem->key.val.data, + nft_genmask_cur(net)); + if (IS_ERR(ret)) + return ret; + + if (nft_set_elem_expired(&ret->ext)) + return ERR_PTR(-ENOENT); + + return ret; } /** @@ -1890,8 +1898,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, goto cont; e = f->mt[r].e; - if (nft_set_elem_expired(&e->ext)) - goto cont; elem.priv = e; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 250279343935..10883b337620 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -446,8 +446,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&rbe->ext)) - goto cont; if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; From f2bb80587a49b300cc2a63a48586097add11ddda Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 10:19:05 -0700 Subject: [PATCH 15/36] netfilter: nftables: rename set element data activation/deactivation functions jira VULN-429 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit f8bb7889af58d8e74d2d61c76b1418230f1610fa Rename: - nft_set_elem_activate() to nft_set_elem_data_activate(). - nft_set_elem_deactivate() to nft_set_elem_data_deactivate(). To prepare for updates in the set element infrastructure to add support for the special catch-all element. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit f8bb7889af58d8e74d2d61c76b1418230f1610fa) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7a18ef44839..53fa1bae8465 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5205,8 +5205,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_set_elem_deactivate() already deals with - * the refcounting from the preparation phase. +/* Only called from commit path, nft_setelem_data_deactivate() already deals + * with the refcounting from the preparation phase. */ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, void *elem) @@ -5738,9 +5738,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) } } -static void nft_set_elem_activate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -5750,9 +5750,9 @@ static void nft_set_elem_activate(const struct net *net, nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } -static void nft_set_elem_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -5839,7 +5839,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(elem.priv); elem.priv = priv; - nft_set_elem_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -5875,7 +5875,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, } set->ndeact++; - nft_set_elem_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -8333,7 +8333,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_set_elem_activate(net, te->set, &te->elem); + nft_setelem_data_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; From dbf56a22cc74e726fbb7877191975fb4389e58da Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 14:32:55 -0700 Subject: [PATCH 16/36] netfilter: nftables: add nft_pernet() helper function jira VULN-429 pre-cve CVE-2023-4244 commit-author Pablo Neira Ayuso commit d59d2f82f984df44b31c5d7837fc2f62268b7571 upstream-diff So many conflicts when trying to cherry pick this but they're all very similar and didn't have much trouble picking them out. As per previous commits in this series I've used 4.18.0-534 as the source of truth when resolving conflicts. Consolidate call to net_generic(net, nf_tables_net_id) in this wrapper function. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit d59d2f82f984df44b31c5d7837fc2f62268b7571) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 13 +++++++++++++ net/netfilter/nf_tables_api.c | 20 ++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2865638e0a79..98f9f776977b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -13,6 +13,7 @@ #include #include #include +#include struct module; @@ -1561,4 +1562,16 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...); #else static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; } #endif + +struct nftables_pernet { + unsigned int gc_seq; +}; + +extern unsigned int nf_tables_net_id; + +static inline struct nftables_pernet *nft_pernet(const struct net *net) +{ + return net_generic(net, nf_tables_net_id); +} + #endif /* _NET_NF_TABLES_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 53fa1bae8465..e972af05d6c1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -28,6 +28,9 @@ #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +unsigned int nf_tables_net_id __read_mostly; +EXPORT_SYMBOL_GPL(nf_tables_net_id); + static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); @@ -3593,8 +3596,8 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { - struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); + struct nft_trans *trans; list_for_each_entry(trans, &net->nft.commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET) { @@ -3837,8 +3840,8 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, int event, gfp_t gfp_flags) { - u32 portid = ctx->portid; struct sk_buff *skb; + u32 portid = ctx->portid; u16 flags = 0; int err; @@ -4786,7 +4789,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = READ_ONCE(net->nft.base_seq); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &net->nft.tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; @@ -7964,6 +7967,7 @@ static void nft_set_commit_update(struct list_head *set_update_list) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; @@ -8398,7 +8402,7 @@ static void nf_tables_cleanup(struct net *net) static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { - int ret = __nf_tables_abort(net, action); + struct nftables_pernet *nft_net = nft_pernet(net); mutex_unlock(&net->nft_commit_mutex); @@ -8407,6 +8411,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = nft_pernet(net); bool genid_ok; mutex_lock(&net->nft_commit_mutex); @@ -9019,6 +9024,8 @@ static void __nft_release_tables(struct net *net) static int __net_init nf_tables_init_net(struct net *net) { + struct nftables_pernet *nft_net = nft_pernet(net); + INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft_module_list); @@ -9039,11 +9046,16 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { + struct nftables_pernet *nft_net = nft_pernet(net); + mutex_lock(&net->nft_commit_mutex); + if (!list_empty(&net->nft.commit_list) || !list_empty(&net->nft_module_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); + __nft_release_tables(net); + mutex_unlock(&net->nft_commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft_module_list)); From 2282ecd4c2b1ed575015b6c4e123fc728c4f809f Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 22 Oct 2024 16:42:27 -0700 Subject: [PATCH 17/36] netfilter: nf_tables: GC transaction API to avoid race with control plane jira VULN-429 cve CVE-2023-4244 commit-author Pablo Neira Ayuso ' commit 5f68718b34a531a556f2f50300ead2862278da26 upstream-diff - Upstream fuzz and conflicts. Resolved by pointing to 4.18.0-534 as the source of truth. The set types rhashtable and rbtree use a GC worker to reclaim memory. From system work queue, in periodic intervals, a scan of the table is done. The major caveat here is that the nft transaction mutex is not held. This causes a race between control plane and GC when they attempt to delete the same element. We cannot grab the netlink mutex from the work queue, because the control plane has to wait for the GC work queue in case the set is to be removed, so we get following deadlock: cpu 1 cpu2 GC work transaction comes in , lock nft mutex `acquire nft mutex // BLOCKS transaction asks to remove the set set destruction calls cancel_work_sync() cancel_work_sync will now block forever, because it is waiting for the mutex the caller already owns. This patch adds a new API that deals with garbage collection in two steps: 1) Lockless GC of expired elements sets on the NFT_SET_ELEM_DEAD_BIT so they are not visible via lookup. Annotate current GC sequence in the GC transaction. Enqueue GC transaction work as soon as it is full. If ruleset is updated, then GC transaction is aborted and retried later. 2) GC work grabs the mutex. If GC sequence has changed then this GC transaction lost race with control plane, abort it as it contains stale references to objects and let GC try again later. If the ruleset is intact, then this GC transaction deactivates and removes the elements and it uses call_rcu() to destroy elements. Note that no elements are removed from GC lockless path, the _DEAD bit is set and pointers are collected. GC catchall does not remove the elements anymore too. There is a new set->dead flag that is set on to abort the GC transaction to deal with set->ops->destroy() path which removes the remaining elements in the set from commit_release, where no mutex is held. To deal with GC when mutex is held, which allows safe deactivate and removal, add sync GC API which releases the set element object via call_rcu(). This is used by rbtree and pipapo backends which also perform garbage collection from control plane path. Since element removal from sets can happen from control plane and element garbage collection/timeout, it is necessary to keep the set structure alive until all elements have been deactivated and destroyed. We cannot do a cancel_work_sync or flush_work in nft_set_destroy because its called with the transaction mutex held, but the aforementioned async work queue might be blocked on the very mutex that nft_set_destroy() callchain is sitting on. This gives us the choice of ABBA deadlock or UaF. To avoid both, add set->refs refcount_t member. The GC API can then increment the set refcount and release it once the elements have been free'd. Set backends are adapted to use the GC transaction API in a follow up patch entitled: ("netfilter: nf_tables: use gc transaction API in set backends") This is joint work with Florian Westphal. Fixes: cfed7e1b1f8e ("netfilter: nf_tables: add set garbage collection helpers") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 62 ++++++++- net/netfilter/nf_tables_api.c | 216 ++++++++++++++++++++++++++++-- 2 files changed, 268 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 98f9f776977b..480c4f8e5e16 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -445,6 +445,7 @@ struct nft_set_elem_expr { * * @list: table set list node * @bindings: list of set bindings + * @refs: internal refcounting for async set destruction * @table: table this set belongs to * @net: netnamespace this set belongs to * @name: name of the set @@ -474,6 +475,7 @@ struct nft_set_elem_expr { struct nft_set { struct list_head list; struct list_head bindings; + refcount_t refs; struct nft_table *table; possible_net_t net; char *name; @@ -495,7 +497,8 @@ struct nft_set { struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + dead:1, genmask:2; u8 klen; u8 dlen; @@ -1444,6 +1447,32 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) clear_bit(NFT_SET_ELEM_BUSY_BIT, word); } +#define NFT_SET_ELEM_DEAD_MASK (1 << 3) + +#if defined(__LITTLE_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT 3 +#elif defined(__BIG_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) +#else +#error +#endif + +static inline void nft_set_elem_dead(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + set_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + +static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + /** * struct nft_trans - nf_tables object update in transaction * @@ -1546,6 +1575,35 @@ struct nft_trans_flowtable { #define nft_trans_flowtable(trans) \ (((struct nft_trans_flowtable *)trans->data)->flowtable) +#define NFT_TRANS_GC_BATCHCOUNT 256 + +struct nft_trans_gc { + struct list_head list; + struct net *net; + struct nft_set *set; + u32 seq; + u8 count; + void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct rcu_head rcu; +}; + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_destroy(struct nft_trans_gc *trans); + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); + +void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); + +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); @@ -1564,7 +1622,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re #endif struct nftables_pernet { - unsigned int gc_seq; + unsigned int gc_seq; }; extern unsigned int nf_tables_net_id; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e972af05d6c1..6e4faf8ea255 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -35,7 +35,9 @@ static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_destroy_list); +static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); +static DEFINE_SPINLOCK(nf_tables_gc_list_lock); enum { NFT_VALIDATE_SKIP = 0, @@ -125,6 +127,9 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); +static void nft_trans_gc_work(struct work_struct *work); +static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); + static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, @@ -4280,6 +4285,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } INIT_LIST_HEAD(&set->bindings); + refcount_set(&set->refs, 1); set->table = table; write_pnet(&set->net, net); set->ops = ops; @@ -4368,6 +4374,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, return err; } +static void nft_set_put(struct nft_set *set) +{ + if (refcount_dec_and_test(&set->refs)) { + kfree(set->name); + kvfree(set); + } +} + static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { int i; @@ -4380,8 +4394,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) set->ops->destroy(set); module_put(to_set_type(set->ops)->owner); - kfree(set->name); - kvfree(set); + nft_set_put(set); } static int nf_tables_delset(struct net *net, struct sock *nlsk, @@ -5753,9 +5766,9 @@ static void nft_setelem_data_activate(const struct net *net, nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } -static void nft_setelem_data_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -7810,6 +7823,181 @@ static void nft_chain_del(struct nft_chain *chain) list_del_rcu(&chain->list); } +static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, + struct nft_trans_gc *trans) +{ + void **priv = trans->priv; + unsigned int i; + + for (i = 0; i < trans->count; i++) { + struct nft_set_elem elem = { + .priv = priv[i], + }; + + nft_setelem_data_deactivate(ctx->net, trans->set, &elem); + nft_setelem_remove(ctx->net, trans->set, &elem); + } +} + +void nft_trans_gc_destroy(struct nft_trans_gc *trans) +{ + nft_set_put(trans->set); + put_net(trans->net); + kfree(trans); +} + +static void nft_trans_gc_trans_free(struct rcu_head *rcu) +{ + struct nft_set_elem elem = {}; + struct nft_trans_gc *trans; + struct nft_ctx ctx = {}; + unsigned int i; + + trans = container_of(rcu, struct nft_trans_gc, rcu); + ctx.net = read_pnet(&trans->set->net); + + for (i = 0; i < trans->count; i++) { + elem.priv = trans->priv[i]; + atomic_dec(&trans->set->nelems); + + nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); + } + + nft_trans_gc_destroy(trans); +} + +static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) +{ + struct nftables_pernet *nft_net; + struct nft_ctx ctx = {}; + struct net *net; + + net = trans->net; + nft_net = nft_pernet(net); + + mutex_lock(&net->nft_commit_mutex); + + /* Check for race with transaction, otherwise this batch refers to + * stale objects that might not be there anymore. Skip transaction if + * set has been destroyed from control plane transaction in case gc + * worker loses race. + */ + if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { + mutex_unlock(&net->nft_commit_mutex); + return false; + } + + ctx.net = trans->net; + ctx.table = trans->set->table; + + nft_trans_gc_setelem_remove(&ctx, trans); + mutex_unlock(&net->nft_commit_mutex); + + return true; +} + +static void nft_trans_gc_work(struct work_struct *work) +{ + struct nft_trans_gc *trans, *next; + LIST_HEAD(trans_gc_list); + + spin_lock(&nf_tables_destroy_list_lock); + list_splice_init(&nf_tables_gc_list, &trans_gc_list); + spin_unlock(&nf_tables_destroy_list_lock); + + list_for_each_entry_safe(trans, next, &trans_gc_list, list) { + list_del(&trans->list); + if (!nft_trans_gc_work_done(trans)) { + nft_trans_gc_destroy(trans); + continue; + } + call_rcu(&trans->rcu, nft_trans_gc_trans_free); + } +} + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp) +{ + struct net *net = read_pnet(&set->net); + struct nft_trans_gc *trans; + + trans = kzalloc(sizeof(*trans), gfp); + if (!trans) + return NULL; + + refcount_inc(&set->refs); + trans->set = set; + trans->net = get_net(net); + trans->seq = gc_seq; + + return trans; +} + +void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) +{ + trans->priv[trans->count++] = priv; +} + +static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) +{ + spin_lock(&nf_tables_gc_list_lock); + list_add_tail(&trans->list, &nf_tables_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + schedule_work(&trans_gc_work); +} + +static int nft_trans_gc_space(struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp) +{ + if (nft_trans_gc_space(gc)) + return gc; + + nft_trans_gc_queue_work(gc); + + return nft_trans_gc_alloc(gc->set, gc_seq, gfp); +} + +void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) +{ + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + nft_trans_gc_queue_work(trans); +} + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) +{ + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) + return NULL; + + if (nft_trans_gc_space(gc)) + return gc; + + call_rcu(&gc->rcu, nft_trans_gc_trans_free); + + return nft_trans_gc_alloc(gc->set, 0, gfp); +} + +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) +{ + WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net)); + + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + call_rcu(&trans->rcu, nft_trans_gc_trans_free); +} + static void nf_tables_module_autoload_cleanup(struct net *net) { struct nft_module_request *req, *next; @@ -8103,6 +8291,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET, GFP_KERNEL); @@ -8127,7 +8316,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM); - te->set->ops->remove(net, te->set, &te->elem); + nft_setelem_remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; if (te->set->ops->commit && @@ -8178,6 +8367,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, net->nft.base_seq); + nf_tables_commit_release(net); return 0; @@ -8284,7 +8474,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } break; case NFT_MSG_DELCHAIN: - nft_use_inc_restore(&trans->ctx.chain->use); + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; @@ -8367,6 +8557,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans)); + break; case NFT_MSG_DELFLOWTABLE: nft_use_inc_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); @@ -8411,7 +8602,6 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, static bool nf_tables_valid_genid(struct net *net, u32 genid) { - struct nftables_pernet *nft_net = nft_pernet(net); bool genid_ok; mutex_lock(&net->nft_commit_mutex); @@ -9033,6 +9223,7 @@ static int __net_init nf_tables_init_net(struct net *net) mutex_init(&net->nft_commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; + nft_net->gc_seq = 0; return 0; } @@ -9062,10 +9253,18 @@ static void __net_exit nf_tables_exit_net(struct net *net) WARN_ON_ONCE(!list_empty(&net->nft_notify_list)); } +static void nf_tables_exit_batch(struct list_head *net_exit_list) +{ + flush_work(&trans_gc_work); +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, + .exit_batch = nf_tables_exit_batch, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) @@ -9128,6 +9327,7 @@ static void __exit nf_tables_module_exit(void) nft_chain_filter_fini(); nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); From b6e3acae2e48fe5e47c8673466c311df8515db11 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 11:11:26 -0700 Subject: [PATCH 18/36] netfilter: nf_tables: adapt set backend to use GC transaction API jira VULN-429 cve CVE-2023-4244 commit-author Pablo Neira Ayuso ' commit f6c383b8c31a93752a52697f8430a71dcbc46adf upstream-diff - Upstream fuzz and conflicts. Resolved by pointing to 4.18.0-534 as the source of truth. Previous commiit 5d235d6ce75c is completely overwritten by this commit so we're not backporting it. Use the GC transaction API to replace the old and buggy gc API and the busy mark approach. No set elements are removed from async garbage collection anymore, instead the _DEAD bit is set on so the set element is not visible from lookup path anymore. Async GC enqueues transaction work that might be aborted and retried later. rbtree and pipapo set backends does not set on the _DEAD bit from the sync GC path since this runs in control plane path where mutex is held. In this case, set elements are deactivated, removed and then released via RCU callback, sync GC never fails. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Fixes: 9d0982927e79 ("netfilter: nft_hash: add support for timeouts") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit f6c383b8c31a93752a52697f8430a71dcbc46adf) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 8 +++ net/netfilter/nft_set_hash.c | 68 +++++++++++++++-------- net/netfilter/nft_set_pipapo.c | 45 ++++++++++++---- net/netfilter/nft_set_rbtree.c | 98 +++++++++++++++++++++------------- 4 files changed, 150 insertions(+), 69 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6e4faf8ea255..8ecb5b9bfce8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5777,6 +5777,7 @@ void nft_setelem_data_deactivate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_dec(&(*nft_set_ext_obj(ext))->use); } +EXPORT_SYMBOL_GPL(nft_setelem_data_deactivate); static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) @@ -7845,6 +7846,7 @@ void nft_trans_gc_destroy(struct nft_trans_gc *trans) put_net(trans->net); kfree(trans); } +EXPORT_SYMBOL_GPL(nft_trans_gc_destroy); static void nft_trans_gc_trans_free(struct rcu_head *rcu) { @@ -7932,11 +7934,13 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, return trans; } +EXPORT_SYMBOL_GPL(nft_trans_gc_alloc); void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) { trans->priv[trans->count++] = priv; } +EXPORT_SYMBOL_GPL(nft_trans_gc_elem_add); static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) { @@ -7962,6 +7966,7 @@ struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, return nft_trans_gc_alloc(gc->set, gc_seq, gfp); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async); void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) { @@ -7972,6 +7977,7 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) nft_trans_gc_queue_work(trans); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done); struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { @@ -7985,6 +7991,7 @@ struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) return nft_trans_gc_alloc(gc->set, 0, gfp); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync); void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) { @@ -7997,6 +8004,7 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); } +EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync_done); static void nf_tables_module_autoload_cleanup(struct net *net) { diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 037a2a90e4be..e8d10bba3050 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -62,6 +62,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; + if (nft_set_elem_is_dead(&he->ext)) + return 1; if (nft_set_elem_expired(&he->ext)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) @@ -190,7 +192,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, struct nft_rhash_elem *he = elem->priv; nft_set_elem_change_active(net, set, &he->ext); - nft_set_elem_clear_busy(&he->ext); } static bool nft_rhash_flush(const struct net *net, @@ -198,12 +199,9 @@ static bool nft_rhash_flush(const struct net *net, { struct nft_rhash_elem *he = priv; - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) { - nft_set_elem_change_active(net, set, &he->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &he->ext); + + return true; } static void *nft_rhash_deactivate(const struct net *net, @@ -220,9 +218,8 @@ static void *nft_rhash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); - if (he != NULL && - !nft_rhash_flush(net, set, he)) - he = NULL; + if (he) + nft_set_elem_change_active(net, set, &he->ext); rcu_read_unlock(); @@ -296,25 +293,48 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, static void nft_rhash_gc(struct work_struct *work) { + struct nftables_pernet *nft_net; struct nft_set *set; struct nft_rhash_elem *he; struct nft_rhash *priv; - struct nft_set_gc_batch *gcb = NULL; struct rhashtable_iter hti; + struct nft_trans_gc *gc; + struct net *net; + u32 gc_seq; priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) - break; + if (PTR_ERR(he) != -EAGAIN) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } continue; } + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + + if (nft_set_elem_is_dead(&he->ext)) + goto dead_elem; + if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) && nft_rhash_expr_needs_gc_run(set, &he->ext)) goto needs_gc_run; @@ -322,20 +342,23 @@ static void nft_rhash_gc(struct work_struct *work) if (!nft_set_elem_expired(&he->ext)) continue; needs_gc_run: - if (nft_set_elem_mark_busy(&he->ext)) - continue; + nft_set_elem_dead(&he->ext); +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb == NULL) - break; - rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, he); + nft_trans_gc_elem_add(gc, he); } + +try_later: rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); - nft_set_gc_batch_complete(gcb); + if (gc) + nft_trans_gc_queue_async_done(gc); + +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } @@ -386,7 +409,6 @@ static void nft_rhash_destroy(const struct nft_set *set) struct nft_rhash *priv = nft_set_priv(set); cancel_delayed_work_sync(&priv->gc_work); - rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, (void *)set); } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 90562786e3af..b7138112825b 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1436,20 +1436,38 @@ static void pipapo_drop(struct nft_pipapo_match *m, } } +static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, + struct nft_pipapo_elem *e) + +{ + struct nft_set_elem elem = { + .priv = e, + }; + + nft_setelem_data_deactivate(net, set, &elem); +} + /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) +static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) { + struct nft_set *set = (struct nft_set *) _set; struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); int rules_f0, first_rule = 0; + struct nft_pipapo_elem *e; + struct nft_trans_gc *gc; + + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + if (!gc) + return; while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; struct nft_pipapo_field *f; - struct nft_pipapo_elem *e; int i, start, rules_fx; start = first_rule; @@ -1469,13 +1487,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) f--; i--; e = f->mt[rulemap[i].to].e; - if (nft_set_elem_expired(&e->ext) && - !nft_set_elem_mark_busy(&e->ext)) { + + /* synchronous gc never fails, there is no need to set on + * NFT_SET_ELEM_DEAD_BIT. + */ + if (nft_set_elem_expired(&e->ext)) { priv->dirty = true; - pipapo_drop(m, rulemap); - rcu_barrier(); - nft_set_elem_destroy(set, e, true); + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + if (!gc) + break; + + nft_pipapo_gc_deactivate(net, set, e); + pipapo_drop(m, rulemap); + nft_trans_gc_elem_add(gc, e); /* And check again current first rule, which is now the * first we haven't checked. @@ -1485,7 +1510,10 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) } } - priv->last_gc = jiffies; + if (gc) { + nft_trans_gc_queue_sync_done(gc); + priv->last_gc = jiffies; + } } /** @@ -1607,7 +1635,6 @@ static void nft_pipapo_activate(const struct net *net, return; nft_set_elem_change_active(net, set, &e->ext); - nft_set_elem_clear_busy(&e->ext); } /** diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 10883b337620..59c3bcc6fc82 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -47,6 +47,12 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; } +static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) +{ + return nft_set_elem_expired(&rbe->ext) || + nft_set_elem_is_dead(&rbe->ext); +} + static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext, unsigned int seq) @@ -83,7 +89,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set continue; } - if (nft_set_elem_expired(&rbe->ext)) + if (nft_rbtree_elem_expired(rbe)) return false; if (nft_rbtree_interval_end(rbe)) { @@ -101,7 +107,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && - !nft_set_elem_expired(&interval->ext) && + !nft_rbtree_elem_expired(interval) && nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; @@ -376,7 +382,6 @@ static void nft_rbtree_activate(const struct net *net, struct nft_rbtree_elem *rbe = elem->priv; nft_set_elem_change_active(net, set, &rbe->ext); - nft_set_elem_clear_busy(&rbe->ext); } static bool nft_rbtree_flush(const struct net *net, @@ -384,12 +389,9 @@ static bool nft_rbtree_flush(const struct net *net, { struct nft_rbtree_elem *rbe = priv; - if (!nft_set_elem_mark_busy(&rbe->ext) || - !nft_is_active(net, &rbe->ext)) { - nft_set_elem_change_active(net, set, &rbe->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &rbe->ext); + + return true; } static void *nft_rbtree_deactivate(const struct net *net, @@ -464,58 +466,80 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, static void nft_rbtree_gc(struct work_struct *work) { - struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; - struct nft_set_gc_batch *gcb = NULL; + struct nft_rbtree_elem *rbe, *rbe_end = NULL; + struct nftables_pernet *nft_net; struct nft_rbtree *priv; + struct nft_trans_gc *gc; struct rb_node *node; struct nft_set *set; + unsigned int gc_seq; + struct net *net; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (nft_set_elem_is_dead(&rbe->ext)) + goto dead_elem; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + nft_set_elem_dead(&rbe->ext); + + if (!rbe_end) continue; - if (rbe_prev) { - rb_erase(&rbe_prev->node, &priv->root); - rbe_prev = NULL; - } - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (!gcb) - break; - - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe); - rbe_prev = rbe; - - if (rbe_end) { - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe_end); - rb_erase(&rbe_end->node, &priv->root); - rbe_end = NULL; - } - node = rb_next(node); - if (!node) - break; + nft_set_elem_dead(&rbe_end->ext); + + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; + + nft_trans_gc_elem_add(gc, rbe_end); + rbe_end = NULL; +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; + + nft_trans_gc_elem_add(gc, rbe); } - if (rbe_prev) - rb_erase(&rbe_prev->node, &priv->root); + +try_later: write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); - nft_set_gc_batch_complete(gcb); - + if (gc) + nft_trans_gc_queue_async_done(gc); +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } From 0584d4eab4ea990e4ddef84228771d19d5fa4924 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 13:14:39 -0700 Subject: [PATCH 19/36] netfilter: nf_tables: remove busy mark and gc batch API jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5 upstream-diff cherry-pick occassionally pulls in big blobs of unrelated crap. I had to excise significant portions of code in the process of resolving the conflicts. As per usual in this netfilter series I have relied on 4.18.0-534 code as a source of truth. Ditch it, it has been replace it by the GC transaction API and it has no clients anymore. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit a2dd0233cbc4d8a0abb5f64487487ffc9265beb5) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 97 +------------------------------ net/netfilter/nf_tables_api.c | 27 +-------- 2 files changed, 4 insertions(+), 120 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 480c4f8e5e16..e82111eda80d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -740,62 +740,6 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); -/** - * struct nft_set_gc_batch_head - nf_tables set garbage collection batch - * - * @rcu: rcu head - * @set: set the elements belong to - * @cnt: count of elements - */ -struct nft_set_gc_batch_head { - struct rcu_head rcu; - const struct nft_set *set; - unsigned int cnt; -}; - -#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ - sizeof(struct nft_set_gc_batch_head)) / \ - sizeof(void *)) - -/** - * struct nft_set_gc_batch - nf_tables set garbage collection batch - * - * @head: GC batch head - * @elems: garbage collection elements - */ -struct nft_set_gc_batch { - struct nft_set_gc_batch_head head; - void *elems[NFT_SET_GC_BATCH_SIZE]; -}; - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp); -void nft_set_gc_batch_release(struct rcu_head *rcu); - -static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) -{ - if (gcb != NULL) - call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); -} - -static inline struct nft_set_gc_batch * -nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, - gfp_t gfp) -{ - if (gcb != NULL) { - if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) - return gcb; - nft_set_gc_batch_complete(gcb); - } - return nft_set_gc_batch_alloc(set, gfp); -} - -static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, - void *elem) -{ - gcb->elems[gcb->head.cnt++] = elem; -} - struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type @@ -1412,47 +1356,12 @@ static inline void nft_set_elem_change_active(const struct net *net, ext->genmask ^= nft_genmask_next(net); } -/* - * We use a free bit in the genmask field to indicate the element - * is busy, meaning it is currently being processed either by - * the netlink API or GC. - * - * Even though the genmask is only a single byte wide, this works - * because the extension structure if fully constant once initialized, - * so there are no non-atomic write accesses unless it is already - * marked busy. - */ -#define NFT_SET_ELEM_BUSY_MASK (1 << 2) - -#if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT 2 -#elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) -#else -#error -#endif - -static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) -{ - unsigned long *word = (unsigned long *)ext; - - BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); - return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); -} - -static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) -{ - unsigned long *word = (unsigned long *)ext; - - clear_bit(NFT_SET_ELEM_BUSY_BIT, word); -} - -#define NFT_SET_ELEM_DEAD_MASK (1 << 3) +#define NFT_SET_ELEM_DEAD_MASK (1 << 2) #if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_DEAD_BIT 3 +#define NFT_SET_ELEM_DEAD_BIT 2 #elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) #else #error #endif diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8ecb5b9bfce8..dc8ea91abde5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5630,7 +5630,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err_elem_expr; } - ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; + ext->genmask = nft_genmask_cur(ctx->net); err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { @@ -5947,31 +5947,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, return err; } -void nft_set_gc_batch_release(struct rcu_head *rcu) -{ - struct nft_set_gc_batch *gcb; - unsigned int i; - - gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); - for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); - kfree(gcb); -} -EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp) -{ - struct nft_set_gc_batch *gcb; - - gcb = kzalloc(sizeof(*gcb), gfp); - if (gcb == NULL) - return gcb; - gcb->head.set = set; - return gcb; -} -EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); - /* * Stateful objects */ From 912589507c513d3bff135dee3970c1d0b93e0ac8 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 13:29:08 -0700 Subject: [PATCH 20/36] netfilter: nf_tables: fix false-positive lockdep splat jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit b9f052dc68f69dac89fe1e24693354c033daa091 upstream-diff Had to synch to the use of inline from the 4.18.0-534 and also found an upstream diff for lockdep_is_held that I also matched to the 4.18.0-534 kernel code. ->abort invocation may cause splat on debug kernels: WARNING: suspicious RCU usage net/netfilter/nft_set_pipapo.c:1697 suspicious rcu_dereference_check() usage! [..] rcu_scheduler_active = 2, debug_locks = 1 1 lock held by nft/133554: [..] (nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid [..] lockdep_rcu_suspicious+0x1ad/0x260 nft_pipapo_abort+0x145/0x180 __nf_tables_abort+0x5359/0x63d0 nf_tables_abort+0x24/0x40 nfnetlink_rcv+0x1a0a/0x22c0 netlink_unicast+0x73c/0x900 netlink_sendmsg+0x7f0/0xc20 ____sys_sendmsg+0x48d/0x760 Transaction mutex is held, so parallel updates are not possible. Switch to _protected and check mutex is held for lockdep enabled builds. Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") Signed-off-by: Florian Westphal (cherry picked from commit b9f052dc68f69dac89fe1e24693354c033daa091) Signed-off-by: Greg Rose --- net/netfilter/nft_set_pipapo.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index b7138112825b..113771935e5f 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1593,6 +1593,17 @@ static void nft_pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static bool inline nft_pipapo_transaction_mutex_held(const struct nft_set *set) +{ +#ifdef CONFIG_PROVE_LOCKING + const struct net *net = read_pnet(&set->net); + + return lockdep_is_held(&net->nft_commit_mutex); +#else + return true; +#endif +} + static void nft_pipapo_abort(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); @@ -1601,7 +1612,7 @@ static void nft_pipapo_abort(const struct nft_set *set) if (!priv->dirty) return; - m = rcu_dereference(priv->match); + m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set)); new_clone = pipapo_clone(m); if (IS_ERR(new_clone)) From b80380b7e61b543d9d6eb9adb72390878087b6a7 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:28:04 -0700 Subject: [PATCH 21/36] netfilter: nf_tables: fix kdoc warnings after gc rework jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 08713cb006b6f07434f276c5ee214fb20c7fd965 Jakub Kicinski says: We've got some new kdoc warnings here: net/netfilter/nft_set_pipapo.c:1557: warning: Function parameter or member '_set' not described in 'pipapo_gc' net/netfilter/nft_set_pipapo.c:1557: warning: Excess function parameter 'set' description in 'pipapo_gc' include/net/netfilter/nf_tables.h:577: warning: Function parameter or member 'dead' not described in 'nft_set' Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20230810104638.746e46f1@kernel.org/ Signed-off-by: Florian Westphal (cherry picked from commit 08713cb006b6f07434f276c5ee214fb20c7fd965) Signed-off-by: Greg Rose (cherry picked from commit ddcae6925219c35588313d4f84e103e8a885e457) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nft_set_pipapo.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e82111eda80d..7a4dfb109390 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -467,6 +467,7 @@ struct nft_set_elem_expr { * @expr: stateful expression * @ops: set ops * @flags: set flags + * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 113771935e5f..538f71042489 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1449,7 +1449,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements - * @set: nftables API set representation + * @_set: nftables API set representation * @m: Matching data */ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) From ef414a3282ee36985b6e9aaa786f5f90c732991e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 13:35:39 -0700 Subject: [PATCH 22/36] netfilter: nf_tables: don't fail inserts if duplicate has expired jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 7845914f45f066497ac75b30c50dbc735e84e884 nftables selftests fail: run-tests.sh testcases/sets/0044interval_overlap_0 Expected: 0-2 . 0-3, got: W: [FAILED] ./testcases/sets/0044interval_overlap_0: got 1 Insertion must ignore duplicate but expired entries. Moreover, there is a strange asymmetry in nft_pipapo_activate: It refetches the current element, whereas the other ->activate callbacks (bitmap, hash, rhash, rbtree) use elem->priv. Same for .remove: other set implementations take elem->priv, nft_pipapo_remove fetches elem->priv, then does a relookup, remove this. I suspect this was the reason for the change that prompted the removal of the expired check in pipapo_get() in the first place, but skipping exired elements there makes no sense to me, this helper is used for normal get requests, insertions (duplicate check) and deactivate callback. In first two cases expired elements must be skipped. For ->deactivate(), this gets called for DELSETELEM, so it seems to me that expired elements should be skipped as well, i.e. delete request should fail with -ENOENT error. Fixes: 24138933b97b ("netfilter: nf_tables: don't skip expired elements during walk") Signed-off-by: Florian Westphal (cherry picked from commit 7845914f45f066497ac75b30c50dbc735e84e884) Signed-off-by: Greg Rose --- net/netfilter/nft_set_pipapo.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 538f71042489..743f55327811 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -704,6 +704,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; if (last) { + if (nft_set_elem_expired(&f->mt[b].e->ext)) + goto next_match; if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) goto next_match; @@ -738,17 +740,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, void *nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { - struct nft_pipapo_elem *ret; - - ret = pipapo_get(net, set, (const u8 *)elem->key.val.data, + return pipapo_get(net, set, (const u8 *)elem->key.val.data, nft_genmask_cur(net)); - if (IS_ERR(ret)) - return ret; - - if (nft_set_elem_expired(&ret->ext)) - return ERR_PTR(-ENOENT); - - return ret; } /** @@ -1639,11 +1632,7 @@ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_pipapo_elem *e; - - e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0); - if (IS_ERR(e)) - return; + struct nft_pipapo_elem *e = elem->priv; nft_set_elem_change_active(net, set, &e->ext); } @@ -1853,10 +1842,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, data = (const u8 *)nft_set_ext_key(&e->ext); - e = pipapo_get(net, set, data, 0); - if (IS_ERR(e)) - return; - while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *match_start, *match_end; From 7a82bf4ba1cc94e7b2083aada70a0163f30d499c Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 14:11:25 -0700 Subject: [PATCH 23/36] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc upstream-diff There's a lot of fuzz and code differences - resolved in favor of the 534 release code. Netlink event path is missing a synchronization point with GC transactions. Add GC sequence number update to netns release path and netlink event path, any GC transaction losing race will be discarded. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit 6a33d8b73dfac0a41f3877894b38082bd0c9a5bc) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dc8ea91abde5..72e59619c592 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8136,10 +8136,27 @@ static void nft_set_commit_update(struct list_head *set_update_list) } } +static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) +{ + unsigned int gc_seq; + + /* Bump gc counter, it becomes odd, this is the busy mark. */ + gc_seq = READ_ONCE(nft_net->gc_seq); + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + + return gc_seq; +} + +static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) +{ + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; @@ -8195,6 +8212,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) WRITE_ONCE(net->nft.base_seq, base_seq); + gc_seq = nft_gc_seq_begin(nft_net); + /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); @@ -8351,6 +8370,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, net->nft.base_seq); + nft_gc_seq_end(nft_net, gc_seq); nf_tables_commit_release(net); return 0; @@ -8498,7 +8518,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(net, te->set, &te->elem); + nft_setelem_remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); if (te->set->ops->abort && @@ -9221,15 +9241,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); + unsigned int gc_seq; mutex_lock(&net->nft_commit_mutex); + gc_seq = nft_gc_seq_begin(nft_net); + if (!list_empty(&net->nft.commit_list) || - !list_empty(&net->nft_module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + !list_empty(&net->nft_module_list)) + __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); + nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&net->nft_commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft_module_list)); From 903d9428a24dbf47d9e223a678fcc3f054aa017b Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 14:43:36 -0700 Subject: [PATCH 24/36] netfilter: nf_tables: GC transaction race with netns dismantle jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 02c6c24402bf1c1e986899c14ba22a10b510916b Use maybe_get_net() since GC workqueue might race with netns exit path. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit 02c6c24402bf1c1e986899c14ba22a10b510916b) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 72e59619c592..12b8ceebdc5a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7902,9 +7902,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, if (!trans) return NULL; + trans->net = maybe_get_net(net); + if (!trans->net) { + kfree(trans); + return NULL; + } + refcount_inc(&set->refs); trans->set = set; - trans->net = get_net(net); trans->seq = gc_seq; return trans; @@ -8161,7 +8166,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; - unsigned int base_seq; LIST_HEAD(adl); int err; From 70c4cda1549a20656acaf8a7c31165034550a3a5 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 29 Oct 2024 15:32:54 -0700 Subject: [PATCH 25/36] netfilter: nf_tables: GC transaction race with abort path jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 720344340fb9be2765bbaab7b292ece0a4570eae upstream-diff Some minor differences due to pernet goo - not important. Abort path is missing a synchronization point with GC transactions. Add GC sequence number hence any GC transaction losing race will be discarded. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry-picked from commit 720344340fb9be2765bbaab7b292ece0a4570eae) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 12b8ceebdc5a..70e9576bf502 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8601,6 +8601,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); + unsigned int gc_seq; + int ret; + + gc_seq = nft_gc_seq_begin(nft_net); + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&net->nft_commit_mutex); From ff4e14df9b389cea3461971ec24b8f4529c9d220 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:48:15 -0700 Subject: [PATCH 26/36] netfilter: nf_tables: use correct lock to protect gc_list jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 8357bc946a2abc2a10ca40e5a2105d2b4c57515e Use nf_tables_gc_list_lock spinlock, not nf_tables_destroy_list_lock to protect the gc list. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit 8357bc946a2abc2a10ca40e5a2105d2b4c57515e) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 70e9576bf502..75c7770c2ea9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7878,9 +7878,9 @@ static void nft_trans_gc_work(struct work_struct *work) struct nft_trans_gc *trans, *next; LIST_HEAD(trans_gc_list); - spin_lock(&nf_tables_destroy_list_lock); + spin_lock(&nf_tables_gc_list_lock); list_splice_init(&nf_tables_gc_list, &trans_gc_list); - spin_unlock(&nf_tables_destroy_list_lock); + spin_unlock(&nf_tables_gc_list_lock); list_for_each_entry_safe(trans, next, &trans_gc_list, list) { list_del(&trans->list); From 6f85cfcc451fe8de8fe9c250bf9ccc7439599f13 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:53:32 -0700 Subject: [PATCH 27/36] netfilter: nf_tables: fix out of memory error handling jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 5e1be4cdc98c989d5387ce94ff15b5ad06a5b681 upstream-diff Using the 4.18.0-534 code as an example. Several instances of pipapo_resize() don't propagate allocation failures, this causes a crash when fault injection is enabled for gfp_kernel slabs. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio (cherry picked from commit 5e1be4cdc98c989d5387ce94ff15b5ad06a5b681) Signed-off-by: Greg Rose --- net/netfilter/nft_set_pipapo.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 743f55327811..0c4983ad9621 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -850,12 +850,14 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group, static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules++, group, ret; + int rule = f->rules, group, ret; - ret = pipapo_resize(f, f->rules - 1, f->rules); + ret = pipapo_resize(f, f->rules, f->rules + 1); if (ret) return ret; + f->rules++; + for (group = 0; group < f->groups; group++) { int i, v; u8 mask; @@ -995,7 +997,9 @@ static int pipapo_expand(struct nft_pipapo_field *f, step++; if (step >= len) { if (!masks) { - pipapo_insert(f, base, 0); + err = pipapo_insert(f, base, 0); + if (err < 0) + return err; masks = 1; } goto out; @@ -1151,6 +1155,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, f->groups * NFT_PIPAPO_GROUP_BITS); } + if (ret < 0) + return ret; + if (f->bsize > bsize_max) bsize_max = f->bsize; From e128fc409da42a6ed1cb5c58312ad995455dd319 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 16:59:06 -0700 Subject: [PATCH 28/36] netfilter: nf_tables: defer gc run if previous batch is still pending jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit 8e51830e29e12670b4c10df070a4ea4c9593e961 Don't queue more gc work, else we may queue the same elements multiple times. If an element is flagged as dead, this can mean that either the previous gc request was invalidated/discarded by a transaction or that the previous request is still pending in the system work queue. The latter will happen if the gc interval is set to a very low value, e.g. 1ms, and system work queue is backlogged. The sets refcount is 1 if no previous gc requeusts are queued, so add a helper for this and skip gc run if old requests are pending. Add a helper for this and skip the gc run in this case. Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") Signed-off-by: Florian Westphal Reviewed-by: Pablo Neira Ayuso (cherry picked from commit 8e51830e29e12670b4c10df070a4ea4c9593e961) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 5 +++++ net/netfilter/nft_set_hash.c | 3 +++ net/netfilter/nft_set_rbtree.c | 3 +++ 3 files changed, 11 insertions(+) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7a4dfb109390..f8983f8f3790 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -519,6 +519,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline bool nft_set_gc_is_pending(const struct nft_set *s) +{ + return refcount_read(&s->refs) != 1; +} + static inline struct nft_set *nft_set_container_of(const void *priv) { return (void *)priv - offsetof(struct nft_set, data); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e8d10bba3050..dd915a16a37d 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -308,6 +308,9 @@ static void nft_rhash_gc(struct work_struct *work) nft_net = nft_pernet(net); gc_seq = READ_ONCE(nft_net->gc_seq); + if (nft_set_gc_is_pending(set)) + goto done; + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); if (!gc) goto done; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 59c3bcc6fc82..3d1e8f0d7a67 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -481,6 +481,9 @@ static void nft_rbtree_gc(struct work_struct *work) nft_net = nft_pernet(net); gc_seq = READ_ONCE(nft_net->gc_seq); + if (nft_set_gc_is_pending(set)) + goto done; + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); if (!gc) goto done; From 46108885ab166372a4a8dafb4bb3eadc86f00f58 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 17:03:56 -0700 Subject: [PATCH 29/36] netfilter: nf_tables: fix nft_trans type confusion jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit e3c361b8acd636f5fe80c02849ca175201edf10c upstream-diff - Some cruft in nft_rule_lookup_byid() - resolved by using branch 4.18.0-534 as the source of truth. nft_trans_FOO objects all share a common nft_trans base structure, but trailing fields depend on the real object size. Access is only safe after trans->msg_type check. Check for rule type first. Found by code inspection. Fixes: 1a94e38d254b ("netfilter: nf_tables: add NFTA_RULE_ID attribute") Signed-off-by: Florian Westphal (cherry picked from commit e3c361b8acd636f5fe80c02849ca175201edf10c) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 75c7770c2ea9..390167eaab27 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3330,12 +3330,10 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, struct nft_trans *trans; list_for_each_entry(trans, &net->nft.commit_list, list) { - struct nft_rule *rule = nft_trans_rule(trans); - if (trans->msg_type == NFT_MSG_NEWRULE && trans->ctx.chain == chain && id == nft_trans_rule_id(trans)) - return rule; + return nft_trans_rule(trans); } return ERR_PTR(-ENOENT); } From 7e580ea97a25aeed16b717618f78fc1e3ea45657 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:22:36 -0700 Subject: [PATCH 30/36] netfilter: nf_tables: disallow element removal on anonymous sets jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 23a3bfd4ba7acd36abf52b78605f61b21bdac216 Anonymous sets need to be populated once at creation and then they are bound to rule since 938154b93be8 ("netfilter: nf_tables: reject unbound anonymous set before commit phase"), otherwise transaction reports EINVAL. Userspace does not need to delete elements of anonymous sets that are not yet bound, reject this with EOPNOTSUPP. From flush command path, skip anonymous sets, they are expected to be bound already. Otherwise, EINVAL is hit at the end of this transaction for unbound sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 23a3bfd4ba7acd36abf52b78605f61b21bdac216) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 390167eaab27..3045a1f1c4ef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1139,8 +1139,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -5921,8 +5920,10 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && - (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { From 07f22cf2c1affec05bea2965fd1b6878d47f51cd Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:28:42 -0700 Subject: [PATCH 31/36] netfilter: nftables: update table flags from the commit phase jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 0ce7cf4127f14078ca598ba9700d813178a59409 Do not update table flags from the preparation phase. Store the flags update into the transaction, then update the flags from the commit phase. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 0ce7cf4127f14078ca598ba9700d813178a59409) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 9 ++++++--- net/netfilter/nf_tables_api.c | 31 ++++++++++++++++--------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f8983f8f3790..a578cf9fbff2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1449,13 +1449,16 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - bool enable; + u8 state; + u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_enable(trans) \ - (((struct nft_trans_table *)trans->data)->enable) +#define nft_trans_table_state(trans) \ + (((struct nft_trans_table *)trans->data)->state) +#define nft_trans_table_flags(trans) \ + (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3045a1f1c4ef..9c0ef774b2ac 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -945,6 +945,12 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) nft_table_disable(net, table, 0); } +enum { + NFT_TABLE_STATE_UNCHANGED = 0, + NFT_TABLE_STATE_DORMANT, + NFT_TABLE_STATE_WAKEUP +}; + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -968,19 +974,17 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_enable(trans) = false; + nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); if (ret >= 0) - nft_trans_table_enable(trans) = true; - else - ctx->table->flags |= NFT_TABLE_F_DORMANT; + nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; } if (ret < 0) goto err; + nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -8226,11 +8230,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + nf_tables_table_disable(net, trans->ctx.table); + + trans->ctx.table->flags = nft_trans_table_flags(trans); } else { nft_clear(net, trans->ctx.table); } @@ -8452,11 +8455,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + nf_tables_table_disable(net, trans->ctx.table); + nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); From fba8a39bde1303daf9f209a6cd5010af068f8445 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:39:50 -0700 Subject: [PATCH 32/36] netfilter: nf_tables: fix table flag updates jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 179d9ba5559a756f4322583388b3213fe4e391b0 upstream-diff Again some cruft around an upstream commit that Red Hat did not take - using 4.18.0-534 as the source of truth for the commit. The dormant flag need to be updated from the preparation phase, otherwise, two consecutive requests to dorm a table in the same batch might try to remove the same hooks twice, resulting in the following warning: hook not found, pf 3 num 0 WARNING: CPU: 0 PID: 334 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 Modules linked in: CPU: 0 PID: 334 Comm: kworker/u4:5 Not tainted 5.12.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:__nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 This patch is a partial revert of 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase") to restore the previous behaviour. However, there is still another problem: A batch containing a series of dorm-wakeup-dorm table and vice-versa also trigger the warning above since hook unregistration happens from the preparation phase, while hook registration occurs from the commit phase. To fix this problem, this patch adds two internal flags to annotate the original dormant flag status which are __NFT_TABLE_F_WAS_DORMANT and __NFT_TABLE_F_WAS_AWAKEN, to restore it from the abort path. The __NFT_TABLE_F_UPDATE bitmask allows to handle the dormant flag update with one single transaction. Reported-by: syzbot+7ad5cd1615f2d89c6e7e@syzkaller.appspotmail.com Fixes: 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase") Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 179d9ba5559a756f4322583388b3213fe4e391b0) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 6 --- include/uapi/linux/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 59 ++++++++++++++++-------- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a578cf9fbff2..b658ac8fb817 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1449,16 +1449,10 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - u8 state; - u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_state(trans) \ - (((struct nft_trans_table *)trans->data)->state) -#define nft_trans_table_flags(trans) \ - (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 3e9dcd1da600..a59f04845b2d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -164,6 +164,7 @@ enum nft_hook_attributes { enum nft_table_flags { NFT_TABLE_F_DORMANT = 0x1, }; +#define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT) /** * enum nft_table_attributes - nf_tables table netlink attributes diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9c0ef774b2ac..30519b8bec9b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -739,7 +739,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, + htonl(table->flags & NFT_TABLE_F_MASK)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) @@ -942,20 +943,22 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table) static void nf_tables_table_disable(struct net *net, struct nft_table *table) { + table->flags &= ~NFT_TABLE_F_DORMANT; nft_table_disable(net, table, 0); + table->flags |= NFT_TABLE_F_DORMANT; } -enum { - NFT_TABLE_STATE_UNCHANGED = 0, - NFT_TABLE_STATE_DORMANT, - NFT_TABLE_STATE_WAKEUP -}; +#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) +#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) +#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; u32 flags; - int ret = 0; + int ret; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; @@ -974,21 +977,27 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; + ctx->table->flags |= NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) + ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) - nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { + ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret < 0) + goto err_register_hooks; + + ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; + } } - if (ret < 0) - goto err; - nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; -err: + +err_register_hooks: nft_trans_destroy(trans); return ret; } @@ -8230,10 +8239,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags = nft_trans_table_flags(trans); + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; } else { nft_clear(net, trans->ctx.table); } @@ -8455,9 +8468,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { nf_tables_table_disable(net, trans->ctx.table); - + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + } + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); From d72c9866044536a4c537d95ef32efba2be336396 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:44:56 -0700 Subject: [PATCH 33/36] netfilter: nf_tables: disable toggling dormant table state more than once jira VULN-597 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Florian Westphal commit c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 upstream-diff Onced again cherry-pick pulls in unrelated cruft, the patch itself is fine - as per usual the source of truth is 4.18.0-534 nft -f -< Cc: Bing-Jhong Billy Jheng Cc: info@starlabs.sg Signed-off-by: Florian Westphal (cherry picked from commit c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 30519b8bec9b..d6420b54aa5f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -970,6 +970,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags == ctx->table->flags) return 0; + /* No dormant off/on/off/on games in single transaction */ + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) From 6eca119ec92d1247ff94e8475adee22f06a87e97 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 23 Oct 2024 18:57:05 -0700 Subject: [PATCH 34/36] netfilter: nf_tables: fix memleak when more than 255 elements expired jira VULN-597 cve CVE-2023-52581 commit-author Florian Westphal commit cf5000a7787cbc10341091d37245a42c119d26c5 upstream-diff some cruft around GPL symbol exports When more than 255 elements expired we're supposed to switch to a new gc container structure. This never happens: u8 type will wrap before reaching the boundary and nft_trans_gc_space() always returns true. This means we recycle the initial gc container structure and lose track of the elements that came before. While at it, don't deref 'gc' after we've passed it to call_rcu. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal (cherry picked from commit cf5000a7787cbc10341091d37245a42c119d26c5) Signed-off-by: Greg Rose --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b658ac8fb817..e6d1d28352c7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1494,7 +1494,7 @@ struct nft_trans_gc { struct net *net; struct nft_set *set; u32 seq; - u8 count; + u16 count; void *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d6420b54aa5f..090c9388d1d1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7955,12 +7955,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans) struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { + struct nft_set *set; + if (nft_trans_gc_space(gc)) return gc; + set = gc->set; nft_trans_gc_queue_work(gc); - return nft_trans_gc_alloc(gc->set, gc_seq, gfp); + return nft_trans_gc_alloc(set, gc_seq, gfp); } EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async); @@ -7977,15 +7980,18 @@ EXPORT_SYMBOL_GPL(nft_trans_gc_queue_async_done); struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) { + struct nft_set *set; + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) return NULL; if (nft_trans_gc_space(gc)) return gc; + set = gc->set; call_rcu(&gc->rcu, nft_trans_gc_trans_free); - return nft_trans_gc_alloc(gc->set, 0, gfp); + return nft_trans_gc_alloc(set, 0, gfp); } EXPORT_SYMBOL_GPL(nft_trans_gc_queue_sync); From 74e2238231cdff877275582cbaa2065b0f5876b3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Sat, 26 Oct 2024 11:00:57 -0700 Subject: [PATCH 35/36] netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout jira VULN-835 cve CVE-2024-26643 commit-author Pablo Neira Ayuso commit 552705a3650bbf46a22b1adedc1b04181490fc36 While the rhashtable set gc runs asynchronously, a race allows it to collect elements from anonymous sets with timeouts while it is being released from the commit path. Mingi Cho originally reported this issue in a different path in 6.1.x with a pipapo set with low timeouts which is not possible upstream since 7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout"). Fix this by setting on the dead flag for anonymous sets to skip async gc in this case. According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on transaction abort"), Florian plans to accelerate abort path by releasing objects via workqueue, therefore, this sets on the dead flag for abort path too. Cc: stable@vger.kernel.org Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Mingi Cho Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 552705a3650bbf46a22b1adedc1b04181490fc36) Signed-off-by: Greg Rose --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 090c9388d1d1..d1b46734044f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4517,6 +4517,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); From b7b6632c179f2e15ea74a6f16840efcf68aaac61 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 31 Oct 2024 16:11:36 -0700 Subject: [PATCH 36/36] netfilter: nf_tables: set backend .flush always succeeds jira VULN-835 subsystem-sync netfilter:nf_tables 4.18.0-534 commit-author Pablo Neira Ayuso commit 6509a2e410c3cb36c78a0a85c6102debe171337e upstream-diff - A conflict in nft_pipapo_flush resolved by favoring the 4.18.0-0-534 tagged code. .flush is always successful since this results from iterating over the set elements to toggle mark the element as inactive in the next generation. Signed-off-by: Pablo Neira Ayuso (cherry picked from commit 6509a2e410c3cb36c78a0a85c6102debe171337e) Signed-off-by: Greg Rose Conflicts: net/netfilter/nft_set_pipapo.c --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 9 +-------- net/netfilter/nft_set_bitmap.c | 4 +--- net/netfilter/nft_set_hash.c | 7 ++----- net/netfilter/nft_set_pipapo.c | 5 ++--- net/netfilter/nft_set_rbtree.c | 4 +--- 6 files changed, 8 insertions(+), 23 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e6d1d28352c7..6f5b0acc79a9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -375,7 +375,7 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*flush)(const struct net *net, + void (*flush)(const struct net *net, const struct nft_set *set, void *priv); void (*remove)(const struct net *net, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d1b46734044f..d9346fab34f9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5894,17 +5894,13 @@ static int nft_flush_set(const struct nft_ctx *ctx, struct nft_set_elem *elem) { struct nft_trans *trans; - int err; trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, sizeof(struct nft_trans_elem), GFP_ATOMIC); if (!trans) return -ENOMEM; - if (!set->ops->flush(ctx->net, set, elem->priv)) { - err = -ENOENT; - goto err1; - } + set->ops->flush(ctx->net, set, elem->priv); set->ndeact++; nft_setelem_data_deactivate(ctx->net, set, elem); @@ -5913,9 +5909,6 @@ static int nft_flush_set(const struct nft_ctx *ctx, list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err1: - kfree(trans); - return err; } static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b127826b5d7f..ba9acdb055cd 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -176,7 +176,7 @@ static void nft_bitmap_activate(const struct net *net, nft_set_elem_change_active(net, set, &be->ext); } -static bool nft_bitmap_flush(const struct net *net, +static void nft_bitmap_flush(const struct net *net, const struct nft_set *set, void *_be) { struct nft_bitmap *priv = nft_set_priv(set); @@ -188,8 +188,6 @@ static bool nft_bitmap_flush(const struct net *net, /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - - return true; } static void *nft_bitmap_deactivate(const struct net *net, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index dd915a16a37d..24467ab02ef2 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -194,14 +194,12 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_rhash_flush(const struct net *net, +static void nft_rhash_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_rhash_elem *he = priv; nft_set_elem_change_active(net, set, &he->ext); - - return true; } static void *nft_rhash_deactivate(const struct net *net, @@ -567,13 +565,12 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_hash_flush(const struct net *net, +static void nft_hash_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_hash_elem *he = priv; nft_set_elem_change_active(net, set, &he->ext); - return true; } static void *nft_hash_deactivate(const struct net *net, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 0c4983ad9621..63263bf94806 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1706,13 +1706,12 @@ static void *nft_pipapo_deactivate(const struct net *net, * * Return: true if element was found and deactivated. */ -static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, +static void nft_pipapo_flush(const struct net *net, const struct nft_set *set, void *elem) { struct nft_pipapo_elem *e = elem; - return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext), - &e->ext); + nft_set_elem_change_active(net, set, &e->ext); } /** diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 3d1e8f0d7a67..a2d0d18630ff 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -384,14 +384,12 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } -static bool nft_rbtree_flush(const struct net *net, +static void nft_rbtree_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_rbtree_elem *rbe = priv; nft_set_elem_change_active(net, set, &rbe->ext); - - return true; } static void *nft_rbtree_deactivate(const struct net *net,