From a6ebdb70b5028cf88c622850899358d62ef69631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 20 Dec 2024 15:30:21 +0100 Subject: [PATCH 1/7] tree-wide: use -EBADF for fd initialization -1 was used everywhere, but -EBADF or -EBADFD started being used in various places. Let's make things consistent in the new style. Note that there are two candidates: EBADF 9 Bad file descriptor EBADFD 77 File descriptor in bad state Since we're initializating the fd, we're just assigning a value that means "no fd yet", so it's just a bad file descriptor, and the first errno fits better. If instead we had a valid file descriptor that became invalid because of some operation or state change, the other errno would fit better. In some places, initialization is dropped if unnecessary. Partially backported from commit https://github.com/systemd/systemd/commit/254d1313ae5a69c08c9b93032aaaf3d6083cfc07 (cherry picked from commit 254d1313ae5a69c08c9b93032aaaf3d6083cfc07) Related: RHEL-33815 --- src/udev/udev-node.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index 9e34ea6b01f..1990282c754 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -281,7 +281,7 @@ static int stack_directory_update(sd_device *dev, int fd, bool add) { } static int stack_directory_open(const char *dirname) { - _cleanup_close_ int fd = -1; + _cleanup_close_ int fd = -EBADF; int r; assert(dirname); @@ -298,7 +298,7 @@ static int stack_directory_open(const char *dirname) { } static int stack_directory_lock(int dirfd) { - _cleanup_close_ int fd = -1; + _cleanup_close_ int fd = -EBADF; assert(dirfd >= 0); @@ -387,7 +387,7 @@ static int stack_directory_get_name(const char *slink, char **ret) { static int link_update(sd_device *dev, const char *slink, bool add) { _cleanup_free_ char *dirname = NULL, *devnode = NULL; - _cleanup_close_ int dirfd = -1, lockfd = -1; + _cleanup_close_ int dirfd = -EBADF, lockfd = -EBADF; int r; assert(dev); @@ -624,7 +624,7 @@ int udev_node_apply_permissions( OrderedHashmap *seclabel_list) { const char *devnode; - _cleanup_close_ int node_fd = -1; + _cleanup_close_ int node_fd = -EBADF; int r; assert(dev); @@ -654,7 +654,7 @@ int static_node_apply_permissions( char **tags) { _cleanup_free_ char *unescaped_filename = NULL; - _cleanup_close_ int node_fd = -1; + _cleanup_close_ int node_fd = -EBADF; const char *devnode; struct stat stats; int r; From 41f6c5305e746c9e656efa935a71f268983748c6 Mon Sep 17 00:00:00 2001 From: Franck Bui Date: Tue, 3 Jan 2023 18:34:11 +0100 Subject: [PATCH 2/7] udev: merge link_directory_lock() into link_directory_open() These 2 operations are inseparable. (cherry picked from commit c9032f910c02288d08cac68f266f869cb1f66f64) Related: RHEL-33815 --- src/udev/udev-node.c | 71 ++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index 1990282c754..ef6f604478a 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -280,38 +280,6 @@ static int stack_directory_update(sd_device *dev, int fd, bool add) { return 1; /* Updated. */ } -static int stack_directory_open(const char *dirname) { - _cleanup_close_ int fd = -EBADF; - int r; - - assert(dirname); - - r = mkdir_parents(dirname, 0755); - if (r < 0) - return r; - - fd = open_mkdir_at(AT_FDCWD, dirname, O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW | O_RDONLY, 0755); - if (fd < 0) - return fd; - - return TAKE_FD(fd); -} - -static int stack_directory_lock(int dirfd) { - _cleanup_close_ int fd = -EBADF; - - assert(dirfd >= 0); - - fd = openat(dirfd, ".lock", O_CLOEXEC | O_NOFOLLOW | O_RDONLY | O_CREAT, 0600); - if (fd < 0) - return -errno; - - if (flock(fd, LOCK_EX) < 0) - return -errno; - - return TAKE_FD(fd); -} - size_t udev_node_escape_path(const char *src, char *dest, size_t size) { size_t i, j; uint64_t h; @@ -385,6 +353,35 @@ static int stack_directory_get_name(const char *slink, char **ret) { return 0; } +static int stack_directory_open(sd_device *dev, const char *dirname, int *ret_dirfd, int *ret_lockfd) { + _cleanup_close_ int dirfd = -EBADF, lockfd = -EBADF; + int r; + + assert(dev); + assert(dirname); + assert(ret_dirfd); + assert(ret_lockfd); + + r = mkdir_parents(dirname, 0755); + if (r < 0) + return log_device_debug_errno(dev, r, "Failed to create stack directory '%s': %m", dirname); + + dirfd = open_mkdir_at(AT_FDCWD, dirname, O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW | O_RDONLY, 0755); + if (dirfd < 0) + return log_device_debug_errno(dev, dirfd, "Failed to open stack directory '%s': %m", dirname); + + lockfd = openat(dirfd, ".lock", O_CLOEXEC | O_NOFOLLOW | O_RDONLY | O_CREAT, 0600); + if (lockfd < 0) + return log_device_debug_errno(dev, errno, "Failed to create lock file for stack directory '%s': %m", dirname); + + if (flock(lockfd, LOCK_EX) < 0) + return log_device_debug_errno(dev, errno, "Failed to place a lock on lock file for '%s': %m", dirname); + + *ret_dirfd = TAKE_FD(dirfd); + *ret_lockfd = TAKE_FD(lockfd); + return 0; +} + static int link_update(sd_device *dev, const char *slink, bool add) { _cleanup_free_ char *dirname = NULL, *devnode = NULL; _cleanup_close_ int dirfd = -EBADF, lockfd = -EBADF; @@ -397,13 +394,9 @@ static int link_update(sd_device *dev, const char *slink, bool add) { if (r < 0) return log_device_debug_errno(dev, r, "Failed to build stack directory name for '%s': %m", slink); - dirfd = stack_directory_open(dirname); - if (dirfd < 0) - return log_device_debug_errno(dev, dirfd, "Failed to open stack directory '%s': %m", dirname); - - lockfd = stack_directory_lock(dirfd); - if (lockfd < 0) - return log_device_debug_errno(dev, lockfd, "Failed to lock stack directory '%s': %m", dirname); + r = stack_directory_open(dev, dirname, &dirfd, &lockfd); + if (r < 0) + return r; r = stack_directory_update(dev, dirfd, add); if (r < 0) From 9d4f18e49d71549a225f105f86199743a9424c08 Mon Sep 17 00:00:00 2001 From: Franck Bui Date: Wed, 14 Dec 2022 19:04:16 +0100 Subject: [PATCH 3/7] udev: let stack_directory_open() convert a slink into a dirname itself We likely always want to open the directory via a slink. There's currently only one caller so it doesn't make any difference in practice but I think it's still nicer. No functional change. (cherry picked from commit 72a459adc4077047af20762e25a7a8739e81def1) Related: RHEL-33815 --- src/udev/udev-node.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index ef6f604478a..47761289334 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -11,6 +11,7 @@ #include "dirent-util.h" #include "escape.h" #include "fd-util.h" +#include "fileio.h" #include "format-util.h" #include "fs-util.h" #include "hexdecoct.h" @@ -175,14 +176,14 @@ static int stack_directory_read_one(int dirfd, const char *id, bool is_symlink, return 1; /* Updated */ } -static int stack_directory_find_prioritized_devnode(sd_device *dev, const char *dirname, bool add, char **ret) { +static int stack_directory_find_prioritized_devnode(sd_device *dev, int dirfd, bool add, char **ret) { _cleanup_closedir_ DIR *dir = NULL; _cleanup_free_ char *devnode = NULL; int r, priority = 0; const char *id; assert(dev); - assert(dirname); + assert(dirfd >= 0); assert(ret); /* Find device node of device with highest priority. This returns 1 if a device found, 0 if no @@ -204,7 +205,7 @@ static int stack_directory_find_prioritized_devnode(sd_device *dev, const char * return -ENOMEM; } - dir = opendir(dirname); + dir = xopendirat(dirfd, ".", O_NOFOLLOW); if (!dir) return -errno; @@ -223,9 +224,9 @@ static int stack_directory_find_prioritized_devnode(sd_device *dev, const char * if (!IN_SET(de->d_type, DT_LNK, DT_REG)) continue; - r = stack_directory_read_one(dirfd(dir), de->d_name, /* is_symlink = */ de->d_type == DT_LNK, &devnode, &priority); + r = stack_directory_read_one(dirfd, de->d_name, /* is_symlink = */ de->d_type == DT_LNK, &devnode, &priority); if (r < 0) { - log_debug_errno(r, "Failed to read '%s/%s', ignoring: %m", dirname, de->d_name); + log_debug_errno(r, "Failed to read '%s', ignoring: %m", de->d_name); continue; } } @@ -353,15 +354,20 @@ static int stack_directory_get_name(const char *slink, char **ret) { return 0; } -static int stack_directory_open(sd_device *dev, const char *dirname, int *ret_dirfd, int *ret_lockfd) { +static int stack_directory_open(sd_device *dev, const char *slink, int *ret_dirfd, int *ret_lockfd) { _cleanup_close_ int dirfd = -EBADF, lockfd = -EBADF; + _cleanup_free_ char *dirname = NULL; int r; assert(dev); - assert(dirname); + assert(slink); assert(ret_dirfd); assert(ret_lockfd); + r = stack_directory_get_name(slink, &dirname); + if (r < 0) + return log_device_debug_errno(dev, r, "Failed to build stack directory name for '%s': %m", slink); + r = mkdir_parents(dirname, 0755); if (r < 0) return log_device_debug_errno(dev, r, "Failed to create stack directory '%s': %m", dirname); @@ -375,7 +381,7 @@ static int stack_directory_open(sd_device *dev, const char *dirname, int *ret_di return log_device_debug_errno(dev, errno, "Failed to create lock file for stack directory '%s': %m", dirname); if (flock(lockfd, LOCK_EX) < 0) - return log_device_debug_errno(dev, errno, "Failed to place a lock on lock file for '%s': %m", dirname); + return log_device_debug_errno(dev, errno, "Failed to place a lock on lock file for %s: %m", dirname); *ret_dirfd = TAKE_FD(dirfd); *ret_lockfd = TAKE_FD(lockfd); @@ -383,26 +389,22 @@ static int stack_directory_open(sd_device *dev, const char *dirname, int *ret_di } static int link_update(sd_device *dev, const char *slink, bool add) { - _cleanup_free_ char *dirname = NULL, *devnode = NULL; _cleanup_close_ int dirfd = -EBADF, lockfd = -EBADF; + _cleanup_free_ char *devnode = NULL; int r; assert(dev); assert(slink); - r = stack_directory_get_name(slink, &dirname); - if (r < 0) - return log_device_debug_errno(dev, r, "Failed to build stack directory name for '%s': %m", slink); - - r = stack_directory_open(dev, dirname, &dirfd, &lockfd); + r = stack_directory_open(dev, slink, &dirfd, &lockfd); if (r < 0) return r; r = stack_directory_update(dev, dirfd, add); if (r < 0) - return log_device_debug_errno(dev, r, "Failed to update stack directory '%s': %m", dirname); + return log_device_debug_errno(dev, r, "Failed to update stack directory for '%s': %m", slink); - r = stack_directory_find_prioritized_devnode(dev, dirname, add, &devnode); + r = stack_directory_find_prioritized_devnode(dev, dirfd, add, &devnode); if (r < 0) return log_device_debug_errno(dev, r, "Failed to determine device node with the highest priority for '%s': %m", slink); if (r > 0) From 229f1f2e09d3af629bd72876e40d3f37ca88d376 Mon Sep 17 00:00:00 2001 From: Franck Bui Date: Tue, 3 Jan 2023 17:38:59 +0100 Subject: [PATCH 4/7] udev: return ENODEV if link_directory_read_one() can't find the devnode That's usually the errno code we return when a device cannot be found because it's been unplugged. (cherry picked from commit e8a54a4e759517aea2cb5c91f4195a24168054d0) Related: RHEL-33815 --- src/udev/udev-node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index 47761289334..e1e16950509 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -133,7 +133,7 @@ static int stack_directory_read_one(int dirfd, const char *id, bool is_symlink, * symlink will be removed during processing the event. The check is just for shortening the * timespan that the symlink points to a non-existing device node. */ if (access(colon + 1, F_OK) < 0) - return -errno; + return -ENODEV; r = safe_atoi(buf, &tmp_prio); if (r < 0) From cf232509e12162fd3664502b88d568f46a0a88da Mon Sep 17 00:00:00 2001 From: Franck Bui Date: Wed, 4 Jan 2023 14:59:00 +0100 Subject: [PATCH 5/7] udev: simplify a bit stack_directory_find_prioritized_devnode() And make the new format the one we expect as it should replace the old one pretty quickly. (cherry picked from commit 6d90488acb35479965707d3be17f98abf74f13d5) Related: RHEL-33815 --- src/udev/udev-node.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index e1e16950509..1596497da89 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -104,7 +104,8 @@ static int node_symlink(sd_device *dev, const char *devnode, const char *slink) return 0; } -static int stack_directory_read_one(int dirfd, const char *id, bool is_symlink, char **devnode, int *priority) { +static int stack_directory_read_one(int dirfd, const char *id, char **devnode, int *priority) { + _cleanup_free_ char *buf = NULL; int tmp_prio, r; assert(dirfd >= 0); @@ -112,15 +113,14 @@ static int stack_directory_read_one(int dirfd, const char *id, bool is_symlink, assert(devnode); assert(priority); - if (is_symlink) { - _cleanup_free_ char *buf = NULL; - char *colon; + /* First, let's try to read the entry with the new format, which should replace the old format pretty + * quickly. */ - /* New format. The devnode and priority can be obtained from symlink. */ + r = readlinkat_malloc(dirfd, id, &buf); + if (r >= 0) { + char *colon; - r = readlinkat_malloc(dirfd, id, &buf); - if (r < 0) - return r; + /* With the new format, the devnode and priority can be obtained from symlink itself. */ colon = strchr(buf, ':'); if (!colon || colon == buf) @@ -146,7 +146,7 @@ static int stack_directory_read_one(int dirfd, const char *id, bool is_symlink, if (r < 0) return r; - } else { + } else if (r == -EINVAL) { /* Not a symlink ? try the old format */ _cleanup_(sd_device_unrefp) sd_device *dev = NULL; const char *val; @@ -170,7 +170,9 @@ static int stack_directory_read_one(int dirfd, const char *id, bool is_symlink, r = free_and_strdup(devnode, val); if (r < 0) return r; - } + + } else + return r == -ENOENT ? -ENODEV : r; *priority = tmp_prio; return 1; /* Updated */ @@ -213,22 +215,15 @@ static int stack_directory_find_prioritized_devnode(sd_device *dev, int dirfd, b if (r < 0) return r; - FOREACH_DIRENT_ALL(de, dir, break) { - if (de->d_name[0] == '.') - continue; + FOREACH_DIRENT(de, dir, break) { /* skip ourself */ if (streq(de->d_name, id)) continue; - if (!IN_SET(de->d_type, DT_LNK, DT_REG)) - continue; - - r = stack_directory_read_one(dirfd, de->d_name, /* is_symlink = */ de->d_type == DT_LNK, &devnode, &priority); - if (r < 0) { + r = stack_directory_read_one(dirfd, de->d_name, &devnode, &priority); + if (r < 0 && r != -ENODEV) log_debug_errno(r, "Failed to read '%s', ignoring: %m", de->d_name); - continue; - } } *ret = TAKE_PTR(devnode); From 623d77b6055ad70ff1545e531b2adf8961ad59ed Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 13 Jan 2023 13:25:43 +0900 Subject: [PATCH 6/7] udev-node: optimize device node symlink creation If multiple devices requested the same device node symlink with the same priority, then previously we read O(N^2) of files saved in /run/udev/links. This makes if the requested symlink already exists with equal or higher priority, then the symlink is kept, and skip to read all existing files, except for one related to the current device node, in /run/udev/links. Hence, the total amount of file read becomes O(N). This improves performance of testcase_simultaneous_events_2 added by the previous commit about 30%. Before (32.8 sec): ``` ## 3 iterations start: 11:13:44.690953163 ## 3 iterations end: 11:14:17.493974927 ``` After (23.8 sec): ``` ## 3 iterations start: 11:17:53.869938387 ## 3 iterations end: 11:18:17.624268345 ``` This is based on the idea and analysis by Franck Bui. Replaces #25839. Co-authored-by: Franck Bui (cherry picked from commit 331aa7aa15ee5dd12b369b276f575d521435eb52) Resolves: RHEL-33815 --- src/udev/udev-node.c | 90 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index 1596497da89..6527551853c 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -383,10 +383,42 @@ static int stack_directory_open(sd_device *dev, const char *slink, int *ret_dirf return 0; } +static int node_get_current(const char *slink, int dirfd, char **ret_id, int *ret_prio) { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + _cleanup_free_ char *id_dup = NULL; + const char *id; + int r; + + assert(slink); + assert(dirfd >= 0); + assert(ret_id); + + r = sd_device_new_from_devname(&dev, slink); + if (r < 0) + return r; + + r = device_get_device_id(dev, &id); + if (r < 0) + return r; + + id_dup = strdup(id); + if (!id_dup) + return -ENOMEM; + + if (ret_prio) { + r = stack_directory_read_one(dirfd, id, NULL, ret_prio); + if (r < 0) + return r; + } + + *ret_id = TAKE_PTR(id_dup); + return 0; +} + static int link_update(sd_device *dev, const char *slink, bool add) { + _cleanup_free_ char *current_id = NULL, *devnode = NULL; _cleanup_close_ int dirfd = -EBADF, lockfd = -EBADF; - _cleanup_free_ char *devnode = NULL; - int r; + int r, current_prio; assert(dev); assert(slink); @@ -395,10 +427,64 @@ static int link_update(sd_device *dev, const char *slink, bool add) { if (r < 0) return r; + r = node_get_current(slink, dirfd, ¤t_id, add ? ¤t_prio : NULL); + if (r < 0 && !ERRNO_IS_DEVICE_ABSENT(r)) + return log_device_debug_errno(dev, r, "Failed to get the current device node priority for '%s': %m", slink); + r = stack_directory_update(dev, dirfd, add); if (r < 0) return log_device_debug_errno(dev, r, "Failed to update stack directory for '%s': %m", slink); + if (current_id) { + const char *id; + + r = device_get_device_id(dev, &id); + if (r < 0) + return log_device_debug_errno(dev, r, "Failed to get device id: %m"); + + if (add) { + int prio; + + r = device_get_devlink_priority(dev, &prio); + if (r < 0) + return log_device_debug_errno(dev, r, "Failed to get devlink priority: %m"); + + if (streq(current_id, id)) { + if (current_prio <= prio) + /* The devlink is ours and already exists, and the new priority is + * equal or higher than the previous. Hence, it is not necessary to + * recreate it. */ + return 0; + + /* The devlink priority is downgraded. Another device may have a higher + * priority now. Let's find the device node with the highest priority. */ + } else { + if (current_prio >= prio) + /* The devlink with equal or higher priority already exists and is + * owned by another device. Hence, it is not necessary to recreate it. */ + return 0; + + /* This device has a higher priority than the current. Let's create the + * devlink to our device node. */ + return node_symlink(dev, NULL, slink); + } + + } else { + if (!streq(current_id, id)) + /* The devlink already exists and is owned by another device. Hence, it is + * not necessary to recreate it. */ + return 0; + + /* The current devlink is ours, and the target device will be removed. Hence, we need + * to search the device that has the highest priority. and update the devlink. */ + } + } else { + /* The requested devlink does not exist, or the target device does not exist and the devlink + * points to a non-existing device. Let's search the deivce that has the highest priority, + * and update the devlink. */ + ; + } + r = stack_directory_find_prioritized_devnode(dev, dirfd, add, &devnode); if (r < 0) return log_device_debug_errno(dev, r, "Failed to determine device node with the highest priority for '%s': %m", slink); From 3b3ba0ebdc4b9ba24d0d6db102983aa23f0fa7f1 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Mon, 30 Oct 2023 13:31:23 +0900 Subject: [PATCH 7/7] udev: update devlink with the newer device node even when priority is equivalent Several udev rules depends on the previous behavior, i.e. that udev replaces the devlink with the newer device node when the priority is equivalent. Let's relax the optimization done by 331aa7aa15ee5dd12b369b276f575d521435eb52. Follow-up for 331aa7aa15ee5dd12b369b276f575d521435eb52. Note, the offending commit drops O(N) of file reads per uevent, and this commit does not change the computational order. So, hopefully the performance impact of this change is small enough. Fixes #28141. (cherry picked from commit 7ec5ce5673d82818448ac36def7e0c7f7ca44805) Resolves: RHEL-33815 --- src/udev/udev-node.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index 6527551853c..b8f2a14755a 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -459,13 +459,13 @@ static int link_update(sd_device *dev, const char *slink, bool add) { /* The devlink priority is downgraded. Another device may have a higher * priority now. Let's find the device node with the highest priority. */ } else { - if (current_prio >= prio) - /* The devlink with equal or higher priority already exists and is - * owned by another device. Hence, it is not necessary to recreate it. */ + if (current_prio > prio) + /* The devlink with a higher priority already exists and is owned by + * another device. Hence, it is not necessary to recreate it. */ return 0; - /* This device has a higher priority than the current. Let's create the - * devlink to our device node. */ + /* This device has the equal or a higher priority than the current. Let's + * create the devlink to our device node. */ return node_symlink(dev, NULL, slink); }