Skip to content

Commit ac1d28e

Browse files
committed
exec: Find ARG_MAX with binary search after E2BIG
Previously we would shrink the command by one argument at a time until a successful execution. This is okay if the ARG_MAX estimate is just a little bit off, but is terribly slow when it's off by a lot. One situation where it's very far off is when a 32-bit build of bfs launches a 64-bit binary. In this case, bfs thinks the argv pointers are 4 bytes, while they actually take up 8 bytes. The performance is quite bad: $ time ./bfs-64 ~/code/linux -exec echo {} + >/dev/null ./bfs-64 ~/code/linux -exec echo {} + > /dev/null 0.03s user 0.07s system 99% cpu 0.095 total $ time ./bfs-32 ~/code/linux -exec echo {} + >/dev/null ./bfs-32 ~/code/linux -exec echo {} + > /dev/null 0.08s user 10.33s system 100% cpu 10.390 total After this change, performance is much better: $ time ./bfs-32 ~/code/linux -exec echo {} + >/dev/null ./bfs-32 ~/code/linux -exec echo {} + > /dev/null 0.03s user 0.08s system 99% cpu 0.110 total
1 parent 9b50ada commit ac1d28e

File tree

2 files changed

+67
-14
lines changed

2 files changed

+67
-14
lines changed

exec.c

+65-14
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ static void bfs_exec_debug(const struct bfs_exec *execbuf, const char *format, .
5959
va_end(args);
6060
}
6161

62-
extern char **environ;
63-
6462
/** Determine the size of a single argument, for comparison to arg_max. */
6563
static size_t bfs_exec_arg_size(const char *arg) {
6664
return sizeof(arg) + strlen(arg) + 1;
@@ -79,6 +77,7 @@ static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) {
7977
}
8078

8179
// We have to share space with the environment variables
80+
extern char **environ;
8281
for (char **envp = environ; *envp; ++envp) {
8382
arg_max -= bfs_exec_arg_size(*envp);
8483
}
@@ -131,6 +130,7 @@ struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs
131130
execbuf->argv_cap = 0;
132131
execbuf->arg_size = 0;
133132
execbuf->arg_max = 0;
133+
execbuf->arg_min = 0;
134134
execbuf->wd_fd = -1;
135135
execbuf->wd_path = NULL;
136136
execbuf->wd_len = 0;
@@ -183,6 +183,7 @@ struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs
183183
execbuf->argc = execbuf->tmpl_argc - 1;
184184

185185
execbuf->arg_max = bfs_exec_arg_max(execbuf);
186+
execbuf->arg_min = execbuf->arg_max;
186187
}
187188

188189
return execbuf;
@@ -461,6 +462,54 @@ static bool bfs_exec_args_remain(const struct bfs_exec *execbuf) {
461462
return execbuf->argc >= execbuf->tmpl_argc;
462463
}
463464

465+
/** Compute the current ARG_MAX estimate for binary search. */
466+
static size_t bfs_exec_estimate_max(const struct bfs_exec *execbuf) {
467+
size_t min = execbuf->arg_min;
468+
size_t max = execbuf->arg_max;
469+
return min + (max - min)/2;
470+
}
471+
472+
/** Update the ARG_MAX lower bound from a successful execution. */
473+
static void bfs_exec_update_min(struct bfs_exec *execbuf) {
474+
if (execbuf->arg_size > execbuf->arg_min) {
475+
execbuf->arg_min = execbuf->arg_size;
476+
477+
// Don't let min exceed max
478+
if (execbuf->arg_min > execbuf->arg_max) {
479+
execbuf->arg_min = execbuf->arg_max;
480+
}
481+
482+
size_t estimate = bfs_exec_estimate_max(execbuf);
483+
bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n",
484+
execbuf->arg_min, execbuf->arg_max, estimate);
485+
}
486+
}
487+
488+
/** Update the ARG_MAX upper bound from a failed execution. */
489+
static size_t bfs_exec_update_max(struct bfs_exec *execbuf) {
490+
bfs_exec_debug(execbuf, "Got E2BIG, shrinking argument list...\n");
491+
492+
if (execbuf->arg_size < execbuf->arg_max) {
493+
execbuf->arg_max = execbuf->arg_size;
494+
495+
// Don't let min exceed max
496+
if (execbuf->arg_min > execbuf->arg_max) {
497+
execbuf->arg_min = execbuf->arg_max;
498+
}
499+
}
500+
501+
if (execbuf->arg_size <= execbuf->arg_min) {
502+
// Lower bound was wrong, restart binary search.
503+
execbuf->arg_min = 0;
504+
}
505+
506+
// Binary search for a more precise bound
507+
size_t estimate = bfs_exec_estimate_max(execbuf);
508+
bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n",
509+
execbuf->arg_min, execbuf->arg_max, estimate);
510+
return estimate;
511+
}
512+
464513
/** Execute the pending command from a BFS_EXEC_MULTI execbuf. */
465514
static int bfs_exec_flush(struct bfs_exec *execbuf) {
466515
int ret = 0, error = 0;
@@ -470,30 +519,31 @@ static int bfs_exec_flush(struct bfs_exec *execbuf) {
470519
execbuf->argv[execbuf->argc] = NULL;
471520
ret = bfs_exec_spawn(execbuf);
472521
error = errno;
473-
if (ret == 0 || error != E2BIG) {
522+
if (ret == 0) {
523+
bfs_exec_update_min(execbuf);
524+
break;
525+
} else if (error != E2BIG) {
474526
break;
475527
}
476528

477529
// Try to recover from E2BIG by trying fewer and fewer arguments
478530
// until they fit
479-
bfs_exec_debug(execbuf, "Got E2BIG, shrinking argument list...\n");
480-
execbuf->argv[execbuf->argc] = execbuf->argv[execbuf->argc - 1];
481-
execbuf->arg_size -= bfs_exec_arg_size(execbuf->argv[execbuf->argc]);
482-
--execbuf->argc;
531+
size_t new_max = bfs_exec_update_max(execbuf);
532+
while (execbuf->arg_size > new_max) {
533+
execbuf->argv[execbuf->argc] = execbuf->argv[execbuf->argc - 1];
534+
execbuf->arg_size -= bfs_exec_arg_size(execbuf->argv[execbuf->argc]);
535+
--execbuf->argc;
536+
}
483537
}
484-
size_t new_argc = execbuf->argc;
485-
size_t new_size = execbuf->arg_size;
486538

539+
size_t new_argc = execbuf->argc;
487540
for (size_t i = execbuf->tmpl_argc - 1; i < new_argc; ++i) {
488541
free(execbuf->argv[i]);
489542
}
490543
execbuf->argc = execbuf->tmpl_argc - 1;
491544
execbuf->arg_size = 0;
492545

493546
if (new_argc < orig_argc) {
494-
execbuf->arg_max = new_size;
495-
bfs_exec_debug(execbuf, "ARG_MAX: %zu\n", execbuf->arg_max);
496-
497547
// If we recovered from E2BIG, there are unused arguments at the
498548
// end of the list
499549
for (size_t i = new_argc + 1; i <= orig_argc; ++i) {
@@ -526,10 +576,11 @@ static bool bfs_exec_changed_dirs(const struct bfs_exec *execbuf, const struct B
526576

527577
/** Check if we need to flush the execbuf because we're too big. */
528578
static bool bfs_exec_would_overflow(const struct bfs_exec *execbuf, const char *arg) {
579+
size_t arg_max = bfs_exec_estimate_max(execbuf);
529580
size_t next_size = execbuf->arg_size + bfs_exec_arg_size(arg);
530-
if (next_size > execbuf->arg_max) {
581+
if (next_size > arg_max) {
531582
bfs_exec_debug(execbuf, "Command size (%zu) would exceed maximum (%zu), executing buffered command\n",
532-
next_size, execbuf->arg_max);
583+
next_size, arg_max);
533584
return true;
534585
}
535586

exec.h

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ struct bfs_exec {
6363
size_t arg_size;
6464
/** Maximum arg_size before E2BIG. */
6565
size_t arg_max;
66+
/** Lower bound for arg_max. */
67+
size_t arg_min;
6668

6769
/** A file descriptor for the working directory, for BFS_EXEC_CHDIR. */
6870
int wd_fd;

0 commit comments

Comments
 (0)