From 26ae1b03568b1e8a8134dd7e58c42ef9b8a5b9e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Jan 2024 21:02:02 -1000 Subject: [PATCH] scx: Make scx_exit_info fields dynamically allocated scx_exit_info currently embeds all message buffers. This isn't great in that it makes the size of the structs a part of the ABI and wastes memory when scx is not in use. As the contents are accessed with bpf_probe_read_kernel_str(), the buffers can be moved outside the struct. This change requires the scx scheduler to be rebuilt but doesn't require code changes. Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 17 ++++----- kernel/sched/ext.c | 80 +++++++++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 29 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index ae552129931a9..f4870bd5cd073 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -18,9 +18,6 @@ struct cgroup; enum scx_consts { SCX_OPS_NAME_LEN = 128, - SCX_EXIT_REASON_LEN = 128, - SCX_EXIT_BT_LEN = 64, - SCX_EXIT_MSG_LEN = 1024, SCX_SLICE_DFL = 20 * NSEC_PER_MSEC, SCX_SLICE_INF = U64_MAX, /* infinite, implies nohz */ @@ -74,14 +71,16 @@ enum scx_exit_kind { struct scx_exit_info { /* %SCX_EXIT_* - broad category of the exit reason */ enum scx_exit_kind kind; + /* textual representation of the above */ - char reason[SCX_EXIT_REASON_LEN]; - /* number of entries in the backtrace */ - u32 bt_len; + const char *reason; + /* backtrace if exiting due to an error */ - unsigned long bt[SCX_EXIT_BT_LEN]; - /* extra message */ - char msg[SCX_EXIT_MSG_LEN]; + unsigned long *bt; + u32 bt_len; + + /* informational message */ + char *msg; }; /* sched_ext_ops.flags */ diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 52457f65aa572..4373dac429ea3 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -18,6 +18,9 @@ enum scx_internal_consts { SCX_DSP_DFL_MAX_BATCH = 32, SCX_DSP_MAX_LOOPS = 32, SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ, + + SCX_EXIT_BT_LEN = 64, + SCX_EXIT_MSG_LEN = 1024, }; enum scx_ops_enable_state { @@ -113,7 +116,7 @@ struct static_key_false scx_has_op[SCX_OPI_END] = { [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT }; static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE); -static struct scx_exit_info scx_exit_info; +static struct scx_exit_info *scx_exit_info; static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0); @@ -3207,14 +3210,39 @@ static void scx_ops_bypass(bool bypass) } } +static void free_exit_info(struct scx_exit_info *ei) +{ + kfree(ei->msg); + kfree(ei->bt); + kfree(ei); +} + +static struct scx_exit_info *alloc_exit_info(void) +{ + struct scx_exit_info *ei; + + ei = kzalloc(sizeof(*ei), GFP_KERNEL); + if (!ei) + return NULL; + + ei->bt = kcalloc(sizeof(ei->bt[0]), SCX_EXIT_BT_LEN, GFP_KERNEL); + ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL); + + if (!ei->bt || !ei->msg) { + free_exit_info(ei); + return NULL; + } + + return ei; +} + static void scx_ops_disable_workfn(struct kthread_work *work) { - struct scx_exit_info *ei = &scx_exit_info; + struct scx_exit_info *ei = scx_exit_info; struct scx_task_iter sti; struct task_struct *p; struct rhashtable_iter rht_iter; struct scx_dispatch_q *dsq; - const char *reason; int i, kind; kind = atomic_read(&scx_exit_kind); @@ -3229,32 +3257,30 @@ static void scx_ops_disable_workfn(struct kthread_work *work) if (atomic_try_cmpxchg(&scx_exit_kind, &kind, SCX_EXIT_DONE)) break; } + ei->kind = kind; cancel_delayed_work_sync(&scx_watchdog_work); - switch (kind) { + switch (ei->kind) { case SCX_EXIT_UNREG: - reason = "BPF scheduler unregistered"; + ei->reason = "BPF scheduler unregistered"; break; case SCX_EXIT_SYSRQ: - reason = "disabled by sysrq-S"; + ei->reason = "disabled by sysrq-S"; break; case SCX_EXIT_ERROR: - reason = "runtime error"; + ei->reason = "runtime error"; break; case SCX_EXIT_ERROR_BPF: - reason = "scx_bpf_error"; + ei->reason = "scx_bpf_error"; break; case SCX_EXIT_ERROR_STALL: - reason = "runnable task stall"; + ei->reason = "runnable task stall"; break; default: - reason = ""; + ei->reason = ""; } - ei->kind = kind; - strlcpy(ei->reason, reason, sizeof(ei->reason)); - /* guarantee forward progress by bypassing scx_ops */ scx_ops_bypass(true); @@ -3264,7 +3290,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work) break; case SCX_OPS_DISABLED: pr_warn("sched_ext: ops error detected without ops (%s)\n", - scx_exit_info.msg); + scx_exit_info->msg); WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_DISABLED) != SCX_OPS_DISABLING); goto done; @@ -3365,6 +3391,9 @@ static void scx_ops_disable_workfn(struct kthread_work *work) scx_dsp_buf = NULL; scx_dsp_max_batch = 0; + free_exit_info(scx_exit_info); + scx_exit_info = NULL; + mutex_unlock(&scx_ops_enable_mutex); WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_DISABLED) != @@ -3411,17 +3440,17 @@ static DEFINE_IRQ_WORK(scx_ops_error_irq_work, scx_ops_error_irq_workfn); __printf(2, 3) void scx_ops_error_kind(enum scx_exit_kind kind, const char *fmt, ...) { - struct scx_exit_info *ei = &scx_exit_info; + struct scx_exit_info *ei = scx_exit_info; int none = SCX_EXIT_NONE; va_list args; if (!atomic_try_cmpxchg(&scx_exit_kind, &none, kind)) return; - ei->bt_len = stack_trace_save(ei->bt, ARRAY_SIZE(ei->bt), 1); + ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1); va_start(args, fmt); - vscnprintf(ei->msg, ARRAY_SIZE(ei->msg), fmt, args); + vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args); va_end(args); irq_work_queue(&scx_ops_error_irq_work); @@ -3474,6 +3503,12 @@ static int scx_ops_enable(struct sched_ext_ops *ops) goto err; } + scx_exit_info = alloc_exit_info(); + if (!scx_exit_info) { + ret = -ENOMEM; + goto err; + } + scx_root_kobj = kzalloc(sizeof(*scx_root_kobj), GFP_KERNEL); if (!scx_root_kobj) { ret = -ENOMEM; @@ -3494,7 +3529,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops) WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_PREPPING) != SCX_OPS_DISABLED); - memset(&scx_exit_info, 0, sizeof(scx_exit_info)); atomic_set(&scx_exit_kind, SCX_EXIT_NONE); scx_warned_zero_slice = false; @@ -3706,8 +3740,14 @@ static int scx_ops_enable(struct sched_ext_ops *ops) return 0; err: - kfree(scx_root_kobj); - scx_root_kobj = NULL; + if (scx_root_kobj) { + kfree(scx_root_kobj); + scx_root_kobj = NULL; + } + if (scx_exit_info) { + free_exit_info(scx_exit_info); + scx_exit_info = NULL; + } mutex_unlock(&scx_ops_enable_mutex); return ret;