Skip to content

Commit

Permalink
Add mode option for moving to root cgroup
Browse files Browse the repository at this point in the history
Previously it was possible only to disable moving process to root
cgroup. With addition of cgroup v2 things got complicated, especially
when used with systemd logging.

Option -P is changed so it takes arguments on/off/auto. New default is
auto, which first tries to set RR priority and tries to move process
only when setting of priority fails. "on" will always move process to
root cgroup. "off" is just an opposite.

Signed-off-by: Jan Friesse <[email protected]>
  • Loading branch information
jfriesse committed Jul 19, 2021
1 parent 133092c commit 7f47cd5
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 25 deletions.
34 changes: 23 additions & 11 deletions spausedd.8
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
.\"
.\" Author: Jan Friesse <[email protected]>
.\"
.Dd May 20, 2020
.Dd Jul 15, 2021
.Dt SPAUSEDD 8
.Os
.Sh NAME
Expand All @@ -25,6 +25,7 @@
.Nm
.Op Fl dDfhp
.Op Fl m Ar steal_threshold
.Op Fl P Ar mode
.Op Fl t Ar timeout
.Sh DESCRIPTION
The
Expand Down Expand Up @@ -66,16 +67,30 @@ Run on foreground (do not demonize - default).
Show help.
.It Fl p
Do not set RR scheduler.
.It Fl P
Do not move process to root cgroup.
.It Fl m Ar steal_threshold
Set steal threshold percent. (default is 10 if kernel information is used and
100 if VMGuestLib is used).
.It Fl P Ar mode
Set mode of moving process to root cgroup. Default is
.Cm auto
which first checks if setting of RR scheduler is enabled. If so, it tries to set RR scheduler.
If this fails, process is moved to root cgroup and set of RR scheduler is retried.
Another options are
.Cm on
when process is always moved to root cgroup and
.Cm off
which makes
.Nm
to never try move pid into root cgroup.
It's worth noting that currently (May 3 2021) cgroup v2 doesn’t yet
support control of realtime processes and the cpu controller can only be
enabled when all RT processes are in the root cgroup. So when this option
is used together with systemd, it may be impossible to make systemd options
like CPUQuota working correctly until
support control of realtime processes and, for systems with CONFIG_RT_GROUP_SCHED
kernel option enabled, the cpu controller can only be
enabled when all RT processes are in the root cgroup. So when moving to
root cgroup is disabled and used together with systemd, it may be
impossible to make systemd options like CPUQuota working correctly until
.Nm
is stopped.
Also when this option is used together with cgroup v2 and systemd
Also when moving to root cgroup is used together with cgroup v2 and systemd
it makes impossible (most of the time) for journald to add systemd specific
metadata (most importantly _SYSTEMD_UNIT) properly, because
.Nm
Expand All @@ -91,9 +106,6 @@ Problem is even worse because journald caches pid for some time
(approx. 5 sec) so initial
.Nm
messages have correct metadata.
.It Fl m Ar steal_threshold
Set steal threshold percent. (default is 10 if kernel information is used and
100 if VMGuestLib is used).
.It Fl t Ar timeout
Set timeout value in milliseconds (default 200).
.El
Expand Down
65 changes: 51 additions & 14 deletions spausedd.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@
#define LOG_TRACE (LOG_DEBUG + 1)
#endif

enum move_to_root_cgroup_mode {
MOVE_TO_ROOT_CGROUP_MODE_OFF = 0,
MOVE_TO_ROOT_CGROUP_MODE_ON = 1,
MOVE_TO_ROOT_CGROUP_MODE_AUTO = 2,
};

/*
* Globals
*/
Expand Down Expand Up @@ -254,8 +260,8 @@ utils_tty_detach(void)
close(devnull);
}

static void
utils_set_rr_scheduler(void)
static int
utils_set_rr_scheduler(int silent)
{
#ifdef _POSIX_PRIORITY_SCHEDULING
int max_prio;
Expand All @@ -264,19 +270,27 @@ utils_set_rr_scheduler(void)

max_prio = sched_get_priority_max(SCHED_RR);
if (max_prio == -1) {
log_perror(LOG_WARNING, "Can't get maximum SCHED_RR priority");
return ;
if (!silent) {
log_perror(LOG_WARNING, "Can't get maximum SCHED_RR priority");
}

return (-1);
}

param.sched_priority = max_prio;
res = sched_setscheduler(0, SCHED_RR, &param);
if (res == -1) {
log_perror(LOG_WARNING, "Can't set SCHED_RR");
return ;
if (!silent) {
log_perror(LOG_WARNING, "Can't set SCHED_RR");
}

return (-1);
}
#else
log_printf(LOG_WARNING, "Platform without sched_get_priority_min");
#endif

return (0);
}

static void
Expand Down Expand Up @@ -304,9 +318,13 @@ utils_move_to_root_cgroup(void)

return ;
} else {
log_printf(LOG_DEBUG, "Moving main pid to cgroup v2 root cgroup");

cgroup_task_fname = "/sys/fs/cgroup/cgroup.procs";
}
} else {
log_printf(LOG_DEBUG, "Moving main pid to cgroup v1 root cgroup");

cgroup_task_fname = "/sys/fs/cgroup/cpu/tasks";
}
(void)fclose(f);
Expand Down Expand Up @@ -657,15 +675,15 @@ poll_run(uint64_t timeout)
static void
usage(void)
{
printf("usage: %s [-dDfhpP] [-m steal_th] [-t timeout]\n", PROGRAM_NAME);
printf("usage: %s [-dDfhp] [-m steal_th] [-P mode] [-t timeout]\n", PROGRAM_NAME);
printf("\n");
printf(" -d Display debug messages\n");
printf(" -D Run on background - daemonize\n");
printf(" -f Run foreground - do not daemonize (default)\n");
printf(" -h Show help\n");
printf(" -p Do not set RR scheduler\n");
printf(" -P Do not move process to root cgroup\n");
printf(" -m steal_th Steal percent threshold\n");
printf(" -P mode Move process to root cgroup only when needed (auto), always (on) or never (off)\n");
printf(" -t timeout Set timeout value (default: %u)\n", DEFAULT_TIMEOUT);
}

Expand All @@ -677,16 +695,17 @@ main(int argc, char **argv)
long long int tmpll;
uint64_t timeout;
int set_prio;
int move_to_root_cgroup;
enum move_to_root_cgroup_mode move_to_root_cgroup;
int silent;

foreground = 1;
timeout = DEFAULT_TIMEOUT;
set_prio = 1;
move_to_root_cgroup = 1;
move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_AUTO;
max_steal_threshold = DEFAULT_MAX_STEAL_THRESHOLD;
max_steal_threshold_user_set = 0;

while ((ch = getopt(argc, argv, "dDfhpPm:t:")) != -1) {
while ((ch = getopt(argc, argv, "dDfhpm:P:t:")) != -1) {
switch (ch) {
case 'D':
foreground = 0;
Expand Down Expand Up @@ -717,7 +736,15 @@ main(int argc, char **argv)
exit(1);
break;
case 'P':
move_to_root_cgroup = 0;
if (strcasecmp(optarg, "on") == 0) {
move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_ON;
} else if (strcasecmp(optarg, "off") == 0) {
move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_OFF;
} else if (strcasecmp(optarg, "auto") == 0) {
move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_AUTO;
} else {
errx(1, "Move to root cgroup mode %s is invalid", optarg);
}
break;
case 'p':
set_prio = 0;
Expand All @@ -737,12 +764,22 @@ main(int argc, char **argv)

utils_mlockall();

if (move_to_root_cgroup) {
if (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_ON) {
utils_move_to_root_cgroup();
}

if (set_prio) {
utils_set_rr_scheduler();
silent = (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO);

if (utils_set_rr_scheduler(silent) == -1 &&
move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO) {
/*
* Try to move process to root cgroup and try set priority again
*/
utils_move_to_root_cgroup();

(void)utils_set_rr_scheduler(0);
}
}

signal_handlers_register();
Expand Down

0 comments on commit 7f47cd5

Please sign in to comment.