From c3987547dba4c201236cad4deb96d5caf0e963db Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Tue, 21 Nov 2017 00:25:50 +0530 Subject: [PATCH] Add support for detecting docker container Add support in port library for detecting if we are running in docker container or not. This is required for cgroup related APIs in port library, such as omrsysinfo_cgroup_get_memlimit, to work properly. This is because when running in a docker container, the cgroups listed in /proc/self/cgroup do not match the cgroups of the process in the container. /proc/self/cgroup actually shows the cgroups as seen from the host. Eg if a process is running in a docker container, its /proc/PID/cgroup on docker will show something like: $ cat /proc/8364/cgroup 11:cpuset:/docker/ 10:perf_event:/docker/ 9:memory:/dockera/ ... but the actual cgroup of the process would be "/". Once we figure out that we are running in docker container, then cgroup APIs in port library would use "/" as cgroup name for reading resource limits. Note that as and when docker containers start supporting cgroup namespace, then above mentioned mismatch would also be fixed. Following issues and PRs are related to adding support for cgroup namespace in docker container: https://github.com/opencontainers/runc/pull/1184 https://github.com/opencontainers/runtime-spec/issues/62 https://github.com/opencontainers/runtime-spec/issues/66 There is no direct mechanism to detect if we are running in docker container. Most reliable mechanism that I have come across is by checking the cgroup for PID 1 (by reading /proc/1/cgroup file) for all subsystems. If all cgroups are "/", then it indicates the process is running on host system. Else we conclude it to be running in a docker container. Note that this mechanism will break when docker containers start supporting cgroup namespace. Signed-off-by: Ashutosh Mehra --- port/common/omrport.c | 2 +- port/common/omrsysinfo.c | 1 + port/unix/omrsysinfo.c | 176 +++++++++++++++++++++++++++++---------- 3 files changed, 136 insertions(+), 43 deletions(-) diff --git a/port/common/omrport.c b/port/common/omrport.c index cd62a99dc2e..44369f4996e 100644 --- a/port/common/omrport.c +++ b/port/common/omrport.c @@ -261,7 +261,7 @@ static OMRPortLibrary MasterPortLibraryTable = { omrsysinfo_cgroup_get_enabled_subsystems, /* sysinfo_cgroup_get_enabled_subsystems */ omrsysinfo_cgroup_enable_subsystems, /* sysinfo_cgroup_enable_subsystems */ omrsysinfo_cgroup_are_subsystems_enabled, /* sysinfo_cgroup_are_subsystems_enabled */ - omrsysinfo_cgroup_get_memlimit, /* sysinfo_cgroup_get_memlimit */ + omrsysinfo_cgroup_get_memlimit, /* sysinfo_cgroup_get_memlimit */ omrport_init_library, /* port_init_library */ omrport_startup_library, /* port_startup_library */ omrport_create_library, /* port_create_library */ diff --git a/port/common/omrsysinfo.c b/port/common/omrsysinfo.c index 6fffdf75063..2eb983c7c70 100644 --- a/port/common/omrsysinfo.c +++ b/port/common/omrsysinfo.c @@ -902,6 +902,7 @@ omrsysinfo_cgroup_are_subsystems_enabled(struct OMRPortLibrary *portLibrary, uin * omrsysinfo_cgroup_enable_limits() before calling this function. * When the fuction returns OMRPORT_ERROR_SYSINFO_CGROUP_UNSUPPORTED_PLATFORM, * value of *limits is unspecified. + * Note that 'limit' parameter must not be NULL. * * @param[in] portLibrary pointer to OMRPortLibrary * @param[out] limit pointer to uint64_t which successful return contains memory limit imposed by cgroup diff --git a/port/unix/omrsysinfo.c b/port/unix/omrsysinfo.c index 87c4dd94392..b4d153e5583 100644 --- a/port/unix/omrsysinfo.c +++ b/port/unix/omrsysinfo.c @@ -256,8 +256,17 @@ struct { #if defined(LINUX) #define OMR_CGROUP_V1_MOUNT_POINT "/sys/fs/cgroup" +#define ROOT_CGROUP "/" -/* Currently 12 subsystems/resource controllers are defined. +/* An entry in /proc//cgroup is of following form: + * :[,]*: + * + * An example: + * 7:cpuacct,cpu:/mycgroup + */ +#define PROC_PID_CGROUP_ENTRY_FORMAT "%d:%[^:]:%s" + +/* Currently 12 subsystems or resource controllers are defined. */ typedef enum OMRCgroupSubsystem { INVALID_SUBSYSTEM = -1, @@ -331,12 +340,14 @@ static uint16_t getPhysicalMemory(); #endif /* defined(OMRZTPF) */ #if defined(LINUX) && !defined(OMRZTPF) +static BOOLEAN isCgroupV1Available(struct OMRPortLibrary *portLibrary); static void freeCgroupEntries(struct OMRPortLibrary *portLibrary, OMRCgroupEntry *cgEntryList); static char * getCgroupNameForSubsystem(struct OMRPortLibrary *portLibrary, OMRCgroupEntry *cgEntryList, const char *subsystem); static int32_t addCgroupEntry(struct OMRPortLibrary *portLibrary, OMRCgroupEntry **cgEntryList, int32_t hierId, const char *subsystem, const char *cgroupName); -static int32_t readCgroupFile(struct OMRPortLibrary *portLibrary, int pid, OMRCgroupEntry **cgroupEntryList, uint64_t *availableSubsystems); +static int32_t readCgroupFile(struct OMRPortLibrary *portLibrary, int pid, BOOLEAN inContainer, OMRCgroupEntry **cgroupEntryList, uint64_t *availableSubsystems); static OMRCgroupSubsystem getCgroupSubsystemFromFlag(uint64_t subsystemFlag); static int32_t readCgroupSubsystemFile(struct OMRPortLibrary *portLibrary, uint64_t subsystemFlag, const char *fileName, int32_t numItemsToRead, const char *format, ...); +static int32_t isRunningInContainer(struct OMRPortLibrary *portLibrary, BOOLEAN *inContainer); #endif /* defined(LINUX) */ @@ -1701,7 +1712,7 @@ omrsysinfo_startup(struct OMRPortLibrary *portLibrary) #if defined(LINUX) && !defined(OMRZTPF) PPG_cgroupEntryList = NULL; /* To handle the case where multiple port libraries are started and shutdown, - * as done by some fvtests (eg fvtest/porttest/j9portTest.cpp) that create fake portlibrary + * as done by some fvtests (eg fvtest/porttest/j9portTest.cpp) that create fake portlibrary * to test its management and lifecycle, * we need to ensure globals like cgroupEntryListMonitor are initialized and destroyed only once. */ @@ -3314,6 +3325,34 @@ omrsysinfo_os_kernel_info(struct OMRPortLibrary *portLibrary, struct OMROSKernel #if defined(LINUX) +/** + * @internal + * Checks if cgroup v1 system is available + * + * @param[in] portLibrary pointer to OMRPortLibrary + * + * @return TRUE if cgroup v1 system is available, FALSE otherwise + */ +static BOOLEAN +isCgroupV1Available(struct OMRPortLibrary *portLibrary) +{ + struct statfs buf = {0}; + int32_t rc = 0; + BOOLEAN result = TRUE; + + /* If tmpfs is mounted on /sys/fs/cgroup, then it indicates cgroup v1 system is available */ + rc = statfs(OMR_CGROUP_V1_MOUNT_POINT, &buf); + if (0 != rc) { + portLibrary->error_set_last_error(portLibrary, errno, OMRPORT_ERROR_SYSINFO_SYS_FS_CGROUP_STATFS_FAILED); + result = FALSE; + } else if (TMPFS_MAGIC != buf.f_type) { + portLibrary->error_set_last_error_with_message_format(portLibrary, OMRPORT_ERROR_SYSINFO_SYS_FS_CGROUP_TMPFS_NOT_MOUNTED, "tmpfs is not mounted on " OMR_CGROUP_V1_MOUNT_POINT); + result = FALSE; + } + + return result; +} + /** * @internal * Free resources allocated for OMRCgroupEntry @@ -3420,21 +3459,20 @@ addCgroupEntry(struct OMRPortLibrary *portLibrary, OMRCgroupEntry **cgEntryList, * * @param[in] portLibrary pointer to OMRPortLibrary * @param[in] pid process id + * @param[in] inContainer if set to TRUE then ignore cgroup in /proc//cgroup and use ROOT_CGROUP instead * @param[out] cgroupEntryList pointer to OMRCgroupEntry *. On successful return, *cgroupEntry * points to a circular linked list. Each element of the list is populated based on the contents * of /proc//cgroup file. - * @param[out] availableSubsystems on successful return, contains bitwise-OR of flags of type OMR_CGROUP_SUBSYSTEMS_* + * @param[out] availableSubsystems on successful return, contains bitwise-OR of flags of type OMR_CGROUP_SUBSYSTEMS_* * indicating the subsystems available for use * * returns 0 on success, negative code on error */ static int32_t -readCgroupFile(struct OMRPortLibrary *portLibrary, int pid, OMRCgroupEntry **cgroupEntryList, uint64_t *availableSubsystems) +readCgroupFile(struct OMRPortLibrary *portLibrary, int pid, BOOLEAN inContainer, OMRCgroupEntry **cgroupEntryList, uint64_t *availableSubsystems) { - char cgroup[PATH_MAX]; + char cgroupFilePath[PATH_MAX]; uintptr_t requiredSize = 0; - /* This array should be large enough to read names of all subsystems. 1024 should be enough. */ - char subsystems[1024]; FILE *cgroupFile = NULL; OMRCgroupEntry *cgEntryList = NULL; uint64_t available = 0; @@ -3444,34 +3482,29 @@ readCgroupFile(struct OMRPortLibrary *portLibrary, int pid, OMRCgroupEntry **cgr requiredSize = portLibrary->str_printf(portLibrary, NULL, (uint32_t)-1, "/proc/%d/cgroup", pid); Assert_PRT_true(requiredSize <= PATH_MAX); - portLibrary->str_printf(portLibrary, cgroup, sizeof(cgroup), "/proc/%d/cgroup", pid); - cgroupFile = fopen(cgroup, "r"); + portLibrary->str_printf(portLibrary, cgroupFilePath, sizeof(cgroupFilePath), "/proc/%d/cgroup", pid); + + /* Even if 'inContainer' is TRUE, we need to parse the cgroup file to get the list of subsystems */ + cgroupFile = fopen(cgroupFilePath, "r"); if (NULL == cgroupFile) { rc = portLibrary->error_set_last_error(portLibrary, errno, OMRPORT_ERROR_SYSINFO_PROCESS_CGROUP_FILE_FOPEN_FAILED); goto _end; } while (0 == feof(cgroupFile)) { + char cgroup[PATH_MAX]; + /* This array should be large enough to read names of all subsystems. 1024 should be enough based on current supported subsystems. */ + char subsystems[1024]; char *cursor = NULL; char *separator = NULL; int32_t hierId = -1; - /* Following is the description of /proc//cgroup copied from 'man' page for proc: - * - * Each entry in /proc//cgroup is of type: - * 5:cpuacct,cpu,cpuset:/daemons - * The colon-separated fields are, from left to right: - * 1. hierarchy ID number - * 2. set of subsystems bound to the hierarchy - * 3. control group in the hierarchy to which the process belongs - */ - rc = fscanf(cgroupFile, "%d:%[^:]:%s", &hierId, subsystems, cgroup); + rc = fscanf(cgroupFile, PROC_PID_CGROUP_ENTRY_FORMAT, &hierId, subsystems, cgroup); /* Ensure we didn't overflow */ Assert_PRT_true(strlen(subsystems) < 1024); Assert_PRT_true(strlen(cgroup) < PATH_MAX); if (EOF == rc) { - rc = 0; break; } else if (3 != rc) { rc = portLibrary->error_set_last_error_with_message_format(portLibrary, OMRPORT_ERROR_SYSINFO_PROCESS_CGROUP_FILE_READ_FAILED, "unexpcted format of /proc/%d/cgroup file", pid); @@ -3490,7 +3523,12 @@ readCgroupFile(struct OMRPortLibrary *portLibrary, int pid, OMRCgroupEntry **cgr if (OMR_ARE_NO_BITS_SET(available, supportedSubsystems[i].flag) && !strcmp(cursor, supportedSubsystems[i].name) ) { - rc = addCgroupEntry(portLibrary, &cgEntryList, hierId, cursor, cgroup); + const char *cgroupToUse = cgroup; + + if (TRUE == inContainer) { + cgroupToUse = ROOT_CGROUP; + } + rc = addCgroupEntry(portLibrary, &cgEntryList, hierId, cursor, cgroupToUse); if (0 != rc) { goto _end; } @@ -3617,6 +3655,63 @@ readCgroupSubsystemFile(struct OMRPortLibrary *portLibrary, uint64_t subsystemFl return rc; } +/** + * Checks if the process is running inside container + * + * @param[in] portLibrary pointer to OMRPortLibrary + * @param[out] inContainer pointer to BOOLEAN which on successful return indicates if the process is running in container or not + * + * @return 0 on success, otherwise negative error code + */ +static int32_t +isRunningInContainer(struct OMRPortLibrary *portLibrary, BOOLEAN *inContainer) +{ + int32_t rc = 0; + + /* Assume we are not in container */ + *inContainer = FALSE; + + if (isCgroupV1Available(portLibrary)) { + /* Read PID 1's cgroup file /proc/1/cgroup and check cgroup name for each subsystem. + * If cgroup name for each subsystem points to the root cgroup "/", + * then the process is not running in a container. + * For any other cgroup name, assume we are in a container. + */ + FILE *cgroupFile = fopen("/proc/1/cgroup", "r"); + + if (NULL == cgroupFile) { + rc = portLibrary->error_set_last_error(portLibrary, errno, OMRPORT_ERROR_SYSINFO_PROCESS_CGROUP_FILE_FOPEN_FAILED); + goto _end; + } + + while (0 == feof(cgroupFile)) { + char cgroup[PATH_MAX]; + char subsystems[1024]; + int32_t hierId = -1; + + rc = fscanf(cgroupFile, PROC_PID_CGROUP_ENTRY_FORMAT, &hierId, subsystems, cgroup); + /* Ensure we didn't overflow */ + Assert_PRT_true(strlen(subsystems) < 1024); + Assert_PRT_true(strlen(cgroup) < PATH_MAX); + + if (EOF == rc) { + break; + } else if (3 != rc) { + rc = portLibrary->error_set_last_error_with_message_format(portLibrary, OMRPORT_ERROR_SYSINFO_PROCESS_CGROUP_FILE_READ_FAILED, "unexpected format of /proc/1/cgroup file"); + goto _end; + } + + if (0 != strcmp(ROOT_CGROUP, cgroup)) { + *inContainer = TRUE; + break; + } + } + rc = 0; + } +_end: + return rc; +} + #endif /* defined(LINUX) */ BOOLEAN @@ -3627,25 +3722,21 @@ omrsysinfo_cgroup_is_system_available(struct OMRPortLibrary *portLibrary) int32_t rc = OMRPORT_ERROR_SYSINFO_CGROUP_UNSUPPORTED_PLATFORM; if (NULL == PPG_cgroupEntryList) { - struct statfs buf = {0}; + if (isCgroupV1Available(portLibrary)) { + BOOLEAN inContainer = FALSE; - /* If tmpfs is mounted on /sys/fs/cgroup, then it indicates cgroup v1 system is available */ - rc = statfs(OMR_CGROUP_V1_MOUNT_POINT, &buf); - if (0 != rc) { - rc = portLibrary->error_set_last_error(portLibrary, errno, OMRPORT_ERROR_SYSINFO_SYS_FS_CGROUP_STATFS_FAILED); - goto _end; - } else if (TMPFS_MAGIC != buf.f_type) { - rc = portLibrary->error_set_last_error_with_message_format(portLibrary, OMRPORT_ERROR_SYSINFO_SYS_FS_CGROUP_TMPFS_NOT_MOUNTED, "tmpfs is not mounted on " OMR_CGROUP_V1_MOUNT_POINT); - goto _end; - } - - omrthread_monitor_enter(cgroupEntryListMonitor); - if (NULL == PPG_cgroupEntryList) { - rc = readCgroupFile(portLibrary, getpid(), &PPG_cgroupEntryList, &PPG_cgroupSubsystemsAvailable); - } - omrthread_monitor_exit(cgroupEntryListMonitor); - if (0 != rc) { - goto _end; + rc = isRunningInContainer(portLibrary, &inContainer); + if (0 != rc) { + goto _end; + } + omrthread_monitor_enter(cgroupEntryListMonitor); + if (NULL == PPG_cgroupEntryList) { + rc = readCgroupFile(portLibrary, getpid(), inContainer, &PPG_cgroupEntryList, &PPG_cgroupSubsystemsAvailable); + } + omrthread_monitor_exit(cgroupEntryListMonitor); + if (0 != rc) { + goto _end; + } } } else { rc = 0; @@ -3720,13 +3811,14 @@ int32_t omrsysinfo_cgroup_get_memlimit(struct OMRPortLibrary *portLibrary, uint64_t *limit) { int32_t rc = OMRPORT_ERROR_SYSINFO_CGROUP_UNSUPPORTED_PLATFORM; + + Assert_PRT_true(NULL != limit); + #if defined(LINUX) && !defined(OMRZTPF) uint64_t cgroupMemLimit = 0; uint64_t physicalMemLimit = 0; int32_t numItemsToRead = 1; /* memory.limit_in_bytes file contains only one integer value */ - Assert_PRT_true(NULL != limit); - rc = readCgroupSubsystemFile(portLibrary, OMR_CGROUP_SUBSYSTEM_MEMORY, "memory.limit_in_bytes", numItemsToRead, "%lu", &cgroupMemLimit); if (0 != rc) {