From 0bc428a57b87643d1092080d061b91396395f01f Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Sun, 2 Jun 2019 18:39:05 +0900 Subject: [PATCH] kubelet: new cgroup driver: "none" The "none" driver is used for running "rootless" mode on a host that does not support cgroup v2. This commit is specific to Usernetes and isn't going to be proposed to the Kubernetes upstream. So, there is no FeatureFlag for gating this driver. Signed-off-by: Akihiro Suda --- cmd/kubelet/app/options/options.go | 2 +- cmd/kubelet/app/server.go | 40 +++++++++-------- pkg/kubelet/apis/config/types.go | 2 +- pkg/kubelet/cm/cgroup_manager_linux.go | 62 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 20 deletions(-) diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 3d684e6e8e5d5..9545136c7fe60 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -489,7 +489,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig fs.StringVar(&c.ProviderID, "provider-id", c.ProviderID, "Unique identifier for identifying the node in a machine database, i.e cloudprovider") fs.BoolVar(&c.CgroupsPerQOS, "cgroups-per-qos", c.CgroupsPerQOS, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.") - fs.StringVar(&c.CgroupDriver, "cgroup-driver", c.CgroupDriver, "Driver that the kubelet uses to manipulate cgroups on the host. Possible values: 'cgroupfs', 'systemd'") + fs.StringVar(&c.CgroupDriver, "cgroup-driver", c.CgroupDriver, "Driver that the kubelet uses to manipulate cgroups on the host. Possible values: 'cgroupfs', 'systemd', 'none'") fs.StringVar(&c.CgroupRoot, "cgroup-root", c.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.") fs.StringVar(&c.CPUManagerPolicy, "cpu-manager-policy", c.CPUManagerPolicy, "CPU Manager policy to use. Possible values: 'none', 'static'.") fs.Var(cliflag.NewMapStringStringNoSplit(&c.CPUManagerPolicyOptions), "cpu-manager-policy-options", "A set of key=value CPU Manager policy options to use, to fine tune their behaviour. If not supplied, keep the default behaviour.") diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 34bac9c53a9a5..1901f38cdb2cc 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -649,26 +649,30 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend } var cgroupRoots []string - nodeAllocatableRoot := cm.NodeAllocatableRoot(s.CgroupRoot, s.CgroupsPerQOS, s.CgroupDriver) - cgroupRoots = append(cgroupRoots, nodeAllocatableRoot) - kubeletCgroup, err := cm.GetKubeletContainer(s.KubeletCgroups) - if err != nil { - klog.InfoS("Failed to get the kubelet's cgroup. Kubelet system container metrics may be missing.", "err", err) - } else if kubeletCgroup != "" { - cgroupRoots = append(cgroupRoots, kubeletCgroup) - } + if s.CgroupDriver == "none" { + cgroupRoots = []string{"/"} + } else { + nodeAllocatableRoot := cm.NodeAllocatableRoot(s.CgroupRoot, s.CgroupsPerQOS, s.CgroupDriver) + cgroupRoots = append(cgroupRoots, nodeAllocatableRoot) + kubeletCgroup, err := cm.GetKubeletContainer(s.KubeletCgroups) + if err != nil { + klog.InfoS("Failed to get the kubelet's cgroup. Kubelet system container metrics may be missing.", "err", err) + } else if kubeletCgroup != "" { + cgroupRoots = append(cgroupRoots, kubeletCgroup) + } - runtimeCgroup, err := cm.GetRuntimeContainer(s.ContainerRuntime, s.RuntimeCgroups) - if err != nil { - klog.InfoS("Failed to get the container runtime's cgroup. Runtime system container metrics may be missing.", "err", err) - } else if runtimeCgroup != "" { - // RuntimeCgroups is optional, so ignore if it isn't specified - cgroupRoots = append(cgroupRoots, runtimeCgroup) - } + runtimeCgroup, err := cm.GetRuntimeContainer(s.ContainerRuntime, s.RuntimeCgroups) + if err != nil { + klog.InfoS("Failed to get the container runtime's cgroup. Runtime system container metrics may be missing.", "err", err) + } else if runtimeCgroup != "" { + // RuntimeCgroups is optional, so ignore if it isn't specified + cgroupRoots = append(cgroupRoots, runtimeCgroup) + } - if s.SystemCgroups != "" { - // SystemCgroups is optional, so ignore if it isn't specified - cgroupRoots = append(cgroupRoots, s.SystemCgroups) + if s.SystemCgroups != "" { + // SystemCgroups is optional, so ignore if it isn't specified + cgroupRoots = append(cgroupRoots, s.SystemCgroups) + } } if kubeDeps.CAdvisorInterface == nil { diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index 84074eaf56695..7e983eb169001 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -216,7 +216,7 @@ type KubeletConfiguration struct { // And all Burstable and BestEffort pods are brought up under their // specific top level QoS cgroup. CgroupsPerQOS bool - // driver that the kubelet uses to manipulate cgroups on the host (cgroupfs or systemd) + // driver that the kubelet uses to manipulate cgroups on the host (cgroupfs, systemd, none) CgroupDriver string // CPUManagerPolicy is the name of the policy to use. // Requires the CPUManager feature gate to be enabled. diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 95df22c4a300d..06d84c1fcd5e1 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -50,6 +50,9 @@ const ( libcontainerCgroupfs libcontainerCgroupManagerType = "cgroupfs" // libcontainerSystemd means use libcontainer with systemd libcontainerSystemd libcontainerCgroupManagerType = "systemd" + // noneDriver is the name of the "NOP" driver, which is used when + // cgroup is not accessible + noneDriver = "none" // systemdSuffix is the cgroup name suffix for systemd systemdSuffix string = ".slice" // MemoryMin is memory.min for cgroup v2 @@ -194,6 +197,13 @@ var _ CgroupManager = &cgroupManagerImpl{} // NewCgroupManager is a factory method that returns a CgroupManager func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { + if cgroupDriver == noneDriver { + // The patch for the "none" driver isn't going to be merged to the Kubernetes upstream, + // so it is no worth adding a FeatureGate for this. + cm := &noneCgroupManager{} + cm.init() + return cm + } managerType := libcontainerCgroupfs if cgroupDriver == string(libcontainerSystemd) { managerType = libcontainerSystemd @@ -623,3 +633,55 @@ func (m *cgroupManagerImpl) MemoryUsage(name CgroupName) (int64, error) { val, err := fscommon.GetCgroupParamUint(path, file) return int64(val), err } + +type noneCgroupManager struct { + names map[string]struct{} +} + +func (m *noneCgroupManager) init() { + m.names = make(map[string]struct{}) +} + +func (m *noneCgroupManager) Create(c *CgroupConfig) error { + name := m.Name(c.Name) + m.names[name] = struct{}{} + return nil +} + +func (m *noneCgroupManager) Destroy(c *CgroupConfig) error { + name := m.Name(c.Name) + delete(m.names, name) + return nil +} + +func (m *noneCgroupManager) Update(c *CgroupConfig) error { + name := m.Name(c.Name) + m.names[name] = struct{}{} + return nil +} + +func (m *noneCgroupManager) Exists(cgname CgroupName) bool { + name := m.Name(cgname) + _, ok := m.names[name] + return ok +} + +func (m *noneCgroupManager) Name(cgname CgroupName) string { + return cgname.ToCgroupfs() +} + +func (m *noneCgroupManager) CgroupName(name string) CgroupName { + return ParseCgroupfsToCgroupName(name) +} + +func (m *noneCgroupManager) Pids(_ CgroupName) []int { + return nil +} + +func (m *noneCgroupManager) ReduceCPULimits(cgroupName CgroupName) error { + return nil +} + +func (m *noneCgroupManager) MemoryUsage(name CgroupName) (int64, error) { + return 0, nil +}