From 3662c4677ffa15f6625c5af92dde6660fca0b8cb Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Thu, 6 Dec 2018 16:47:06 +0100 Subject: [PATCH 1/9] WIP: Add shm_size option Add possibility to mount an extra memory volume to increase available shm. --- pkg/cluster/k8sres.go | 50 ++++++++++++++++++++++++++++++-- pkg/util/config/config.go | 1 + pkg/util/constants/postgresql.go | 3 ++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index b775ee636..01c17a5a8 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -407,6 +407,7 @@ func generatePodTemplate( podServiceAccountName string, kubeIAMRole string, priorityClassName string, + sharedMemory string, ) (*v1.PodTemplateSpec, error) { terminateGracePeriodSeconds := terminateGracePeriod @@ -420,6 +421,13 @@ func generatePodTemplate( Tolerations: *tolerationsSpec, } + if sharedMemory != "" { + err := addShmVolume(&podSpec, sharedMemory) + if err != nil { + return nil, err + } + } + if nodeAffinity != nil { podSpec.Affinity = nodeAffinity } @@ -733,7 +741,12 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State volumeMounts := generateVolumeMounts() // generate the spilo container - spiloContainer := generateSpiloContainer(c.containerName(), &effectiveDockerImage, resourceRequirements, spiloEnvVars, volumeMounts) + spiloContainer := generateSpiloContainer(c.containerName(), + &effectiveDockerImage, + resourceRequirements, + spiloEnvVars, + volumeMounts, + ) // resolve conflicts between operator-global and per-cluster sidecards sideCars := c.mergeSidecars(spec.Sidecars) @@ -775,7 +788,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), c.OpConfig.PodServiceAccountName, c.OpConfig.KubeIAMRole, - effectivePodPriorityClassName); err != nil { + effectivePodPriorityClassName, + c.OpConfig.SharedMemory); err != nil { return nil, fmt.Errorf("could not generate pod template: %v", err) } @@ -882,6 +896,38 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 { return newcur } +// To avoid issues with limited /dev/shm inside docker environment, when +// PostgreSQL can't allocate enough of dsa segments from it, we can +// mount an extra memory volume +// +// see https://docs.okd.io/latest/dev_guide/shared_memory.html +func addShmVolume(podSpec *v1.PodSpec, sharedMemory string) error { + quantity, err := resource.ParseQuantity(sharedMemory) + if err != nil { + return fmt.Errorf("could not parse shm_size: %v", err) + } + + podSpec.Volumes = []v1.Volume{ + v1.Volume{ + Name: constants.ShmVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: "Memory", + SizeLimit: &quantity, + }, + }, + }, + } + mounts := append(podSpec.Containers[0].VolumeMounts, + v1.VolumeMount{ + Name: constants.ShmVolumeName, + MountPath: constants.ShmVolumePath, + }) + podSpec.Containers[0].VolumeMounts = mounts + + return nil +} + func generatePersistentVolumeClaimTemplate(volumeSize, volumeStorageClass string) (*v1.PersistentVolumeClaim, error) { var storageClassName *string diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 2bd7924ad..f37dd2a53 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,6 +38,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` + SharedMemory string `name:"shm_size" default:""` } // Auth describes authentication specific configuration parameters diff --git a/pkg/util/constants/postgresql.go b/pkg/util/constants/postgresql.go index 7556e8858..e39fd423f 100644 --- a/pkg/util/constants/postgresql.go +++ b/pkg/util/constants/postgresql.go @@ -10,4 +10,7 @@ const ( PostgresConnectRetryTimeout = 2 * time.Minute PostgresConnectTimeout = 15 * time.Second + + ShmVolumeName = "dshm" + ShmVolumePath = "/dev/shm" ) From 28c5fc161da2de16027d82df7393741b7b05d059 Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Wed, 19 Dec 2018 14:06:23 +0100 Subject: [PATCH 2/9] Add tests --- pkg/cluster/k8sres.go | 24 ++++--- pkg/cluster/k8sres_test.go | 109 +++++++++++++++++++++++++++++++ pkg/util/constants/postgresql.go | 1 + 3 files changed, 125 insertions(+), 9 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 01c17a5a8..4961cad47 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -907,23 +907,29 @@ func addShmVolume(podSpec *v1.PodSpec, sharedMemory string) error { return fmt.Errorf("could not parse shm_size: %v", err) } - podSpec.Volumes = []v1.Volume{ - v1.Volume{ - Name: constants.ShmVolumeName, - VolumeSource: v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{ - Medium: "Memory", - SizeLimit: &quantity, - }, + if quantity.Value() <= constants.MinShmSize { + return fmt.Errorf("Requested shm_size %v is less than minimal allowed %v", + quantity, constants.MinShmSize) + } + + volumes := append(podSpec.Volumes, v1.Volume{ + Name: constants.ShmVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: "Memory", + SizeLimit: &quantity, }, }, - } + }) + mounts := append(podSpec.Containers[0].VolumeMounts, v1.VolumeMount{ Name: constants.ShmVolumeName, MountPath: constants.ShmVolumePath, }) + podSpec.Containers[0].VolumeMounts = mounts + podSpec.Volumes = volumes return nil } diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 12e145c04..b9ada232d 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1,8 +1,11 @@ package cluster import ( + "k8s.io/api/core/v1" + acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/util/config" + "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "github.com/zalando-incubator/postgres-operator/pkg/util/k8sutil" "testing" ) @@ -75,3 +78,109 @@ func TestCreateLoadBalancerLogic(t *testing.T) { } } } + +func TestShmVolume(t *testing.T) { + testName := "TestShmVolume" + tests := []struct { + subTest string + podSpec *v1.PodSpec + shmSize string + shmPos int + err bool + }{ + { + subTest: "empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{}, + }, + }, + }, + shmSize: "512M", + shmPos: 0, + err: false, + }, + { + subTest: "non empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{v1.Volume{}}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + v1.VolumeMount{}, + }, + }, + }, + }, + shmSize: "512M", + shmPos: 1, + err: false, + }, + { + subTest: "invalid shm size (negative)", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{}, + }, + }, + }, + shmSize: "-512MB", + shmPos: 0, + err: true, + }, + { + subTest: "invalid shm size", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{}, + }, + }, + }, + shmSize: "invalid", + shmPos: 0, + err: true, + }, + { + subTest: "less than minimal", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{}, + }, + }, + }, + shmSize: "1MB", + shmPos: 0, + err: true, + }, + } + for _, tt := range tests { + err := addShmVolume(tt.podSpec, tt.shmSize) + if err != nil { + if !tt.err { + t.Errorf("%s %s: Unexpected error: %#v", testName, tt.subTest, err) + } + + continue + } + + volumeName := tt.podSpec.Volumes[tt.shmPos].Name + volumeMountName := tt.podSpec.Containers[0].VolumeMounts[tt.shmPos].Name + + if volumeName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected volume %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeName) + } + if volumeMountName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected mount %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeMountName) + } + } +} diff --git a/pkg/util/constants/postgresql.go b/pkg/util/constants/postgresql.go index e39fd423f..fa7a51b04 100644 --- a/pkg/util/constants/postgresql.go +++ b/pkg/util/constants/postgresql.go @@ -13,4 +13,5 @@ const ( ShmVolumeName = "dshm" ShmVolumePath = "/dev/shm" + MinShmSize = 64 * 1024 * 1024 ) From dd05678c80e703873fad00c4156f44795336c41e Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Wed, 19 Dec 2018 16:18:24 +0100 Subject: [PATCH 3/9] K8S doesn't support SizeLimit on EmptyDir Or at least not yet: https://github.com/kubernetes/kubernetes/pull/43823 https://github.com/kubernetes/kubernetes/issues/63126 --- pkg/apis/acid.zalan.do/v1/postgresql_type.go | 1 + pkg/apis/acid.zalan.do/v1/util_test.go | 6 +- pkg/cluster/k8sres.go | 28 ++-------- pkg/cluster/k8sres_test.go | 59 ++------------------ pkg/util/config/config.go | 1 - 5 files changed, 14 insertions(+), 81 deletions(-) diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index aedb0512f..c0477a5cc 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -51,6 +51,7 @@ type PostgresSpec struct { Tolerations []v1.Toleration `json:"tolerations,omitempty"` Sidecars []Sidecar `json:"sidecars,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` + ShmVolume bool `json:"shmVolume,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/acid.zalan.do/v1/util_test.go b/pkg/apis/acid.zalan.do/v1/util_test.go index b6a27542c..01be31e88 100644 --- a/pkg/apis/acid.zalan.do/v1/util_test.go +++ b/pkg/apis/acid.zalan.do/v1/util_test.go @@ -499,7 +499,7 @@ func TestMarshal(t *testing.T) { t.Errorf("Marshal error: %v", err) } if !bytes.Equal(m, tt.marshal) { - t.Errorf("Marshal Postgresql expected: %q, got: %q", string(tt.marshal), string(m)) + t.Errorf("Marshal Postgresql \nexpected: %q, \ngot: %q", string(tt.marshal), string(m)) } } } @@ -507,11 +507,11 @@ func TestMarshal(t *testing.T) { func TestPostgresMeta(t *testing.T) { for _, tt := range unmarshalCluster { if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { - t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a) + t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot: %v", tt.out.TypeMeta, a) } if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) { - t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ObjectMeta, a) + t.Errorf("GetObjectMeta \nexpected: %v, \ngot: %v", tt.out.ObjectMeta, a) } } } diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 4961cad47..8a6d83918 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -407,7 +407,7 @@ func generatePodTemplate( podServiceAccountName string, kubeIAMRole string, priorityClassName string, - sharedMemory string, + shmVolume bool, ) (*v1.PodTemplateSpec, error) { terminateGracePeriodSeconds := terminateGracePeriod @@ -421,11 +421,8 @@ func generatePodTemplate( Tolerations: *tolerationsSpec, } - if sharedMemory != "" { - err := addShmVolume(&podSpec, sharedMemory) - if err != nil { - return nil, err - } + if shmVolume == true { + addShmVolume(&podSpec) } if nodeAffinity != nil { @@ -789,7 +786,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State c.OpConfig.PodServiceAccountName, c.OpConfig.KubeIAMRole, effectivePodPriorityClassName, - c.OpConfig.SharedMemory); err != nil { + spec.ShmVolume); err != nil { return nil, fmt.Errorf("could not generate pod template: %v", err) } @@ -901,23 +898,12 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 { // mount an extra memory volume // // see https://docs.okd.io/latest/dev_guide/shared_memory.html -func addShmVolume(podSpec *v1.PodSpec, sharedMemory string) error { - quantity, err := resource.ParseQuantity(sharedMemory) - if err != nil { - return fmt.Errorf("could not parse shm_size: %v", err) - } - - if quantity.Value() <= constants.MinShmSize { - return fmt.Errorf("Requested shm_size %v is less than minimal allowed %v", - quantity, constants.MinShmSize) - } - +func addShmVolume(podSpec *v1.PodSpec) { volumes := append(podSpec.Volumes, v1.Volume{ Name: constants.ShmVolumeName, VolumeSource: v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{ - Medium: "Memory", - SizeLimit: &quantity, + Medium: "Memory", }, }, }) @@ -930,8 +916,6 @@ func addShmVolume(podSpec *v1.PodSpec, sharedMemory string) error { podSpec.Containers[0].VolumeMounts = mounts podSpec.Volumes = volumes - - return nil } func generatePersistentVolumeClaimTemplate(volumeSize, volumeStorageClass string) (*v1.PersistentVolumeClaim, error) { diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index b9ada232d..4ee6f43de 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -84,9 +84,7 @@ func TestShmVolume(t *testing.T) { tests := []struct { subTest string podSpec *v1.PodSpec - shmSize string shmPos int - err bool }{ { subTest: "empty PodSpec", @@ -98,9 +96,7 @@ func TestShmVolume(t *testing.T) { }, }, }, - shmSize: "512M", - shmPos: 0, - err: false, + shmPos: 0, }, { subTest: "non empty PodSpec", @@ -114,60 +110,13 @@ func TestShmVolume(t *testing.T) { }, }, }, - shmSize: "512M", - shmPos: 1, - err: false, - }, - { - subTest: "invalid shm size (negative)", - podSpec: &v1.PodSpec{ - Volumes: []v1.Volume{}, - Containers: []v1.Container{ - v1.Container{ - VolumeMounts: []v1.VolumeMount{}, - }, - }, - }, - shmSize: "-512MB", - shmPos: 0, - err: true, - }, - { - subTest: "invalid shm size", - podSpec: &v1.PodSpec{ - Volumes: []v1.Volume{}, - Containers: []v1.Container{ - v1.Container{ - VolumeMounts: []v1.VolumeMount{}, - }, - }, - }, - shmSize: "invalid", - shmPos: 0, - err: true, - }, - { - subTest: "less than minimal", - podSpec: &v1.PodSpec{ - Volumes: []v1.Volume{}, - Containers: []v1.Container{ - v1.Container{ - VolumeMounts: []v1.VolumeMount{}, - }, - }, - }, - shmSize: "1MB", - shmPos: 0, - err: true, + shmPos: 1, }, } for _, tt := range tests { - err := addShmVolume(tt.podSpec, tt.shmSize) + err := addShmVolume(tt.podSpec) if err != nil { - if !tt.err { - t.Errorf("%s %s: Unexpected error: %#v", testName, tt.subTest, err) - } - + t.Errorf("%s %s: Unexpected error: %#v", testName, tt.subTest, err) continue } diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index f37dd2a53..2bd7924ad 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,7 +38,6 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` - SharedMemory string `name:"shm_size" default:""` } // Auth describes authentication specific configuration parameters From 1da087829e9cfe3204b59368f606fbfd63762941 Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Wed, 19 Dec 2018 17:19:53 +0100 Subject: [PATCH 4/9] Add documentation --- docs/reference/cluster_manifest.md | 7 +++++++ manifests/complete-postgres-manifest.yaml | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 75de35097..e05fb3ca5 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -97,6 +97,12 @@ Those are parameters grouped directly under the `spec` key in the manifest. is taken from the `pod_priority_class_name` operator parameter, if not set then the default priority class is taken. The priority class itself must be defined in advance. +* **shmVolume** + Start a database pod without limitations on shm memory. By default docker + limit `/dev/shm` to `64M`, which could be not enough if PostgreSQL uses + parallel workers heavily. If this option is enabled, to the target database + pod will be mounted a new tmpfs volume to remove this limitation. + ## Postgres parameters Those parameters are grouped under the `postgresql` top-level key. @@ -112,6 +118,7 @@ Those parameters are grouped under the `postgresql` top-level key. cluster. Optional (Spilo automatically sets reasonable defaults for parameters like work_mem or max_connections). + ## Patroni parameters Those parameters are grouped under the `patroni` top-level key. See the [patroni diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index e0f76e4d4..37e06cfa8 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -13,12 +13,13 @@ spec: - superuser - createdb enableMasterLoadBalancer: true - enableReplicaLoadBalancer: true + enableReplicaLoadBalancer: true allowedSourceRanges: # load balancers' source ranges for both master and replica services - 127.0.0.1/32 databases: foo: zalando #Expert section + shmVolume: true postgresql: version: "10" parameters: From 1bd1456a12bfe38c3997851ecb44a18dc2ba080e Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Wed, 19 Dec 2018 18:05:55 +0100 Subject: [PATCH 5/9] Fix usage of removed err variable --- pkg/cluster/k8sres.go | 2 +- pkg/cluster/k8sres_test.go | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 8a6d83918..a718c1985 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -421,7 +421,7 @@ func generatePodTemplate( Tolerations: *tolerationsSpec, } - if shmVolume == true { + if shmVolume { addShmVolume(&podSpec) } diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 4ee6f43de..92946ab2b 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -114,11 +114,7 @@ func TestShmVolume(t *testing.T) { }, } for _, tt := range tests { - err := addShmVolume(tt.podSpec) - if err != nil { - t.Errorf("%s %s: Unexpected error: %#v", testName, tt.subTest, err) - continue - } + addShmVolume(tt.podSpec) volumeName := tt.podSpec.Volumes[tt.shmPos].Name volumeMountName := tt.podSpec.Containers[0].VolumeMounts[tt.shmPos].Name From 3f56aec0605db1b2c630df78f5bc2899a61c25be Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Thu, 20 Dec 2018 16:04:20 +0100 Subject: [PATCH 6/9] Add ShmVolume to the configmap as a global configuration option --- pkg/apis/acid.zalan.do/v1/postgresql_type.go | 2 +- pkg/cluster/k8sres.go | 13 ++++++++++++- pkg/util/config/config.go | 1 + pkg/util/constants/postgresql.go | 1 - 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index c0477a5cc..e480abda7 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -51,7 +51,7 @@ type PostgresSpec struct { Tolerations []v1.Toleration `json:"tolerations,omitempty"` Sidecars []Sidecar `json:"sidecars,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` - ShmVolume bool `json:"shmVolume,omitempty"` + ShmVolume *bool `json:"shmVolume,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index a718c1985..78556d0d6 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -18,6 +18,7 @@ import ( acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" + "github.com/zalando-incubator/postgres-operator/pkg/util/config" "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "k8s.io/apimachinery/pkg/labels" ) @@ -396,6 +397,16 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar, return nil, nil } +// Check whether or not we're requested to mount an shm volume, +// taking into account that PostgreSQL manifest has precedence. +func mountShmVolumeNeeded(opConfig config.Config, pgSpec *acidv1.PostgresSpec) bool { + if pgSpec.ShmVolume != nil { + return *pgSpec.ShmVolume + } + + return opConfig.ShmVolume +} + func generatePodTemplate( namespace string, labels labels.Set, @@ -786,7 +797,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State c.OpConfig.PodServiceAccountName, c.OpConfig.KubeIAMRole, effectivePodPriorityClassName, - spec.ShmVolume); err != nil { + mountShmVolumeNeeded(c.OpConfig, spec)); err != nil { return nil, fmt.Errorf("could not generate pod template: %v", err) } diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 2bd7924ad..1767dc2d4 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,6 +38,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` + ShmVolume bool `name:"shm_volume" default:"true"` } // Auth describes authentication specific configuration parameters diff --git a/pkg/util/constants/postgresql.go b/pkg/util/constants/postgresql.go index fa7a51b04..e39fd423f 100644 --- a/pkg/util/constants/postgresql.go +++ b/pkg/util/constants/postgresql.go @@ -13,5 +13,4 @@ const ( ShmVolumeName = "dshm" ShmVolumePath = "/dev/shm" - MinShmSize = 64 * 1024 * 1024 ) From 9207c878fb346b320f8e88068e3c61d5e87e9f15 Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Fri, 21 Dec 2018 10:52:27 +0100 Subject: [PATCH 7/9] Add documentation About operator config and adjust wording in manifest docs --- docs/reference/cluster_manifest.md | 14 ++++++++++---- docs/reference/operator_parameters.md | 8 ++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index e05fb3ca5..2bc20a1ca 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -96,12 +96,18 @@ Those are parameters grouped directly under the `spec` key in the manifest. that should be assigned to the cluster pods. When not specified, the value is taken from the `pod_priority_class_name` operator parameter, if not set then the default priority class is taken. The priority class itself must be defined in advance. - + * **shmVolume** Start a database pod without limitations on shm memory. By default docker - limit `/dev/shm` to `64M`, which could be not enough if PostgreSQL uses - parallel workers heavily. If this option is enabled, to the target database - pod will be mounted a new tmpfs volume to remove this limitation. + limit `/dev/shm` to `64M` (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416), which could be + not enough if PostgreSQL uses parallel workers heavily. If this option is + present and value is `true`, to the target database pod will be mounted a new + tmpfs volume to remove this limitation. If it's not present, the decision + about mounting a volume will be made based on operator configuration + (`shm_volume`, which is `true` by default). It it's present and value is + `false`, then no volume will be mounted no matter how operator was configured + (so you can override the operator configuration). ## Postgres parameters diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 47f67228c..a2d5ebeb5 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -224,6 +224,14 @@ CRD-based configuration. * **set_memory_request_to_limit** Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers (Postgres, Scalyr sidecar, and other sidecars). The default is `false`. +* **shm_volume** + Instruct operator to start any new database pod without limitations on shm + memory. If this option is enabled, to the target database pod will be mounted + a new tmpfs volume to remove shm memory limitation (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416)). This option + is global for an operator object, and can be overwritten by `shmVolume` + parameter from Postgres manifest. The default is `true` + ## Operator timeouts This set of parameters define various timeouts related to some operator From 93e1977d31a171e76072ce363dbe9180ba9d6a83 Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Fri, 21 Dec 2018 10:55:18 +0100 Subject: [PATCH 8/9] Make postgres container access more obvious --- pkg/cluster/k8sres.go | 3 ++- pkg/util/constants/kubernetes.go | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 78556d0d6..6a3a052bd 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -919,7 +919,8 @@ func addShmVolume(podSpec *v1.PodSpec) { }, }) - mounts := append(podSpec.Containers[0].VolumeMounts, + pgIdx := constants.PostgresContainerIdx + mounts := append(podSpec.Containers[pgIdx].VolumeMounts, v1.VolumeMount{ Name: constants.ShmVolumeName, MountPath: constants.ShmVolumePath, diff --git a/pkg/util/constants/kubernetes.go b/pkg/util/constants/kubernetes.go index 2604f124d..a4ea73e80 100644 --- a/pkg/util/constants/kubernetes.go +++ b/pkg/util/constants/kubernetes.go @@ -5,6 +5,7 @@ import "time" // General kubernetes-related constants const ( PostgresContainerName = "postgres" + PostgresContainerIdx = 0 K8sAPIPath = "/apis" StatefulsetDeletionInterval = 1 * time.Second StatefulsetDeletionTimeout = 30 * time.Second From c2fb3b48bd359cad2f2feecaf7844bca9dec0689 Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Fri, 21 Dec 2018 15:29:07 +0100 Subject: [PATCH 9/9] Rename shmVolume to enableShmVolume --- docs/reference/cluster_manifest.md | 8 ++++---- docs/reference/operator_parameters.md | 4 ++-- manifests/complete-postgres-manifest.yaml | 2 +- pkg/apis/acid.zalan.do/v1/postgresql_type.go | 2 +- pkg/util/config/config.go | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 2bc20a1ca..efc850aff 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -97,7 +97,7 @@ Those are parameters grouped directly under the `spec` key in the manifest. is taken from the `pod_priority_class_name` operator parameter, if not set then the default priority class is taken. The priority class itself must be defined in advance. -* **shmVolume** +* **enableShmVolume** Start a database pod without limitations on shm memory. By default docker limit `/dev/shm` to `64M` (see e.g. the [docker issue](https://github.com/docker-library/postgres/issues/416), which could be @@ -105,9 +105,9 @@ Those are parameters grouped directly under the `spec` key in the manifest. present and value is `true`, to the target database pod will be mounted a new tmpfs volume to remove this limitation. If it's not present, the decision about mounting a volume will be made based on operator configuration - (`shm_volume`, which is `true` by default). It it's present and value is - `false`, then no volume will be mounted no matter how operator was configured - (so you can override the operator configuration). + (`enable_shm_volume`, which is `true` by default). It it's present and value + is `false`, then no volume will be mounted no matter how operator was + configured (so you can override the operator configuration). ## Postgres parameters diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index a2d5ebeb5..3f96b450c 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -224,12 +224,12 @@ CRD-based configuration. * **set_memory_request_to_limit** Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers (Postgres, Scalyr sidecar, and other sidecars). The default is `false`. -* **shm_volume** +* **enable_shm_volume** Instruct operator to start any new database pod without limitations on shm memory. If this option is enabled, to the target database pod will be mounted a new tmpfs volume to remove shm memory limitation (see e.g. the [docker issue](https://github.com/docker-library/postgres/issues/416)). This option - is global for an operator object, and can be overwritten by `shmVolume` + is global for an operator object, and can be overwritten by `enableShmVolume` parameter from Postgres manifest. The default is `true` ## Operator timeouts diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 37e06cfa8..c5f80f373 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -19,7 +19,7 @@ spec: databases: foo: zalando #Expert section - shmVolume: true + enableShmVolume: true postgresql: version: "10" parameters: diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index e480abda7..2a8f60f71 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -51,7 +51,7 @@ type PostgresSpec struct { Tolerations []v1.Toleration `json:"tolerations,omitempty"` Sidecars []Sidecar `json:"sidecars,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` - ShmVolume *bool `json:"shmVolume,omitempty"` + ShmVolume *bool `json:"enableShmVolume,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 1767dc2d4..d855e0a2a 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,7 +38,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` - ShmVolume bool `name:"shm_volume" default:"true"` + ShmVolume bool `name:"enable_shm_volume" default:"true"` } // Auth describes authentication specific configuration parameters