forked from GoogleCloudPlatform/gke-rolling-updates-demo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcluster_ops.sh
executable file
·388 lines (339 loc) · 11.6 KB
/
cluster_ops.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#!/usr/bin/env bash
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -euo pipefail
# "---------------------------------------------------------"
# "- -"
# "- rolling updates expand contract -"
# "- -"
# "- this poc demonstrates the use of the expand -"
# "- and contract pattern for upgrading gke clusters, -"
# "- the pattern works by increasing the node pool -"
# "- size prior to the upgrade to provide additional -"
# "- headroom while upgrading, once the upgrade is -"
# "- complete the node pool is restored to its -"
# "- original size -"
# "- -"
# "---------------------------------------------------------"
## source properties file
SCRIPT_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
REPO_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
# shellcheck source=.env
source "${REPO_HOME}/.env"
if [ -z ${CLUSTER_NAME:+exists} ]; then
CLUSTER_NAME="expand-contract-upgrade"
export CLUSTER_NAME
fi
################ functions ####################
## validate use of this script
usage() {
echo ""
echo " Checking valid paramater passed to script ....."
echo ""
cat <<-EOM
USAGE: $(basename "$0") <action> [N]
Where the <action> can be:
auto
create
upgrade-control
upgrade-nodes
resize <N>
delete
N - The Number of nodes per zone to set the default node pool during resize
EOM
exit 1
}
## check dependencies installed
check_dependencies() {
echo ""
echo "Checking dependencies are installed ....."
echo ""
command -v gcloud >/dev/null 2>&1 || { \
echo >&2 "I require gcloud but it's not installed. Aborting."; exit 1; }
command -v kubectl >/dev/null 2>&1 || { \
echo >&2 "I require kubectl but it's not installed. Aborting."; exit 1; }
}
## check project exists
check_project() {
echo ""
echo "Checking the project specified for the demo exists ....."
echo ""
local EXISTS
EXISTS=$(gcloud projects list | awk "/${GCLOUD_PROJECT} /" | awk '{print $1}')
sleep 1
if [[ "${EXISTS}" != "${GCLOUD_PROJECT}" ]] ; then
echo ""
echo "the ${GCLOUD_PROJECT} project does not exists"
echo "please update properties file with "
echo "a valid project"
echo ""
exit 1
fi
}
## check api's enabled
check_apis() {
echo ""
echo "Checking the appropriate API's are enabled ....."
echo ""
COMPUTE_API=$(gcloud services list --project="${GCLOUD_PROJECT}" \
--format='value(serviceConfig.name)' \
--filter='serviceConfig.name:compute.googleapis.com' 2>&1)
if [[ "${COMPUTE_API}" != "compute.googleapis.com" ]]; then
echo "Enabling the Compute Engine API"
gcloud services enable compute.googleapis.com --project="${GCLOUD_PROJECT}"
fi
CONTAINER_API=$(gcloud services list --project="${GCLOUD_PROJECT}" \
--format='value(serviceConfig.name)' \
--filter='serviceConfig.name:container.googleapis.com' 2>&1)
if [[ "${CONTAINER_API}" != "container.googleapis.com" ]]; then
echo "Enabling the Kubernetes Engine API"
gcloud services enable container.googleapis.com --project="${GCLOUD_PROJECT}"
fi
}
# Set GCLOUD_REGION to default if it has not yet been set
GCLOUD_REGION_DEFAULT=$(gcloud config get-value compute/region)
if [ "${GCLOUD_REGION_DEFAULT}" == "(unset)" ]; then
# check if defined in env file
if [ -z ${GCLOUD_REGION:+exists} ]; then
fail "GCLOUD_REGION is not set"
fi
else
GCLOUD_REGION="$GCLOUD_REGION_DEFAULT"
export GCLOUD_REGION
fi
# Set GCLOUD_PROJECT to default if it has not yet been set
GCLOUD_PROJECT_DEFAULT=$(gcloud config get-value project)
if [ "${GCLOUD_PROJECT_DEFAULT}" == "(unset)" ]; then
# check if defined in env file
if [ -z ${GCLOUD_PROJECT:+exists} ]; then
fail "GCLOUD_PROJECT is not set"
fi
else
GCLOUD_PROJECT="$GCLOUD_PROJECT_DEFAULT"
export GCLOUD_PROJECT
fi
## create cluster
create_cluster() {
# create cluster
echo ""
echo "Building a GKE cluster ....."
echo ""
gcloud container clusters create "${CLUSTER_NAME}" \
--machine-type "${MACHINE_TYPE}" \
--num-nodes "${NUM_NODES}" \
--cluster-version "${K8S_VER}" \
--project "${GCLOUD_PROJECT}" \
--region "${GCLOUD_REGION}"
# acquire the kubectl credentials
gcloud container clusters get-credentials "${CLUSTER_NAME}" \
--region "${GCLOUD_REGION}" \
--project "${GCLOUD_PROJECT}"
}
## Creates the Shakespeare index and loads the data
load_data() {
echo "Setting up port-forward to Elasticsearch client"
kubectl -n default port-forward svc/elasticsearch 9200 1>&2>/dev/null &
# Wait a couple seconds for connection to establish as that last command is
# not blocking
sleep 5
echo "Creating the Shakespeare index"
# The mapping file creates the index and sets the metadata needed by
# Elasticsearch to parse the actual data
curl -H "Content-Type: application/json" \
-X PUT \
-d @"${REPO_HOME}/data/mapping.json" \
'http://localhost:9200/shakespeare'
# The response does not make include a newline
echo ""
# Here we load the actual data.
echo "Loading Shakespeare sample data into Elasticsearch"
curl -H "Content-Type: application/x-ndjson" \
-X POST \
--data-binary @"${REPO_HOME}/data/shakespeare.json" \
'http://localhost:9200/shakespeare/doc/_bulk?pretty' > /dev/null
# If we've made it this far the data is loaded
echo "Sample data successfully loaded!"
pkill -P $$
}
## Installs the Elasticsearch cluster
setup_app() {
echo "Installing Elasticsearch Cluster"
kubectl -n default create -f "${REPO_HOME}/manifests/es-discovery-svc.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-svc.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-master-pdb.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-master.yaml"
kubectl -n default rollout status -f "${REPO_HOME}/manifests/es-master.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-client-pdb.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-client.yaml"
kubectl -n default rollout status -f "${REPO_HOME}/manifests/es-client.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-data-svc.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-data-pdb.yaml"
kubectl -n default create -f "${REPO_HOME}/manifests/es-data-stateful.yaml"
kubectl -n default rollout status -f "${REPO_HOME}/manifests/es-data-stateful.yaml"
load_data
}
# uninstall app
uninstall_app() {
# Get the Credentials again just in case cluster is left over from previous run and cred is not pulled yet
gcloud container clusters get-credentials "${CLUSTER_NAME}" \
--region "${GCLOUD_REGION}" \
--project "${GCLOUD_PROJECT}"
echo "Uninstalling Elasticsearch Cluster"
kubectl -n default delete -f "${REPO_HOME}"/manifests/ || true
# You have to wait the default pod grace period before you can delete the pvcs
GRACE=$(kubectl --namespace default get sts -l component=elasticsearch,role=data -o jsonpath='{..terminationGracePeriodSeconds}')
PADDING=30
echo "Sleeping $(( GRACE + PADDING )) seconds before deleting PVCs. The default pod grace period."
sleep "$(( GRACE + PADDING ))"
# Deleting and/or scaling a StatefulSet down will not delete the volumes associated with the StatefulSet.
# This is done to ensure data safety, which is generally more valuable
# than an automatic purge of all related StatefulSet resources.
echo "Delete PVCs..."
kubectl -n default delete pvc -l component=elasticsearch,role=data || true
echo "Delete PVs..."
# kubectl delete pv $(kubectl get pv --all-namespaces | grep es-data | awk '{ print $1}')
echo "kubectl get pvc --all-namespaces"
kubectl -n default get pvc --all-namespaces
echo "kubectl get pv --all-namespaces"
kubectl -n default get pv --all-namespaces
}
## increase size of the node pool
resize_node_pool() {
local SIZE=$1
echo ""
echo "Resizing the node pool to $SIZE nodes ....."
echo ""
gcloud container clusters resize "${CLUSTER_NAME}" \
--size "${SIZE}" \
--region "${GCLOUD_REGION}" \
--project "${GCLOUD_PROJECT}" \
--quiet
}
## upgrade the control plane
upgrade_control() {
echo ""
echo "Upgrading the K8s control plane ....."
echo ""
gcloud container clusters upgrade "${CLUSTER_NAME}" \
--cluster-version="${NEW_K8S_VER}" \
--region "${GCLOUD_REGION}" \
--project "${GCLOUD_PROJECT}" \
--master \
--quiet
}
## updgrade the node clusters
upgrade_nodes() {
echo ""
echo "Upgrading the K8s nodes ....."
echo ""
gcloud container clusters upgrade "${CLUSTER_NAME}" \
--cluster-version="${NEW_K8S_VER}" \
--region "${GCLOUD_REGION}" \
--project "${GCLOUD_PROJECT}" \
--quiet
}
## tear down the demo
tear_down() {
echo ""
echo "Tearing down the infrastructure ....."
echo ""
uninstall_app
delete_cluster
}
# delete cluster
delete_cluster() {
if gcloud container clusters describe "${CLUSTER_NAME}" \
--project "${GCLOUD_PROJECT}" \
--region "${GCLOUD_REGION}"; then
# Cluster might be still upgrading. Wait up to 5 mins and then delete it
COUNTER=0
until [[ $(gcloud container clusters list --filter="STATUS:RUNNING AND NAME:$CLUSTER_NAME" | wc -l) -ne 0 || $COUNTER -ge 5 ]]; do
echo Waiting for cluster upgrade to finish...
sleep 60
COUNTER=$((COUNTER+1))
done
gcloud container clusters delete $"${CLUSTER_NAME}" \
--project "${GCLOUD_PROJECT}" \
--region "${GCLOUD_REGION}" \
--quiet
fi
}
# After the node pool is expanded, the control plane instances will likely be
# vertically scaled automatically by Kubernetes Engine to handle the increased
# load of more instances. When the control plane is upgrading, no other cluster
# modifications can occur.
wait_for_upgrade() {
echo "Checking for master upgrade"
OP_ID=$(gcloud container operations list \
--project "${GCLOUD_PROJECT}" \
--region "${GCLOUD_REGION}" \
--filter 'TYPE=UPGRADE_MASTER' \
--filter 'STATUS=RUNNING' \
--format 'value(name)' \
| head -n1 )
if [[ "${OP_ID}" =~ ^operation-.* ]]; then
echo "Master upgrade in process. Waiting until complete..."
gcloud container operations wait "${OP_ID}" \
--region "${GCLOUD_REGION}"
fi
}
auto() {
create_cluster
setup_app
resize_node_pool 2
# Unfortunate race condition here, a little sleep should be enough
sleep 10
wait_for_upgrade
upgrade_control
wait_for_upgrade
upgrade_nodes
resize_node_pool 1
"${SCRIPT_HOME}/validate.sh"
}
################ execution ####################
# validate script called correctly
if [[ $# -lt 1 ]]; then
usage
fi
# check dependencies installed
check_dependencies
# check project exist
check_project
# check apis enabled
check_apis
ACTION=$1
case "${ACTION}" in
auto)
auto
;;
create)
create_cluster
setup_app
;;
resize)
resize_node_pool "$2"
;;
upgrade-control)
upgrade_control
;;
upgrade-nodes)
upgrade_nodes
;;
delete)
tear_down
;;
*)
usage
;;
esac