46
46
name : " $(BUILD_POOL_NAME_DEFAULT)"
47
47
strategy :
48
48
matrix :
49
+ npm_linux_amd64 :
50
+ arch : amd64
51
+ name : npm
52
+ os : linux
49
53
npm_windows2022_amd64 :
50
54
arch : amd64
51
55
name : npm
74
78
FQDN : empty
75
79
strategy :
76
80
matrix :
77
- v2-windows :
78
- PROFILE : " scale-win"
81
+ # v2-linux:
82
+ # PROFILE: "sc-lin"
83
+ # NUM_NETPOLS: 800
84
+ # INITIAL_CONNECTIVITY_TIMEOUT: 60
85
+ ws22 :
86
+ PROFILE : " sc-ws22"
87
+ NUM_NETPOLS : 50
88
+ INITIAL_CONNECTIVITY_TIMEOUT : 720
79
89
steps :
80
90
- checkout : self
81
91
- bash : |
@@ -115,44 +125,46 @@ jobs:
115
125
az extension add --name aks-preview
116
126
az extension update --name aks-preview
117
127
118
- export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
119
-
120
- echo "Creating resource group named $CLUSTER_NAME"
121
- az group create --name $CLUSTER_NAME -l $(LOCATION) -o table
128
+ echo "Creating resource group named $(RESOURCE_GROUP)"
129
+ az group create --name $(RESOURCE_GROUP) -l $(LOCATION) -o table
122
130
123
- echo "Creating resource group named $CLUSTER_NAME"
131
+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
132
+ echo "Creating cluster named $CLUSTER_NAME"
124
133
az aks create \
125
- --resource-group $CLUSTER_NAME \
134
+ --resource-group $(RESOURCE_GROUP) \
126
135
--name $CLUSTER_NAME \
127
136
--generate-ssh-keys \
128
137
--windows-admin-username e2eadmin \
129
138
--windows-admin-password alpha@numeric!password2 \
130
139
--network-plugin azure \
131
140
--vm-set-type VirtualMachineScaleSets \
132
141
--node-vm-size Standard_D4s_v3 \
133
- --node-count 1
134
-
135
- # don't schedule anything on the linux system pool
136
- echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
137
- az aks nodepool update \
138
- --cluster-name $CLUSTER_NAME \
139
- -g $CLUSTER_NAME \
140
- -n nodepool1 \
141
- --node-taints CriticalAddonsOnly=true:NoSchedule
142
-
143
- echo "Adding Windows nodepool to $CLUSTER_NAME"
144
- az aks nodepool add \
145
- --resource-group $CLUSTER_NAME \
146
- --cluster-name $CLUSTER_NAME \
147
- --name awin22 \
148
- --os-type Windows \
149
- --os-sku Windows2022 \
150
- --node-vm-size Standard_D4s_v3 \
151
142
--node-count 1 \
152
143
--max-pods 100
153
144
145
+ if [[ $(PROFILE) == *ws22 ]]; then
146
+ # don't schedule anything on the linux system pool
147
+ echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
148
+ az aks nodepool update \
149
+ --cluster-name $CLUSTER_NAME \
150
+ -g $(RESOURCE_GROUP) \
151
+ -n nodepool1 \
152
+ --node-taints CriticalAddonsOnly=true:NoSchedule
153
+
154
+ echo "Adding Windows nodepool to $CLUSTER_NAME"
155
+ az aks nodepool add \
156
+ --resource-group $(RESOURCE_GROUP) \
157
+ --cluster-name $CLUSTER_NAME \
158
+ --name awin22 \
159
+ --os-type Windows \
160
+ --os-sku Windows2022 \
161
+ --node-vm-size Standard_D4s_v3 \
162
+ --node-count 1 \
163
+ --max-pods 100
164
+ fi
165
+
154
166
echo "Getting credentials to $CLUSTER_NAME"
155
- az aks get-credentials -g $CLUSTER_NAME -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
167
+ az aks get-credentials -g $(RESOURCE_GROUP) -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
156
168
mkdir -p ~/.kube/
157
169
cp ./kubeconfig ~/.kube/config
158
170
@@ -168,28 +180,42 @@ jobs:
168
180
set -e
169
181
170
182
# deploy azure-npm
171
- cp $(Pipeline.Workspace)/s/npm/examples/windows/azure-npm.yaml azure-npm.yaml
172
- # set higher memory limit
183
+ cp $(Pipeline.Workspace)/s/npm/azure-npm.yaml azure-npm.yaml
173
184
sed -i 's/memory: 300Mi/memory: 1000Mi/g' azure-npm.yaml
174
185
kubectl apply -f azure-npm.yaml
175
186
187
+ cp $(Pipeline.Workspace)/s/npm/examples/windows/azure-npm.yaml azure-npm-win.yaml
188
+ # set higher memory limit
189
+ sed -i 's/memory: 300Mi/memory: 1000Mi/g' azure-npm-win.yaml
190
+ kubectl apply -f azure-npm-win.yaml
191
+
176
192
# swap azure-npm image with one built during run
193
+ kubectl set image daemonset/azure-npm -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:linux-amd64-$(TAG)
177
194
kubectl set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)
178
195
179
- sleep 5s
196
+ sleep 30s
180
197
echo "waiting for NPM to start running..."
181
- kubectl wait --for=condition=Ready pod -l k8s-app=azure-npm -n kube-system --timeout=20m
198
+ kubectl wait --for=condition=Ready pod -l k8s-app=azure-npm -n kube-system --timeout=15m || {
199
+ kubectl describe pod -n kube-system -l k8s-app=azure-npm
200
+ echo "##vso[task.logissue type=error]NPM failed to start running"
201
+ exit 1
202
+ }
182
203
echo "sleep 3m to let NPM restart in case of bootup failure due to HNS errors"
183
204
sleep 3m
184
205
185
206
kubectl get po -n kube-system -owide -A
186
207
187
- echo "labeling Windows nodes for scale test"
188
- kubectl get node -o wide | grep "Windows Server 2022 Datacenter" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
208
+ if [[ $(PROFILE) == *ws22 ]]; then
209
+ echo "labeling Windows nodes for scale test"
210
+ kubectl get node -o wide | grep "Windows Server 2022 Datacenter" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
211
+ else
212
+ echo "labeling Linux nodes for scale test"
213
+ kubectl get node -o wide | grep "Ubuntu" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
214
+ fi
189
215
190
216
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
191
217
echo "Showing cluster status for $CLUSTER_NAME"
192
- FQDN=`az aks show -n $CLUSTER_NAME -g $CLUSTER_NAME --query fqdn -o tsv`
218
+ FQDN=`az aks show -n $CLUSTER_NAME -g $(RESOURCE_GROUP) --query fqdn -o tsv`
193
219
echo "##vso[task.setvariable variable=FQDN]$FQDN"
194
220
195
221
- task : AzureCLI@2
@@ -202,15 +228,16 @@ jobs:
202
228
condition : succeeded()
203
229
inlineScript : |
204
230
set -e
205
- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
231
+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
232
+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
206
233
./kwok --kubeconfig ~/.kube/config \
207
234
--cidr=155.0.0.0/16 \
208
235
--node-ip=155.0.0.1 \
209
236
--manage-all-nodes=false \
210
237
--manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
211
238
--manage-nodes-with-label-selector= \
212
239
--disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
213
- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-scale-up.log &
240
+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-scale-up.log &
214
241
kwok_pid=$!
215
242
216
243
# 20 kwok nodes
@@ -229,8 +256,8 @@ jobs:
229
256
--max-real-pods-per-node=30 \
230
257
--num-real-deployments=10 \
231
258
--num-real-replicas=3 \
232
- --num-network-policies=50 \
233
- --num-unapplied-network-policies=50 \
259
+ --num-network-policies=$(NUM_NETPOLS) \
260
+ --num-unapplied-network-policies=$(NUM_NETPOLS) \
234
261
--num-unique-labels-per-pod=2 \
235
262
--num-unique-labels-per-deployment=2 \
236
263
--num-shared-labels-per-pod=10
@@ -248,28 +275,30 @@ jobs:
248
275
condition : succeeded()
249
276
inlineScript : |
250
277
set -e
251
- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
278
+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
279
+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
252
280
./kwok --kubeconfig ~/.kube/config \
253
281
--cidr=155.0.0.0/16 \
254
282
--node-ip=155.0.0.1 \
255
283
--manage-all-nodes=false \
256
284
--manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
257
285
--manage-nodes-with-label-selector= \
258
286
--disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
259
- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-bootup-latency.log &
287
+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-bootup-latency.log &
260
288
kwok_pid=$!
261
289
262
290
kubectl rollout restart -n kube-system ds azure-npm-win
263
291
echo "sleeping 3 minutes to allow NPM pods to restart after scale-up..."
264
292
sleep 3m
265
293
266
294
cd $(Pipeline.Workspace)/s/test/scale/connectivity/
295
+ # notes for Windows:
267
296
# initial connectivity should be established within 15 minutes of NPM restart (12 minute timeout since we already waited 3 minutes above)
268
297
# adding new network policy to all 30 Pods should happen within 30 seconds
269
298
set +e
270
299
./test-connectivity.sh --kubectl-binary=$kubectlPath \
271
300
--num-scale-pods-to-verify=all \
272
- --max-wait-for-initial-connectivity=$((12*60) ) \
301
+ --max-wait-for-initial-connectivity=$(INITIAL_CONNECTIVITY_TIMEOUT ) \
273
302
--max-wait-after-adding-netpol=30
274
303
rc=$?
275
304
if [[ $rc != 0 ]]; then
@@ -286,18 +315,19 @@ jobs:
286
315
scriptType : " bash"
287
316
scriptLocation : " inlineScript"
288
317
failOnStderr : true
289
- # condition: succeeded()
318
+ condition : succeeded()
290
319
inlineScript : |
291
320
set -e
292
- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
321
+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
322
+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
293
323
./kwok --kubeconfig ~/.kube/config \
294
324
--cidr=155.0.0.0/16 \
295
325
--node-ip=155.0.0.1 \
296
326
--manage-all-nodes=false \
297
327
--manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
298
328
--manage-nodes-with-label-selector= \
299
329
--disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
300
- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-crud.log &
330
+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-crud.log &
301
331
kwok_pid=$!
302
332
303
333
# will delete scale-test and connectivity-test namespaces from previous run
@@ -342,15 +372,16 @@ jobs:
342
372
condition : succeeded()
343
373
inlineScript : |
344
374
set -e
345
- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
375
+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
376
+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
346
377
./kwok --kubeconfig ~/.kube/config \
347
378
--cidr=155.0.0.0/16 \
348
379
--node-ip=155.0.0.1 \
349
380
--manage-all-nodes=false \
350
381
--manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
351
382
--manage-nodes-with-label-selector= \
352
383
--disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
353
- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-crud-connectivity.log &
384
+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-crud-connectivity.log &
354
385
kwok_pid=$!
355
386
356
387
cd $(Pipeline.Workspace)/s/test/scale/connectivity/
@@ -371,14 +402,15 @@ jobs:
371
402
372
403
- bash : |
373
404
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
374
- cp cyclonus-$CLUSTER_NAME $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/cyclonus-$CLUSTER_NAME
375
405
echo "Getting cluster state for $CLUSTER_NAME"
376
406
mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
377
- kubectl get pods -n kube-system | grep npm
378
- kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt
379
- # capture any previous logs in case there was a crash
380
- npmPodList=`kubectl get pods -n kube-system | grep npm | awk '{print $1}'`
407
+ kubectl get pods -n kube-system -owide | grep npm | grep -v kwok
408
+ npmPodList=`kubectl get pods -n kube-system -owide | grep npm | grep -v kwok | awk '{print $1}'`
381
409
for npmPod in $npmPodList; do
410
+ logFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE)-$npmPod.txt
411
+ kubectl logs -n kube-system $npmPod > $logFile
412
+
413
+ # capture any previous logs in case there was a crash
382
414
previousLogFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/previous-npm-logs_$(PROFILE).txt
383
415
kubectl logs -n kube-system $npmPod -p > $previousLogFile
384
416
if [[ $? -ne 0 ]]; then
@@ -413,6 +445,7 @@ jobs:
413
445
azureSubscription : $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
414
446
scriptType : " bash"
415
447
scriptLocation : " inlineScript"
448
+ condition : succeeded()
416
449
inlineScript : |
417
450
echo Deleting $(RESOURCE_GROUP)
418
451
az group delete -n $(RESOURCE_GROUP) --yes
0 commit comments