-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathadjust
executable file
·1467 lines (1273 loc) · 62.5 KB
/
adjust
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
from __future__ import print_function
import copy
import importlib
import sys
import os
import errno
import subprocess
import time
import datetime
import hashlib
from collections.abc import Iterable
import json
import yaml
# import signal
from adjust import Adjust, AdjustError
json_enc = json.JSONEncoder(separators=(",", ":")).encode
# === constants
DESC_FILE = "./config.yaml"
EXCLUDE_LABEL = "optune.ai/exclude"
Gi = 1024 * 1024 * 1024
MEM_STEP = 128 * 1024 * 1024 # minimal useful increment in mem limit/reserve, bytes
CPU_STEP = 0.0125 # 1.25% of a core (even though 1 millicore is the highest resolution supported by k8s)
MAX_MEM = 4 * Gi # bytes, may be overridden to higher limit
MAX_CPU = 4.0 # cores
# MAX_REPLICAS = 1000 # arbitrary, TBD
FORCED_RESTART_ANN = "servo.opsani.com/forceRestartAt" # pod annotation to set for forced restart
# the k8s obj to which we make queries/updates:
DEPLOYMENT = "deployment"
# DEPLOYMENT = "deployment.v1.apps" # new, not supported in 1.8 (it has v1beta1)
RESOURCE_MAP = {"mem": "memory", "cpu": "cpu"}
# top-level keys in config data that are not printed on --query
EXCLUDE_FROM_QUERY = ["driver", "update_annotation", "force_restart"]
class ConfigError(Exception): # user-provided descriptor not readable
pass
def import_encoder_base():
try:
return importlib.import_module("encoders.base")
except ImportError:
raise ImportError("Unable to import base for encoders when handling `command` section.")
# === compute hash of arbitrary data struct
# (copied inline from skopos/.../plugins/spec_hash_helper.py)
def _dbg(*data):
with open("/skopos/plugins/dbg.log", "a") as f:
print(data, file=f)
def get_hash(data):
"""md5 hash of Python data. This is limited to scalars that are convertible to string and container
structures (list, dict) containing such scalars. Some data items are not distinguishable, if they have
the same representation as a string, e.g., hash(b'None') == hash('None') == hash(None)"""
# _dbg("get_hash", data)
hasher = hashlib.md5()
dump_container(data, hasher.update)
return hasher.hexdigest()
def dump_container(c, func):
"""stream the contents of a container as a string through a function
in a repeatable order, suitable, e.g., for hashing
"""
#
if isinstance(c, dict): # dict
func("{".encode("utf-8"))
for k in sorted(c): # for all repeatable
func("{}:".format(k).encode("utf-8"))
dump_container(c[k], func)
func(",".encode("utf-8"))
func("}".encode("utf-8"))
elif isinstance(c, list): # list
func("[".encode("utf-8"))
for k in c: # for all repeatable
dump_container(k, func)
func(",".encode("utf-8"))
func("]".encode("utf-8"))
else: # everything else
if isinstance(c, type(b"")):
pass # already a stream, keep as is
elif isinstance(c, str):
# encode to stream explicitly here to avoid implicit encoding to ascii
c = c.encode("utf-8")
else:
c = str(c).encode("utf-8") # convert to string (e.g., if integer)
func(c) # simple value, string or convertible-to-string
# ===
def kubectl(namespace, *args):
cmd_args = ["kubectl"]
if not bool(int(os.environ.get("OPTUNE_USE_DEFAULT_NAMESPACE", "0"))):
cmd_args.append("--namespace=" + namespace)
# append conditional args as provided by env vars
if os.getenv("OPTUNE_K8S_SERVER") is not None:
cmd_args.append("--server=" + os.getenv("OPTUNE_K8S_SERVER"))
if os.getenv("OPTUNE_K8S_TOKEN") is not None:
cmd_args.append("--token=" + os.getenv("OPTUNE_K8S_TOKEN"))
if bool(int(os.getenv("OPTUNE_K8S_SKIP_TLS_VERIFY", "0"))):
cmd_args.append("--insecure-skip-tls-verify=true")
dbg_txt = "DEBUG: ns='{}', env='{}', r='{}', args='{}'".format(
namespace, os.environ.get("OPTUNE_USE_DEFAULT_NAMESPACE", "???"), cmd_args, list(args)
)
if args[0] == "patch":
print(dbg_txt, file=sys.stderr)
else:
dbg_log(dbg_txt)
return cmd_args + list(args)
def k_get(namespace, qry):
"""run kubectl get and return parsed json output"""
if not isinstance(qry, list):
qry = [qry]
# this will raise exception if it fails:
output = subprocess.check_output(kubectl(namespace, "get", "--output=json", *qry))
output = output.decode("utf-8")
output = json.loads(output)
return output
def k_patch(namespace, typ, obj, patchstr):
"""run kubectl patch and return parsed json output"""
# this will raise exception if it fails:
cmd = kubectl(namespace, "patch", "--output=json", typ, obj, "-p", patchstr)
output = subprocess.check_output(cmd)
output = output.decode("utf-8")
output = json.loads(output)
return output
def read_desc():
"""load the user-defined descriptor, returning a dictionary of the contents under the k8s top-level key, if any"""
try:
f = open(DESC_FILE)
desc = yaml.safe_load(f)
except IOError as e:
if e.errno == errno.ENOENT:
raise ConfigError("configuration file {} does not exist".format(DESC_FILE))
raise ConfigError("cannot read configuration from {}: {}".format(DESC_FILE, e.strerror))
except yaml.error.YAMLError as e:
raise ConfigError("syntax error in {}: {}".format(DESC_FILE, str(e)))
refer_tip = "You can refer to a sample configuration in README.md."
assert bool(desc), "Configuration file is empty."
driver_key = "k8s"
if os.environ.get("OPTUNE_USE_DRIVER_NAME", False):
driver_key = os.path.basename(__file__)
assert driver_key in desc and desc[driver_key], (
"No configuration is defined for K8s driver in config file {}. "
'Please set up configuration for deployments under key "{}". '
"{}".format(DESC_FILE, refer_tip, driver_key)
)
desc = desc[driver_key]
assert (
"application" in desc and desc["application"]
), 'Section "application" was not defined in a configuration file. {}'.format(refer_tip)
assert (
"components" in desc["application"] and desc["application"]["components"] is not None
), 'Section "components" was not defined in a configuration file section "application". {}'.format(refer_tip)
assert desc["application"][
"components"
], "No components were defined in a configuration file. " "Please define at least one component. {}".format(
refer_tip
)
comps = desc["application"]["components"]
replicas_tracker = {}
for name, comp in comps.items():
settings = comp.get("settings", {})
# sub-setting validation
validate_setting_configs(name, settings)
# cross-component validation
if "replicas" in settings:
dep_name = comp.get("deployment", name)
dep_name = dep_name.split("/")[0] # if no '/', this just gets the whole name
replicas_tracker.setdefault(dep_name, 0)
replicas_tracker[dep_name] += 1
if len(replicas_tracker) < sum(replicas_tracker.values()):
rotten_deps = map(lambda d: d[0], filter(lambda c: c[1] > 1, replicas_tracker.items()))
raise Exception(
'Several components in the same deployment have "replicas" defined. Affected deployments: {}. '
'Please, keep only one "replicas" per deployment.'.format(", ".join(rotten_deps))
)
ann_key = desc.get("update_annotation", None)
if ann_key is not None:
assert isinstance(ann_key, str), "'update_annotation' must have a string value"
if "force_restart" in desc:
v = desc["force_restart"]
if isinstance(v, str):
try:
v = bool(int(v))
except Exception:
raise ConfigError("'force_restart' must be boolean or convertible to integer/boolean")
desc["force_restart"] = v
return desc
def validate_setting_configs(name, settings):
for k, v in settings.items():
if k in ["mem", "cpu"] and v.get("selector") == "request_min_limit" and not (v.get("limit_min", 0) > 0):
err_str = "Component {name} configuration was malformed; limit_min > 0 required when selector == request_min_limit. Found: {val}"
raise ConfigError(err_str.format(name=name, val=v.get("limit_min")))
def numval(v, minv, maxv, step=1.0, pinn=None):
"""shortcut for creating linear setting descriptions"""
ret = {"value": v, "min": minv, "max": maxv, "step": step, "type": "range"}
if pinn is not None:
ret["pinned"] = bool(pinn)
return ret
def cpuunits(s):
"""convert a string for CPU resource (with optional unit suffix) into a number"""
if s[-1] == "m": # there are no units other than 'm' (millicpu)
return float(s[:-1]) / 1000.0
return float(s)
# valid mem units: E, P, T, G, M, K, Ei, Pi, Ti, Gi, Mi, Ki
# nb: 'm' suffix found after setting 0.7Gi
mumap = {
"E": 1000 ** 6,
"P": 1000 ** 5,
"T": 1000 ** 4,
"G": 1000 ** 3,
"M": 1000 ** 2,
"K": 1000,
"m": 1000 ** -1,
"Ei": 1024 ** 6,
"Pi": 1024 ** 5,
"Ti": 1024 ** 4,
"Gi": 1024 ** 3,
"Mi": 1024 ** 2,
"Ki": 1024,
}
def memunits(s):
"""convert a string for memory resource (with optional unit suffix) into a number"""
for u, m in mumap.items():
if s.endswith(u):
return float(s[: -len(u)]) * m
return float(s)
def check_setting(name, settings):
assert isinstance(settings, Iterable), 'Object "settings" passed to check_setting() is not iterable.'
assert name not in settings, (
'Setting "{}" has been define more than once. '
"Please, check other config sections for setting duplicates.".format(name)
)
def encoder_setting_name(setting_name, encoder_config):
prefix = encoder_config["setting_prefix"] if "setting_prefix" in encoder_config else ""
return "{}{}".format(prefix, setting_name)
def describe_encoder(value, config, exception_context="a describe phase of an encoder"):
encoder_base = import_encoder_base()
try:
settings = encoder_base.describe(config, value or "")
for name, setting in settings.items():
yield (encoder_setting_name(name, config), setting)
except BaseException as e:
raise Exception("Error while handling {}: {}".format(exception_context, str(e)))
def encode_encoder(settings, config, expected_type=None, exception_context="an encode phase of an encoder"):
encoder_base = import_encoder_base()
try:
sanitized_settings = settings
prefix = config.get("setting_prefix")
if prefix:
sanitized_settings = dict(
map(lambda i: (i[0].lstrip(prefix), i[1]), filter(lambda i: i[0].startswith(prefix), settings.items()))
)
encoded_value, encoded_settings = encoder_base.encode(config, sanitized_settings, expected_type=expected_type)
encoded_settings = list(map(lambda setting_name: encoder_setting_name(setting_name, config), encoded_settings))
return encoded_value, encoded_settings
except BaseException as e:
raise Exception("Error while handling {}: {}".format(exception_context, str(e)))
def islookinglikerangesetting(s):
return "min" in s or "max" in s or "step" in s
def islookinglikeenumsetting(s):
return "values" in s
def israngesetting(s):
return s.get("type") == "range" or islookinglikerangesetting(s)
def isenumsetting(s):
return s.get("type") == "enum" or islookinglikeenumsetting(s)
def issetting(s):
return isinstance(s, dict) and (israngesetting(s) or isenumsetting(s))
def get_rsrc(desc_settings, cont_resources, sn):
rn = RESOURCE_MAP[sn]
selector = desc_settings.get(sn, {}).get("selector", "both")
if selector in ["request", "both", "request_min_limit"]:
val = cont_resources.get("requests", {}).get(rn)
if val is None:
val = cont_resources.get("limits", {}).get(rn)
if val is not None:
Adjust.print_json_error(
error="warning",
cl=None,
message='Using the non-selected value "limit" for resource "{}" as the selected value is not set'.format(
sn
),
)
else:
val = "0"
else: # selector == 'limit'
val = cont_resources.get("limits", {}).get(rn)
if val is None:
val = cont_resources.get("requests", {}).get(rn)
if val is not None:
if selector == "limit":
Adjust.print_json_error(
error="warning",
cl=None,
message='Using the non-selected value "request" for resource "{}" as the selected value is not set'.format(
sn
),
)
# else: don't print warning for 'both'
else:
val = "0"
return val
def get_latest_rs(appname, labels, deployment):
rs = k_get(appname, ["-l", labels, "rs"])["items"]
dep_rs = [
x
for x in rs
if any(
y.get("uid") is not None and y.get("uid") == deployment.get("metadata", {}).get("uid")
for y in x.get("metadata", {}).get("ownerReferences", [])
)
]
if not dep_rs:
raise AdjustError("Unable to locate replicaset(s) for deployment. Found replica_sets: {}".format(rs))
return max(
dep_rs,
key=lambda r: int(r.get("metadata", {}).get("annotations", {}).get("deployment.kubernetes.io/revision", -1)),
)
def get_latest_pods(appname, labels, replicaset, pod_debug=False):
if pod_debug:
output = subprocess.check_output(kubectl(appname, "get", "-l", labels, "pods"))
print("DEBUG pods: \n{}".format(output.decode("utf-8")), file=sys.stderr)
pods = k_get(appname, ["-l", labels, "pods"])
return [
pod
for pod in pods["items"]
if any(
ownRef.get("uid") is not None and ownRef.get("uid") == replicaset.get("metadata", {}).get("uid")
for ownRef in pod.get("metadata", {}).get("ownerReferences", [])
)
]
def raw_query(appname, desc, pod_debug=False):
"""
Read the list of deployments in a namespace and fill in data into desc.
Both the input 'desc' and the return value are in the 'settings query response' format.
NOTE only 'cpu', 'memory' and 'replicas' settings are filled in even if not present in desc.
Other settings must have a description in 'desc' to be returned.
"""
desc = copy.deepcopy(desc)
app = desc["application"]
comps = app["components"]
cfg = desc.pop(
"control", {}
) # FIXME TODO - query doesn't receive data from remote, only the local cfg can be used; where in the data should the "control" section really be?? note, [userdata][deployment] sub-keys for specifying the 'reference' app means we have to have that 'reference' as a single deployment and it has to be excluded from enumeration as an 'adjustable' component, using the whitelist.
refapp = cfg.get("userdata", {}).get("deployment", None)
mon_data = {}
if refapp:
d2 = desc.copy()
c2 = copy.deepcopy(cfg)
c2["userdata"].pop("deployment", None)
d2["control"] = c2
if (
len(comps) != 1
): # 'reference app' works only with single-component (due to the use of deployment name as 'component name' and having both apps in the same namespace)
raise AdjustError(
"operation with reference app not possible when multiple components are defined",
status="aborted",
reason="ref-app-unavailable",
)
refcomps = {refapp: comps[list(comps.keys())[0]]}
d2["application"] = {
"components": refcomps
} # single component, renamed (so we pick the 'reference deployment' in the same namespace)
try:
refqry, _, _ = raw_query(appname, d2)
except AdjustError as e:
raise AdjustError(str(e), status="aborted", reason="ref-app-unavailable")
# let other exceptions go unchanged
# TODO: maybe something better than a sum is needed here, some multi-component scale events could end up modifying scale counts without changing the overall sum
replicas_sum = sum((c["settings"]["replicas"]["value"] for c in refqry["application"]["components"].values()))
refqry = refqry["monitoring"] # we don't need other data from refqry any more
mon_data = {
"ref_spec_id": refqry["spec_id"],
"ref_version_id": refqry["version_id"],
"ref_runtime_count": replicas_sum,
}
if refqry.get("runtime_id"):
mon_data["ref_runtime_id"] = refqry["runtime_id"]
deployments = k_get(appname, DEPLOYMENT)
# note d["Kind"] should be "List"
deps_list = deployments["items"]
if (
not deps_list
): # NOTE we don't distinguish the case when the namespace doesn't exist at all or is just empty (k8s will return an empty list whether or not it exists)
raise AdjustError(
"application '{}' does not exist or has no components".format(appname),
status="aborted",
reason="app-unavailable",
) # NOTE not a documented 'reason'
deps_dict = {dep["metadata"]["name"]: dep for dep in deps_list}
raw_specs = {}
imgs = {}
runtime_ids = {}
restart_counts = {}
# ?? TODO: is it possible to have an item in 'd' with "kind" other than "Deployment"? (likely no)
# is it possible to have replicas == 0 (and how do we represent that, if at all)
for full_comp_name, comp_data in comps.items():
dep_name = comp_data.get("deployment", full_comp_name)
cont_name = None
if "/" in dep_name:
dep_name, cont_name = dep_name.split("/")
assert (
dep_name in deps_dict
), 'Could not find deployment "{}" defined for component "{}" in namespace "{}".' "".format(
dep_name, full_comp_name, appname
)
dep = deps_dict[dep_name]
conts = dep["spec"]["template"]["spec"]["containers"]
if cont_name is not None:
contsd = {c["name"]: c for c in conts}
assert cont_name in contsd, (
'Could not find container with name "{}" in deployment "{}" '
'for component "{}" in namespace "{}".'
"".format(cont_name, dep_name, full_comp_name, appname)
)
cont = contsd[cont_name]
else:
cont = conts[0]
# skip if excluded by label
try:
if bool(int(dep["metadata"].get("labels", {}).get(EXCLUDE_LABEL, "0"))): # string value of 1 (non-0)
continue
except ValueError as e: # int() is the only thing that should trigger exceptions here
# TODO add warning to annotations to be returned
print(
"failed to parse exclude label for deployment {}: {}: {}; ignored".format(
dep_name, type(e).__name__, str(e)
),
file=sys.stderr,
)
# pass # fall through, ignore unparseable label
# selector for pods, NOTE this relies on having a equality-based label selector,
# k8s seems to support other types, I don't know what's being used in practice.
try:
sel = dep["spec"]["selector"]["matchLabels"]
except KeyError:
raise AdjustError(
"only deployments with matchLabels selector are supported, found selector: {}".format(
repr(dep["spec"].get("selector", {}))
),
status="aborted",
reason="app-unavailable",
) # NOTE not a documented 'reason'
# convert to string suitable for 'kubect -l labelsel'
sel = ",".join(("{}={}".format(k, v) for k, v in sel.items()))
# list of pods, for runtime_id
try:
latest_rs = get_latest_rs(appname=appname, labels=sel, deployment=dep)
pods = get_latest_pods(appname, sel, latest_rs, pod_debug)
# NOTE: "Terminating" is not an actual phase on the pod status. More info here: https://github.com/kubernetes/kubernetes/issues/22839
non_terminating = [pod for pod in pods if not pod["metadata"].get("deletionTimestamp")]
runtime_ids[dep_name] = [pod["metadata"]["uid"] for pod in non_terminating]
restart_counts[dep_name] = [
{
"pod+container": "{}+{}".format(pod["metadata"]["name"], cont_stat["name"]),
"restartCount": cont_stat["restartCount"],
}
for pod in pods
for cont_stat in pod["status"].get("containerStatuses", [])
]
except subprocess.CalledProcessError as e:
# TODO: re-implement graceful failure
# Adjust.print_json_error(error="warning", cl="CalledProcessError", message='Unable to retrieve pods: {}. Output: {}'.format(e, e.output))
raise AdjustError(
"Unable to get pods for deployment {}: rc {}, output: {}".format(dep_name, e.returncode, e.output),
status="aborted",
reason="app-unavailable",
)
# extract deployment settings
# NOTE: generation, resourceVersion and uid can help detect changes
# (also, to check PG's k8s code in oco)
replicas = dep["spec"]["replicas"]
tmplt_spec = dep["spec"]["template"]["spec"]
raw_specs[dep_name] = tmplt_spec # save for later, used to checksum all specs
# name, env, resources (limits { cpu, memory }, requests { cpu, memory })
# FIXME: what to do if there's no mem reserve or limits defined? (a namespace can have a default mem limit, but that's not necessarily set, either)
# (for now, we give the limit as 0, treated as 'unlimited' - AFAIK)
imgs[full_comp_name] = cont["image"] # FIXME, is this always defined?
comp = comps[full_comp_name] = comps[full_comp_name] or {}
settings = comp["settings"] = comp.setdefault("settings", {}) or {}
read_mem = settings and "mem" in settings
read_cpu = settings and "cpu" in settings
read_replicas = not settings or "replicas" in settings
res = cont.get("resources")
if res:
if read_mem:
mem_val = get_rsrc(desc_settings=settings, cont_resources=res, sn="mem")
# (value, min, max, step) all in GiB
settings["mem"] = numval(
v=memunits(mem_val) / Gi,
minv=(settings.get("mem") or {}).get("min", MEM_STEP / Gi),
maxv=(settings.get("mem") or {}).get("max", MAX_MEM / Gi),
step=(settings.get("mem") or {}).get("step", MEM_STEP / Gi),
pinn=(settings.get("mem") or {}).get("pinned", None),
)
if read_cpu:
cpu_val = get_rsrc(desc_settings=settings, cont_resources=res, sn="cpu")
# (value, min, max, step), all in CPU cores
settings["cpu"] = numval(
v=cpuunits(cpu_val),
minv=(settings.get("cpu") or {}).get("min", CPU_STEP),
maxv=(settings.get("cpu") or {}).get("max", MAX_CPU),
step=(settings.get("cpu") or {}).get("step", CPU_STEP),
pinn=(settings.get("cpu") or {}).get("pinned", None),
)
else:
if read_mem:
settings["mem"]["type"] = "range"
settings["mem"]["value"] = None
if read_cpu:
settings["cpu"]["type"] = "range"
settings["cpu"]["value"] = None
# TODO: adjust min/max to include current values, (e.g., increase mem_max to at least current if current > max)
# set replicas: FIXME: can't actually be set for each container (the pod as a whole is replicated); for now we have no way of expressing this limitation in the setting descriptions
# note: setting min=max=current replicas, since there is no way to know what is allowed; use override descriptor to loosen range
if read_replicas:
settings["replicas"] = numval(
v=replicas,
minv=(settings.get("replicas") or {}).get("min", replicas),
maxv=(settings.get("replicas") or {}).get("max", replicas),
step=(settings.get("replicas") or {}).get("step", 1),
pinn=(settings.get("replicas") or {}).get("pinned", None),
)
# current settings of custom env vars (NB: type conv needed for numeric values!)
cont_env_list = cont.get("env", [])
# include only vars for which the keys 'name' and 'value' are defined
cont_env_dict = {i["name"]: i["value"] for i in cont_env_list if "name" in i and "value" in i}
env = comp.get("env")
if env:
for en, ev in env.items():
check_setting(en, settings)
assert isinstance(ev, dict), 'Setting "{}" in section "env" of a config file is not a dictionary.'
if "encoder" in ev:
for name, setting in describe_encoder(
cont_env_dict.get(en),
ev["encoder"],
exception_context="an environment variable {}" "".format(en),
):
check_setting(name, settings)
settings[name] = setting
if issetting(ev):
defval = ev.pop("default", None)
val = cont_env_dict.get(en, defval)
val = float(val) if israngesetting(ev) and isinstance(val, (int, str)) else val
assert val is not None, (
'Environment variable "{}" does not have a current value defined and '
"neither it has a default value specified in a config file. "
"Please, set current value for this variable or adjust the "
"configuration file to include its default value."
"".format(en)
)
val = {**ev, "value": val}
settings[en] = val
# Remove section "env" from final descriptor
del comp["env"]
command = comp.get("command")
if command:
if command.get("encoder"):
for name, setting in describe_encoder(
cont.get("command", []), command["encoder"], exception_context="a command section"
):
check_setting(name, settings)
settings[name] = setting
# Remove section "command" from final descriptor
del comp["command"]
# if runtime_ids:
mon_data["runtime_id"] = get_hash(runtime_ids)
# app state data
# (NOTE we strip the component names because our (single-component) 'reference' app will necessarily have a different component name)
# this should be resolved by complete re-work, if we are to support 'reference' app in a way that allows multiple components
raw_specs = [raw_specs[k] for k in sorted(raw_specs.keys())]
imgs = [imgs[k] for k in sorted(imgs.keys())]
mon_data.update(
{
"spec_id": get_hash(raw_specs),
"version_id": get_hash(imgs),
# "runtime_count": replicas_sum
}
)
desc["monitoring"] = mon_data
return desc, deps_list, restart_counts
# DEBUG:
def ydump(fn, data):
f = open(fn, "w")
yaml.dump(data, f)
f.close()
def dbg_log(*args):
if os.getenv("TDR_DEBUG_LOG"):
print(*args, file=sys.stderr)
def query(appname, desc):
r, _, _ = raw_query(appname, desc)
return r
class Waiter(object):
"""an object for use to poll and wait for a condition;
use:
w = Waiter(max_time, delay)
while w.wait():
if test_condition(): break
if w.expired:
raise Hell
"""
def __init__(self, timeout, delay=1):
self.timefn = time.time # change that on windows to time.clock
self.start = self.timefn()
self.end = self.start + timeout
self.delay = delay
self.expired = False
def wait(self):
self.expired = self.end < self.timefn()
waiting = not self.expired
if waiting:
time.sleep(self.delay) # TODO: add support for increasing delay over time
return waiting
def test_dep_generation(dep, g, ge=False):
"""check if the deployment status indicates it has been updated to the given generation number"""
if ge:
return dep["status"]["observedGeneration"] >= g
return dep["status"]["observedGeneration"] == g
def test_dep_progress(dep):
"""check if the deployment object 'dep' has reached final successful status
('dep' should be the data returned by 'kubectl get deployment' or the equivalent API call, e.g.,
GET /apis/(....)/namespaces/:ns/deployments/my-deployment-name).
This tests the conditions[] array and the replica counts and converts the data to a simplified status, as follows:
- if the deployment appears to be in progress and k8s is still waiting for updates from the controlled objects (replicasets and their pods),
return a tuple (x, ""), where x is the fraction of the updated instances (0.0 .. 1.0, excluding 1.0).
- if the deployment has completed, return (1.0, "")
- if the deployment has stalled or failed, return (x, "(errormsg)"), with an indication of the
detected failure (NOTE: in k8s, the 'stall' is never final and could be unblocked by change
of resources or other modifications of the cluster not related to the deployment in question,
but we assume that the system is operating under stable conditions and there won't be anyone
or anything that can unblock such a stall)
"""
dbg_log("test_dep_progress:")
spec_replicas = dep["spec"]["replicas"] # this is what we expect as target
progress_final = None
dep_status = dep["status"]
sel = dep["spec"]["selector"]["matchLabels"]
sel = ",".join(("{}={}".format(k, v) for k, v in sel.items()))
latest_rs = get_latest_rs(dep["metadata"]["namespace"], sel, dep)
rs_status = latest_rs["status"]
for co in dep_status["conditions"]:
dbg_log(
"... condition type {}, reason {}, status {}, message {}".format(
co.get("type"), co.get("reason"), co.get("status"), co.get("message")
)
)
if co["type"] == "Progressing":
if co["status"] == "True" and co["reason"] == "NewReplicaSetAvailable":
# if the replica set was updated, test the replica counts
if dep_status.get("updatedReplicas", None) == spec_replicas: # update complete, check other counts
if (
rs_status.get("availableReplicas", None) == spec_replicas
and rs_status.get("readyReplicas", None) == spec_replicas
):
return (1.0, "") # done
elif co["status"] == "False": # failed
return (
dep_status.get("updatedReplicas", 0) / spec_replicas,
co["reason"] + ", " + co.get("message", ""),
)
# otherwise, assume in-progress
elif co["type"] == "ReplicaFailure":
# note if this status is found, we report failure early here, before k8s times out
return (dep_status.get("updatedReplicas", 0) / spec_replicas, co["reason"] + ", " + co.get("message", ""))
# no errors and not complete yet, assume in-progress
# (NOTE if "Progressing" condition isn't found, but updated replicas is good, we will return 100% progress; in this case check that other counts are correct, as well!
if spec_replicas == 0: # If dep is being destroyed
progress = 1.0 if dep_status.get("replicas", 0) == 0 else 0.99 / dep_status.get("replicas", 0)
else:
progress = dep_status.get("updatedReplicas", 0) / spec_replicas
if progress == 1.0:
if (
rs_status.get("availableReplicas", None) == spec_replicas
and rs_status.get("readyReplicas", None) == spec_replicas
):
return (1.0, "") # all good
progress = 0.99 # available/ready counts aren't there - don't report 100%, wait loop will contiune until ready or time out
# check for pod restarts
pods = get_latest_pods(dep["metadata"]["namespace"], sel, latest_rs)
if progress == 1.0 and spec_replicas == 0:
progress = 1.0 if len(pods) == 0 else 0.99 / len(pods)
restart_counts = [
{
"pod+container": "{}+{}".format(pod["metadata"]["name"], cont_stat["name"]),
"restartCount": cont_stat["restartCount"],
}
for pod in pods
for cont_stat in pod["status"].get("containerStatuses", [])
if cont_stat["restartCount"] > 0
]
if restart_counts and spec_replicas > 0:
return (
progress,
"component(s) crash restart detected on deployment {}: {}".format(dep["metadata"]["name"], restart_counts),
)
return (progress, "")
def compare_settings(patch, dep):
"""test select parts of a deployment patch against an actual deployment object,
return None if they match, or a string detailing the difference otherwise.
Only spec/template/spec/containers/:N:/resources/limits and .../resources/requests are compared. If a
patch has 'None' setting for a resource, the corresponding value is the deployment is not checked
(patching to None means 'delete', not sure if K8s deletes or sets to a default value in this case).
"""
try:
p_containers = patch["spec"]["template"]["spec"]["containers"]
except KeyError:
# patch does not set any resource, return OK
return None
# KeyError here not caught (propagate as fatal error)
d_containers = dep["spec"]["template"]["spec"]["containers"]
# convert arrays to maps, for easy matching
p_containers = {x["name"]: x for x in p_containers}
d_containers = {x["name"]: x for x in d_containers}
# compare
for k, v in p_containers.items():
if not v.get("resources"): # patch didn't set resources
continue
c = d_containers.get(k)
if not c or not c.get("resources"): # patch has resources, but dep doesn't: mismatch
msg = "no resources in deployment for container '{}'".format(k)
print("compare_settings: " + msg, file=sys.stderr)
return msg
p_rsrc = v["resources"]
d_rsrc = c["resources"]
print("compare_settings: cname={}, comparing: {} <> {}".format(k, repr(p_rsrc), repr(d_rsrc)), file=sys.stderr)
for rtype, rd in p_rsrc.items():
for rname, rvalue in rd.items():
if rvalue is None: # don't compare if patch says None
continue
try:
dep_rvalue = d_rsrc[rtype][rname]
except KeyError: # patched value not present in dep - mismatch
msg = "{}.{}.{}: no value".format(k, rtype, rname)
print("compare_settings: " + msg, file=sys.stderr)
return msg
if rname == "memory":
r = memunits(rvalue) == memunits(dep_rvalue)
elif rname == "cpu":
r = cpuunits(rvalue) == cpuunits(dep_rvalue)
if not r:
msg = "compare_settings: {}.{}.{}: {}!={}".format(k, rtype, rname, rvalue, dep_rvalue)
print("compare_settings: " + msg, file=sys.stderr)
return msg
return None
# FIXME: observed a patch trigger spontaneous reduction in replica count! (happened when update was attempted without replica count changes and 2nd replica was not schedulable according to k8s)
# NOTE: update of 'observedGeneration' does not mean that the 'deployment' object is done updating; also checking readyReplicas or availableReplicas in status does not help (these numbers may be for OLD replicas, if the new replicas cannot be started at all). We check for a 'Progressing' condition with a specific 'reason' code as an indication that the deployment is fully updated.
# The 'kubectl rollout status' command relies only on the deployment object - therefore info in it should be sufficient to track progress.
# ? do we need to use --to-revision with the undo command?
# FIXME: cpu request above 0.05 fails for 2 replicas on minikube. Not understood. (NOTE also that setting cpu_limit without specifying request causes request to be set to the same value, except if limit is very low - in that case, request isn't set at all)
def wait_for_update(appname, obj, patch_gen, print_progress, c=0, t=1, wait_for_progress=40, phase="", cmp_=None):
"""wait for a patch to take effect. appname is the namespace, obj is the deployment name, patch_gen is the object generation immediately after the patch was applied (should be a k8s obj with "kind":"Deployment")"""
wait_for_gen = 15 # time to wait for object update ('observedGeneration')
# wait_for_progress = 40 # time to wait for rollout to complete
part = 1.0 / float(t)
m = "updating {}".format(obj)
dbg_log("waiting for update: deployment {}, generation {}".format(obj, patch_gen))
# NOTE: best to implement this with a 'watch', not using an API poll!
# ?watch=1 & resourceVersion = metadata[resourceVersion], timeoutSeconds=t,
# --raw=''
# GET /apis/apps/v1/namespaces/{namespace}/deployments
t0 = time.time()
w = Waiter(wait_for_gen, 2)
r = None
while w.wait():
# NOTE: no progress prints here, this wait should be short
r = k_get(appname, DEPLOYMENT + "/" + obj)
# ydump("tst_wait{}_output_{}.yaml".format(rc,obj),r) ; rc = rc+1
if test_dep_generation(r, patch_gen, ge=True):
break
r0 = r
if r:
print(
"DEBUG: waited {}s for k8s object update, expected g = {}, g now = {}".format(
time.time() - t0, patch_gen, r["status"]["observedGeneration"]
),
file=sys.stderr,
)
if w.expired:
raise AdjustError(
"update of {} failed, timed out waiting for k8s object update".format(obj),
status="failed",
reason="adjust-failed",
)
dbg_log("waiting for progress: deployment {}, generation {}".format(obj, patch_gen))
p = 0.0 #
m = "waiting for progress from k8s {}".format(obj)
w = Waiter(wait_for_progress, 2)
c = float(c)
err = "(wait skipped)"
while w.wait():
r = k_get(appname, DEPLOYMENT + "/" + obj)
print_progress(int((c + p) * part * 100), m)
p, err = test_dep_progress(r)
if p == 1.0:
if not test_dep_generation(r0, patch_gen) and cmp_:
# if generation did not match exactly, there has been another update besides ours,
# compare the configuration to the expected one and fail if a controlled setting was changed
print(
"WARNING: detected concurrent update during adjust, re-checking settings",
file=sys.stderr,
flush=True,
)
diff = compare_settings(cmp_, r)
if diff:
raise AdjustError("deployment was modified unexpectedly: " + diff, reason="overwritten")
return # all done
if err:
break
# loop ended, timed out:
status = "rejected"
reason = "start-failed"
err_text = "during {}; update of {} failed: timed out waiting for replicas to come up, status: {}".format(
phase, obj, err
)
if "component(s) crash restart detected" in err:
reason = "unstable"
err_text = "during {}; {}".format(phase, err)
raise AdjustError(err_text, status=status, reason=reason)
def set_rsrc(cont, cp, sn, sv, desc_settings):
rn = RESOURCE_MAP[sn]
sv_str = rsrc_str(sn, sv)
sel = desc_settings.get("selector", "both")
if sel == "request":
cp.setdefault("resources", {}).setdefault("requests", {})[rn] = sv_str
if cont.get("resources", {}).get("limits", {}).get(rn) is not None:
cp["resources"].setdefault("limits", {})[rn] = None # Remove corresponding limit if exists
elif sel == "limit":
cp.setdefault("resources", {}).setdefault("limits", {})[rn] = sv_str
if cont.get("resources", {}).get("requests", {}).get(rn) is not None:
cp["resources"].setdefault("requests", {})[rn] = None # Remove corresponding request if exists
elif sel == "request_min_limit":
lv = max(desc_settings["limit_min"], sv)
cp.setdefault("resources", {}).setdefault("requests", {})[rn] = sv_str
cp.setdefault("resources", {}).setdefault("limits", {})[rn] = rsrc_str(sn, lv)
else: # both
cp.setdefault("resources", {}).setdefault("requests", {})[rn] = sv_str
cp.setdefault("resources", {}).setdefault("limits", {})[rn] = sv_str
def rsrc_str(setting_name, value):
if setting_name == "mem":
return str(round(value, 5)) + "Gi" # internal memory representation is in GiB
return str(round(value, 5))
def _value(x):
if isinstance(x, dict) and "value" in x:
return x["value"]
return x
def add_meta(patch, key, data):
"""add json-encoded data to 'metadata[key]' in patch"""
ma = patch.setdefault("metadata", {}).setdefault("annotations", {})
ma[key] = json_enc(data)
def add_pod_meta(patch, key, data):
"""add json-encoded data to 'spec.template.metadata[key]' in patch"""
ma = (
patch.setdefault("spec", {}).setdefault("template", {}).setdefault("metadata", {}).setdefault("annotations", {})
)
ma[key] = json_enc(data)
def update(appname, desc, data, print_progress):
adjust_on = desc.get("adjust_on", False)
if adjust_on:
try:
# nosec: ast.literal_eval would not work here and the eval has been constrained
should_adjust = eval(adjust_on, {"__builtins__": None}, {"data": data}) # nosec