-
Notifications
You must be signed in to change notification settings - Fork 14.6k
/
Copy pathrun_generate_constraints.py
executable file
·458 lines (420 loc) · 17.8 KB
/
run_generate_constraints.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import json
import os
import sys
from dataclasses import dataclass
from datetime import datetime
from functools import cached_property
from pathlib import Path
from typing import TextIO
import requests
from click import Choice
from in_container_utils import click, console, run_command
AIRFLOW_SOURCES = Path(__file__).resolve().parents[2]
DEFAULT_BRANCH = os.environ.get("DEFAULT_BRANCH", "main")
PYTHON_VERSION = os.environ.get("PYTHON_MAJOR_MINOR_VERSION", "3.8")
GENERATED_PROVIDER_DEPENDENCIES_FILE = AIRFLOW_SOURCES / "generated" / "provider_dependencies.json"
ALL_PROVIDER_DEPENDENCIES = json.loads(GENERATED_PROVIDER_DEPENDENCIES_FILE.read_text())
now = datetime.now().isoformat()
NO_PROVIDERS_CONSTRAINTS_PREFIX = f"""
#
# This constraints file was automatically generated on {now}
# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow.
# This variant of constraints install just the 'bare' 'apache-airflow' package build from the HEAD of
# the branch, without installing any of the providers.
#
# Those constraints represent the "newest" dependencies airflow could use, if providers did not limit
# Airflow in any way.
#
"""
SOURCE_PROVIDERS_CONSTRAINTS_PREFIX = f"""
#
# This constraints file was automatically generated on {now}
# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow.
# This variant of constraints install uses the HEAD of the branch version of both
# 'apache-airflow' package and all available community provider packages.
#
# Those constraints represent the dependencies that are used by all pull requests when they are build in CI.
# They represent "latest" and greatest set of constraints that HEAD of the "apache-airflow" package should
# Install with "HEAD" of providers. Those are the only constraints that are used by our CI builds.
#
"""
PYPI_PROVIDERS_CONSTRAINTS_PREFIX = f"""
#
# This constraints file was automatically generated on {now}
# via "eager-upgrade" mechanism of PIP. For the "{DEFAULT_BRANCH}" branch of Airflow.
# This variant of constraints install uses the HEAD of the branch version for 'apache-airflow' but installs
# the providers from PIP-released packages at the moment of the constraint generation.
#
# Those constraints are actually those that regular users use to install released version of Airflow.
# We also use those constraints after "apache-airflow" is released and the constraints are tagged with
# "constraints-X.Y.Z" tag to build the production image for that version.
#
# This constraints file is meant to be used only in the "apache-airflow" installation command and not
# in all subsequent pip commands. By using a constraints.txt file, we ensure that solely the Airflow
# installation step is reproducible. Subsequent pip commands may install packages that would have
# been incompatible with the constraints used in Airflow reproducible installation step. Finally, pip
# commands that might change the installed version of apache-airflow should include "apache-airflow==X.Y.Z"
# in the list of install targets to prevent Airflow accidental upgrade or downgrade.
#
# Typical installation process of airflow for Python 3.8 is (with random selection of extras and custom
# dependencies added), usually consists of two steps:
#
# 1. Reproducible installation of airflow with selected providers (note constraints are used):
#
# pip install "apache-airflow[celery,cncf.kubernetes,google,amazon,snowflake]==X.Y.Z" \\
# --constraint \\
# "https://raw.githubusercontent.com/apache/airflow/constraints-X.Y.Z/constraints-{PYTHON_VERSION}.txt"
#
# 2. Installing own dependencies that are potentially not matching the constraints (note constraints are not
# used, and apache-airflow==X.Y.Z is used to make sure there is no accidental airflow upgrade/downgrade.
#
# pip install "apache-airflow==X.Y.Z" "snowflake-connector-python[pandas]=N.M.O"
#
"""
@dataclass
class ConfigParams:
airflow_constraints_mode: str
chicken_egg_providers: str
constraints_github_repository: str
default_constraints_branch: str
github_actions: bool
eager_upgrade_additional_requirements: str
python: str
@cached_property
def constraints_dir(self) -> Path:
constraints_dir = Path("/files") / f"constraints-{self.python}"
constraints_dir.mkdir(parents=True, exist_ok=True)
return constraints_dir
@cached_property
def latest_constraints_file(self) -> Path:
return self.constraints_dir / f"original-{self.airflow_constraints_mode}-{self.python}.txt"
@cached_property
def constraints_diff_file(self) -> Path:
return self.constraints_dir / f"diff-{self.airflow_constraints_mode}-{self.python}.md"
@cached_property
def current_constraints_file(self) -> Path:
return self.constraints_dir / f"{self.airflow_constraints_mode}-{self.python}.txt"
def install_local_airflow_with_eager_upgrade(
config_params: ConfigParams, eager_upgrade_additional_requirements: str
) -> None:
run_command(
[
"pip",
"install",
"--root-user-action",
"ignore",
"-e",
".[all-core]",
*eager_upgrade_additional_requirements.split(" "),
"--upgrade",
"--upgrade-strategy",
"eager",
],
github_actions=config_params.github_actions,
cwd=AIRFLOW_SOURCES,
check=True,
)
def freeze_packages_to_file(config_params: ConfigParams, file: TextIO) -> None:
console.print(f"[bright_blue]Freezing constraints to file: {file.name}")
result = run_command(
["pip", "freeze"],
github_actions=config_params.github_actions,
text=True,
check=True,
capture_output=True,
)
count_lines = 0
for line in sorted(result.stdout.split("\n")):
if line.startswith(("apache_airflow", "apache-airflow==", "/opt/airflow", "#", "-e")):
continue
if "@" in line:
continue
if line.strip() == "":
continue
count_lines += 1
file.write(line)
file.write("\n")
file.flush()
console.print(f"[green]Constraints generated to file: {file.name}. Wrote {count_lines} lines")
def download_latest_constraint_file(config_params: ConfigParams):
constraints_url = (
"https://raw.githubusercontent.com/"
f"{config_params.constraints_github_repository}/{config_params.default_constraints_branch}/"
f"{config_params.airflow_constraints_mode}-{config_params.python}.txt"
)
# download the latest constraints file
# download using requests
console.print(f"[bright_blue]Downloading constraints file from {constraints_url}")
r = requests.get(constraints_url, timeout=60)
r.raise_for_status()
with config_params.latest_constraints_file.open("w") as constraints_file:
constraints_file.write(r.text)
console.print(f"[green]Downloaded constraints file from {constraints_url} to {constraints_file.name}")
def diff_constraints(config_params: ConfigParams) -> None:
"""
Diffs constraints files and prints the diff to the console.
"""
console.print("[bright_blue]Diffing constraints files")
result = run_command(
[
"diff",
"--ignore-matching-lines=#",
"--color=always",
config_params.latest_constraints_file.as_posix(),
config_params.current_constraints_file.as_posix(),
],
# always shows output directly in CI without folded group
github_actions=False,
check=False,
)
if result.returncode == 0:
console.print("[green]No changes in constraints files. exiting")
config_params.constraints_diff_file.unlink(missing_ok=True)
return
result = run_command(
[
"diff",
"--ignore-matching-lines=#",
"--color=never",
config_params.latest_constraints_file.as_posix(),
config_params.current_constraints_file.as_posix(),
],
github_actions=config_params.github_actions,
check=False,
text=True,
capture_output=True,
)
with config_params.constraints_diff_file.open("w") as diff_file:
diff_file.write(
f"Dependencies {config_params.airflow_constraints_mode} updated "
f"for Python {config_params.python}\n\n"
)
diff_file.write("```diff\n")
diff_file.write(result.stdout)
diff_file.write("```\n")
console.print(f"[green]Diff generated to file: {config_params.constraints_diff_file}")
def uninstall_all_packages(config_params: ConfigParams):
console.print("[bright_blue]Uninstall All PIP packages")
result = run_command(
["pip", "freeze"],
github_actions=config_params.github_actions,
cwd=AIRFLOW_SOURCES,
text=True,
check=True,
capture_output=True,
)
all_installed_packages = [
dep.split("==")[0]
for dep in result.stdout.strip().split("\n")
if not dep.startswith(("apache-airflow", "apache-airflow==", "/opt/airflow", "#", "-e"))
]
run_command(
["pip", "uninstall", "--root-user-action", "ignore", "-y", *all_installed_packages],
github_actions=config_params.github_actions,
cwd=AIRFLOW_SOURCES,
text=True,
check=True,
)
def get_all_active_provider_packages() -> list[str]:
return [
f"apache-airflow-providers-{provider.replace('.','-')}"
for provider in ALL_PROVIDER_DEPENDENCIES.keys()
if ALL_PROVIDER_DEPENDENCIES[provider]["state"] == "ready"
]
def generate_constraints_source_providers(config_params: ConfigParams) -> None:
"""
Generates constraints with provider dependencies used from current sources. This might be different
from the constraints generated from the latest released version of the providers in PyPI. Those
constraints are used in CI builds when we install providers built using current sources and in
Breeze CI image builds.
"""
with config_params.current_constraints_file.open("w") as constraints_file:
constraints_file.write(SOURCE_PROVIDERS_CONSTRAINTS_PREFIX)
freeze_packages_to_file(config_params, constraints_file)
download_latest_constraint_file(config_params)
diff_constraints(config_params)
def generate_constraints_pypi_providers(config_params: ConfigParams) -> None:
"""
Generates constraints with provider installed from PyPI. This is the default constraints file
used in production/release builds when we install providers from PyPI and when tagged, those
providers are used by our users to install Airflow in reproducible way.
:return:
"""
dist_dir = Path("/dist")
all_provider_packages = get_all_active_provider_packages()
chicken_egg_prefixes = []
packages_to_install = []
console.print("[bright_blue]Installing Airflow with PyPI providers with eager upgrade")
if config_params.chicken_egg_providers:
for chicken_egg_provider in config_params.chicken_egg_providers.split(" "):
chicken_egg_prefixes.append(f"apache-airflow-providers-{chicken_egg_provider.replace('.','-')}")
console.print(
f"[bright_blue]Checking if {chicken_egg_prefixes} are available in local dist folder "
f"as chicken egg providers)"
)
for provider_package in all_provider_packages:
if config_params.chicken_egg_providers and provider_package.startswith(tuple(chicken_egg_prefixes)):
glob_pattern = f"{provider_package.replace('-','_')}-*.whl"
console.print(
f"[bright_blue]Checking if {provider_package} is available in local dist folder "
f"with {glob_pattern} pattern"
)
files = dist_dir.glob(glob_pattern)
for file in files:
console.print(
f"[yellow]Installing {file.name} from local dist folder as it is "
f"a chicken egg provider"
)
packages_to_install.append(file.as_posix())
else:
console.print(
f"[yellow]Skipping {provider_package} as it is not found in dist folder to install."
)
continue
console.print(f"[bright_blue]Checking if {provider_package} is available in PyPI: ... ", end="")
r = requests.head(f"https://pypi.org/pypi/{provider_package}/json", timeout=60)
if r.status_code == 200:
console.print("[green]OK")
packages_to_install.append(provider_package)
else:
console.print("[yellow]NOK. Skipping.")
run_command(
cmd=[
"pip",
"install",
"--root-user-action",
"ignore",
".[all-core]",
*packages_to_install,
*config_params.eager_upgrade_additional_requirements.split(" "),
"--upgrade",
"--upgrade-strategy",
"eager",
],
github_actions=config_params.github_actions,
check=True,
)
console.print("[success]Installed airflow with PyPI providers with eager upgrade.")
with config_params.current_constraints_file.open("w") as constraints_file:
constraints_file.write(PYPI_PROVIDERS_CONSTRAINTS_PREFIX)
freeze_packages_to_file(config_params, constraints_file)
download_latest_constraint_file(config_params)
diff_constraints(config_params)
def generate_constraints_no_providers(config_params: ConfigParams) -> None:
"""
Generates constraints without any provider dependencies. This is used mostly to generate SBOM
files - where we generate list of dependencies for Airflow without any provider installed.
"""
uninstall_all_packages(config_params)
console.print(
"[bright_blue]Installing airflow with [all-core] extras only with eager upgrade in "
"installable mode."
)
install_local_airflow_with_eager_upgrade(
config_params, config_params.eager_upgrade_additional_requirements
)
console.print("[success]Installed airflow with [all-core] extras only with eager upgrade.")
with config_params.current_constraints_file.open("w") as constraints_file:
constraints_file.write(NO_PROVIDERS_CONSTRAINTS_PREFIX)
freeze_packages_to_file(config_params, constraints_file)
download_latest_constraint_file(config_params)
diff_constraints(config_params)
ALLOWED_CONSTRAINTS_MODES = ["constraints", "constraints-source-providers", "constraints-no-providers"]
@click.command()
@click.option(
"--airflow-constraints-mode",
type=Choice(ALLOWED_CONSTRAINTS_MODES),
required=True,
envvar="AIRFLOW_CONSTRAINTS_MODE",
help="Mode of constraints to generate",
)
@click.option(
"--chicken-egg-providers",
envvar="CHICKEN_EGG_PROVIDERS",
help="Providers that should be installed from packages built from current sources.",
)
@click.option(
"--constraints-github-repository",
default="apache/airflow",
show_default=True,
envvar="CONSTRAINTS_GITHUB_REPOSITORY",
help="GitHub repository to get constraints from",
)
@click.option(
"--default-constraints-branch",
default="constraints-main",
show_default=True,
envvar="DEFAULT_CONSTRAINTS_BRANCH",
help="Branch to get constraints from",
)
@click.option(
"--eager-upgrade-additional-requirements",
envvar="EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS",
help="Additional requirements to add to eager upgrade",
)
@click.option(
"--github-actions",
is_flag=True,
default=False,
show_default=True,
envvar="GITHUB_ACTIONS",
help="Running in GitHub Actions",
)
@click.option(
"--python",
required=True,
envvar="PYTHON_MAJOR_MINOR_VERSION",
help="Python major.minor version",
)
def generate_constraints(
airflow_constraints_mode: str,
chicken_egg_providers: str,
constraints_github_repository: str,
default_constraints_branch: str,
eager_upgrade_additional_requirements: str,
github_actions: bool,
python: str,
) -> None:
config_params = ConfigParams(
airflow_constraints_mode=airflow_constraints_mode,
chicken_egg_providers=chicken_egg_providers,
constraints_github_repository=constraints_github_repository,
default_constraints_branch=default_constraints_branch,
eager_upgrade_additional_requirements=eager_upgrade_additional_requirements,
github_actions=github_actions,
python=python,
)
if airflow_constraints_mode == "constraints-source-providers":
generate_constraints_source_providers(config_params)
elif airflow_constraints_mode == "constraints":
generate_constraints_pypi_providers(config_params)
elif airflow_constraints_mode == "constraints-no-providers":
generate_constraints_no_providers(config_params)
else:
console.print(f"[red]Unknown constraints mode: {airflow_constraints_mode}")
sys.exit(1)
console.print("[green]Generated constraints:")
files = config_params.constraints_dir.rglob("*.txt")
for file in files:
console.print(file.as_posix())
console.print()
if __name__ == "__main__":
generate_constraints()