Skip to content

Commit

Permalink
Switch to hermetic python
Browse files Browse the repository at this point in the history
Allows retiring almost all of the MIN_PYs. (orjson fails to import on 3.12)
Removes the check python version wrapper 3dddf20
  • Loading branch information
cpsauer committed Jan 6, 2024
1 parent 6d58fa6 commit 0e5b1aa
Show file tree
Hide file tree
Showing 14 changed files with 255 additions and 118 deletions.
2 changes: 0 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-ast
exclude: ^check_python_version\.template\.py$ # Template for bazel creates syntax error
- id: debug-statements
exclude: ^check_python_version\.template\.py$ # Template for bazel creates syntax error
- id: mixed-line-ending
- id: check-case-conflict
- id: fix-byte-order-marker
Expand Down
7 changes: 5 additions & 2 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ filegroup(
name = "bzl_srcs_for_stardoc",
visibility = ["//visibility:public"],
srcs = glob(["**/*.bzl"]) + [
"@bazel_tools//tools:bzl_srcs"
"@bazel_tools//tools:bzl_srcs",
"@hedron_compile_commands_pip//:requirements.bzl",
"@python_3_11//:defs.bzl",
"@rules_python//:bzl",
],
)

Expand All @@ -25,7 +28,7 @@ filegroup(
# Implementation:
# If you are looking into the implementation, start with the overview in ImplementationReadme.md.

exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
exports_files(["refresh.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.

cc_binary(
name = "print_args",
Expand Down
20 changes: 20 additions & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
module(name = "hedron_compile_commands")

use_extension("//:workspace_setup.bzl", "hedron_compile_commands_extension")
use_extension("//:workspace_setup_transitive.bzl", "hedron_compile_commands_extension")
use_extension("//:workspace_setup_transitive_transitive.bzl", "hedron_compile_commands_extension")
use_extension("//:workspace_setup_transitive_transitive_transitive.bzl", "hedron_compile_commands_extension")

# While we're supporting the WORKSPACE, we need to load rules_python through its WORKSPACE mechanism because the (currently unstable) bzlmod APIs differ just enough that loads would fail if you tried to support both at the same time.
# But this is how you'd load rules_python from bzlmod:
# bazel_dep(name = "rules_python", version = "0.27.1")
# python = use_extension("@rules_python//python/extensions:python.bzl", "python")
# python.toolchain(
# python_version = "3.11",
# )
# use_repo(python, "python_versions")
# pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
# pip.parse(
# hub_name = "hedron_compile_commands_pip",
# # Available versions are listed in @rules_python//python:versions.bzl.
# python_version = "3.11",
# requirements_lock = "//:requirements.txt",
# )
# use_repo(pip, "hedron_compile_commands_pip")
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ http_archive(
)
load("@hedron_compile_commands//:workspace_setup.bzl", "hedron_compile_commands_setup")
hedron_compile_commands_setup()
load("@hedron_compile_commands//:workspace_setup_transitive.bzl", "hedron_compile_commands_setup_transitive")
hedron_compile_commands_setup_transitive()
load("@hedron_compile_commands//:workspace_setup_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive")
hedron_compile_commands_setup_transitive_transitive()
load("@hedron_compile_commands//:workspace_setup_transitive_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive_transitive")
hedron_compile_commands_setup_transitive_transitive_transitive()
```

#### Either way: Get Updates via Renovate
Expand Down
6 changes: 6 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ workspace(name = "hedron_compile_commands")

load("@hedron_compile_commands//:workspace_setup.bzl", "hedron_compile_commands_setup")
hedron_compile_commands_setup()
load("@hedron_compile_commands//:workspace_setup_transitive.bzl", "hedron_compile_commands_setup_transitive")
hedron_compile_commands_setup_transitive()
load("@hedron_compile_commands//:workspace_setup_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive")
hedron_compile_commands_setup_transitive_transitive()
load("@hedron_compile_commands//:workspace_setup_transitive_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive_transitive")
hedron_compile_commands_setup_transitive_transitive_transitive()
16 changes: 0 additions & 16 deletions check_python_version.template.py

This file was deleted.

100 changes: 37 additions & 63 deletions refresh.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,13 @@
"""


# This file requires python 3.6, which is enforced by check_python_version.template.py
# 3.6 backwards compatibility required by @zhanyong-wan in https://github.com/hedronvision/bazel-compile-commands-extractor/issues/111.
# 3.7 backwards compatibility required by @lummax in https://github.com/hedronvision/bazel-compile-commands-extractor/pull/27.
# ^ Try to contact before upgrading.
# When adding things could be cleaner if we had a higher minimum version, please add a comment with MIN_PY=3.<v>.
# Similarly, when upgrading, please search for that MIN_PY= tag.


import concurrent.futures
import enum
import functools # MIN_PY=3.9: Replace `functools.lru_cache(maxsize=None)` with `functools.cache`.
import functools
import itertools
import json
import locale
import orjson # orjson is much faster than the standard library's json module (1.9 seconds vs 6.6 seconds for a ~140 MB file). See https://github.com/hedronvision/bazel-compile-commands-extractor/pull/118
import os
import pathlib
import re
Expand All @@ -35,7 +28,7 @@
import tempfile
import time
import types
import typing # MIN_PY=3.9: Switch e.g. typing.List[str] -> list[str]
import typing


@enum.unique
Expand Down Expand Up @@ -96,14 +89,12 @@ def _print_header_finding_warning_once():
_print_header_finding_warning_once.has_logged = False


@functools.lru_cache(maxsize=None)
@functools.lru_cache
def _get_bazel_cached_action_keys():
"""Gets the set of actionKeys cached in bazel-out."""
action_cache_process = subprocess.run(
['bazel', 'dump', '--action_cache'],
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
capture_output=True,
encoding=locale.getpreferredencoding(),
check=True, # Should always succeed.
)
Expand Down Expand Up @@ -157,7 +148,7 @@ def _parse_headers_from_makefile_deps(d_file_content: str, source_path_for_sanit
return set(headers)


@functools.lru_cache(maxsize=None)
@functools.lru_cache
def _get_cached_modified_time(path: str):
"""Returns 0 if the file doesn't exist.
Expand Down Expand Up @@ -202,7 +193,7 @@ def _is_nvcc(path: str):
return os.path.basename(path).startswith('nvcc')


def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_key: str):
def _get_headers_gcc(compile_args: list[str], source_path: str, action_key: str):
"""Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.)
Relatively slow. Requires running the C preprocessor if we can't hit Bazel's cache.
Expand Down Expand Up @@ -257,9 +248,7 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke

header_search_process = _subprocess_run_spilling_over_to_param_file_if_needed( # Note: gcc/clang can be run from Windows, too.
header_cmd,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
capture_output=True,
encoding=locale.getpreferredencoding(),
check=False, # We explicitly ignore errors and carry on.
)
Expand Down Expand Up @@ -350,7 +339,7 @@ def windows_list2cmdline(seq):
return ''.join(result)


def _subprocess_run_spilling_over_to_param_file_if_needed(command: typing.List[str], **kwargs):
def _subprocess_run_spilling_over_to_param_file_if_needed(command: list[str], **kwargs):
"""Same as subprocess.run, but it handles the case where the command line length is exceeded on Windows and we need a param file."""

# On non-Windows, we have to run directly via a special case.
Expand Down Expand Up @@ -378,7 +367,7 @@ def _subprocess_run_spilling_over_to_param_file_if_needed(command: typing.List[s
raise


def _get_headers_msvc(compile_args: typing.List[str], source_path: str):
def _get_headers_msvc(compile_args: list[str], source_path: str):
"""Gets the headers used by a particular compile command that uses msvc argument formatting (including clang-cl.)
Relatively slow. Requires running the C preprocessor.
Expand Down Expand Up @@ -453,29 +442,19 @@ def _get_headers_msvc(compile_args: typing.List[str], source_path: str):
return headers, should_cache


def _is_relative_to(sub: pathlib.PurePath, parent: pathlib.PurePath):
"""Determine if one path is relative to another."""
# MIN_PY=3.9: Eliminate helper in favor of `PurePath.is_relative_to()`.
try:
sub.relative_to(parent)
except ValueError:
return False
return True


def _file_is_in_main_workspace_and_not_external(file_str: str):
file_path = pathlib.PurePath(file_str)
if file_path.is_absolute():
workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
if not _is_relative_to(file_path, workspace_absolute):
if not file_path.is_relative_to(workspace_absolute):
return False
file_path = file_path.relative_to(workspace_absolute)
# You can now assume that the path is relative to the workspace.
# [Already assuming that relative paths are relative to the main workspace.]

# some/file.h, but not external/some/file.h
# also allows for things like bazel-out/generated/file.h
if _is_relative_to(file_path, pathlib.PurePath("external")):
if file_path.is_relative_to(pathlib.PurePath("external")):
return False

# ... but, ignore files in e.g. bazel-out/<configuration>/bin/external/
Expand Down Expand Up @@ -532,7 +511,7 @@ def _get_headers(compile_action, source_path: str):
cache_last_modified = os.path.getmtime(cache_file_path) # Do before opening just as a basic hedge against concurrent write, even though we won't handle the concurrent delete case perfectly.
try:
with open(cache_file_path) as cache_file:
action_key, cached_headers = json.load(cache_file)
action_key, cached_headers = orjson.loads(cache_file.read())
except json.JSONDecodeError:
# Corrupted cache, which can happen if, for example, the user kills the program, since writes aren't atomic.
# But if it is the result of a bug, we want to print it before it's overwritten, so it can be reported
Expand Down Expand Up @@ -561,8 +540,11 @@ def _get_headers(compile_action, source_path: str):
# Cache for future use
if output_file and should_cache:
os.makedirs(os.path.dirname(cache_file_path), exist_ok=True)
with open(cache_file_path, 'w') as cache_file:
json.dump((compile_action.actionKey, list(headers)), cache_file)
with open(cache_file_path, 'wb') as cache_file:
cache_file.write(orjson.dumps(
(compile_action.actionKey, list(headers)),
option=orjson.OPT_INDENT_2,
))
elif not headers and cached_headers: # If we failed to get headers, we'll fall back on a stale cache.
headers = set(cached_headers)

Expand Down Expand Up @@ -678,7 +660,7 @@ def _get_files(compile_action):
_get_files.extensions_to_language_args = {ext : flag for exts, flag in _get_files.extensions_to_language_args.items() for ext in exts} # Flatten map for easier use


@functools.lru_cache(maxsize=None)
@functools.lru_cache
def _get_apple_SDKROOT(SDK_name: str):
"""Get path to xcode-select'd root for the given OS."""
SDKROOT_maybe_versioned = subprocess.check_output(
Expand All @@ -696,7 +678,7 @@ def _get_apple_SDKROOT(SDK_name: str):
# Traditionally stored in SDKROOT environment variable, but not provided by Bazel. See https://github.com/bazelbuild/bazel/issues/12852


def _get_apple_platform(compile_args: typing.List[str]):
def _get_apple_platform(compile_args: list[str]):
"""Figure out which Apple platform a command is for.
Is the name used by Xcode in the SDK files, not the marketing name.
Expand All @@ -710,15 +692,15 @@ def _get_apple_platform(compile_args: typing.List[str]):
return None


@functools.lru_cache(maxsize=None)
@functools.lru_cache
def _get_apple_DEVELOPER_DIR():
"""Get path to xcode-select'd developer directory."""
return subprocess.check_output(('xcode-select', '--print-path'), encoding=locale.getpreferredencoding()).rstrip()
# Unless xcode-select has been invoked (like for a beta) we'd expect, e.g., '/Applications/Xcode.app/Contents/Developer' or '/Library/Developer/CommandLineTools'.
# Traditionally stored in DEVELOPER_DIR environment variable, but not provided by Bazel. See https://github.com/bazelbuild/bazel/issues/12852


def _apple_platform_patch(compile_args: typing.List[str]):
def _apple_platform_patch(compile_args: list[str]):
"""De-Bazel the command into something clangd can parse.
This function has fixes specific to Apple platforms, but you should call it on all platforms. It'll determine whether the fixes should be applied or not.
Expand Down Expand Up @@ -768,9 +750,7 @@ def _emscripten_platform_patch(compile_action):
# On Windows, it fails to spawn the subprocess when the path uses forward slashes as a separator.
# Here, we convert emcc driver path to use the native path separator.
[str(emcc_driver)] + compile_action.arguments[1:],
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
capture_output=True,
env=environment,
encoding=locale.getpreferredencoding(),
check=False, # We explicitly ignore errors and carry on.
Expand All @@ -784,7 +764,7 @@ def _emscripten_platform_patch(compile_action):
end_args_idx = lines.index(END_ARGS_MARKER, begin_args_idx + 1)
args = lines[begin_args_idx + 1:end_args_idx]
clang_driver = pathlib.PurePath(args[0])
if _is_relative_to(clang_driver, workspace_absolute):
if clang_driver.is_relative_to(workspace_absolute):
args[0] = clang_driver.relative_to(workspace_absolute).as_posix()
return args

Expand All @@ -793,7 +773,7 @@ def _emscripten_platform_patch(compile_action):
END_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_END_ARGS==='


def _all_platform_patch(compile_args: typing.List[str]):
def _all_platform_patch(compile_args: list[str]):
"""Apply de-Bazeling fixes to the compile command that are shared across target platforms."""
# clangd writes module cache files to the wrong place
# Without this fix, you get tons of module caches dumped into the VSCode root folder.
Expand Down Expand Up @@ -844,7 +824,7 @@ def _all_platform_patch(compile_args: typing.List[str]):
return compile_args


def _nvcc_patch(compile_args: typing.List[str]) -> typing.List[str]:
def _nvcc_patch(compile_args: list[str]) -> list[str]:
"""Apply fixes to args to nvcc.
Basically remove everything that's an nvcc arg that is not also a clang arg, converting what we can.
Expand Down Expand Up @@ -1087,9 +1067,7 @@ def _convert_compile_commands(aquery_output):

# Process each action from Bazelisms -> file paths and their clang commands
# Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved
with concurrent.futures.ThreadPoolExecutor(
max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
) as threadpool:
with concurrent.futures.ThreadPoolExecutor() as threadpool:
outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions)

# Yield as compile_commands.json entries
Expand Down Expand Up @@ -1175,9 +1153,7 @@ def _get_commands(target: str, flags: str):

aquery_process = subprocess.run(
aquery_args,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
capture_output=True,
encoding=locale.getpreferredencoding(),
check=False, # We explicitly ignore errors from `bazel aquery` and carry on.
)
Expand Down Expand Up @@ -1235,20 +1211,20 @@ def _ensure_external_workspaces_link_exists():
dest = (pathlib.Path('bazel-out').resolve()/'../../../external').resolve()

# Handle problem cases where //external exists
if os.path.lexists(source):
if os.path.lexists(source): # MIN_PY=3.12: use source.exists(follow_symlinks=False), here and elsewhere.
# Detect symlinks or Windows junctions
# This seemed to be the cleanest way to detect both.
# Note that os.path.islink doesn't detect junctions.
try:
current_dest = os.readlink(source) # MIN_PY=3.9 source.readlink()
current_dest = source.readlink()
except OSError:
log_error(f">>> //external already exists, but it isn't a {'junction' if is_windows else 'symlink'}. //external is reserved by Bazel and needed for this tool. Please rename or delete your existing //external and rerun. More details in the README if you want them.") # Don't auto delete in case the user has something important there.
sys.exit(1)

# Normalize the path for matching
# First, workaround a gross case where Windows readlink returns extended path, starting with \\?\, causing the match to fail
if is_windows and current_dest.startswith('\\\\?\\'):
current_dest = current_dest[4:] # MIN_PY=3.9 stripprefix
if is_windows:
current_dest = current_dest.removeprefix('\\\\?\\')
current_dest = pathlib.Path(current_dest)

if dest != current_dest:
Expand Down Expand Up @@ -1336,7 +1312,7 @@ def _ensure_cwd_is_workspace_root():
os.chdir(workspace_root)


def main():
if __name__ == '__main__':
_ensure_cwd_is_workspace_root()
_ensure_gitignore_entries_exist()
_ensure_external_workspaces_link_exists()
Expand All @@ -1357,10 +1333,8 @@ def main():
sys.exit(1)

# Chain output into compile_commands.json
with open('compile_commands.json', 'w') as output_file:
json.dump(
with open('compile_commands.json', 'wb') as output_file:
output_file.write(orjson.dumps(
compile_command_entries,
output_file,
indent=2, # Yay, human readability!
check_circular=False # For speed.
)
option=orjson.OPT_INDENT_2,
))
Loading

1 comment on commit 0e5b1aa

@Attempt3035
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @cpsauer!
Looks like some big changes in this commit! Noticed this broke bzlmod support, haven't had a chance to look too far into it but just wanted to see if you were aware! Let me know if there's anything you'd like me to work on!

Please sign in to comment.