Skip to content

Commit

Permalink
mypy_test.py: Allow non-types dependencies (#9408)
Browse files Browse the repository at this point in the history
The approach is pretty similar to the approach I took in #9382 for regr_test.py: dynamically create virtual environments for testing stubs packages in isolation. However, since mypy_test.py tests all of typeshed's stubs in succession (and since lots of typeshed's stubs packages depend on other typeshed stubs packages), the performance issues with creating a virtual environment for testing each stubs package are even more severe than with regr_test.py.

I mitigate the performance issues associated with dynamically creating virtual environments in two ways:

- Dynamically creating venvs is mostly slow due to I/O bottlenecks. Creating the virtual environments can be sped up dramatically by creating them concurrently in a threadpool. The same goes for pip installing the requirements into the venvs -- though unfortunately, we have to use pip with --no-cache-dir, as the pip cache gets easily corrupted if you try to do concurrent reads and writes to the pip cache.

- If types-pycocotools requires numpy>=1 and types-D3DShot also requires numpy>=1 (for example), there's no need to create 2 virtual environments. The same requirements have been specified, so they can share a virtual environment between them. This means we don't have to create nearly so many virtual environments.
  • Loading branch information
AlexWaygood authored Jan 7, 2023
1 parent 2c9816e commit 4d6fda9
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 78 deletions.
269 changes: 193 additions & 76 deletions tests/mypy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
from __future__ import annotations

import argparse
import concurrent.futures
import os
import re
import subprocess
import sys
import tempfile
from contextlib import redirect_stderr, redirect_stdout
import time
from collections import defaultdict
from dataclasses import dataclass
from io import StringIO
from itertools import product
from pathlib import Path
from threading import Lock
from typing import TYPE_CHECKING, Any, NamedTuple

if TYPE_CHECKING:
Expand All @@ -23,17 +26,22 @@
import tomli
from utils import (
VERSIONS_RE as VERSION_LINE_RE,
PackageDependencies,
VenvInfo,
colored,
get_gitignore_spec,
get_mypy_req,
get_recursive_requirements,
make_venv,
print_error,
print_success_msg,
spec_matches_path,
strip_comments,
)

# Fail early if mypy isn't installed
try:
from mypy.api import run as mypy_run
import mypy # noqa: F401
except ImportError:
print_error("Cannot import mypy. Did you install it?")
sys.exit(1)
Expand Down Expand Up @@ -108,7 +116,7 @@ class TestConfig:

def log(args: TestConfig, *varargs: object) -> None:
if args.verbose >= 2:
print(*varargs)
print(colored(" ".join(map(str, varargs)), "blue"))


def match(path: Path, args: TestConfig) -> bool:
Expand Down Expand Up @@ -204,7 +212,19 @@ def add_configuration(configurations: list[MypyDistConf], distribution: str) ->
configurations.append(MypyDistConf(module_name, values.copy()))


def run_mypy(args: TestConfig, configurations: list[MypyDistConf], files: list[Path], *, testing_stdlib: bool) -> ReturnCode:
def run_mypy(
args: TestConfig,
configurations: list[MypyDistConf],
files: list[Path],
*,
testing_stdlib: bool,
non_types_dependencies: bool,
venv_info: VenvInfo,
mypypath: str | None = None,
) -> ReturnCode:
env_vars = dict(os.environ)
if mypypath is not None:
env_vars["MYPYPATH"] = mypypath
with tempfile.NamedTemporaryFile("w+") as temp:
temp.write("[mypy]\n")
for dist_conf in configurations:
Expand All @@ -213,57 +233,49 @@ def run_mypy(args: TestConfig, configurations: list[MypyDistConf], files: list[P
temp.write(f"{k} = {v}\n")
temp.flush()

flags = get_mypy_flags(args, temp.name, testing_stdlib=testing_stdlib)
flags = [
"--python-version",
args.version,
"--show-traceback",
"--warn-incomplete-stub",
"--no-error-summary",
"--platform",
args.platform,
"--custom-typeshed-dir",
str(Path(__file__).parent.parent),
"--strict",
# Stub completion is checked by pyright (--allow-*-defs)
"--allow-untyped-defs",
"--allow-incomplete-defs",
"--allow-subclassing-any", # TODO: Do we still need this now that non-types dependencies are allowed? (#5768)
"--enable-error-code",
"ignore-without-code",
"--config-file",
temp.name,
]
if not testing_stdlib:
flags.append("--explicit-package-bases")
if not non_types_dependencies:
flags.append("--no-site-packages")

mypy_args = [*flags, *map(str, files)]
mypy_command = [venv_info.python_exe, "-m", "mypy"] + mypy_args
if args.verbose:
print("running mypy", " ".join(mypy_args))
stdout_redirect, stderr_redirect = StringIO(), StringIO()
with redirect_stdout(stdout_redirect), redirect_stderr(stderr_redirect):
returned_stdout, returned_stderr, exit_code = mypy_run(mypy_args)

if exit_code:
print(colored(f"running {' '.join(mypy_command)}", "blue"))
result = subprocess.run(mypy_command, capture_output=True, text=True, env=env_vars)
if result.returncode:
print_error("failure\n")
captured_stdout = stdout_redirect.getvalue()
captured_stderr = stderr_redirect.getvalue()
if returned_stderr:
print_error(returned_stderr)
if captured_stderr:
print_error(captured_stderr)
if returned_stdout:
print_error(returned_stdout)
if captured_stdout:
print_error(captured_stdout, end="")
if result.stdout:
print_error(result.stdout)
if result.stderr:
print_error(result.stderr)
if non_types_dependencies and args.verbose:
print("Ran with the following environment:")
subprocess.run([venv_info.pip_exe, "freeze", "--all"])
print()
else:
print_success_msg()
return exit_code


def get_mypy_flags(args: TestConfig, temp_name: str, *, testing_stdlib: bool) -> list[str]:
flags = [
"--python-version",
args.version,
"--show-traceback",
"--warn-incomplete-stub",
"--show-error-codes",
"--no-error-summary",
"--platform",
args.platform,
"--no-site-packages",
"--custom-typeshed-dir",
str(Path(__file__).parent.parent),
"--strict",
# Stub completion is checked by pyright (--allow-*-defs)
"--allow-untyped-defs",
"--allow-incomplete-defs",
"--allow-subclassing-any", # Needed until we can use non-types dependencies #5768
"--enable-error-code",
"ignore-without-code",
"--config-file",
temp_name,
]
if not testing_stdlib:
flags.append("--explicit-package-bases")
return flags
return result.returncode


def add_third_party_files(
Expand Down Expand Up @@ -298,7 +310,9 @@ class TestResults(NamedTuple):
files_checked: int


def test_third_party_distribution(distribution: str, args: TestConfig) -> TestResults:
def test_third_party_distribution(
distribution: str, args: TestConfig, venv_info: VenvInfo, *, non_types_dependencies: bool
) -> TestResults:
"""Test the stubs of a third-party distribution.
Return a tuple, where the first element indicates mypy's return code
Expand All @@ -313,20 +327,24 @@ def test_third_party_distribution(distribution: str, args: TestConfig) -> TestRe
if not files and args.filter:
return TestResults(0, 0)

print(f"testing {distribution} ({len(files)} files)... ", end="")
print(f"testing {distribution} ({len(files)} files)... ", end="", flush=True)

if not files:
print_error("no files found")
sys.exit(1)

prev_mypypath = os.getenv("MYPYPATH")
os.environ["MYPYPATH"] = os.pathsep.join(str(Path("stubs", dist)) for dist in seen_dists)
code = run_mypy(args, configurations, files, testing_stdlib=False)
if prev_mypypath is None:
del os.environ["MYPYPATH"]
else:
os.environ["MYPYPATH"] = prev_mypypath

mypypath = os.pathsep.join(str(Path("stubs", dist)) for dist in seen_dists)
if args.verbose:
print(colored(f"\n{mypypath=}", "blue"))
code = run_mypy(
args,
configurations,
files,
venv_info=venv_info,
mypypath=mypypath,
testing_stdlib=False,
non_types_dependencies=non_types_dependencies,
)
return TestResults(code, len(files))


Expand All @@ -343,19 +361,105 @@ def test_stdlib(code: int, args: TestConfig) -> TestResults:
add_files(files, (stdlib / name), args)

if files:
print(f"Testing stdlib ({len(files)} files)...")
print("Running mypy " + " ".join(get_mypy_flags(args, "/tmp/...", testing_stdlib=True)))
this_code = run_mypy(args, [], files, testing_stdlib=True)
print(f"Testing stdlib ({len(files)} files)...", end="", flush=True)
# We don't actually need pip for the stdlib testing
venv_info = VenvInfo(pip_exe="", python_exe=sys.executable)
this_code = run_mypy(args, [], files, venv_info=venv_info, testing_stdlib=True, non_types_dependencies=False)
code = max(code, this_code)

return TestResults(code, len(files))


def test_third_party_stubs(code: int, args: TestConfig) -> TestResults:
_PRINT_LOCK = Lock()
_DISTRIBUTION_TO_VENV_MAPPING: dict[str, VenvInfo] = {}


def setup_venv_for_external_requirements_set(requirements_set: frozenset[str], tempdir: Path) -> tuple[frozenset[str], VenvInfo]:
venv_dir = tempdir / f".venv-{hash(requirements_set)}"
return requirements_set, make_venv(venv_dir)


def install_requirements_for_venv(venv_info: VenvInfo, args: TestConfig, external_requirements: frozenset[str]) -> None:
# Use --no-cache-dir to avoid issues with concurrent read/writes to the cache
pip_command = [venv_info.pip_exe, "install", get_mypy_req(), *sorted(external_requirements), "--no-cache-dir"]
if args.verbose:
with _PRINT_LOCK:
print(colored(f"Running {pip_command}", "blue"))
try:
subprocess.run(pip_command, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
print(e.stderr)
raise


def setup_virtual_environments(distributions: dict[str, PackageDependencies], args: TestConfig, tempdir: Path) -> None:
# We don't actually need pip if there aren't any external dependencies
no_external_dependencies_venv = VenvInfo(pip_exe="", python_exe=sys.executable)
external_requirements_to_distributions: defaultdict[frozenset[str], list[str]] = defaultdict(list)
num_pkgs_with_external_reqs = 0

for distribution_name, requirements in distributions.items():
if requirements.external_pkgs:
num_pkgs_with_external_reqs += 1
external_requirements = frozenset(requirements.external_pkgs)
external_requirements_to_distributions[external_requirements].append(distribution_name)
else:
_DISTRIBUTION_TO_VENV_MAPPING[distribution_name] = no_external_dependencies_venv

if num_pkgs_with_external_reqs == 0:
if args.verbose:
print(colored("No additional venvs are required to be set up", "blue"))
return

requirements_sets_to_venvs: dict[frozenset[str], VenvInfo] = {}

if args.verbose:
num_venvs = len(external_requirements_to_distributions)
msg = (
f"Setting up {num_venvs} venv{'s' if num_venvs != 1 else ''} "
f"for {num_pkgs_with_external_reqs} "
f"distribution{'s' if num_pkgs_with_external_reqs != 1 else ''}... "
)
print(colored(msg, "blue"), end="", flush=True)
venv_start_time = time.perf_counter()

with concurrent.futures.ThreadPoolExecutor() as executor:
venv_info_futures = [
executor.submit(setup_venv_for_external_requirements_set, requirements_set, tempdir)
for requirements_set in external_requirements_to_distributions
]
for venv_info_future in concurrent.futures.as_completed(venv_info_futures):
requirements_set, venv_info = venv_info_future.result()
requirements_sets_to_venvs[requirements_set] = venv_info

if args.verbose:
venv_elapsed_time = time.perf_counter() - venv_start_time
print(colored(f"took {venv_elapsed_time:.2f} seconds", "blue"))
pip_start_time = time.perf_counter()

# Limit workers to 10 at a time, since this makes network requests
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
pip_install_futures = [
executor.submit(install_requirements_for_venv, venv_info, args, requirements_set)
for requirements_set, venv_info in requirements_sets_to_venvs.items()
]
concurrent.futures.wait(pip_install_futures)

if args.verbose:
pip_elapsed_time = time.perf_counter() - pip_start_time
msg = f"Combined time for installing requirements across all venvs: {pip_elapsed_time:.2f} seconds"
print(colored(msg, "blue"))

for requirements_set, distribution_list in external_requirements_to_distributions.items():
venv_to_use = requirements_sets_to_venvs[requirements_set]
_DISTRIBUTION_TO_VENV_MAPPING.update(dict.fromkeys(distribution_list, venv_to_use))


def test_third_party_stubs(code: int, args: TestConfig, tempdir: Path) -> TestResults:
print("Testing third-party packages...")
print("Running mypy " + " ".join(get_mypy_flags(args, "/tmp/...", testing_stdlib=False)))
files_checked = 0
gitignore_spec = get_gitignore_spec()
distributions_to_check: dict[str, PackageDependencies] = {}

for distribution in sorted(os.listdir("stubs")):
distribution_path = Path("stubs", distribution)
Expand All @@ -368,14 +472,25 @@ def test_third_party_stubs(code: int, args: TestConfig) -> TestResults:
or Path("stubs") in args.filter
or any(distribution_path in path.parents for path in args.filter)
):
this_code, checked = test_third_party_distribution(distribution, args)
code = max(code, this_code)
files_checked += checked
distributions_to_check[distribution] = get_recursive_requirements(distribution)

if not _DISTRIBUTION_TO_VENV_MAPPING:
setup_virtual_environments(distributions_to_check, args, tempdir)

assert _DISTRIBUTION_TO_VENV_MAPPING.keys() == distributions_to_check.keys()

for distribution, venv_info in _DISTRIBUTION_TO_VENV_MAPPING.items():
non_types_dependencies = venv_info.python_exe != sys.executable
this_code, checked = test_third_party_distribution(
distribution, args, venv_info=venv_info, non_types_dependencies=non_types_dependencies
)
code = max(code, this_code)
files_checked += checked

return TestResults(code, files_checked)


def test_typeshed(code: int, args: TestConfig) -> TestResults:
def test_typeshed(code: int, args: TestConfig, tempdir: Path) -> TestResults:
print(f"*** Testing Python {args.version} on {args.platform}")
files_checked_this_version = 0
stdlib_dir, stubs_dir = Path("stdlib"), Path("stubs")
Expand All @@ -385,7 +500,7 @@ def test_typeshed(code: int, args: TestConfig) -> TestResults:
print()

if stubs_dir in args.filter or any(stubs_dir in path.parents for path in args.filter):
code, third_party_files_checked = test_third_party_stubs(code, args)
code, third_party_files_checked = test_third_party_stubs(code, args, tempdir)
files_checked_this_version += third_party_files_checked
print()

Expand All @@ -400,10 +515,12 @@ def main() -> None:
exclude = args.exclude or []
code = 0
total_files_checked = 0
for version, platform in product(versions, platforms):
config = TestConfig(args.verbose, filter, exclude, version, platform)
code, files_checked_this_version = test_typeshed(code, args=config)
total_files_checked += files_checked_this_version
with tempfile.TemporaryDirectory() as td:
td_path = Path(td)
for version, platform in product(versions, platforms):
config = TestConfig(args.verbose, filter, exclude, version, platform)
code, files_checked_this_version = test_typeshed(code, args=config, tempdir=td_path)
total_files_checked += files_checked_this_version
if code:
print_error(f"--- exit status {code}, {total_files_checked} files checked ---")
sys.exit(code)
Expand All @@ -417,5 +534,5 @@ def main() -> None:
try:
main()
except KeyboardInterrupt:
print_error("\n\n!!!\nTest aborted due to KeyboardInterrupt\n!!!")
print_error("\n\nTest aborted due to KeyboardInterrupt!")
sys.exit(1)
Loading

0 comments on commit 4d6fda9

Please sign in to comment.