From 32fff52df8b3f22933b1e6a9236c3cc75dde0e60 Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Tue, 18 Jun 2024 05:33:02 -0700 Subject: [PATCH] Rewrite symlinks for vendored repositories To make sure symlinks work correctly, Bazel uses the following strategy to rewrite symlinks in the vendored source: - Create a symlink `/bazel-external` that points to `$(bazel info output_base)/external`. It is refreshed by every Bazel command automatically. - For the vendored source, rewrite all symlinks that originally point to a path under `$(bazel info output_base)/external` to a relative path under `/bazel-external`. Fixes https://github.com/bazelbuild/bazel/issues/22303 Closes #22723. PiperOrigin-RevId: 644349481 Change-Id: I853ac0ea5405f0cf58431e988d727e690cbbb013 --- site/en/external/vendor.md | 59 ++++++++- .../com/google/devtools/build/lib/bazel/BUILD | 2 + .../lib/bazel/BazelRepositoryModule.java | 31 +++++ .../devtools/build/lib/bazel/bzlmod/BUILD | 3 + .../build/lib/bazel/bzlmod/VendorManager.java | 117 +++++++++++++----- .../build/lib/profiler/ProfilerTask.java | 1 + src/test/py/bazel/bzlmod/bazel_vendor_test.py | 92 ++++++++++++-- src/test/py/bazel/test_base.py | 13 +- 8 files changed, 277 insertions(+), 41 deletions(-) diff --git a/site/en/external/vendor.md b/site/en/external/vendor.md index 17caccca5c20e9..3b0a0eb348885a 100644 --- a/site/en/external/vendor.md +++ b/site/en/external/vendor.md @@ -141,8 +141,8 @@ always excluded from vendoring. Bazel fetches external dependencies of a project under `$(bazel info output_base)/external`. Vendoring external dependencies means moving out -relevant files and directories to a given vendor directory and use the vendored -source for later builds. +relevant files and directories to the given vendor directory and use the +vendored source for later builds. The content being vendored includes: @@ -160,3 +160,58 @@ is pinned in the VENDOR.bazel file. If a user does change the vendored source without pinning the repo, the changed vendored source will be used, but it will be overwritten if its existing marker file is outdated and the repo is vendored again. + +### Vendor registry files {:#vendor-registry-files} + +Bazel has to perform the Bazel module resolution in order to fetch external +dependencies, which may require accessing registry files through internet. To +achieve offline build, Bazel vendors all registry files fetched from +network under the `/_registries` directory. + +### Vendor symlinks {:#vendor-symlinks} + +External repositories may contain symlinks pointing to other files or +directories. To make sure symlinks work correctly, Bazel uses the following +strategy to rewrite symlinks in the vendored source: + +- Create a symlink `/bazel-external` that points to `$(bazel info + output_base)/external`. It is refreshed by every Bazel command + automatically. +- For the vendored source, rewrite all symlinks that originally point to a + path under `$(bazel info output_base)/external` to a relative path under + `/bazel-external`. + +For example, if the original symlink is + +```none +/repo_foo~/link => $(bazel info output_base)/external/repo_bar~/file +``` + +It will be rewritten to + +```none +/repo_foo~/link => ../../bazel-external/repo_bar~/file +``` + +where + +```none +/bazel-external => $(bazel info output_base)/external # This might be new if output base is changed +``` + +Since `/bazel-external` is generated by Bazel automatically, it's +recommended to add it to `.gitignore` or equivalent to avoid checking it in. + +With this strategy, symlinks in the vendored source should work correctly even +after the vendored source is moved to another location or the bazel output base +is changed. + +Note: symlinks that point to an absolute path outside of $(bazel info +output_base)/external are not rewritten. Therefore, it could still break +cross-machine compatibility. + +Note: On Windows, vendoring symlinks only works with +[`--windows_enable_symlinks`][windows_enable_symlinks] +flag enabled. + +[windows_enable_symlinks]: /reference/command-line-reference#flag--windows_enable_symlinks diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/BUILD index 6d8abb60844550..815dee82de214c 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/BUILD @@ -34,6 +34,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:resolution", "//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:resolution_impl", "//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:tidy_impl", + "//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:vendor", "//src/main/java/com/google/devtools/build/lib/bazel/commands", "//src/main/java/com/google/devtools/build/lib/bazel/repository", "//src/main/java/com/google/devtools/build/lib/bazel/repository:repository_options", @@ -57,6 +58,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/starlarkbuildapi/repository", "//src/main/java/com/google/devtools/build/lib/util:abrupt_exit_exception", "//src/main/java/com/google/devtools/build/lib/util:detailed_exit_code", + "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/common/options", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java b/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java index ee8000f43b2d5b..b04d25024fd25c 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java @@ -55,6 +55,7 @@ import com.google.devtools.build.lib.bazel.bzlmod.SingleExtensionFunction; import com.google.devtools.build.lib.bazel.bzlmod.SingleExtensionUsagesFunction; import com.google.devtools.build.lib.bazel.bzlmod.VendorFileFunction; +import com.google.devtools.build.lib.bazel.bzlmod.VendorManager; import com.google.devtools.build.lib.bazel.bzlmod.YankedVersionsFunction; import com.google.devtools.build.lib.bazel.bzlmod.YankedVersionsUtil; import com.google.devtools.build.lib.bazel.commands.FetchCommand; @@ -82,6 +83,7 @@ import com.google.devtools.build.lib.bazel.rules.android.AndroidSdkRepositoryFunction; import com.google.devtools.build.lib.bazel.rules.android.AndroidSdkRepositoryRule; import com.google.devtools.build.lib.clock.Clock; +import com.google.devtools.build.lib.cmdline.LabelConstants; import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.pkgcache.PackageOptions; @@ -114,7 +116,9 @@ import com.google.devtools.build.lib.starlarkbuildapi.repository.RepositoryBootstrap; import com.google.devtools.build.lib.util.AbruptExitException; import com.google.devtools.build.lib.util.DetailedExitCode; +import com.google.devtools.build.lib.util.OS; import com.google.devtools.build.lib.vfs.FileSystem; +import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.Root; @@ -507,6 +511,33 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException { vendorDirectory = Optional.ofNullable(repoOptions.vendorDirectory) .map(vendorDirectory -> env.getWorkspace().getRelative(vendorDirectory)); + + if (vendorDirectory.isPresent()) { + try { + Path externalRoot = + env.getOutputBase().getRelative(LabelConstants.EXTERNAL_PATH_PREFIX); + FileSystemUtils.ensureSymbolicLink( + vendorDirectory.get().getChild(VendorManager.EXTERNAL_ROOT_SYMLINK_NAME), + externalRoot); + if (OS.getCurrent() == OS.WINDOWS) { + // On Windows, symlinks are resolved differently. + // Given /repo_foo/link, + // where /repo_foo points to /repo_foo in vendor mode + // and repo_foo/link points to a relative path ../bazel-external/repo_bar/data. + // Windows won't resolve `repo_foo` before resolving `link`, which causes + // /repo_foo/link to be resolved to /bazel-external/repo_bar/data + // To work around this, we create a symlink /bazel-external -> . + FileSystemUtils.ensureSymbolicLink( + externalRoot.getChild(VendorManager.EXTERNAL_ROOT_SYMLINK_NAME), externalRoot); + } + } catch (IOException e) { + env.getReporter() + .handle( + Event.error( + "Failed to create symlink to external repo root under vendor directory: " + + e.getMessage())); + } + } } if (repoOptions.registries != null && !repoOptions.registries.isEmpty()) { diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD index d9126a21439682..1c4ed82a6c99e0 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD @@ -52,7 +52,10 @@ java_library( deps = [ "//src/main/java/com/google/devtools/build/lib/bazel/repository/downloader", "//src/main/java/com/google/devtools/build/lib/cmdline", + "//src/main/java/com/google/devtools/build/lib/profiler", + "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/vfs", + "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//third_party:guava", ], ) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorManager.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorManager.java index 23b0ab71e923ac..88bdc61b33fdc8 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorManager.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorManager.java @@ -20,12 +20,19 @@ import com.google.common.hash.Hasher; import com.google.devtools.build.lib.bazel.repository.downloader.Checksum; import com.google.devtools.build.lib.cmdline.RepositoryName; +import com.google.devtools.build.lib.profiler.Profiler; +import com.google.devtools.build.lib.profiler.ProfilerTask; +import com.google.devtools.build.lib.profiler.SilentCloseable; +import com.google.devtools.build.lib.util.OS; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.PathFragment; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLDecoder; +import java.nio.file.Files; +import java.util.Collection; import java.util.Locale; import java.util.Objects; @@ -34,6 +41,8 @@ public class VendorManager { private static final String REGISTRIES_DIR = "_registries"; + public static final String EXTERNAL_ROOT_SYMLINK_NAME = "bazel-external"; + private final Path vendorDirectory; public VendorManager(Path vendorDirectory) { @@ -56,38 +65,88 @@ public void vendorRepos(Path externalRepoRoot, ImmutableList rep } for (RepositoryName repo : reposToVendor) { - Path repoUnderExternal = externalRepoRoot.getChild(repo.getName()); - Path repoUnderVendor = vendorDirectory.getChild(repo.getName()); - // This could happen when running the vendor command twice without changing anything. - if (repoUnderExternal.isSymbolicLink() - && repoUnderExternal.resolveSymbolicLinks().equals(repoUnderVendor)) { - continue; - } + try (SilentCloseable c = + Profiler.instance().profile(ProfilerTask.REPOSITORY_VENDOR, repo.toString())) { + Path repoUnderExternal = externalRepoRoot.getChild(repo.getName()); + Path repoUnderVendor = vendorDirectory.getChild(repo.getName()); + // This could happen when running the vendor command twice without changing anything. + if (repoUnderExternal.isSymbolicLink() + && repoUnderExternal.resolveSymbolicLinks().equals(repoUnderVendor)) { + continue; + } + + // At this point, the repo should exist under external dir, but check if the vendor src is + // already up-to-date. + Path markerUnderExternal = externalRepoRoot.getChild(repo.getMarkerFileName()); + Path markerUnderVendor = vendorDirectory.getChild(repo.getMarkerFileName()); + if (isRepoUpToDate(markerUnderVendor, markerUnderExternal)) { + continue; + } - // At this point, the repo should exist under external dir, but check if the vendor src is - // already up-to-date. - Path markerUnderExternal = externalRepoRoot.getChild(repo.getMarkerFileName()); - Path markerUnderVendor = vendorDirectory.getChild(repo.getMarkerFileName()); - if (isRepoUpToDate(markerUnderVendor, markerUnderExternal)) { - continue; + // Actually vendor the repo: + // 1. Clean up existing marker file and vendor dir. + markerUnderVendor.delete(); + repoUnderVendor.deleteTree(); + repoUnderVendor.createDirectory(); + // 2. Move the marker file to a temporary one under vendor dir. + Path tMarker = vendorDirectory.getChild(repo.getMarkerFileName() + ".tmp"); + FileSystemUtils.moveFile(markerUnderExternal, tMarker); + // 3. Move the external repo to vendor dir. It's fine if this step fails or is interrupted, + // because the marker file under external is gone anyway. + FileSystemUtils.moveTreesBelow(repoUnderExternal, repoUnderVendor); + // 4. Re-plant symlinks pointing a path under the external root to a relative path + // to make sure the vendor src keep working after being moved or output base changed + replantSymlinks(repoUnderVendor, externalRepoRoot); + // 5. Rename the temporary marker file after the move is done. + tMarker.renameTo(markerUnderVendor); + // 6. Leave a symlink in external dir to keep things working. + repoUnderExternal.deleteTree(); + FileSystemUtils.ensureSymbolicLink(repoUnderExternal, repoUnderVendor); } + } + } - // Actually vendor the repo: - // 1. Clean up existing marker file and vendor dir. - markerUnderVendor.delete(); - repoUnderVendor.deleteTree(); - repoUnderVendor.createDirectory(); - // 2. Move the marker file to a temporary one under vendor dir. - Path tMarker = vendorDirectory.getChild(repo.getMarkerFileName() + ".tmp"); - FileSystemUtils.moveFile(markerUnderExternal, tMarker); - // 3. Move the external repo to vendor dir. It's fine if this step fails or is interrupted, - // because the marker file under external is gone anyway. - FileSystemUtils.moveTreesBelow(repoUnderExternal, repoUnderVendor); - // 4. Rename to temporary marker file after the move is done. - tMarker.renameTo(markerUnderVendor); - // 5. Leave a symlink in external dir. - repoUnderExternal.deleteTree(); - FileSystemUtils.ensureSymbolicLink(repoUnderExternal, repoUnderVendor); + /** + * Replants the symlinks under the specified repository directory. + * + *

Re-write symlinks that originally pointing to a path under the external root to a relative + * path pointing to an external root symlink under the vendor directory. + * + * @param repoUnderVendor The path to the repository directory under the vendor directory. + * @param externalRepoRoot The path to the root of external repositories. + * @throws IOException If an I/O error occurs while replanting the symlinks. + */ + private void replantSymlinks(Path repoUnderVendor, Path externalRepoRoot) throws IOException { + try { + Collection symlinks = + FileSystemUtils.traverseTree(repoUnderVendor, Path::isSymbolicLink); + Path externalSymlinkUnderVendor = vendorDirectory.getChild(EXTERNAL_ROOT_SYMLINK_NAME); + FileSystemUtils.ensureSymbolicLink(externalSymlinkUnderVendor, externalRepoRoot); + for (Path symlink : symlinks) { + PathFragment target = symlink.readSymbolicLink(); + if (!target.startsWith(externalRepoRoot.asFragment())) { + // TODO: print a warning for absolute symlinks? + continue; + } + PathFragment newTarget = + PathFragment.create( + "../".repeat(symlink.relativeTo(vendorDirectory).segmentCount() - 1)) + .getRelative(EXTERNAL_ROOT_SYMLINK_NAME) + .getRelative(target.relativeTo(externalRepoRoot.asFragment())); + if (OS.getCurrent() == OS.WINDOWS) { + // On Windows, FileSystemUtils.ensureSymbolicLink always resolves paths to absolute path. + // Use Files.createSymbolicLink here instead to preserve relative target path. + symlink.delete(); + Files.createSymbolicLink( + java.nio.file.Path.of(symlink.getPathString()), + java.nio.file.Path.of(newTarget.getPathString())); + } else { + FileSystemUtils.ensureSymbolicLink(symlink, newTarget); + } + } + } catch (IOException e) { + throw new IOException( + String.format("Failed to rewrite symlinks under %s: ", repoUnderVendor), e); } } diff --git a/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java b/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java index 2024b49cfe2ce9..3444eb5bea1b06 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java @@ -99,6 +99,7 @@ public enum ProfilerTask { CONFLICT_CHECK("Conflict checking"), DYNAMIC_LOCK("Acquiring dynamic execution output lock", Threshold.FIFTY_MILLIS), REPOSITORY_FETCH("Fetching repository"), + REPOSITORY_VENDOR("Vendoring repository"), UNKNOWN("Unknown event"); private static class Threshold { diff --git a/src/test/py/bazel/bzlmod/bazel_vendor_test.py b/src/test/py/bazel/bzlmod/bazel_vendor_test.py index c93adee75995c4..813c32d7025f5d 100644 --- a/src/test/py/bazel/bzlmod/bazel_vendor_test.py +++ b/src/test/py/bazel/bzlmod/bazel_vendor_test.py @@ -164,10 +164,7 @@ def testVendoringMultipleTimes(self): _, stdout, _ = self.RunBazel(['info', 'output_base']) repo_path = stdout[0] + '/external/aaa~' - if self.IsWindows(): - self.assertTrue(self.IsJunction(repo_path)) - else: - self.assertTrue(os.path.islink(repo_path)) + self.AssertPathIsSymlink(repo_path) def testVendorRepo(self): self.main_registry.createCcModule('aaa', '1.0').createCcModule( @@ -659,10 +656,7 @@ def testBuildVendoredTargetOffline(self): _, stdout, _ = self.RunBazel(['info', 'output_base']) for repo in ['aaa~', 'bbb~']: repo_path = stdout[0] + '/external/' + repo - if self.IsWindows(): - self.assertTrue(self.IsJunction(repo_path)) - else: - self.assertTrue(os.path.islink(repo_path)) + self.AssertPathIsSymlink(repo_path) def testVendorConflictRegistryFile(self): self.main_registry.createCcModule('aaa', '1.0').createCcModule( @@ -699,6 +693,88 @@ def testVendorConflictRegistryFile(self): stderr, ) + def testVendorRepoWithSymlinks(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'ext = use_extension("extension.bzl", "ext")', + 'use_repo(ext, "foo", "bar")', + ], + ) + abs_foo = self.ScratchFile('abs', ['Hello from abs!']).replace('\\', '/') + self.ScratchFile( + 'extension.bzl', + [ + 'def _repo_foo_impl(ctx):', + ' ctx.file("REPO.bazel")', + ' ctx.file("data", "Hello from foo!\\n")', + # Symlink to an absolute path outside of external root + f' ctx.symlink("{abs_foo}", "sym_abs")', + # Symlink to a file in the same repo + ' ctx.symlink("data", "sym_foo")', + # Symlink to a file in another repo + ' ctx.symlink(ctx.path(Label("@bar//:data")), "sym_bar")', + # Symlink to a directory in another repo + ' ctx.symlink("../_main~ext~bar/pkg", "sym_pkg")', + ( + ' ctx.file("BUILD", "exports_files([\'sym_abs\',' + " 'sym_foo','sym_bar', 'sym_pkg/data'])\")" + ), + 'repo_foo = repository_rule(implementation=_repo_foo_impl)', + '', + 'def _repo_bar_impl(ctx):', + ' ctx.file("REPO.bazel")', + ' ctx.file("data", "Hello from bar!\\n")', + ' ctx.file("pkg/data", "Hello from pkg bar!\\n")', + ' ctx.file("BUILD", "exports_files([\'data\'])")', + 'repo_bar = repository_rule(implementation=_repo_bar_impl)', + '', + 'def _ext_impl(ctx):', + ' repo_foo(name="foo")', + ' repo_bar(name="bar")', + 'ext = module_extension(implementation=_ext_impl)', + ], + ) + self.ScratchFile( + 'BUILD', + [ + 'genrule(', + ' name = "print_paths",', + ( + ' srcs = ["@foo//:sym_abs", "@foo//:sym_foo",' + ' "@foo//:sym_bar", "@foo//:sym_pkg/data"],' + ), + ' outs = ["output.txt"],', + ' cmd = "cat $(SRCS) > $@",', + ')', + ], + ) + self.RunBazel(['vendor', '--vendor_dir=vendor', '--repo=@foo']) + self.RunBazel(['clean', '--expunge']) + self.AssertPathIsSymlink(self._test_cwd + '/vendor/bazel-external') + + # Move the vendor directory to a new location and use a new output base, + # it should still work + os.rename(self._test_cwd + '/vendor', self._test_cwd + '/vendor_new') + output_base = tempfile.mkdtemp(dir=self._tests_root) + self.RunBazel([ + f'--output_base={output_base}', + 'build', + '//:print_paths', + '--vendor_dir=vendor_new', + '--verbose_failures', + ]) + _, stdout, _ = self.RunBazel( + [f'--output_base={output_base}', 'info', 'output_base'] + ) + self.AssertPathIsSymlink(stdout[0] + '/external/_main~ext~foo') + output = os.path.join(self._test_cwd, './bazel-bin/output.txt') + self.AssertFileContentContains( + output, + 'Hello from abs!\nHello from foo!\nHello from bar!\nHello from pkg' + ' bar!\n', + ) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/test_base.py b/src/test/py/bazel/test_base.py index fc0936b1729612..691817a3cde342 100644 --- a/src/test/py/bazel/test_base.py +++ b/src/test/py/bazel/test_base.py @@ -212,6 +212,15 @@ def AssertFileContentNotContains(self, file_path, entry): if entry in f.read(): self.fail('File "%s" does contain "%s"' % (file_path, entry)) + def AssertPathIsSymlink(self, path): + if self.IsWindows(): + self.assertTrue( + self.IsReparsePoint(path), + "Path '%s' is not a symlink or junction" % path, + ) + else: + self.assertTrue(os.path.islink(path), "Path '%s' is not a symlink" % path) + def CreateWorkspaceWithDefaultRepos(self, path, lines=None): rule_definition = [ 'load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")' @@ -274,8 +283,8 @@ def IsLinux(): """Returns true if the current platform is Linux.""" return sys.platform.startswith('linux') - def IsJunction(self, path): - """Returns whether a folder is a junction or not. Used with Windows folders. + def IsReparsePoint(self, path): + """Returns whether a path is a reparse point (symlink or junction) on Windows. Args: path: string; an absolute path to a folder e.g. "C://foo/bar/aaa"