Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Git repo cache #7424

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/test/shell/bazel/skylark_git_repository_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ function set_up() {
tar zxf outer-planets-repo.tar.gz
tar zxf refetch-repo.tar.gz

local cache_dir=/tmp/bazel_cache
rm -rf $cache_dir

# Fix environment variables for a hermetic use of git.
export GIT_CONFIG_NOSYSTEM=1
export GIT_CONFIG_NOGLOBAL=1
Expand Down Expand Up @@ -158,6 +161,27 @@ EOF
expect_log "Pluto is a dwarf planet"
}

# This test:
# 1. Creates a git_repository rule with some commit hash
# 2. Make sure worksapce can be built and repository cache is updated
function test_git_repositry_cache_is_populated() {
local pluto_repo_dir=$TEST_TMPDIR/repos/pluto
local cache_dir=/tmp/bazel_cache
local commit_hash="52f9a3f87a2dd17ae0e5847bbae9734f09354afd"


export BAZEL_GIT_REPOSITORY_CACHE=$cache_dir

# Do not run tests if the git worktree command does not exist.
# Testing without if check - is windows timing out due to that?
# if git worktree --help; then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this condition commented out? Should it be there? Or should we actually run the test even in this case, and just skip the verification that the correct hash ended up in the cache.

do_git_repository_test $commit_hash

cache_commit_hash="$(cat $cache_dir/worktrees/pluto/HEAD)"
assert_equals $commit_hash $cache_commit_hash
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A verification of caching usually verifies that we can do a clone from cache only in an "offline" situation, if we successfully filled the cache first, and attempt to check out a fixed hash (not a branch). I don't see your test verifying that main property of a cache.

# fi
}

function test_git_repository() {
do_git_repository_test "52f9a3f87a2dd17ae0e5847bbae9734f09354afd"
}
Expand Down Expand Up @@ -640,7 +664,7 @@ EOF

bazel fetch //planets:planet-info >& $TEST_log \
|| echo "Expect run to fail."
expect_log "error cloning"
expect_log "Error cloning"
}

run_suite "skylark git_repository tests"
123 changes: 99 additions & 24 deletions tools/build_defs/repo/git.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,70 @@

load(":utils.bzl", "patch", "update_attrs", "workspace_and_buildfile")


def _if_debug(cond, st, what="Action"):
"Print if 'cond'."
if cond:
print("{} returned {}\n{}\n----\n{}".format(what, st.return_code, st.stdout, st.stderr))

def _setup_cache(ctx, git_cache):
# Set-up git_cache git repository.
bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash"
st = ctx.execute([bash_exe, "-c", """set -ex
mkdir -p {git_cache}
git -C {git_cache} init --bare || :
""".format(git_cache=git_cache)], environment = ctx.os.environ)
_if_debug(cond=ctx.attr.verbose, st=st, what='Init')
if st.return_code:
fail("Error ... {}:\n{}\n---\n{}".format(ctx.name, st.stdout, st.stderr))

def _get_repository_from_cache(ctx, directory, ref, git_cache):
bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash"
exec_result = ctx.execute([bash_exe, "-c", """
cd {working_dir}
set -ex
rm -rf '{directory}' '{dir_link}'
git -C {git_cache} worktree prune
git -C {git_cache} worktree add '{directory}' {ref}
""".format(
working_dir = ctx.path(".").dirname,
dir_link = ctx.path("."),
directory = directory,
ref = ref,
git_cache = git_cache,
)], environment = ctx.os.environ)
_if_debug(cond=ctx.attr.verbose, st=exec_result, what='Checkout')

return exec_result

def _populate_cache(ctx, git_cache, remote_name, ref, shallow):
# 'remote add x' must be done only if x does not exist
bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash"
st = ctx.execute([bash_exe, "-c", """set -ex
git -C {git_cache} remote add '{remote_name}' '{remote}' || \
git -C {git_cache} remote set-url '{remote_name}' '{remote}'
git -C {git_cache} fetch '{shallow}' '{remote_name}' {ref} || \
git -C {git_cache} fetch '{remote_name}' {ref} || \
git -C {git_cache} '{shallow}' fetch '{remote_name}' || \
git -C {git_cache} fetch '{remote_name}'
""".format(
git_cache=git_cache,
remote_name=remote_name,
remote=ctx.attr.remote,
ref=ref,
shallow=shallow)], environment = ctx.os.environ)
_if_debug(cond=ctx.attr.verbose, st=st, what='Fetching')

if st.return_code:
fail("Error fetching {}:\n{}\n----\n{}".format(ctx.name, st.stdout, st.stderr))

def _clone_or_update(ctx):
if ((not ctx.attr.tag and not ctx.attr.commit and not ctx.attr.branch) or
(ctx.attr.tag and ctx.attr.commit) or
(ctx.attr.tag and ctx.attr.branch) or
(ctx.attr.commit and ctx.attr.branch)):
fail("Exactly one of commit, tag, or branch must be provided")
remote_name = "remote_" + str(hash(ctx.attr.remote))
shallow = ""
if ctx.attr.commit:
ref = ctx.attr.commit
Expand Down Expand Up @@ -49,29 +107,45 @@ def _clone_or_update(ctx):
ctx.attr.strip_prefix if ctx.attr.strip_prefix else "None",
))
bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash"
st = ctx.execute([bash_exe, "-c", """
cd {working_dir}
set -ex
( cd {working_dir} &&
if ! ( cd '{dir_link}' && [[ "$(git rev-parse --git-dir)" == '.git' ]] ) >/dev/null 2>&1; then
rm -rf '{directory}' '{dir_link}'
git clone '{shallow}' '{remote}' '{directory}' || git clone '{remote}' '{directory}'
fi
git -C '{directory}' reset --hard {ref} || \
((git -C '{directory}' fetch '{shallow}' origin {ref}:{ref} || \
git -C '{directory}' fetch origin {ref}:{ref}) && git -C '{directory}' reset --hard {ref})
git -C '{directory}' clean -xdf )
""".format(
working_dir = ctx.path(".").dirname,
dir_link = ctx.path("."),
directory = directory,
remote = ctx.attr.remote,
ref = ref,
shallow = shallow,
)], environment = ctx.os.environ)
git_cache = ctx.os.environ.get("BAZEL_GIT_REPOSITORY_CACHE")

if git_cache:
_setup_cache(ctx, git_cache)
st = _get_repository_from_cache(ctx, directory, ref, git_cache)

if st.return_code:
_populate_cache(ctx, git_cache, remote_name, ref, shallow)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach has the problem, that git fetch is called unconditionally, and git fetch tries to contact the remote site, even if we only need a commit already in cache. That means, we cannot use the cache when being offline, which would be a main use case for a cache.

This can be avoided by first trying to check out the correct commit and only if that fails, trying to update the cache.

st = _get_repository_from_cache(ctx, directory, ref, git_cache)

if st.return_code:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we fail or fall back to using no cache?

fail("""Error checking out worktree %s. Maybe your git version is too old?. Using Git
repository caching requires at least Git 2.5.0. Her is the error we got :\n%s""" %
(ctx.name, st.stderr))
else:
st = ctx.execute([bash_exe, "-c", """
cd {working_dir}
set -ex
( cd {working_dir} &&
if ! ( cd '{dir_link}' && [[ "$(git rev-parse --git-dir)" == '.git' ]] ) >/dev/null 2>&1; then
rm -rf '{directory}' '{dir_link}'
git clone '{shallow}' '{remote}' '{directory}' || git clone '{remote}' '{directory}'
fi
git -C '{directory}' reset --hard {ref} || \
((git -C '{directory}' fetch '{shallow}' origin {ref}:{ref} || \
git -C '{directory}' fetch origin {ref}:{ref}) && git -C '{directory}' reset --hard {ref})
git -C '{directory}' clean -xdf )
""".format(
working_dir = ctx.path(".").dirname,
dir_link = ctx.path("."),
directory = directory,
remote = ctx.attr.remote,
ref = ref,
shallow = shallow,
)], environment = ctx.os.environ)

if st.return_code:
fail("Error cloning %s:\n%s" % (ctx.name, st.stderr))

if st.return_code:
fail("error cloning %s:\n%s" % (ctx.name, st.stderr))

if ctx.attr.strip_prefix:
dest_link = "{}/{}".format(directory, ctx.attr.strip_prefix)
Expand All @@ -87,8 +161,9 @@ set -ex
""".format(
directory = ctx.path("."),
)], environment = ctx.os.environ)
if st.return_code:
fail("error updating submodules %s:\n%s" % (ctx.name, st.stderr))

if st.return_code:
fail("error updating submodules %s:\n%s" % (ctx.name, st.stderr))

ctx.report_progress("Recording actual commit")

Expand Down