From 97be92e2ca72a777db2110519506f46fac57bd98 Mon Sep 17 00:00:00 2001 From: nitrocode <7775707+nitrocode@users.noreply.github.com> Date: Wed, 22 Feb 2023 15:29:29 -0600 Subject: [PATCH] feat: shallow repo clone in merge checkout strategy (#2758) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Implement shallow repo clone in merge checkout strategy --------- Co-authored-by: Ilya Lukyanov Co-authored-by: Nikolai Røed Kristiansen Co-authored-by: PePe Amengual Co-authored-by: Ken Kaizu --- cmd/server.go | 13 ++- runatlantis.io/docs/checkout-strategy.md | 9 ++ runatlantis.io/docs/server-configuration.md | 9 ++ server/events/pending_plan_finder_test.go | 16 ++++ server/events/working_dir.go | 97 +++++++++++---------- server/events/working_dir_test.go | 94 ++++++++++++++++++++ server/server.go | 1 + server/user_config.go | 1 + 8 files changed, 191 insertions(+), 49 deletions(-) diff --git a/cmd/server.go b/cmd/server.go index 5065abec1a..5cf1cc096b 100644 --- a/cmd/server.go +++ b/cmd/server.go @@ -56,8 +56,9 @@ const ( BitbucketTokenFlag = "bitbucket-token" BitbucketUserFlag = "bitbucket-user" BitbucketWebhookSecretFlag = "bitbucket-webhook-secret" - ConfigFlag = "config" + CheckoutDepthFlag = "checkout-depth" CheckoutStrategyFlag = "checkout-strategy" + ConfigFlag = "config" DataDirFlag = "data-dir" DefaultTFVersionFlag = "default-tf-version" DisableApplyAllFlag = "disable-apply-all" @@ -139,6 +140,7 @@ const ( DefaultAutoplanFileList = "**/*.tf,**/*.tfvars,**/*.tfvars.json,**/terragrunt.hcl,**/.terraform.lock.hcl" DefaultAllowCommands = "version,plan,apply,unlock,approve_policies" DefaultCheckoutStrategy = "branch" + DefaultCheckoutDepth = 0 DefaultBitbucketBaseURL = bitbucketcloud.BaseURL DefaultDataDir = "~/.atlantis" DefaultExecutableName = "atlantis" @@ -531,6 +533,12 @@ var boolFlags = map[string]boolFlag{ }, } var intFlags = map[string]intFlag{ + CheckoutDepthFlag: { + description: fmt.Sprintf("Used only if --%s=merge.", CheckoutStrategyFlag) + + " How many commits to include in each of base and feature branches when cloning repository." + + " If merge base is further behind than this number of commits from any of branches heads, full fetch will be performed.", + defaultValue: DefaultCheckoutDepth, + }, ParallelPoolSize: { description: "Max size of the wait group that runs parallel plans and applies (if enabled).", defaultValue: DefaultParallelPoolSize, @@ -758,6 +766,9 @@ func (s *ServerCmd) setDefaults(c *server.UserConfig) { if c.AutoplanFileList == "" { c.AutoplanFileList = DefaultAutoplanFileList } + if c.CheckoutDepth <= 0 { + c.CheckoutDepth = DefaultCheckoutDepth + } if c.AllowCommands == "" { c.AllowCommands = DefaultAllowCommands } diff --git a/runatlantis.io/docs/checkout-strategy.md b/runatlantis.io/docs/checkout-strategy.md index d58c875235..066f7444f0 100644 --- a/runatlantis.io/docs/checkout-strategy.md +++ b/runatlantis.io/docs/checkout-strategy.md @@ -46,3 +46,12 @@ Atlantis doesn't actually commit this merge anywhere. It just uses it locally. Atlantis only performs this merge during the `terraform plan` phase. If another commit is pushed to `main` **after** Atlantis runs `plan`, nothing will happen. ::: + +To optimize cloning time, Atlantis can perform a shallow clone by specifying the `--checkout-depth` flag. The cloning is performed in a following manner: + +- Shallow clone of the default branch is performed with depth of `--checkout-depth` value of zero (full clone). +- `branch` is retrieved, including the same amount of commits. +- Merge base of the default branch and `branch` is checked for existence in the shallow clone. +- If the merge base is not present, it means that either of the branches are ahead of the merge base by more than `--checkout-depth` commits. In this case full repo history is fetched. + +If the commit history often diverges by more than the default checkout depth then the `--checkout-depth` flag should be tuned to avoid full fetches. \ No newline at end of file diff --git a/runatlantis.io/docs/server-configuration.md b/runatlantis.io/docs/server-configuration.md index 0093f69eb9..b060a50473 100644 --- a/runatlantis.io/docs/server-configuration.md +++ b/runatlantis.io/docs/server-configuration.md @@ -305,6 +305,15 @@ and set `--autoplan-modules` to `false`. This means that an attacker could spoof calls to Atlantis and cause it to perform malicious actions. ::: +### `--checkout-depth` + ```bash + atlantis server --checkout-depth=0 + # or + ATLANTIS_CHECKOUT_DEPTH=0 + ``` + The number of commits to fetch from the branch. Used if `--checkout-strategy=merge` since the `--checkout-strategy=branch` (default) checkout strategy always defaults to a shallow clone using a depth of 1. + Defaults to `0`. See [Checkout Strategy](checkout-strategy.html) for more details. + ### `--checkout-strategy` ```bash atlantis server --checkout-strategy="" diff --git a/server/events/pending_plan_finder_test.go b/server/events/pending_plan_finder_test.go index 1d7027c2bb..cecc8c9bf2 100644 --- a/server/events/pending_plan_finder_test.go +++ b/server/events/pending_plan_finder_test.go @@ -233,6 +233,22 @@ func TestPendingPlanFinder_FindPlanCheckedIn(t *testing.T) { Equals(t, 0, len(actPlans)) } +func runCmdErrCode(t *testing.T, dir string, errCode int, name string, args ...string) string { + t.Helper() + cpCmd := exec.Command(name, args...) + cpCmd.Dir = dir + cpOut, err := cpCmd.CombinedOutput() + cmd := strings.Join(append([]string{name}, args...), " ") + if err != nil { + if eerr, ok := err.(*exec.ExitError); ok { + Assert(t, errCode == eerr.ExitCode(), "unexpected exit code: want %v, got %v, running %q: %s", errCode, eerr.ExitCode(), cmd, cpCmd) + return string(cpOut) + } + } + Assert(t, false, "invalid exit code, running %q: %s", cmd, cpOut) + return string(cpOut) +} + // Test that it deletes pending plans. func TestPendingPlanFinder_DeletePlans(t *testing.T) { files := map[string]interface{}{ diff --git a/server/events/working_dir.go b/server/events/working_dir.go index bf258b0602..da821e95f0 100644 --- a/server/events/working_dir.go +++ b/server/events/working_dir.go @@ -59,6 +59,10 @@ type FileWorkspace struct { // If this is false, then we will check out the head branch from the pull // request. CheckoutMerge bool + // CheckoutDepth is how many commits of feature branch and main branch we'll + // retrieve by default. If their merge base is not retrieved with this depth, + // full fetch will be performed. Only matters if CheckoutMerge=true. + CheckoutDepth int // TestingOverrideHeadCloneURL can be used during testing to override the // URL of the head repo to be cloned. If it's empty then we clone normally. TestingOverrideHeadCloneURL string @@ -267,53 +271,8 @@ func (w *FileWorkspace) forceClone(log logging.SimpleLogging, baseCloneURL = w.TestingOverrideBaseCloneURL } - var cmds [][]string - if w.CheckoutMerge { - // NOTE: We can't do a shallow clone when we're merging because we'll - // get merge conflicts if our clone doesn't have the commits that the - // branch we're merging branched off at. - // See https://groups.google.com/forum/#!topic/git-users/v3MkuuiDJ98. - fetchRef := fmt.Sprintf("+refs/heads/%s:", p.HeadBranch) - fetchRemote := "head" - if w.GithubAppEnabled { - fetchRef = fmt.Sprintf("pull/%d/head:", p.Num) - fetchRemote = "origin" - } - cmds = [][]string{ - { - "git", "clone", "--branch", p.BaseBranch, "--single-branch", baseCloneURL, cloneDir, - }, - { - "git", "remote", "add", "head", headCloneURL, - }, - { - "git", "fetch", fetchRemote, fetchRef, - }, - } - if w.GpgNoSigningEnabled { - cmds = append(cmds, []string{ - "git", "config", "--local", "commit.gpgsign", "false", - }) - } - // We use --no-ff because we always want there to be a merge commit. - // This way, our branch will look the same regardless if the merge - // could be fast forwarded. This is useful later when we run - // git rev-parse HEAD^2 to get the head commit because it will - // always succeed whereas without --no-ff, if the merge was fast - // forwarded then git rev-parse HEAD^2 would fail. - cmds = append(cmds, []string{ - "git", "merge", "-q", "--no-ff", "-m", "atlantis-merge", "FETCH_HEAD", - }) - } else { - cmds = [][]string{ - { - "git", "clone", "--branch", p.HeadBranch, "--depth=1", "--single-branch", headCloneURL, cloneDir, - }, - } - } - - for _, args := range cmds { - cmd := exec.Command(args[0], args[1:]...) // nolint: gosec + runGit := func(args ...string) error { + cmd := exec.Command("git", args...) // nolint: gosec cmd.Dir = cloneDir // The git merge command requires these env vars are set. cmd.Env = append(os.Environ(), []string{ @@ -330,8 +289,50 @@ func (w *FileWorkspace) forceClone(log logging.SimpleLogging, return fmt.Errorf("running %s: %s: %s", cmdStr, sanitizedOutput, sanitizedErrMsg) } log.Debug("ran: %s. Output: %s", cmdStr, strings.TrimSuffix(sanitizedOutput, "\n")) + return nil + } + + if !w.CheckoutMerge { + return runGit("clone", "--depth=1", "--branch", p.HeadBranch, "--single-branch", headCloneURL, cloneDir) + } + + if err := runGit("clone", "--depth", fmt.Sprint(w.CheckoutDepth), "--branch", p.BaseBranch, "--single-branch", baseCloneURL, cloneDir); err != nil { + return err + } + if err := runGit("remote", "add", "head", headCloneURL); err != nil { + return err + } + + fetchRef := fmt.Sprintf("+refs/heads/%s:", p.HeadBranch) + fetchRemote := "head" + if w.GithubAppEnabled { + fetchRef = fmt.Sprintf("pull/%d/head:", p.Num) + fetchRemote = "origin" + } + if err := runGit("fetch", "--depth", fmt.Sprint(w.CheckoutDepth), fetchRemote, fetchRef); err != nil { + return err } - return nil + + if w.GpgNoSigningEnabled { + if err := runGit("config", "--local", "commit.gpgsign", "false"); err != nil { + return err + } + } + if err := runGit("merge-base", p.BaseBranch, "FETCH_HEAD"); err != nil { + // git merge-base returning error means that we did not receive enough commits in shallow clone. + // Fall back to retrieving full repo history. + if err := runGit("fetch", "--unshallow"); err != nil { + return err + } + } + + // We use --no-ff because we always want there to be a merge commit. + // This way, our branch will look the same regardless if the merge + // could be fast forwarded. This is useful later when we run + // git rev-parse HEAD^2 to get the head commit because it will + // always succeed whereas without --no-ff, if the merge was fast + // forwarded then git rev-parse HEAD^2 would fail. + return runGit("merge", "-q", "--no-ff", "-m", "atlantis-merge", "FETCH_HEAD") } // GetWorkingDir returns the path to the workspace for this repo and pull. diff --git a/server/events/working_dir_test.go b/server/events/working_dir_test.go index cb98d2dea1..c708c25e7b 100644 --- a/server/events/working_dir_test.go +++ b/server/events/working_dir_test.go @@ -6,6 +6,7 @@ import ( "net/http" "os" "path/filepath" + "strings" "testing" "github.com/runatlantis/atlantis/server/events" @@ -84,6 +85,7 @@ func TestClone_CheckoutMergeNoneExisting(t *testing.T) { wd := &events.FileWorkspace{ DataDir: dataDir, CheckoutMerge: true, + CheckoutDepth: 50, TestingOverrideHeadCloneURL: overrideURL, TestingOverrideBaseCloneURL: overrideURL, GpgNoSigningEnabled: true, @@ -132,6 +134,7 @@ func TestClone_CheckoutMergeNoReclone(t *testing.T) { wd := &events.FileWorkspace{ DataDir: dataDir, CheckoutMerge: true, + CheckoutDepth: 50, TestingOverrideHeadCloneURL: overrideURL, TestingOverrideBaseCloneURL: overrideURL, GpgNoSigningEnabled: true, @@ -181,6 +184,7 @@ func TestClone_CheckoutMergeNoRecloneFastForward(t *testing.T) { wd := &events.FileWorkspace{ DataDir: dataDir, CheckoutMerge: true, + CheckoutDepth: 50, TestingOverrideHeadCloneURL: overrideURL, TestingOverrideBaseCloneURL: overrideURL, GpgNoSigningEnabled: true, @@ -235,6 +239,7 @@ func TestClone_CheckoutMergeConflict(t *testing.T) { wd := &events.FileWorkspace{ DataDir: dataDir, CheckoutMerge: true, + CheckoutDepth: 50, TestingOverrideHeadCloneURL: overrideURL, TestingOverrideBaseCloneURL: overrideURL, GpgNoSigningEnabled: true, @@ -254,6 +259,93 @@ func TestClone_CheckoutMergeConflict(t *testing.T) { ErrContains(t, "exit status 1", err) } +func TestClone_CheckoutMergeShallow(t *testing.T) { + // Initialize the git repo. + repoDir := initRepo(t) + + runCmd(t, repoDir, "git", "commit", "--allow-empty", "-m", "should not be cloned") + oldCommit := strings.TrimSpace(runCmd(t, repoDir, "git", "rev-parse", "HEAD")) + + runCmd(t, repoDir, "git", "commit", "--allow-empty", "-m", "merge-base") + baseCommit := strings.TrimSpace(runCmd(t, repoDir, "git", "rev-parse", "HEAD")) + + runCmd(t, repoDir, "git", "branch", "-f", "branch", "HEAD") + + // Add a commit to branch 'branch' that's not on master. + runCmd(t, repoDir, "git", "checkout", "branch") + runCmd(t, repoDir, "touch", "branch-file") + runCmd(t, repoDir, "git", "add", "branch-file") + runCmd(t, repoDir, "git", "commit", "-m", "branch-commit") + + // Now switch back to master and advance the master branch by another + // commit. + runCmd(t, repoDir, "git", "checkout", "main") + runCmd(t, repoDir, "touch", "main-file") + runCmd(t, repoDir, "git", "add", "main-file") + runCmd(t, repoDir, "git", "commit", "-m", "main-commit") + + overrideURL := fmt.Sprintf("file://%s", repoDir) + + // Test that we don't check out full repo if using CheckoutMerge strategy + t.Run("Shallow", func(t *testing.T) { + dataDir := t.TempDir() + + wd := &events.FileWorkspace{ + DataDir: dataDir, + CheckoutMerge: true, + // retrieve two commits in each branch: + // master: master-commit, merge-base + // branch: branch-commit, merge-base + CheckoutDepth: 2, + TestingOverrideHeadCloneURL: overrideURL, + TestingOverrideBaseCloneURL: overrideURL, + GpgNoSigningEnabled: true, + } + + cloneDir, hasDiverged, err := wd.Clone(logging.NewNoopLogger(t), models.Repo{}, models.PullRequest{ + BaseRepo: models.Repo{}, + HeadBranch: "branch", + BaseBranch: "main", + }, "default") + Ok(t, err) + Equals(t, false, hasDiverged) + + gotBaseCommitType := runCmd(t, cloneDir, "git", "cat-file", "-t", baseCommit) + Assert(t, gotBaseCommitType == "commit\n", "should have merge-base in shallow repo") + gotOldCommitType := runCmdErrCode(t, cloneDir, 128, "git", "cat-file", "-t", oldCommit) + Assert(t, strings.Contains(gotOldCommitType, "could not get object info"), "should not have old commit in shallow repo") + }) + + // Test that we will check out full repo if CheckoutDepth is too small + t.Run("FullClone", func(t *testing.T) { + dataDir := t.TempDir() + + wd := &events.FileWorkspace{ + DataDir: dataDir, + CheckoutMerge: true, + // 1 is not enough to retrieve merge-base, so full clone should be performed + CheckoutDepth: 1, + TestingOverrideHeadCloneURL: overrideURL, + TestingOverrideBaseCloneURL: overrideURL, + GpgNoSigningEnabled: true, + } + + cloneDir, hasDiverged, err := wd.Clone(logging.NewNoopLogger(t), models.Repo{}, models.PullRequest{ + BaseRepo: models.Repo{}, + HeadBranch: "branch", + BaseBranch: "main", + }, "default") + Ok(t, err) + Equals(t, false, hasDiverged) + + gotBaseCommitType := runCmd(t, cloneDir, "git", "cat-file", "-t", baseCommit) + Assert(t, gotBaseCommitType == "commit\n", "should have merge-base in full repo") + gotOldCommitType := runCmd(t, cloneDir, "git", "cat-file", "-t", oldCommit) + Assert(t, gotOldCommitType == "commit\n", "should have old commit in full repo") + }) + +} + // Test that if the repo is already cloned and is at the right commit, we // don't reclone. func TestClone_NoReclone(t *testing.T) { @@ -372,6 +464,7 @@ func TestClone_MasterHasDiverged(t *testing.T) { wd := &events.FileWorkspace{ DataDir: repoDir, CheckoutMerge: true, + CheckoutDepth: 50, GpgNoSigningEnabled: true, } _, hasDiverged, err := wd.Clone(logging.NewNoopLogger(t), models.Repo{CloneURL: repoDir}, models.PullRequest{ @@ -448,6 +541,7 @@ func TestHasDiverged_MasterHasDiverged(t *testing.T) { wd := &events.FileWorkspace{ DataDir: repoDir, CheckoutMerge: true, + CheckoutDepth: 50, GpgNoSigningEnabled: true, } hasDiverged := wd.HasDiverged(logging.NewNoopLogger(t), repoDir+"/repos/0/default") diff --git a/server/server.go b/server/server.go index e1d03c908c..54e40e0d42 100644 --- a/server/server.go +++ b/server/server.go @@ -457,6 +457,7 @@ func NewServer(userConfig UserConfig, config Config) (*Server, error) { var workingDir events.WorkingDir = &events.FileWorkspace{ DataDir: userConfig.DataDir, CheckoutMerge: userConfig.CheckoutStrategy == "merge", + CheckoutDepth: userConfig.CheckoutDepth, GithubAppEnabled: githubAppEnabled, } // provide fresh tokens before clone from the GitHub Apps integration, proxy workingDir diff --git a/server/user_config.go b/server/user_config.go index 40da825014..e75f2bd5a7 100644 --- a/server/user_config.go +++ b/server/user_config.go @@ -28,6 +28,7 @@ type UserConfig struct { BitbucketToken string `mapstructure:"bitbucket-token"` BitbucketUser string `mapstructure:"bitbucket-user"` BitbucketWebhookSecret string `mapstructure:"bitbucket-webhook-secret"` + CheckoutDepth int `mapstructure:"checkout-depth"` CheckoutStrategy string `mapstructure:"checkout-strategy"` DataDir string `mapstructure:"data-dir"` DisableApplyAll bool `mapstructure:"disable-apply-all"`