Skip to content

Commit

Permalink
perf(cli): avoid use of os.Stat for checking if file exists (#7173)
Browse files Browse the repository at this point in the history
We should not be using `os.Stat` to check if files exist. It is very
expensive and gazelle has already read the full list of entries for the
directory.

The gazelle patch exposes that list. This is similar to
bazel-contrib/bazel-gazelle#1737 which was done
at airtable for similar reasons.

On one major repo this reduces the time by almost 2s which is ~15% and
well worth the patch imo.

---

### Changes are visible to end-users: no

- Searched for relevant documentation and updated as needed: yes
- Breaking change (forces users to change their own code or config): no
- Suggested release notes appear below: yes

Reduce fs io of `aspect configure` during fs traversal.

### Test plan

- Covered by existing test cases

GitOrigin-RevId: b17d52b2723d3b7e4d5ee33fc294f491062ded5a
  • Loading branch information
jbedard committed Nov 7, 2024
1 parent 8d6610d commit adb9487
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 14 deletions.
1 change: 1 addition & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ http_archive(
"//:patches/bazelbuild_bazel-gazelle_aspect-cli.patch",
"//:patches/bazelbuild_bazel-gazelle_aspect-walk-subdir.patch",
"//:patches/bazelbuild_bazel-gazelle_aspect-gitignore.patch",
"//:patches/bazelbuild_bazel-gazelle_aspect-fs-direntry.patch",
],
sha256 = "872f1532567cdc53dc8e9f4681cd45021cd6787e2bde8a022bcec24a5867ce4c",
# Ensure this version always matches the go.mod version.
Expand Down
13 changes: 11 additions & 2 deletions gazelle/common/git/gitignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package git

import (
"bufio"
"fmt"
"io"
"io/fs"
"os"
"path"
"strings"
Expand Down Expand Up @@ -36,15 +38,22 @@ func collectIgnoreFiles(c *config.Config, rel string) {
}
c.Exts[lastConfiguredExt] = rel

ents := c.Exts[common.ASPECT_DIR_ENTRIES].(map[string]fs.DirEntry)
if _, hasIgnore := ents[".gitignore"]; !hasIgnore {
return
}

// Find and add .gitignore files from this directory
ignoreFilePath := path.Join(c.RepoRoot, rel, ".gitignore")
ignoreReader, ignoreErr := os.Open(ignoreFilePath)
if ignoreErr == nil {
BazelLog.Tracef("Add ignore file %s/.gitignore", rel)
defer ignoreReader.Close()
addIgnore(c, rel, ignoreReader)
} else if !os.IsNotExist(ignoreErr) {
BazelLog.Errorf("Failed to open %s/.gitignore: %v", rel, ignoreErr)
} else {
msg := fmt.Sprintf("Failed to open %s/.gitignore: %v", rel, ignoreErr)
BazelLog.Errorf(msg)
fmt.Printf("%s\n", msg)
}
}

Expand Down
1 change: 1 addition & 0 deletions gazelle/common/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type GazelleWalkFunc func(path string) error

// Must align with patched bazel-gazelle
const ASPECT_WALKSUBDIR = "__aspect:walksubdir"
const ASPECT_DIR_ENTRIES = "__aspect:direntries"

// Read any configuration regarding walk options.
func ReadWalkConfig(c *config.Config, rel string, f *rule.File) bool {
Expand Down
2 changes: 1 addition & 1 deletion gazelle/js/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ func (c *JsGazelleConfig) GetNpmPackageGenerationMode() NpmPackageMode {

// Set the pnpm-workspace.yaml file path.
func (c *JsGazelleConfig) SetPnpmLockfile(pnpmLockPath string) {
c.pnpmLockPath = pnpmLockPath
c.pnpmLockPath = path.Clean(pnpmLockPath)
}
func (c *JsGazelleConfig) PnpmLockfile() string {
return c.pnpmLockPath
Expand Down
29 changes: 21 additions & 8 deletions gazelle/js/configure.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package gazelle
import (
"flag"
"fmt"
"io/fs"
"log"
"os"
"path"
Expand Down Expand Up @@ -89,11 +90,10 @@ func (ts *typeScriptLang) Configure(c *config.Config, rel string, f *rule.File)

if f != nil {
ts.readDirectives(c, rel, f)

// Read configurations relative to the current BUILD file.
ts.readConfigurations(c, rel)
}

ts.readConfigurations(c, rel)

common.ReadWalkConfig(c, rel, f)

git.ReadGitConfig(c, rel, f)
Expand All @@ -102,16 +102,29 @@ func (ts *typeScriptLang) Configure(c *config.Config, rel string, f *rule.File)
func (ts *typeScriptLang) readConfigurations(c *config.Config, rel string) {
config := c.Exts[LanguageName].(*JsGazelleConfig)

ents := c.Exts[common.ASPECT_DIR_ENTRIES].(map[string]fs.DirEntry)

// pnpm
lockfilePath := path.Join(c.RepoRoot, rel, config.PnpmLockfile())
if _, err := os.Stat(lockfilePath); err == nil {
ts.addPnpmLockfile(config, c.RepoName, c.RepoRoot, path.Join(rel, config.PnpmLockfile()))
pnpmLockPath := config.PnpmLockfile()
pnpmLockDir := path.Dir(pnpmLockPath)
if pnpmLockDir == "." {
// Common case of the lockfile not being within a subdirectory.
// Can use the fs.DirEntry list of this directory to check if the lockfile exists.
if ents[pnpmLockPath] != nil {
ts.addPnpmLockfile(config, c.RepoName, c.RepoRoot, path.Join(rel, pnpmLockPath))
}
} else if rootDirEntry := strings.Split(pnpmLockDir, "/")[0]; ents[rootDirEntry] != nil {
// If the lockfile is in a subdirectory then check the fs.DirEntry of the subdirectory.
// If the subdirectory exists then perform the expensive os.Stat to check if the file exists.
lockfilePath := path.Join(c.RepoRoot, rel, pnpmLockPath)
if _, err := os.Stat(lockfilePath); err == nil {
ts.addPnpmLockfile(config, c.RepoName, c.RepoRoot, path.Join(rel, pnpmLockPath))
}
}

// tsconfig
// TODO: add support for alternate tsconfig names
configPath := path.Join(c.RepoRoot, rel, config.defaultTsconfigName)
if _, err := os.Stat(configPath); err == nil {
if _, hasTsconfig := ents[config.defaultTsconfigName]; hasTsconfig {
ts.tsconfig.AddTsConfigFile(c.RepoRoot, rel, config.defaultTsconfigName)
}
}
Expand Down
3 changes: 0 additions & 3 deletions gazelle/js/typescript/tsconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,6 @@ func parseTsConfigJSONFile(parsed map[string]*TsConfig, resolver TsConfigResolve

content, err := os.ReadFile(path.Join(root, tsconfig))
if err != nil {
if os.IsNotExist(err) {
err = nil
}
return nil, err
}

Expand Down
69 changes: 69 additions & 0 deletions patches/bazelbuild_bazel-gazelle_aspect-fs-direntry.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
diff --git a/config/config.go b/config/config.go
index 1841650..f34ad5e 100644
--- a/config/config.go
+++ b/config/config.go
@@ -122,6 +122,7 @@ type MappedKind struct {

const ASPECT_WALKSUBDIR = "__aspect:walksubdir"
const ASPECT_GITIGNORE = "__aspect:gitignore"
+const ASPECT_DIR_ENTRIES = "__aspect:direntries"

func New() *Config {
return &Config{
diff --git a/walk/walk.go b/walk/walk.go
index 1ba1b47..19e99ef 100644
--- a/walk/walk.go
+++ b/walk/walk.go
@@ -144,6 +144,14 @@ func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[stri
return ents[i].Name() < ents[j].Name()
})

+ // PATCH(fs.DirEntry map) ---
+ entsMap := make(map[string]fs.DirEntry, len(trie.children))
+ for _, node := range trie.children {
+ entsMap[(*node.entry).Name()] = *node.entry
+ }
+ c.Exts[config.ASPECT_DIR_ENTRIES] = entsMap
+ // END PATCH(fs.DirEntry map) ---
+
// Absolute path to the directory being visited
dir := filepath.Join(c.RepoRoot, rel)

@@ -158,7 +166,7 @@ func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[stri
haveError = true
}

- c = configure(cexts, knownDirectives, c, rel, f)
+ configure(cexts, knownDirectives, c, rel, f)
wc := getWalkConfig(c)

if wc.isExcluded(rel) {
@@ -203,7 +211,7 @@ func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[stri
if subRel := path.Join(rel, sub); updateRels.shouldVisit(subRel, shouldUpdate) {
// PATCH ---
// Merge the returned 'subFiles' if 'mergeFiles' is true
- subFiles, mergeFiles := visit(c, cexts, knownDirectives, updateRels, trie.children[sub], wf, subRel, shouldUpdate)
+ subFiles, mergeFiles := visit(c.Clone(), cexts, knownDirectives, updateRels, trie.children[sub], wf, subRel, shouldUpdate)
if mergeFiles {
for _, f := range subFiles {
regularFiles = append(regularFiles, path.Join(sub, f))
@@ -330,10 +338,7 @@ func loadBuildFile(c *config.Config, pkg, dir string, ents []fs.DirEntry) (*rule
return rule.LoadFile(path, pkg)
}

-func configure(cexts []config.Configurer, knownDirectives map[string]bool, c *config.Config, rel string, f *rule.File) *config.Config {
- if rel != "" {
- c = c.Clone()
- }
+func configure(cexts []config.Configurer, knownDirectives map[string]bool, c *config.Config, rel string, f *rule.File) {
if f != nil {
for _, d := range f.Directives {
if !knownDirectives[d.Key] {
@@ -349,7 +354,6 @@ func configure(cexts []config.Configurer, knownDirectives map[string]bool, c *co
for _, cext := range cexts {
cext.Configure(c, rel, f)
}
- return c
}

func findGenFiles(wc *walkConfig, f *rule.File) []string {

0 comments on commit adb9487

Please sign in to comment.