Skip to content

Commit

Permalink
perf(cli): pre-compute and cache tree-sitter query metadata (#7171)
Browse files Browse the repository at this point in the history
All of our tree-sitter queries are cached, this adds some additional
metadata caching to prevent recomputing on every query execution and in
some cases every query match.

---

### Changes are visible to end-users: no

- Searched for relevant documentation and updated as needed: yes
- Breaking change (forces users to change their own code or config): no
- Suggested release notes appear below: yes

Caching of `aspect configure` tree-sitter query metadata across query
executions.

### Test plan

- Covered by existing test cases

GitOrigin-RevId: 85d25355eded802cdb408d0c24364b40d1062b08
  • Loading branch information
jbedard committed Nov 7, 2024
1 parent e34503f commit 8d6610d
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 14 deletions.
1 change: 1 addition & 0 deletions gazelle/common/treesitter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
"filters.go",
"parser.go",
"queries.go",
"query.go",
"traversal.go",
],
importpath = "aspect.build/cli/gazelle/common/treesitter",
Expand Down
2 changes: 1 addition & 1 deletion gazelle/common/treesitter/filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
// Predicates implemented here:
// - eq?
// - match?
func matchesAllPredicates(q *sitter.Query, m *sitter.QueryMatch, qc *sitter.QueryCursor, input []byte) bool {
func matchesAllPredicates(q *sitterQuery, m *sitter.QueryMatch, qc *sitter.QueryCursor, input []byte) bool {
qm := &sitter.QueryMatch{
ID: m.ID,
PatternIndex: m.PatternIndex,
Expand Down
26 changes: 13 additions & 13 deletions gazelle/common/treesitter/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ import (
var ErrorsQuery = `(ERROR) @error`

// A cache of parsed queries per language
var queryCache = make(map[LanguageGrammar]map[string]*sitter.Query)
var queryCache = make(map[LanguageGrammar]map[string]*sitterQuery)
var queryMutex sync.Mutex

func parseQuery(lang LanguageGrammar, queryStr string) *sitter.Query {
func parseQuery(lang LanguageGrammar, queryStr string) *sitterQuery {
queryMutex.Lock()
defer queryMutex.Unlock()

if queryCache[lang] == nil {
queryCache[lang] = make(map[string]*sitter.Query)
queryCache[lang] = make(map[string]*sitterQuery)
}
if queryCache[lang][queryStr] == nil {
queryCache[lang][queryStr] = mustNewQuery(lang, []byte(queryStr))
queryCache[lang][queryStr] = mustNewQuery(lang, queryStr)
}

return queryCache[lang][queryStr]
Expand All @@ -39,7 +39,7 @@ func (tree TreeAst) QueryStrings(query, returnVar string) []string {

// Execute the query.
qc := sitter.NewQueryCursor()
qc.Exec(sitterQuery, rootNode)
qc.Exec(sitterQuery.q, rootNode)

// Collect string from the query results.
for {
Expand Down Expand Up @@ -82,7 +82,7 @@ func (tree TreeAst) Query(query string) <-chan ASTQueryResult {
// Execute the query.
go func() {
qc := sitter.NewQueryCursor()
qc.Exec(q, rootNode)
qc.Exec(q.q, rootNode)

for {
m, ok := qc.NextMatch()
Expand All @@ -104,7 +104,7 @@ func (tree TreeAst) Query(query string) <-chan ASTQueryResult {
return out
}

func (tree TreeAst) mapQueryMatchCaptures(m *sitter.QueryMatch, q *sitter.Query) map[string]string {
func (tree TreeAst) mapQueryMatchCaptures(m *sitter.QueryMatch, q *sitterQuery) map[string]string {
captures := make(map[string]string, len(m.Captures))
for _, c := range m.Captures {
name := q.CaptureNameForId(c.Index)
Expand All @@ -120,11 +120,11 @@ func (tree TreeAst) mapQueryMatchCaptures(m *sitter.QueryMatch, q *sitter.Query)

// Find and read the `from` QueryCapture from a QueryMatch.
// Filter matches based on captures value using "equals-{name}" vars.
func fetchQueryMatch(query *sitter.Query, name string, m *sitter.QueryMatch, sourceCode []byte) *sitter.QueryCapture {
func fetchQueryMatch(query *sitterQuery, name string, m *sitter.QueryMatch, sourceCode []byte) *sitter.QueryCapture {
var result *sitter.QueryCapture

for ci, c := range m.Captures {
cn := query.CaptureNameForId(uint32(ci))
for _, c := range m.Captures {
cn := query.CaptureNameForId(c.Index)

// Filters where a capture must equal a specific value.
if strings.HasPrefix(cn, "equals-") {
Expand All @@ -145,8 +145,8 @@ func fetchQueryMatch(query *sitter.Query, name string, m *sitter.QueryMatch, sou
return result
}

func mustNewQuery(lang LanguageGrammar, query []byte) *sitter.Query {
treeQ, err := sitter.NewQuery(query, toSitterLanguage(lang))
func mustNewTreeQuery(lang LanguageGrammar, query string) *sitter.Query {
treeQ, err := sitter.NewQuery([]byte(query), toSitterLanguage(lang))
if err != nil {
BazelLog.Fatalf("Failed to create query for %q: %v", query, err)
}
Expand All @@ -166,7 +166,7 @@ func (tree TreeAst) QueryErrors() []error {

// Execute the import query
qc := sitter.NewQueryCursor()
qc.Exec(query, node)
qc.Exec(query.q, node)

// Collect import statements from the query results
for {
Expand Down
53 changes: 53 additions & 0 deletions gazelle/common/treesitter/query.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package treesitter

import sitter "github.com/smacker/go-tree-sitter"

// Basic wrapper around sitter.Query to cache tree-sitter cgo calls.
type sitterQuery struct {
q *sitter.Query

// Pre-computed and cached query data
stringValues []string
captureNames []string
predicatePatterns [][][]sitter.QueryPredicateStep
}

func mustNewQuery(lang LanguageGrammar, query string) *sitterQuery {
q := mustNewTreeQuery(lang, query)

captureNames := make([]string, q.CaptureCount())
for i := uint32(0); i < q.CaptureCount(); i++ {
captureNames[i] = q.CaptureNameForId(i)
}

stringValues := make([]string, q.StringCount())
for i := uint32(0); i < q.StringCount(); i++ {
stringValues[i] = q.StringValueForId(i)
}

predicatePatterns := make([][][]sitter.QueryPredicateStep, q.PatternCount())
for i := uint32(0); i < q.PatternCount(); i++ {
predicatePatterns[i] = q.PredicatesForPattern(i)
}

return &sitterQuery{
q: q,
stringValues: stringValues,
captureNames: captureNames,
predicatePatterns: predicatePatterns,
}
}

// Cached query data accessors mirroring the tree-sitter Query signatures.

func (q *sitterQuery) StringValueForId(id uint32) string {
return q.stringValues[id]
}

func (q *sitterQuery) CaptureNameForId(id uint32) string {
return q.captureNames[id]
}

func (q *sitterQuery) PredicatesForPattern(patternIndex uint32) [][]sitter.QueryPredicateStep {
return q.predicatePatterns[patternIndex]
}

0 comments on commit 8d6610d

Please sign in to comment.