diff --git a/pkg/filter/filter.go b/pkg/filter/filter.go index f8bddd77c..d4b9ae743 100644 --- a/pkg/filter/filter.go +++ b/pkg/filter/filter.go @@ -14,12 +14,12 @@ package filter import ( - "fmt" "regexp" "strings" "sync" "github.com/pingcap/errors" + tfilter "github.com/pingcap/tidb-tools/pkg/table-filter" selector "github.com/pingcap/tidb-tools/pkg/table-rule-selector" ) @@ -33,18 +33,7 @@ const ( ) // Table represents a table. -type Table struct { - Schema string `toml:"db-name" json:"db-name" yaml:"db-name"` - Name string `toml:"tbl-name" json:"tbl-name" yaml:"tbl-name"` -} - -// Clone clones a new filter.Table -func (t *Table) Clone() *Table { - return &Table{ - Schema: t.Schema, - Name: t.Name, - } -} +type Table = tfilter.Table // String implements the fmt.Stringer interface. func (t *Table) String() string { @@ -74,35 +63,7 @@ func (c *cache) set(key string, action ActionType) { } // Rules contains Filter rules. -type Rules struct { - DoTables []*Table `json:"do-tables" toml:"do-tables" yaml:"do-tables"` - DoDBs []string `json:"do-dbs" toml:"do-dbs" yaml:"do-dbs"` - - IgnoreTables []*Table `json:"ignore-tables" toml:"ignore-tables" yaml:"ignore-tables"` - IgnoreDBs []string `json:"ignore-dbs" toml:"ignore-dbs" yaml:"ignore-dbs"` -} - -// ToLower convert all entries to lowercase -func (r *Rules) ToLower() { - if r == nil { - return - } - - for _, table := range r.DoTables { - table.Name = strings.ToLower(table.Name) - table.Schema = strings.ToLower(table.Schema) - } - for _, table := range r.IgnoreTables { - table.Name = strings.ToLower(table.Name) - table.Schema = strings.ToLower(table.Schema) - } - for i, db := range r.IgnoreDBs { - r.IgnoreDBs[i] = strings.ToLower(db) - } - for i, db := range r.DoDBs { - r.DoDBs[i] = strings.ToLower(db) - } -} +type Rules = tfilter.MySQLReplicationRules // Filter implements whitelist and blacklist filters. type Filter struct { diff --git a/pkg/table-filter/README.md b/pkg/table-filter/README.md new file mode 100644 index 000000000..b66f31b0f --- /dev/null +++ b/pkg/table-filter/README.md @@ -0,0 +1,225 @@ +# Table Filter + +A table filter is an interface which determines if a table or schema should be +accepted for some process or not given its name. + +This package defines the format allowing users to specify the filter criteria +via command line or config files. This package is used by all tools in the TiDB +ecosystem. + +## Examples + +```go +package main + +import ( + "fmt" + + "github.com/pingcap/tidb-tools/pkg/table-filter" + "github.com/spf13/pflag" +) + +func main() { + args := pflag.StringArrayP("filter", "f", []string{"*.*"}, "table filter") + pflag.Parse() + + f, err := filter.Parse(*args) + if err != nil { + panic(err) + } + f = filter.CaseInsensitive(f) + + tables := []filter.Table{ + {Schema: "employees", Name: "employees"}, + {Schema: "employees", Name: "departments"}, + {Schema: "employees", Name: "dept_manager"}, + {Schema: "employees", Name: "dept_emp"}, + {Schema: "employees", Name: "titles"}, + {Schema: "employees", Name: "salaries"}, + {Schema: "AdventureWorks.Person", Name: "Person"}, + {Schema: "AdventureWorks.Person", Name: "Password"}, + {Schema: "AdventureWorks.Sales", Name: "SalesOrderDetail"}, + {Schema: "AdventureWorks.Sales", Name: "SalesOrderHeader"}, + {Schema: "AdventureWorks.Production", Name: "WorkOrder"}, + {Schema: "AdventureWorks.Production", Name: "WorkOrderRouting"}, + {Schema: "AdventureWorks.Production", Name: "ProductPhoto"}, + {Schema: "AdventureWorks.Production", Name: "TransactionHistory"}, + {Schema: "AdventureWorks.Production", Name: "TransactionHistoryArchive"}, + } + + for _, table := range tables { + fmt.Printf("%5v: %v\n", f.MatchTable(table.Schema, table.Name), table) + } +} +``` + +Try to run with `./main -f 'employee.*' -f '*.WorkOrder'` and see the result. + +## Syntax + +### Whitelist + +The input to the `filter.Parse()` function is a list of table filter rules. +Each rule specifies what the fully-qualified name of the table to be accepted. + +``` +db1.tbl1 +db2.tbl2 +db3.tbl3 +``` + +A plain name must only consist of valid [identifier characters] +`[0-9a-zA-Z$_\U00000080-\U0010ffff]+`. All other ASCII characters are reserved. +Some punctuations have special meanings, described below. + +### Wildcards + +Each part of the name can be a wildcard symbol as in [fnmatch(3)]: +* `*` — matches zero or more characters +* `?` — matches one character +* `[a-z]` — matches one character between “a” and “z” inclusive +* `[!a-z]` — matches one character except “a” to “z”. + +``` +db[0-9].tbl[0-9][0-9] +data.* +*.backup_* +``` + +“Character” here means a Unicode code point, so e.g. +* U+00E9 (é) is 1 character. +* U+0065 U+0301 (é) are 2 characters. +* U+1F926 U+1F3FF U+200D U+2640 U+FE0F (🤦🏿‍♀️) are 5 characters. + +### File import + +Include an `@` at the beginning of the string to specify a file name, which +`filter.Parse()` reads every line as filter rules. + +For example, if a file `config/filter.txt` has content: + +``` +employees.* +*.WorkOrder +``` + +the following two invocations would be equivalent: + +```sh +./main -f '@config/filter.txt' +./main -f 'employees.*' -f '*.WorkOrder' +``` + +A filter file cannot further import another file. + +### Comments and blank lines + +Leading and trailing white-spaces of every line are trimmed. + +Blank lines (empty strings) are ignored. + +A leading `#` marks a comment and is ignored. +`#` not at start of line may be considered syntax error. + +### Blacklist + +An `!` at the beginning of the line means the pattern after it is used to +exclude tables from being processed. This effectively turns the filter into a +blacklist. + +```ini +*.* +#^ note: must add the *.* to include all tables first +!*.Password +!employees.salaries +``` + +### Escape character + +Precede any special character by a `\` to turn it into an identifier character. + +``` +AdventureWorks\.*.* +``` + +For simplicity and future compatibility, the following sequences are prohibited: +* `\` at the end of the line after trimming whitespaces (use “`[ ]`” to match a literal whitespace at the end). +* `\` followed by any ASCII alphanumeric character (`[0-9a-zA-Z]`). In particular, C-like escape sequences like `\0`, `\r`, `\n` and `\t` currently are meaningless. + +### Quoted identifier + +Besides `\`, special characters can also be escaped by quoting using `"` or `` ` ``. + +``` +"AdventureWorks.Person".Person +`AdventureWorks.Person`.Password +``` + +Quoted identifier cannot span multiple lines. + +It is invalid to partially quote an identifier. + +``` +"this is "invalid*.* +``` + +### Regular expression + +Use `/` to delimit regular expressions: + +``` +/^db\d{2,}$/./^tbl\d{2,}$/ +``` + +These regular expressions use the [Go dialect]. The pattern is matched if the +identifier contains a substring matching the regular expression. For instance, +`/b/` matches `db01`. + +(Note: every `/` in the regex must be escaped as `\/`, including inside `[`…`]`. +You cannot place an unescaped `/` between `\Q`…`\E`.) + +[identifier characters]: https://dev.mysql.com/doc/refman/8.0/en/identifiers.html +[fnmatch(3)]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 +[Go dialect]: https://pkg.go.dev/regexp/syntax?tab=doc + +## Algorithm + +### Default behavior + +When a table name matches none of the rules in the filter list, the default +behavior is to ignore such unmatched tables. + +To build a blacklist, an explicit `*.*` must be used as the first rule, +otherwise all tables will be excluded. + +```sh +# every table will be filtered out +./main -f '!*.Password' + +# only the "Password" table is filtered out, the rest are included. +./main -f '*.*' -f '!*.Password' +``` + +### Precedence + +In a filter list, if a table name matches multiple patterns, the last match +decides the outcome. For instance, given + +```ini +# rule 1 +employees.* +# rule 2 +!*.dep* +# rule 3 +*.departments +``` + +We get: + +| Table name | Rule 1 | Rule 2 | Rule 3 | Outcome | +|-----------------------|--------|--------|--------|------------------| +| irrelevant.table | | | | Default (reject) | +| employees.employees | ✓ | | | Rule 1 (accept) | +| employees.dept_emp | ✓ | ✓ | | Rule 2 (reject) | +| employees.departments | ✓ | ✓ | ✓ | Rule 3 (accept) | +| else.departments | | ✓ | ✓ | Rule 3 (accept) | diff --git a/pkg/table-filter/compat.go b/pkg/table-filter/compat.go new file mode 100644 index 000000000..c5b2adf11 --- /dev/null +++ b/pkg/table-filter/compat.go @@ -0,0 +1,279 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "errors" + "fmt" + "regexp" + "strings" +) + +// Table represents a qualified table name. +type Table struct { + // Schema is the name of the schema (database) containing this table. + Schema string `toml:"db-name" json:"db-name" yaml:"db-name"` + // Name is the unqualified table name. + Name string `toml:"tbl-name" json:"tbl-name" yaml:"tbl-name"` +} + +func (t *Table) lessThan(u *Table) bool { + return t.Schema < u.Schema || t.Schema == u.Schema && t.Name < u.Name +} + +// String implements the fmt.Stringer interface. +func (t *Table) String() string { + if len(t.Name) > 0 { + return fmt.Sprintf("`%s`.`%s`", t.Schema, t.Name) + } + return fmt.Sprintf("`%s`", t.Schema) +} + +// Clone clones a new filter.Table +func (t *Table) Clone() *Table { + return &Table{ + Schema: t.Schema, + Name: t.Name, + } +} + +// MySQLReplicationRules is a set of rules based on MySQL's replication filter. +type MySQLReplicationRules struct { + // DoTables is a whitelist of tables. + DoTables []*Table `json:"do-tables" toml:"do-tables" yaml:"do-tables"` + // DoDBs is the whitelist of schemas. + DoDBs []string `json:"do-dbs" toml:"do-dbs" yaml:"do-dbs"` + + // IgnoreTables is a blacklist of tables. + IgnoreTables []*Table `json:"ignore-tables" toml:"ignore-tables" yaml:"ignore-tables"` + // IgnoreDBs is a blacklist of schemas. + IgnoreDBs []string `json:"ignore-dbs" toml:"ignore-dbs" yaml:"ignore-dbs"` +} + +// ToLower convert all entries to lowercase +// Deprecated: use `filter.CaseInsensitive` instead. +func (r *MySQLReplicationRules) ToLower() { + if r == nil { + return + } + + for _, table := range r.DoTables { + table.Name = strings.ToLower(table.Name) + table.Schema = strings.ToLower(table.Schema) + } + for _, table := range r.IgnoreTables { + table.Name = strings.ToLower(table.Name) + table.Schema = strings.ToLower(table.Schema) + } + for i, db := range r.IgnoreDBs { + r.IgnoreDBs[i] = strings.ToLower(db) + } + for i, db := range r.DoDBs { + r.DoDBs[i] = strings.ToLower(db) + } +} + +type schemasFilter struct { + schemas map[string]struct{} +} + +func (f schemasFilter) MatchTable(schema string, table string) bool { + return f.MatchSchema(schema) +} + +func (f schemasFilter) MatchSchema(schema string) bool { + _, ok := f.schemas[schema] + return ok +} + +func (f schemasFilter) toLower() Filter { + loweredSchemas := make(map[string]struct{}, len(f.schemas)) + for schema := range f.schemas { + loweredSchemas[strings.ToLower(schema)] = struct{}{} + } + return schemasFilter{schemas: loweredSchemas} +} + +// NewSchemasFilter creates a filter which only accepts a list of schemas. +func NewSchemasFilter(schemas ...string) schemasFilter { + schemaMap := make(map[string]struct{}, len(schemas)) + for _, schema := range schemas { + schemaMap[schema] = struct{}{} + } + return schemasFilter{schemas: schemaMap} +} + +type tablesFilter struct { + schemas map[string]map[string]struct{} +} + +func (f tablesFilter) MatchTable(schema string, table string) bool { + t, ok := f.schemas[schema] + if !ok { + return false + } + _, ok = t[table] + return ok +} + +func (f tablesFilter) MatchSchema(schema string) bool { + _, ok := f.schemas[schema] + return ok +} + +func (f tablesFilter) toLower() Filter { + loweredSchemas := make(map[string]map[string]struct{}, len(f.schemas)) + for schema, tables := range f.schemas { + loweredSchema := strings.ToLower(schema) + loweredTables, ok := loweredSchemas[loweredSchema] + if !ok { + loweredTables = make(map[string]struct{}, len(tables)) + } + for table := range tables { + loweredTables[strings.ToLower(table)] = struct{}{} + } + loweredSchemas[loweredSchema] = loweredTables + } + return tablesFilter{schemas: loweredSchemas} +} + +// NewTablesFilter creates a filter which only accepts a list of tables. +func NewTablesFilter(tables ...Table) Filter { + schemas := make(map[string]map[string]struct{}) + for _, table := range tables { + tbls, ok := schemas[table.Schema] + if !ok { + tbls = make(map[string]struct{}) + } + tbls[table.Name] = struct{}{} + schemas[table.Schema] = tbls + } + return tablesFilter{schemas: schemas} +} + +// bothFilter is a filter which passes if both filters in the field passes. +type bothFilter struct { + a Filter + b Filter +} + +func (f *bothFilter) MatchTable(schema string, table string) bool { + return f.a.MatchTable(schema, table) && f.b.MatchTable(schema, table) +} + +func (f *bothFilter) MatchSchema(schema string) bool { + return f.a.MatchSchema(schema) && f.b.MatchSchema(schema) +} + +func (f *bothFilter) toLower() Filter { + return &bothFilter{ + a: f.a.toLower(), + b: f.b.toLower(), + } +} + +var legacyWildcardReplacer = strings.NewReplacer( + `\*`, ".*", + `\?`, ".", + `\[!`, "[^", + `\[`, "[", + `\]`, "]", +) + +func matcherFromLegacyPattern(pattern string) (matcher, error) { + if len(pattern) == 0 { + return nil, errors.New("pattern cannot be empty") + } + if pattern[0] == '~' { + // this is a regexp pattern. + return newRegexpMatcher(pattern[1:]) + } + + if !strings.ContainsAny(pattern, "?*[") { + // this is a literal string. + return stringMatcher(pattern), nil + } + + // this is a wildcard. + pattern = "(?s)^" + legacyWildcardReplacer.Replace(regexp.QuoteMeta(pattern)) + "$" + return newRegexpMatcher(pattern) +} + +// ParseMySQLReplicationRules constructs up to 2 filters from the MySQLReplicationRules. +// Tables have to pass *both* filters to be processed. +func ParseMySQLReplicationRules(rules *MySQLReplicationRules) (Filter, error) { + schemas := rules.DoDBs + positive := true + rulesLen := len(schemas) + if rulesLen == 0 { + schemas = rules.IgnoreDBs + positive = false + rulesLen = len(schemas) + 1 + } + + schemaRules := make([]rule, 0, rulesLen) + for _, schema := range schemas { + m, err := matcherFromLegacyPattern(schema) + if err != nil { + return nil, err + } + schemaRules = append(schemaRules, rule{ + schema: m, + table: trueMatcher{}, + positive: positive, + }) + } + if !positive { + schemaRules = append(schemaRules, rule{ + schema: trueMatcher{}, + table: trueMatcher{}, + positive: true, + }) + } + + tables := rules.DoTables + positive = true + rulesLen = len(tables) + if len(tables) == 0 { + tables = rules.IgnoreTables + positive = false + rulesLen = len(tables) + 1 + } + + tableRules := make([]rule, 0, rulesLen) + for _, table := range tables { + sm, err := matcherFromLegacyPattern(table.Schema) + if err != nil { + return nil, err + } + tm, err := matcherFromLegacyPattern(table.Name) + if err != nil { + return nil, err + } + tableRules = append(tableRules, rule{ + schema: sm, + table: tm, + positive: positive, + }) + } + if !positive { + tableRules = append(tableRules, rule{ + schema: trueMatcher{}, + table: trueMatcher{}, + positive: true, + }) + } + + return &bothFilter{a: filter(schemaRules), b: filter(tableRules)}, nil +} diff --git a/pkg/table-filter/compat_test.go b/pkg/table-filter/compat_test.go new file mode 100644 index 000000000..5d0ef4798 --- /dev/null +++ b/pkg/table-filter/compat_test.go @@ -0,0 +1,243 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter_test + +import ( + . "github.com/pingcap/check" + + filter "github.com/pingcap/tidb-tools/pkg/table-filter" +) + +type compatSuite struct{} + +var _ = Suite(&compatSuite{}) + +func (s *compatSuite) TestSchemaFilter(c *C) { + sf0 := filter.CaseInsensitive(filter.NewSchemasFilter("foo?", "bar")) + c.Assert(sf0.MatchTable("foo?", "a"), IsTrue) + c.Assert(sf0.MatchTable("food", "a"), IsFalse) + c.Assert(sf0.MatchTable("bar", "b"), IsTrue) + c.Assert(sf0.MatchTable("BAR", "b"), IsTrue) + + sf1 := filter.NewSchemasFilter(`\baz`) + c.Assert(sf1.MatchSchema("baz"), IsFalse) + c.Assert(sf1.MatchSchema("Baz"), IsFalse) + c.Assert(sf1.MatchSchema(`\baz`), IsTrue) + c.Assert(sf1.MatchSchema(`\Baz`), IsFalse) + + sf2 := filter.NewSchemasFilter() + c.Assert(sf2.MatchTable("aaa", "bbb"), IsFalse) +} + +func (s *compatSuite) TestTableFilter(c *C) { + tf0 := filter.CaseInsensitive(filter.NewTablesFilter( + filter.Table{Schema: "foo?", Name: "bar*"}, + filter.Table{Schema: "BAR?", Name: "FOO*"}, + )) + c.Assert(tf0.MatchTable("foo?", "bar*"), IsTrue) + c.Assert(tf0.MatchTable("bar?", "foo*"), IsTrue) + c.Assert(tf0.MatchTable("FOO?", "BAR*"), IsTrue) + c.Assert(tf0.MatchTable("foo?", "bar"), IsFalse) + c.Assert(tf0.MatchTable("BARD", "FOO*"), IsFalse) + + tf1 := filter.NewTablesFilter( + filter.Table{Schema: `\baz`, Name: `BAR`}, + ) + c.Assert(tf1.MatchSchema("baz"), IsFalse) + c.Assert(tf1.MatchSchema("Baz"), IsFalse) + c.Assert(tf1.MatchSchema(`\baz`), IsTrue) + c.Assert(tf1.MatchSchema(`\Baz`), IsFalse) + + tf2 := filter.NewTablesFilter() + c.Assert(tf2.MatchTable("aaa", "bbb"), IsFalse) +} + +func (s *compatSuite) TestLegacyFilter(c *C) { + cases := []struct { + rules filter.MySQLReplicationRules + accepted []filter.Table + rejected []filter.Table + }{ + { + rules: filter.MySQLReplicationRules{}, + accepted: []filter.Table{ + {Schema: "foo", Name: "bar"}, + }, + rejected: nil, + }, + { + rules: filter.MySQLReplicationRules{ + IgnoreDBs: []string{"foo"}, + DoDBs: []string{"foo"}, + }, + accepted: []filter.Table{ + {Schema: "foo", Name: "bar"}, + }, + rejected: []filter.Table{ + {Schema: "foo1", Name: "bar"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + IgnoreDBs: []string{"foo1"}, + }, + accepted: []filter.Table{ + {Schema: "foo", Name: "bar"}, + }, + rejected: []filter.Table{ + {Schema: "foo1", Name: "bar"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + DoTables: []*filter.Table{{Schema: "foo", Name: "bar1"}}, + }, + accepted: []filter.Table{ + {Schema: "foo", Name: "bar1"}, + }, + rejected: []filter.Table{ + {Schema: "foo", Name: "bar"}, + {Schema: "foo1", Name: "bar"}, + {Schema: "foo1", Name: "bar1"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + IgnoreTables: []*filter.Table{{Schema: "foo", Name: "bar"}}, + }, + accepted: []filter.Table{ + {Schema: "foo", Name: "bar1"}, + {Schema: "foo1", Name: "bar"}, + {Schema: "foo1", Name: "bar1"}, + }, + rejected: []filter.Table{ + {Schema: "foo", Name: "bar"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + DoDBs: []string{"~^foo"}, + IgnoreTables: []*filter.Table{{Schema: "~^foo", Name: `~^sbtest-\d`}}, + }, + accepted: []filter.Table{ + {Schema: "foo", Name: "sbtest"}, + {Schema: "foo", Name: `sbtest-\d`}, + }, + rejected: []filter.Table{ + {Schema: "fff", Name: "bar"}, + {Schema: "foo1", Name: "sbtest-1"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + IgnoreDBs: []string{"foo[bar]", "baz?", `special\`}, + }, + accepted: []filter.Table{ + {Schema: "foo[bar]", Name: "1"}, + {Schema: "food", Name: "2"}, + {Schema: "fo", Name: "3"}, + {Schema: `special\\`, Name: "4"}, + {Schema: "bazzz", Name: "9"}, + {Schema: `special\$`, Name: "10"}, + {Schema: `afooa`, Name: "11"}, + }, + rejected: []filter.Table{ + {Schema: "foor", Name: "5"}, + {Schema: "baz?", Name: "6"}, + {Schema: "baza", Name: "7"}, + {Schema: `special\`, Name: "8"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + DoDBs: []string{`!@#$%^&*\?`}, + }, + accepted: []filter.Table{ + {Schema: `!@#$%^&abcdef\g`, Name: "1"}, + }, + rejected: []filter.Table{ + {Schema: "abcdef", Name: "2"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + DoDBs: []string{"1[!abc]", "2[^abc]", `3[\d]`}, + }, + accepted: []filter.Table{ + {Schema: "1!", Name: "1"}, + {Schema: "1z", Name: "4"}, + {Schema: "2^", Name: "3"}, + {Schema: "2a", Name: "5"}, + {Schema: "3d", Name: "6"}, + {Schema: `3\`, Name: "8"}, + }, + rejected: []filter.Table{ + {Schema: "1a", Name: "2"}, + {Schema: "30", Name: "7"}, + }, + }, + { + rules: filter.MySQLReplicationRules{ + DoDBs: []string{"foo", "bar"}, + DoTables: []*filter.Table{{Schema: "*", Name: "a"}, {Schema: "*", Name: "b"}}, + }, + accepted: []filter.Table{ + {Schema: "foo", Name: "a"}, + {Schema: "foo", Name: "b"}, + {Schema: "bar", Name: "a"}, + {Schema: "bar", Name: "b"}, + }, + rejected: []filter.Table{ + {Schema: "foo", Name: "c"}, + {Schema: "baz", Name: "a"}, + }, + }, + } + + for _, tc := range cases { + c.Log("test case =", tc.rules) + f, err := filter.ParseMySQLReplicationRules(&tc.rules) + f = filter.CaseInsensitive(f) + c.Assert(err, IsNil) + for _, tbl := range tc.accepted { + c.Assert(f.MatchTable(tbl.Schema, tbl.Name), IsTrue, Commentf("accept case %v", tbl)) + } + for _, tbl := range tc.rejected { + c.Assert(f.MatchTable(tbl.Schema, tbl.Name), IsFalse, Commentf("reject case %v", tbl)) + } + } +} + +func (s *filterSuite) TestParseLegacyFailures(c *C) { + cases := []struct { + arg string + msg string + }{ + { + arg: "[a", + msg: `error parsing regexp: missing closing \]:.*`, + }, + { + arg: "", + msg: "pattern cannot be empty", + }, + } + + for _, tc := range cases { + _, err := filter.ParseMySQLReplicationRules(&filter.MySQLReplicationRules{ + DoDBs: []string{tc.arg}, + }) + c.Assert(err, ErrorMatches, tc.msg, Commentf("test case = %s", tc.arg)) + } +} diff --git a/pkg/table-filter/filter.go b/pkg/table-filter/filter.go new file mode 100644 index 000000000..abe64446d --- /dev/null +++ b/pkg/table-filter/filter.go @@ -0,0 +1,109 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "strings" +) + +// Filter is a structure to check if a table should be included for processing. +type Filter interface { + // MatchTable checks if a table can be processed after applying the filter. + MatchTable(schema string, table string) bool + // MatchSchema checks if a schema can be processed after applying the filter. + MatchSchema(schema string) bool + // toLower changes the filter to compare with case-insensitive strings. + toLower() Filter +} + +// filter is a concrete implementation of Filter. +type filter []rule + +// Parse a filter from a list of serialized filter rules. The parsed filter is +// case-sensitive by default. +func Parse(args []string) (Filter, error) { + p := parser{ + rules: make([]rule, 0, len(args)), + fileName: "", + lineNum: 1, + } + + for _, arg := range args { + if err := p.parse(arg, true); err != nil { + return nil, err + } + } + + // https://github.com/golang/go/wiki/SliceTricks#reversing. + rules := p.rules + for i := len(rules)/2 - 1; i >= 0; i-- { + opp := len(rules) - 1 - i + rules[i], rules[opp] = rules[opp], rules[i] + } + return filter(rules), nil +} + +// CaseInsensitive returns a new filter which is the case-insensitive version of +// the input filter. +func CaseInsensitive(f Filter) Filter { + return loweredFilter{wrapped: f.toLower()} +} + +// MatchTable checks if a table can be processed after applying the filter `f`. +func (f filter) MatchTable(schema string, table string) bool { + for _, rule := range f { + if rule.schema.matchString(schema) && rule.table.matchString(table) { + return rule.positive + } + } + return false +} + +// MatchSchema checks if a schema can be processed after applying the filter `f`. +func (f filter) MatchSchema(schema string) bool { + for _, rule := range f { + if rule.schema.matchString(schema) && (rule.positive || rule.table.matchAllStrings()) { + return rule.positive + } + } + return false +} + +func (f filter) toLower() Filter { + rules := make([]rule, 0, len(f)) + for _, r := range f { + rules = append(rules, rule{ + schema: r.schema.toLower(), + table: r.table.toLower(), + positive: r.positive, + }) + } + return filter(rules) +} + +type loweredFilter struct { + wrapped Filter +} + +func (f loweredFilter) MatchTable(schema string, table string) bool { + return f.wrapped.MatchTable(strings.ToLower(schema), strings.ToLower(table)) +} + +func (f loweredFilter) MatchSchema(schema string) bool { + return f.wrapped.MatchSchema(strings.ToLower(schema)) +} + +func (f loweredFilter) toLower() Filter { + return f +} diff --git a/pkg/table-filter/filter_test.go b/pkg/table-filter/filter_test.go new file mode 100644 index 000000000..3230056ec --- /dev/null +++ b/pkg/table-filter/filter_test.go @@ -0,0 +1,448 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter_test + +import ( + "io/ioutil" + "path/filepath" + "testing" + + . "github.com/pingcap/check" + + filter "github.com/pingcap/tidb-tools/pkg/table-filter" +) + +func Test(t *testing.T) { + TestingT(t) +} + +type filterSuite struct{} + +var _ = Suite(&filterSuite{}) + +func (s *filterSuite) TestMatchTables(c *C) { + cases := []struct { + args []string + tables []filter.Table + acceptedCS []bool + acceptedCI []bool + }{ + { + args: nil, + tables: []filter.Table{ + {Schema: "foo", Name: "bar"}, + }, + acceptedCS: []bool{false}, + acceptedCI: []bool{false}, + }, + { + args: []string{"*.*"}, + tables: []filter.Table{ + {Schema: "foo", Name: "bar"}, + }, + acceptedCS: []bool{true}, + acceptedCI: []bool{true}, + }, + { + args: []string{"foo.*"}, + tables: []filter.Table{ + {Schema: "foo", Name: "bar"}, + {Schema: "foo1", Name: "bar"}, + {Schema: "foo2", Name: "bar"}, + }, + acceptedCS: []bool{true, false, false}, + acceptedCI: []bool{true, false, false}, + }, + { + args: []string{"*.*", "!foo1.*"}, + tables: []filter.Table{ + {Schema: "foo", Name: "bar"}, + {Schema: "foo1", Name: "bar"}, + {Schema: "foo2", Name: "bar"}, + }, + acceptedCS: []bool{true, false, true}, + acceptedCI: []bool{true, false, true}, + }, + { + args: []string{"foo.bar1"}, + tables: []filter.Table{ + {Schema: "foo", Name: "bar"}, + {Schema: "foo", Name: "bar1"}, + {Schema: "fff", Name: "bar1"}, + }, + acceptedCS: []bool{false, true, false}, + acceptedCI: []bool{false, true, false}, + }, + { + args: []string{"*.*", "!foo.bar"}, + tables: []filter.Table{ + {Schema: "foo", Name: "bar"}, + {Schema: "foo", Name: "bar1"}, + {Schema: "fff", Name: "bar1"}, + }, + acceptedCS: []bool{false, true, true}, + acceptedCI: []bool{false, true, true}, + }, + { + args: []string{"/^foo/.*", `!/^foo/./^sbtest-\d/`}, + tables: []filter.Table{ + {Schema: "foo", Name: "sbtest"}, + {Schema: "foo1", Name: "sbtest-1"}, + {Schema: "fff", Name: "bar"}, + }, + acceptedCS: []bool{true, false, false}, + acceptedCI: []bool{true, false, false}, + }, + { + args: []string{"*.*", "!foo[bar].*", "!bar?.*", `!special\\.*`}, + tables: []filter.Table{ + {Schema: "foor", Name: "a"}, + {Schema: "foo[bar]", Name: "b"}, + {Schema: "ba", Name: "c"}, + {Schema: "bar?", Name: "d"}, + {Schema: `special\`, Name: "e"}, + {Schema: `special\\`, Name: "f"}, + {Schema: "bazzz", Name: "g"}, + {Schema: `special\$`, Name: "h"}, + {Schema: `afooa`, Name: "i"}, + }, + acceptedCS: []bool{false, true, true, false, false, true, true, true, true}, + acceptedCI: []bool{false, true, true, false, false, true, true, true, true}, + }, + { + args: []string{"*.*", "!/^FOO/.*", "!*./FoO$/"}, + tables: []filter.Table{ + {Schema: "FOO1", Name: "a"}, + {Schema: "foo2", Name: "b"}, + {Schema: "BoO3", Name: "cFoO"}, + {Schema: "Foo4", Name: "dfoo"}, + {Schema: "5", Name: "5"}, + }, + acceptedCS: []bool{false, true, false, true, true}, + acceptedCI: []bool{false, false, false, false, true}, + }, + { + args: []string{"*.*", "!a?b?./f[0-9]/"}, + tables: []filter.Table{ + {Schema: "abbd", Name: "f1"}, + {Schema: "aaaa", Name: "f2"}, + {Schema: "5", Name: "5"}, + {Schema: "abbc", Name: "fa"}, + }, + acceptedCS: []bool{false, true, true, true}, + acceptedCI: []bool{false, true, true, true}, + }, + { + args: []string{"*.*", "!/t[0-8]/.a??"}, + tables: []filter.Table{ + {Schema: "t1", Name: "a01"}, + {Schema: "t9", Name: "a02"}, + {Schema: "5", Name: "5"}, + {Schema: "t8", Name: "a001"}, + }, + acceptedCS: []bool{false, true, true, true}, + acceptedCI: []bool{false, true, true, true}, + }, + { + args: []string{"*.*", "!a*.A*"}, + tables: []filter.Table{ + {Schema: "aB", Name: "Ab"}, + {Schema: "AaB", Name: "aab"}, + {Schema: "acB", Name: "Afb"}, + }, + acceptedCS: []bool{false, true, false}, + acceptedCI: []bool{false, false, false}, + }, + { + args: []string{"BAR.*"}, + tables: []filter.Table{ + {Schema: "bar", Name: "a"}, + {Schema: "BAR", Name: "a"}, + }, + acceptedCS: []bool{false, true}, + acceptedCI: []bool{true, true}, + }, + { + args: []string{"# comment", "x.y", " \t"}, + tables: []filter.Table{ + {Schema: "x", Name: "y"}, + {Schema: "y", Name: "y"}, + }, + acceptedCS: []bool{true, false}, + acceptedCI: []bool{true, false}, + }, + { + args: []string{"p_123$.45", "中文.表名"}, + tables: []filter.Table{ + {Schema: "p_123", Name: "45"}, + {Schema: "p_123$", Name: "45"}, + {Schema: "英文", Name: "表名"}, + {Schema: "中文", Name: "表名"}, + }, + acceptedCS: []bool{false, true, false, true}, + acceptedCI: []bool{false, true, false, true}, + }, + { + args: []string{`\\\..*`}, + tables: []filter.Table{ + {Schema: `\.`, Name: "a"}, + {Schema: `\\\.`, Name: "b"}, + {Schema: `\a`, Name: "c"}, + }, + acceptedCS: []bool{true, false, false}, + acceptedCI: []bool{true, false, false}, + }, + { + args: []string{"[!a-z].[^a-z]"}, + tables: []filter.Table{ + {Schema: "!", Name: "z"}, + {Schema: "!", Name: "^"}, + {Schema: "!", Name: "9"}, + {Schema: "a", Name: "z"}, + {Schema: "a", Name: "^"}, + {Schema: "a", Name: "9"}, + {Schema: "1", Name: "z"}, + {Schema: "1", Name: "^"}, + {Schema: "1", Name: "9"}, + }, + acceptedCS: []bool{true, true, false, false, false, false, true, true, false}, + acceptedCI: []bool{true, true, false, false, false, false, true, true, false}, + }, + { + args: []string{"\"some \"\"quoted\"\"\".`identifiers?`"}, + tables: []filter.Table{ + {Schema: `some "quoted"`, Name: "identifiers?"}, + {Schema: `some "quoted"`, Name: "identifiers!"}, + {Schema: `some ""quoted""`, Name: "identifiers?"}, + {Schema: `SOME "QUOTED"`, Name: "IDENTIFIERS?"}, + {Schema: "some\t\"quoted\"", Name: "identifiers?"}, + }, + acceptedCS: []bool{true, false, false, false, false}, + acceptedCI: []bool{true, false, false, true, false}, + }, + { + args: []string{"db*.*", "!*.cfg*", "*.cfgsample"}, + tables: []filter.Table{ + {Schema: "irrelevant", Name: "table"}, + {Schema: "db1", Name: "tbl1"}, + {Schema: "db1", Name: "cfg1"}, + {Schema: "db1", Name: "cfgsample"}, + {Schema: "else", Name: "cfgsample"}, + }, + acceptedCS: []bool{false, true, false, true, true}, + acceptedCI: []bool{false, true, false, true, true}, + }, + } + + for _, tc := range cases { + c.Log("test case =", tc.args) + fcs, err := filter.Parse(tc.args) + c.Assert(err, IsNil) + fci := filter.CaseInsensitive(fcs) + for i, tbl := range tc.tables { + c.Assert(fcs.MatchTable(tbl.Schema, tbl.Name), Equals, tc.acceptedCS[i], Commentf("cs tbl %v", tbl)) + c.Assert(fci.MatchTable(tbl.Schema, tbl.Name), Equals, tc.acceptedCI[i], Commentf("ci tbl %v", tbl)) + } + } +} + +func (s *filterSuite) TestMatchSchemas(c *C) { + cases := []struct { + args []string + schemas []string + acceptedCS []bool + acceptedCI []bool + }{ + { + args: nil, + schemas: []string{"foo"}, + acceptedCS: []bool{false}, + acceptedCI: []bool{false}, + }, + { + args: []string{"*.*"}, + schemas: []string{"foo"}, + acceptedCS: []bool{true}, + acceptedCI: []bool{true}, + }, + { + args: []string{"foo.*"}, + schemas: []string{"foo", "foo1"}, + acceptedCS: []bool{true, false}, + acceptedCI: []bool{true, false}, + }, + { + args: []string{"*.*", "!foo1.*"}, + schemas: []string{"foo", "foo1"}, + acceptedCS: []bool{true, false}, + acceptedCI: []bool{true, false}, + }, + { + args: []string{"foo.bar1"}, + schemas: []string{"foo", "foo1"}, + acceptedCS: []bool{true, false}, + acceptedCI: []bool{true, false}, + }, + { + args: []string{"*.*", "!foo.bar"}, + schemas: []string{"foo", "foo1"}, + acceptedCS: []bool{true, true}, + acceptedCI: []bool{true, true}, + }, + { + args: []string{"/^foo/.*", `!/^foo/./^sbtest-\d/`}, + schemas: []string{"foo", "foo2"}, + acceptedCS: []bool{true, true}, + acceptedCI: []bool{true, true}, + }, + { + args: []string{"*.*", "!FOO*.*", "!*.*FoO"}, + schemas: []string{"foo", "FOO", "foobar", "FOOBAR", "bar", "BAR"}, + acceptedCS: []bool{true, false, true, false, true, true}, + acceptedCI: []bool{false, false, false, false, true, true}, + }, + } + + for _, tc := range cases { + c.Log("test case =", tc.args) + fcs, err := filter.Parse(tc.args) + c.Assert(err, IsNil) + fci := filter.CaseInsensitive(fcs) + for i, schema := range tc.schemas { + c.Assert(fcs.MatchSchema(schema), Equals, tc.acceptedCS[i], Commentf("cs schema %s", schema)) + c.Assert(fci.MatchSchema(schema), Equals, tc.acceptedCI[i], Commentf("ci schema %s", schema)) + } + } +} + +func (s *filterSuite) TestParseFailures(c *C) { + cases := []struct { + arg string + msg string + }{ + { + arg: "/^t[0-9]+((?!_copy).)*$/.*", + msg: ".*: invalid pattern: error parsing regexp:.*", + }, + { + arg: "/^t[0-9]+sp(?=copy).*/.*", + msg: ".*: invalid pattern: error parsing regexp:.*", + }, + { + arg: "a.b.c", + msg: ".*: syntax error: stray characters after table pattern", + }, + { + arg: "a%b.c", + msg: ".*: unexpected special character '%'", + }, + { + arg: `a\tb.c`, + msg: `.*: cannot escape a letter or number \(\\t\), it is reserved for future extension`, + }, + { + arg: "[].*", + msg: ".*: syntax error: failed to parse character class", + }, + { + arg: "[!].*", + msg: `.*: invalid pattern: error parsing regexp: missing closing \]:.*`, + }, + { + arg: "[.*", + msg: `.*: syntax error: failed to parse character class`, + }, + { + arg: `[\d\D].*`, + msg: `.*: syntax error: failed to parse character class`, + }, + { + arg: "db", + msg: `.*: missing table pattern`, + }, + { + arg: "db.", + msg: `.*: syntax error: missing pattern`, + }, + { + arg: "`db`*.*", + msg: `.*: syntax error: missing '\.' between schema and table patterns`, + }, + { + arg: "/db.*", + msg: `.*: syntax error: incomplete regexp`, + }, + { + arg: "`db.*", + msg: `.*: syntax error: incomplete quoted identifier`, + }, + { + arg: `"db.*`, + msg: `.*: syntax error: incomplete quoted identifier`, + }, + { + arg: `db\`, + msg: `.*: syntax error: cannot place \\ at end of line`, + }, + { + arg: "db.tbl#not comment", + msg: `.*: unexpected special character '#'`, + }, + } + + for _, tc := range cases { + _, err := filter.Parse([]string{tc.arg}) + c.Assert(err, ErrorMatches, tc.msg, Commentf("test case = %s", tc.arg)) + } +} + +func (s *filterSuite) TestImport(c *C) { + dir := c.MkDir() + path1 := filepath.Join(dir, "1.txt") + path2 := filepath.Join(dir, "2.txt") + ioutil.WriteFile(path1, []byte(` + db?.tbl? + db02.tbl02 + `), 0644) + ioutil.WriteFile(path2, []byte(` + db03.tbl03 + !db4.tbl4 + `), 0644) + + f, err := filter.Parse([]string{"@" + path1, "@" + path2, "db04.tbl04"}) + c.Assert(err, IsNil) + + c.Assert(f.MatchTable("db1", "tbl1"), IsTrue) + c.Assert(f.MatchTable("db2", "tbl2"), IsTrue) + c.Assert(f.MatchTable("db3", "tbl3"), IsTrue) + c.Assert(f.MatchTable("db4", "tbl4"), IsFalse) + c.Assert(f.MatchTable("db01", "tbl01"), IsFalse) + c.Assert(f.MatchTable("db02", "tbl02"), IsTrue) + c.Assert(f.MatchTable("db03", "tbl03"), IsTrue) + c.Assert(f.MatchTable("db04", "tbl04"), IsTrue) +} + +func (s *filterSuite) TestRecursiveImport(c *C) { + dir := c.MkDir() + path3 := filepath.Join(dir, "3.txt") + path4 := filepath.Join(dir, "4.txt") + ioutil.WriteFile(path3, []byte("db1.tbl1"), 0644) + ioutil.WriteFile(path4, []byte("# comment\n\n@"+path3), 0644) + + _, err := filter.Parse([]string{"@" + path4}) + c.Assert(err, ErrorMatches, `.*4\.txt:3: importing filter files recursively is not allowed`) + + _, err = filter.Parse([]string{"@" + filepath.Join(dir, "5.txt")}) + c.Assert(err, ErrorMatches, `.*: cannot open filter file: open .*5\.txt: .*`) +} diff --git a/pkg/table-filter/matchers.go b/pkg/table-filter/matchers.go new file mode 100644 index 000000000..f752ebf89 --- /dev/null +++ b/pkg/table-filter/matchers.go @@ -0,0 +1,96 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "regexp" + "strings" + + "github.com/pingcap/errors" +) + +// rule of a filter, consisting of a schema and table pattern, and may be an +// accept-list (positive) or deny-list (negative). +type rule struct { + schema matcher + table matcher + positive bool +} + +// matcher matches a name against a pattern. +type matcher interface { + matchString(name string) bool + matchAllStrings() bool + toLower() matcher +} + +// stringMatcher is a matcher with a literal string. +type stringMatcher string + +func (m stringMatcher) matchString(name string) bool { + return string(m) == name +} + +func (stringMatcher) matchAllStrings() bool { + return false +} + +func (m stringMatcher) toLower() matcher { + return stringMatcher(strings.ToLower(string(m))) +} + +// trueMatcher is a matcher which matches everything. The `*` pattern. +type trueMatcher struct{} + +func (trueMatcher) matchString(string) bool { + return true +} + +func (trueMatcher) matchAllStrings() bool { + return true +} + +func (m trueMatcher) toLower() matcher { + return m +} + +// regexpMatcher is a matcher based on a regular expression. +type regexpMatcher struct { + pattern *regexp.Regexp +} + +func newRegexpMatcher(pat string) (matcher, error) { + if pat == "(?s)^.*$" { + // special case for '*' + return trueMatcher{}, nil + } + pattern, err := regexp.Compile(pat) + if err != nil { + return nil, errors.Trace(err) + } + return regexpMatcher{pattern: pattern}, nil +} + +func (m regexpMatcher) matchString(name string) bool { + return m.pattern.MatchString(name) +} + +func (regexpMatcher) matchAllStrings() bool { + return false +} + +func (m regexpMatcher) toLower() matcher { + pattern := regexp.MustCompile("(?i)" + m.pattern.String()) + return regexpMatcher{pattern: pattern} +} diff --git a/pkg/table-filter/parser.go b/pkg/table-filter/parser.go new file mode 100644 index 000000000..74cba9d44 --- /dev/null +++ b/pkg/table-filter/parser.go @@ -0,0 +1,273 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "bufio" + "fmt" + "os" + "regexp" + "strings" + + "github.com/pingcap/errors" +) + +type parser struct { + rules []rule + fileName string + lineNum int64 +} + +func (p *parser) wrapErrorFormat(format string) string { + return fmt.Sprintf("at %s:%d: %s", strings.ReplaceAll(p.fileName, "%", "%%"), p.lineNum, format) +} + +func (p *parser) errorf(format string, args ...interface{}) error { + return errors.Errorf(p.wrapErrorFormat(format), args...) +} + +func (p *parser) annotatef(err error, format string, args ...interface{}) error { + return errors.Annotatef(err, p.wrapErrorFormat(format), args...) +} + +func (p *parser) parse(line string, canImport bool) (err error) { + line = strings.Trim(line, " \t") + if len(line) == 0 { + return nil + } + + positive := true + switch line[0] { + case '#': + return nil + case '!': + positive = false + line = line[1:] + case '@': + if !canImport { + // FIXME: should we relax this? + return p.errorf("importing filter files recursively is not allowed") + } + // FIXME: can't deal with file names which ends in spaces (perhaps not a big deal) + return p.importFile(line[1:]) + } + + var sm, tm matcher + + sm, line, err = p.parsePattern(line) + if err != nil { + return err + } + if len(line) == 0 { + return p.errorf("missing table pattern") + } + if line[0] != '.' { + return p.errorf("syntax error: missing '.' between schema and table patterns") + } + + tm, line, err = p.parsePattern(line[1:]) + if err != nil { + return err + } + if len(line) != 0 { + return p.errorf("syntax error: stray characters after table pattern") + } + + p.rules = append(p.rules, rule{ + schema: sm, + table: tm, + positive: positive, + }) + return nil +} + +func (p *parser) importFile(fileName string) error { + file, err := os.Open(fileName) + if err != nil { + return p.annotatef(err, "cannot open filter file") + } + defer file.Close() + + oldFileName, oldLineNum := p.fileName, p.lineNum + p.fileName, p.lineNum = fileName, 1 + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + if err := p.parse(scanner.Text(), false); err != nil { + return err + } + p.lineNum++ + } + + p.fileName, p.lineNum = oldFileName, oldLineNum + + if err := scanner.Err(); err != nil { + return p.annotatef(err, "cannot read filter file") + } + return nil +} + +var ( + regexpRegexp = regexp.MustCompile(`^/(?:\\.|[^/])+/`) + doubleQuotedRegexp = regexp.MustCompile(`^"(?:""|[^"])+"`) + backquotedRegexp = regexp.MustCompile("^`(?:``|[^`])+`") + wildcardRangeRegexp = regexp.MustCompile(`^\[!?(?:\\[^0-9a-zA-Z]|[^\\\]])+\]`) +) + +func (p *parser) newRegexpMatcher(pat string) (matcher, error) { + m, err := newRegexpMatcher(pat) + if err != nil { + return nil, p.annotatef(err, "invalid pattern") + } + return m, nil +} + +func (p *parser) parsePattern(line string) (matcher, string, error) { + if len(line) == 0 { + return nil, "", p.errorf("syntax error: missing pattern") + } + + switch line[0] { + case '/': + // a regexp pattern + loc := regexpRegexp.FindStringIndex(line) + if len(loc) < 2 { + return nil, "", p.errorf("syntax error: incomplete regexp") + } + m, err := p.newRegexpMatcher(line[1 : loc[1]-1]) + if err != nil { + return nil, "", err + } + return m, line[loc[1]:], nil + + case '"': + // a double-quoted pattern + loc := doubleQuotedRegexp.FindStringIndex(line) + if len(loc) < 2 { + return nil, "", p.errorf("syntax error: incomplete quoted identifier") + } + name := strings.ReplaceAll(line[1:loc[1]-1], `""`, `"`) + return stringMatcher(name), line[loc[1]:], nil + + case '`': + // a backquoted pattern + loc := backquotedRegexp.FindStringIndex(line) + if len(loc) < 2 { + return nil, "", p.errorf("syntax error: incomplete quoted identifier") + } + name := strings.ReplaceAll(line[1:loc[1]-1], "``", "`") + return stringMatcher(name), line[loc[1]:], nil + + default: + // wildcard or literal string. + return p.parseWildcardPattern(line) + } +} + +func isASCIIAlphanumeric(b byte) bool { + return '0' <= b && b <= '9' || 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' +} + +func (p *parser) parseWildcardPattern(line string) (matcher, string, error) { + var ( + literalStringBuilder strings.Builder + wildcardPatternBuilder strings.Builder + isLiteralString = true + i = 0 + ) + literalStringBuilder.Grow(len(line)) + wildcardPatternBuilder.Grow(len(line) + 6) + wildcardPatternBuilder.WriteString("(?s)^") + +parseLoop: + for i < len(line) { + c := line[i] + switch c { + case '\\': + // escape character + if i == len(line)-1 { + return nil, "", p.errorf(`syntax error: cannot place \ at end of line`) + } + esc := line[i+1] + if isASCIIAlphanumeric(esc) { + return nil, "", p.errorf(`cannot escape a letter or number (\%c), it is reserved for future extension`, esc) + } + if isLiteralString { + literalStringBuilder.WriteByte(esc) + } + if esc < 0x80 { + wildcardPatternBuilder.WriteByte('\\') + } + wildcardPatternBuilder.WriteByte(esc) + + i += 2 + + case '.': + // table separator, end now. + break parseLoop + + case '*': + // wildcard + isLiteralString = false + wildcardPatternBuilder.WriteString(".*") + i++ + + case '?': + isLiteralString = false + wildcardPatternBuilder.WriteByte('.') + i++ + + case '[': + // range of characters + isLiteralString = false + rangeLoc := wildcardRangeRegexp.FindStringIndex(line[i:]) + if len(rangeLoc) < 2 { + return nil, "", p.errorf("syntax error: failed to parse character class") + } + end := i + rangeLoc[1] + switch line[1] { + case '!': + wildcardPatternBuilder.WriteString("[^") + wildcardPatternBuilder.WriteString(line[i+2 : end]) + case '^': // `[^` is not special in a glob pattern. escape it. + wildcardPatternBuilder.WriteString(`[\^`) + wildcardPatternBuilder.WriteString(line[i+2 : end]) + default: + wildcardPatternBuilder.WriteString(line[i:end]) + } + fmt.Println(wildcardPatternBuilder.String()) + i = end + + default: + if c == '$' || c == '_' || isASCIIAlphanumeric(c) || c >= 0x80 { + literalStringBuilder.WriteByte(c) + wildcardPatternBuilder.WriteByte(c) + i++ + } else { + return nil, "", p.errorf("unexpected special character '%c'", c) + } + } + } + + line = line[i:] + if isLiteralString { + return stringMatcher(literalStringBuilder.String()), line, nil + } + wildcardPatternBuilder.WriteByte('$') + m, err := p.newRegexpMatcher(wildcardPatternBuilder.String()) + if err != nil { + return nil, "", err + } + return m, line, nil +}