From 9f46e8dcb941d59974951cafd48e236714ac4c00 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Wed, 29 Apr 2020 04:53:01 +0800
Subject: [PATCH 1/2] table-filter: implement .gitignore-style table filter

---
 pkg/filter/filter.go            |  45 +---
 pkg/table-filter/README.md      | 225 ++++++++++++++++
 pkg/table-filter/compat.go      | 271 +++++++++++++++++++
 pkg/table-filter/compat_test.go | 243 +++++++++++++++++
 pkg/table-filter/filter.go      | 109 ++++++++
 pkg/table-filter/filter_test.go | 448 ++++++++++++++++++++++++++++++++
 pkg/table-filter/matchers.go    |  96 +++++++
 pkg/table-filter/parser.go      | 273 +++++++++++++++++++
 8 files changed, 1668 insertions(+), 42 deletions(-)
 create mode 100644 pkg/table-filter/README.md
 create mode 100644 pkg/table-filter/compat.go
 create mode 100644 pkg/table-filter/compat_test.go
 create mode 100644 pkg/table-filter/filter.go
 create mode 100644 pkg/table-filter/filter_test.go
 create mode 100644 pkg/table-filter/matchers.go
 create mode 100644 pkg/table-filter/parser.go

diff --git a/pkg/filter/filter.go b/pkg/filter/filter.go
index 590dccaa5..461dc7c2b 100644
--- a/pkg/filter/filter.go
+++ b/pkg/filter/filter.go
@@ -14,12 +14,12 @@
 package filter
 
 import (
-	"fmt"
 	"regexp"
 	"strings"
 	"sync"
 
 	"github.com/pingcap/errors"
+	tfilter "github.com/pingcap/tidb-tools/pkg/table-filter"
 	selector "github.com/pingcap/tidb-tools/pkg/table-rule-selector"
 )
 
@@ -33,18 +33,7 @@ const (
 )
 
 // Table represents a table.
-type Table struct {
-	Schema string `toml:"db-name" json:"db-name" yaml:"db-name"`
-	Name   string `toml:"tbl-name" json:"tbl-name" yaml:"tbl-name"`
-}
-
-// String implements the fmt.Stringer interface.
-func (t *Table) String() string {
-	if len(t.Name) > 0 {
-		return fmt.Sprintf("`%s`.`%s`", t.Schema, t.Name)
-	}
-	return fmt.Sprintf("`%s`", t.Schema)
-}
+type Table = tfilter.Table
 
 type cache struct {
 	sync.RWMutex
@@ -66,35 +55,7 @@ func (c *cache) set(key string, action ActionType) {
 }
 
 // Rules contains Filter rules.
-type Rules struct {
-	DoTables []*Table `json:"do-tables" toml:"do-tables" yaml:"do-tables"`
-	DoDBs    []string `json:"do-dbs" toml:"do-dbs" yaml:"do-dbs"`
-
-	IgnoreTables []*Table `json:"ignore-tables" toml:"ignore-tables" yaml:"ignore-tables"`
-	IgnoreDBs    []string `json:"ignore-dbs" toml:"ignore-dbs" yaml:"ignore-dbs"`
-}
-
-// ToLower convert all entries to lowercase
-func (r *Rules) ToLower() {
-	if r == nil {
-		return
-	}
-
-	for _, table := range r.DoTables {
-		table.Name = strings.ToLower(table.Name)
-		table.Schema = strings.ToLower(table.Schema)
-	}
-	for _, table := range r.IgnoreTables {
-		table.Name = strings.ToLower(table.Name)
-		table.Schema = strings.ToLower(table.Schema)
-	}
-	for i, db := range r.IgnoreDBs {
-		r.IgnoreDBs[i] = strings.ToLower(db)
-	}
-	for i, db := range r.DoDBs {
-		r.DoDBs[i] = strings.ToLower(db)
-	}
-}
+type Rules = tfilter.MySQLReplicationRules
 
 // Filter implements whitelist and blacklist filters.
 type Filter struct {
diff --git a/pkg/table-filter/README.md b/pkg/table-filter/README.md
new file mode 100644
index 000000000..b66f31b0f
--- /dev/null
+++ b/pkg/table-filter/README.md
@@ -0,0 +1,225 @@
+# Table Filter
+
+A table filter is an interface which determines if a table or schema should be
+accepted for some process or not given its name.
+
+This package defines the format allowing users to specify the filter criteria
+via command line or config files. This package is used by all tools in the TiDB
+ecosystem.
+
+## Examples
+
+```go
+package main
+
+import (
+    "fmt"
+
+    "github.com/pingcap/tidb-tools/pkg/table-filter"
+    "github.com/spf13/pflag"
+)
+
+func main() {
+    args := pflag.StringArrayP("filter", "f", []string{"*.*"}, "table filter")
+    pflag.Parse()
+
+    f, err := filter.Parse(*args)
+    if err != nil {
+            panic(err)
+    }
+    f = filter.CaseInsensitive(f)
+
+    tables := []filter.Table{
+        {Schema: "employees", Name: "employees"},
+        {Schema: "employees", Name: "departments"},
+        {Schema: "employees", Name: "dept_manager"},
+        {Schema: "employees", Name: "dept_emp"},
+        {Schema: "employees", Name: "titles"},
+        {Schema: "employees", Name: "salaries"},
+        {Schema: "AdventureWorks.Person", Name: "Person"},
+        {Schema: "AdventureWorks.Person", Name: "Password"},
+        {Schema: "AdventureWorks.Sales", Name: "SalesOrderDetail"},
+        {Schema: "AdventureWorks.Sales", Name: "SalesOrderHeader"},
+        {Schema: "AdventureWorks.Production", Name: "WorkOrder"},
+        {Schema: "AdventureWorks.Production", Name: "WorkOrderRouting"},
+        {Schema: "AdventureWorks.Production", Name: "ProductPhoto"},
+        {Schema: "AdventureWorks.Production", Name: "TransactionHistory"},
+        {Schema: "AdventureWorks.Production", Name: "TransactionHistoryArchive"},
+    }
+
+    for _, table := range tables {
+        fmt.Printf("%5v: %v\n", f.MatchTable(table.Schema, table.Name), table)
+    }
+}
+```
+
+Try to run with `./main -f 'employee.*' -f '*.WorkOrder'` and see the result.
+
+## Syntax
+
+### Whitelist
+
+The input to the `filter.Parse()` function is a list of table filter rules.
+Each rule specifies what the fully-qualified name of the table to be accepted.
+
+```
+db1.tbl1
+db2.tbl2
+db3.tbl3
+```
+
+A plain name must only consist of valid [identifier characters]
+`[0-9a-zA-Z$_\U00000080-\U0010ffff]+`. All other ASCII characters are reserved.
+Some punctuations have special meanings, described below.
+
+### Wildcards
+
+Each part of the name can be a wildcard symbol as in [fnmatch(3)]:
+* `*` — matches zero or more characters
+* `?` — matches one character
+* `[a-z]` — matches one character between “a” and “z” inclusive
+* `[!a-z]` — matches one character except “a” to “z”.
+
+```
+db[0-9].tbl[0-9][0-9]
+data.*
+*.backup_*
+```
+
+“Character” here means a Unicode code point, so e.g.
+* U+00E9 (é) is 1 character.
+* U+0065 U+0301 (é) are 2 characters.
+* U+1F926 U+1F3FF U+200D U+2640 U+FE0F (🤦🏿‍♀️) are 5 characters.
+
+### File import
+
+Include an `@` at the beginning of the string to specify a file name, which
+`filter.Parse()` reads every line as filter rules.
+
+For example, if a file `config/filter.txt` has content:
+
+```
+employees.*
+*.WorkOrder
+```
+
+the following two invocations would be equivalent:
+
+```sh
+./main -f '@config/filter.txt'
+./main -f 'employees.*' -f '*.WorkOrder'
+```
+
+A filter file cannot further import another file.
+
+### Comments and blank lines
+
+Leading and trailing white-spaces of every line are trimmed.
+
+Blank lines (empty strings) are ignored.
+
+A leading `#` marks a comment and is ignored.
+`#` not at start of line may be considered syntax error.
+
+### Blacklist
+
+An `!` at the beginning of the line means the pattern after it is used to
+exclude tables from being processed. This effectively turns the filter into a
+blacklist.
+
+```ini
+*.*
+#^ note: must add the *.* to include all tables first
+!*.Password
+!employees.salaries
+```
+
+### Escape character
+
+Precede any special character by a `\` to turn it into an identifier character.
+
+```
+AdventureWorks\.*.*
+```
+
+For simplicity and future compatibility, the following sequences are prohibited:
+* `\` at the end of the line after trimming whitespaces (use “`[ ]`” to match a literal whitespace at the end).
+* `\` followed by any ASCII alphanumeric character (`[0-9a-zA-Z]`). In particular, C-like escape sequences like `\0`, `\r`, `\n` and `\t` currently are meaningless.
+
+### Quoted identifier
+
+Besides `\`, special characters can also be escaped by quoting using `"` or `` ` ``.
+
+```
+"AdventureWorks.Person".Person
+`AdventureWorks.Person`.Password
+```
+
+Quoted identifier cannot span multiple lines.
+
+It is invalid to partially quote an identifier.
+
+```
+"this is "invalid*.*
+```
+
+### Regular expression
+
+Use `/` to delimit regular expressions:
+
+```
+/^db\d{2,}$/./^tbl\d{2,}$/
+```
+
+These regular expressions use the [Go dialect]. The pattern is matched if the
+identifier contains a substring matching the regular expression. For instance,
+`/b/` matches `db01`.
+
+(Note: every `/` in the regex must be escaped as `\/`, including inside `[`…`]`.
+You cannot place an unescaped `/` between `\Q`…`\E`.)
+
+[identifier characters]: https://dev.mysql.com/doc/refman/8.0/en/identifiers.html
+[fnmatch(3)]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
+[Go dialect]: https://pkg.go.dev/regexp/syntax?tab=doc
+
+## Algorithm
+
+### Default behavior
+
+When a table name matches none of the rules in the filter list, the default
+behavior is to ignore such unmatched tables.
+
+To build a blacklist, an explicit `*.*` must be used as the first rule,
+otherwise all tables will be excluded.
+
+```sh
+# every table will be filtered out
+./main -f '!*.Password'
+
+# only the "Password" table is filtered out, the rest are included.
+./main -f '*.*' -f '!*.Password'
+```
+
+### Precedence
+
+In a filter list, if a table name matches multiple patterns, the last match
+decides the outcome. For instance, given
+
+```ini
+# rule 1
+employees.*
+# rule 2
+!*.dep*
+# rule 3
+*.departments
+```
+
+We get:
+
+| Table name            | Rule 1 | Rule 2 | Rule 3 | Outcome          |
+|-----------------------|--------|--------|--------|------------------|
+| irrelevant.table      |        |        |        | Default (reject) |
+| employees.employees   | ✓      |        |        | Rule 1 (accept)  |
+| employees.dept_emp    | ✓      | ✓      |        | Rule 2 (reject)  |
+| employees.departments | ✓      | ✓      | ✓      | Rule 3 (accept)  |
+| else.departments      |        | ✓      | ✓      | Rule 3 (accept)  |
diff --git a/pkg/table-filter/compat.go b/pkg/table-filter/compat.go
new file mode 100644
index 000000000..a15a4d1a3
--- /dev/null
+++ b/pkg/table-filter/compat.go
@@ -0,0 +1,271 @@
+// Copyright 2020 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filter
+
+import (
+	"errors"
+	"fmt"
+	"regexp"
+	"strings"
+)
+
+// Table represents a qualified table name.
+type Table struct {
+	// Schema is the name of the schema (database) containing this table.
+	Schema string `toml:"db-name" json:"db-name" yaml:"db-name"`
+	// Name is the unqualified table name.
+	Name string `toml:"tbl-name" json:"tbl-name" yaml:"tbl-name"`
+}
+
+func (t *Table) lessThan(u *Table) bool {
+	return t.Schema < u.Schema || t.Schema == u.Schema && t.Name < u.Name
+}
+
+// String implements the fmt.Stringer interface.
+func (t *Table) String() string {
+	if len(t.Name) > 0 {
+		return fmt.Sprintf("`%s`.`%s`", t.Schema, t.Name)
+	}
+	return fmt.Sprintf("`%s`", t.Schema)
+}
+
+// MySQLReplicationRules is a set of rules based on MySQL's replication filter.
+type MySQLReplicationRules struct {
+	// DoTables is a whitelist of tables.
+	DoTables []*Table `json:"do-tables" toml:"do-tables" yaml:"do-tables"`
+	// DoDBs is the whitelist of schemas.
+	DoDBs []string `json:"do-dbs" toml:"do-dbs" yaml:"do-dbs"`
+
+	// IgnoreTables is a blacklist of tables.
+	IgnoreTables []*Table `json:"ignore-tables" toml:"ignore-tables" yaml:"ignore-tables"`
+	// IgnoreDBs is a blacklist of schemas.
+	IgnoreDBs []string `json:"ignore-dbs" toml:"ignore-dbs" yaml:"ignore-dbs"`
+}
+
+// ToLower convert all entries to lowercase
+// Deprecated: use `filter.CaseInsensitive` instead.
+func (r *MySQLReplicationRules) ToLower() {
+	if r == nil {
+		return
+	}
+
+	for _, table := range r.DoTables {
+		table.Name = strings.ToLower(table.Name)
+		table.Schema = strings.ToLower(table.Schema)
+	}
+	for _, table := range r.IgnoreTables {
+		table.Name = strings.ToLower(table.Name)
+		table.Schema = strings.ToLower(table.Schema)
+	}
+	for i, db := range r.IgnoreDBs {
+		r.IgnoreDBs[i] = strings.ToLower(db)
+	}
+	for i, db := range r.DoDBs {
+		r.DoDBs[i] = strings.ToLower(db)
+	}
+}
+
+type schemasFilter struct {
+	schemas map[string]struct{}
+}
+
+func (f schemasFilter) MatchTable(schema string, table string) bool {
+	return f.MatchSchema(schema)
+}
+
+func (f schemasFilter) MatchSchema(schema string) bool {
+	_, ok := f.schemas[schema]
+	return ok
+}
+
+func (f schemasFilter) toLower() Filter {
+	loweredSchemas := make(map[string]struct{}, len(f.schemas))
+	for schema := range f.schemas {
+		loweredSchemas[strings.ToLower(schema)] = struct{}{}
+	}
+	return schemasFilter{schemas: loweredSchemas}
+}
+
+// NewSchemasFilter creates a filter which only accepts a list of schemas.
+func NewSchemasFilter(schemas ...string) schemasFilter {
+	schemaMap := make(map[string]struct{}, len(schemas))
+	for _, schema := range schemas {
+		schemaMap[schema] = struct{}{}
+	}
+	return schemasFilter{schemas: schemaMap}
+}
+
+type tablesFilter struct {
+	schemas map[string]map[string]struct{}
+}
+
+func (f tablesFilter) MatchTable(schema string, table string) bool {
+	t, ok := f.schemas[schema]
+	if !ok {
+		return false
+	}
+	_, ok = t[table]
+	return ok
+}
+
+func (f tablesFilter) MatchSchema(schema string) bool {
+	_, ok := f.schemas[schema]
+	return ok
+}
+
+func (f tablesFilter) toLower() Filter {
+	loweredSchemas := make(map[string]map[string]struct{}, len(f.schemas))
+	for schema, tables := range f.schemas {
+		loweredSchema := strings.ToLower(schema)
+		loweredTables, ok := loweredSchemas[loweredSchema]
+		if !ok {
+			loweredTables = make(map[string]struct{}, len(tables))
+		}
+		for table := range tables {
+			loweredTables[strings.ToLower(table)] = struct{}{}
+		}
+		loweredSchemas[loweredSchema] = loweredTables
+	}
+	return tablesFilter{schemas: loweredSchemas}
+}
+
+// NewTablesFilter creates a filter which only accepts a list of tables.
+func NewTablesFilter(tables ...Table) Filter {
+	schemas := make(map[string]map[string]struct{})
+	for _, table := range tables {
+		tbls, ok := schemas[table.Schema]
+		if !ok {
+			tbls = make(map[string]struct{})
+		}
+		tbls[table.Name] = struct{}{}
+		schemas[table.Schema] = tbls
+	}
+	return tablesFilter{schemas: schemas}
+}
+
+// bothFilter is a filter which passes if both filters in the field passes.
+type bothFilter struct {
+	a Filter
+	b Filter
+}
+
+func (f *bothFilter) MatchTable(schema string, table string) bool {
+	return f.a.MatchTable(schema, table) && f.b.MatchTable(schema, table)
+}
+
+func (f *bothFilter) MatchSchema(schema string) bool {
+	return f.a.MatchSchema(schema) && f.b.MatchSchema(schema)
+}
+
+func (f *bothFilter) toLower() Filter {
+	return &bothFilter{
+		a: f.a.toLower(),
+		b: f.b.toLower(),
+	}
+}
+
+var legacyWildcardReplacer = strings.NewReplacer(
+	`\*`, ".*",
+	`\?`, ".",
+	`\[!`, "[^",
+	`\[`, "[",
+	`\]`, "]",
+)
+
+func matcherFromLegacyPattern(pattern string) (matcher, error) {
+	if len(pattern) == 0 {
+		return nil, errors.New("pattern cannot be empty")
+	}
+	if pattern[0] == '~' {
+		// this is a regexp pattern.
+		return newRegexpMatcher(pattern[1:])
+	}
+
+	if !strings.ContainsAny(pattern, "?*[") {
+		// this is a literal string.
+		return stringMatcher(pattern), nil
+	}
+
+	// this is a wildcard.
+	pattern = "(?s)^" + legacyWildcardReplacer.Replace(regexp.QuoteMeta(pattern)) + "$"
+	return newRegexpMatcher(pattern)
+}
+
+// ParseMySQLReplicationRules constructs up to 2 filters from the MySQLReplicationRules.
+// Tables have to pass *both* filters to be processed.
+func ParseMySQLReplicationRules(rules *MySQLReplicationRules) (Filter, error) {
+	schemas := rules.DoDBs
+	positive := true
+	rulesLen := len(schemas)
+	if rulesLen == 0 {
+		schemas = rules.IgnoreDBs
+		positive = false
+		rulesLen = len(schemas) + 1
+	}
+
+	schemaRules := make([]rule, 0, rulesLen)
+	for _, schema := range schemas {
+		m, err := matcherFromLegacyPattern(schema)
+		if err != nil {
+			return nil, err
+		}
+		schemaRules = append(schemaRules, rule{
+			schema:   m,
+			table:    trueMatcher{},
+			positive: positive,
+		})
+	}
+	if !positive {
+		schemaRules = append(schemaRules, rule{
+			schema:   trueMatcher{},
+			table:    trueMatcher{},
+			positive: true,
+		})
+	}
+
+	tables := rules.DoTables
+	positive = true
+	rulesLen = len(tables)
+	if len(tables) == 0 {
+		tables = rules.IgnoreTables
+		positive = false
+		rulesLen = len(tables) + 1
+	}
+
+	tableRules := make([]rule, 0, rulesLen)
+	for _, table := range tables {
+		sm, err := matcherFromLegacyPattern(table.Schema)
+		if err != nil {
+			return nil, err
+		}
+		tm, err := matcherFromLegacyPattern(table.Name)
+		if err != nil {
+			return nil, err
+		}
+		tableRules = append(tableRules, rule{
+			schema:   sm,
+			table:    tm,
+			positive: positive,
+		})
+	}
+	if !positive {
+		tableRules = append(tableRules, rule{
+			schema:   trueMatcher{},
+			table:    trueMatcher{},
+			positive: true,
+		})
+	}
+
+	return &bothFilter{a: filter(schemaRules), b: filter(tableRules)}, nil
+}
diff --git a/pkg/table-filter/compat_test.go b/pkg/table-filter/compat_test.go
new file mode 100644
index 000000000..5d0ef4798
--- /dev/null
+++ b/pkg/table-filter/compat_test.go
@@ -0,0 +1,243 @@
+// Copyright 2020 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filter_test
+
+import (
+	. "github.com/pingcap/check"
+
+	filter "github.com/pingcap/tidb-tools/pkg/table-filter"
+)
+
+type compatSuite struct{}
+
+var _ = Suite(&compatSuite{})
+
+func (s *compatSuite) TestSchemaFilter(c *C) {
+	sf0 := filter.CaseInsensitive(filter.NewSchemasFilter("foo?", "bar"))
+	c.Assert(sf0.MatchTable("foo?", "a"), IsTrue)
+	c.Assert(sf0.MatchTable("food", "a"), IsFalse)
+	c.Assert(sf0.MatchTable("bar", "b"), IsTrue)
+	c.Assert(sf0.MatchTable("BAR", "b"), IsTrue)
+
+	sf1 := filter.NewSchemasFilter(`\baz`)
+	c.Assert(sf1.MatchSchema("baz"), IsFalse)
+	c.Assert(sf1.MatchSchema("Baz"), IsFalse)
+	c.Assert(sf1.MatchSchema(`\baz`), IsTrue)
+	c.Assert(sf1.MatchSchema(`\Baz`), IsFalse)
+
+	sf2 := filter.NewSchemasFilter()
+	c.Assert(sf2.MatchTable("aaa", "bbb"), IsFalse)
+}
+
+func (s *compatSuite) TestTableFilter(c *C) {
+	tf0 := filter.CaseInsensitive(filter.NewTablesFilter(
+		filter.Table{Schema: "foo?", Name: "bar*"},
+		filter.Table{Schema: "BAR?", Name: "FOO*"},
+	))
+	c.Assert(tf0.MatchTable("foo?", "bar*"), IsTrue)
+	c.Assert(tf0.MatchTable("bar?", "foo*"), IsTrue)
+	c.Assert(tf0.MatchTable("FOO?", "BAR*"), IsTrue)
+	c.Assert(tf0.MatchTable("foo?", "bar"), IsFalse)
+	c.Assert(tf0.MatchTable("BARD", "FOO*"), IsFalse)
+
+	tf1 := filter.NewTablesFilter(
+		filter.Table{Schema: `\baz`, Name: `BAR`},
+	)
+	c.Assert(tf1.MatchSchema("baz"), IsFalse)
+	c.Assert(tf1.MatchSchema("Baz"), IsFalse)
+	c.Assert(tf1.MatchSchema(`\baz`), IsTrue)
+	c.Assert(tf1.MatchSchema(`\Baz`), IsFalse)
+
+	tf2 := filter.NewTablesFilter()
+	c.Assert(tf2.MatchTable("aaa", "bbb"), IsFalse)
+}
+
+func (s *compatSuite) TestLegacyFilter(c *C) {
+	cases := []struct {
+		rules    filter.MySQLReplicationRules
+		accepted []filter.Table
+		rejected []filter.Table
+	}{
+		{
+			rules: filter.MySQLReplicationRules{},
+			accepted: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+			},
+			rejected: nil,
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				IgnoreDBs: []string{"foo"},
+				DoDBs:     []string{"foo"},
+			},
+			accepted: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+			},
+			rejected: []filter.Table{
+				{Schema: "foo1", Name: "bar"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				IgnoreDBs: []string{"foo1"},
+			},
+			accepted: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+			},
+			rejected: []filter.Table{
+				{Schema: "foo1", Name: "bar"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				DoTables: []*filter.Table{{Schema: "foo", Name: "bar1"}},
+			},
+			accepted: []filter.Table{
+				{Schema: "foo", Name: "bar1"},
+			},
+			rejected: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+				{Schema: "foo1", Name: "bar"},
+				{Schema: "foo1", Name: "bar1"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				IgnoreTables: []*filter.Table{{Schema: "foo", Name: "bar"}},
+			},
+			accepted: []filter.Table{
+				{Schema: "foo", Name: "bar1"},
+				{Schema: "foo1", Name: "bar"},
+				{Schema: "foo1", Name: "bar1"},
+			},
+			rejected: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				DoDBs:        []string{"~^foo"},
+				IgnoreTables: []*filter.Table{{Schema: "~^foo", Name: `~^sbtest-\d`}},
+			},
+			accepted: []filter.Table{
+				{Schema: "foo", Name: "sbtest"},
+				{Schema: "foo", Name: `sbtest-\d`},
+			},
+			rejected: []filter.Table{
+				{Schema: "fff", Name: "bar"},
+				{Schema: "foo1", Name: "sbtest-1"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				IgnoreDBs: []string{"foo[bar]", "baz?", `special\`},
+			},
+			accepted: []filter.Table{
+				{Schema: "foo[bar]", Name: "1"},
+				{Schema: "food", Name: "2"},
+				{Schema: "fo", Name: "3"},
+				{Schema: `special\\`, Name: "4"},
+				{Schema: "bazzz", Name: "9"},
+				{Schema: `special\$`, Name: "10"},
+				{Schema: `afooa`, Name: "11"},
+			},
+			rejected: []filter.Table{
+				{Schema: "foor", Name: "5"},
+				{Schema: "baz?", Name: "6"},
+				{Schema: "baza", Name: "7"},
+				{Schema: `special\`, Name: "8"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				DoDBs: []string{`!@#$%^&*\?`},
+			},
+			accepted: []filter.Table{
+				{Schema: `!@#$%^&abcdef\g`, Name: "1"},
+			},
+			rejected: []filter.Table{
+				{Schema: "abcdef", Name: "2"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				DoDBs: []string{"1[!abc]", "2[^abc]", `3[\d]`},
+			},
+			accepted: []filter.Table{
+				{Schema: "1!", Name: "1"},
+				{Schema: "1z", Name: "4"},
+				{Schema: "2^", Name: "3"},
+				{Schema: "2a", Name: "5"},
+				{Schema: "3d", Name: "6"},
+				{Schema: `3\`, Name: "8"},
+			},
+			rejected: []filter.Table{
+				{Schema: "1a", Name: "2"},
+				{Schema: "30", Name: "7"},
+			},
+		},
+		{
+			rules: filter.MySQLReplicationRules{
+				DoDBs:    []string{"foo", "bar"},
+				DoTables: []*filter.Table{{Schema: "*", Name: "a"}, {Schema: "*", Name: "b"}},
+			},
+			accepted: []filter.Table{
+				{Schema: "foo", Name: "a"},
+				{Schema: "foo", Name: "b"},
+				{Schema: "bar", Name: "a"},
+				{Schema: "bar", Name: "b"},
+			},
+			rejected: []filter.Table{
+				{Schema: "foo", Name: "c"},
+				{Schema: "baz", Name: "a"},
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		c.Log("test case =", tc.rules)
+		f, err := filter.ParseMySQLReplicationRules(&tc.rules)
+		f = filter.CaseInsensitive(f)
+		c.Assert(err, IsNil)
+		for _, tbl := range tc.accepted {
+			c.Assert(f.MatchTable(tbl.Schema, tbl.Name), IsTrue, Commentf("accept case %v", tbl))
+		}
+		for _, tbl := range tc.rejected {
+			c.Assert(f.MatchTable(tbl.Schema, tbl.Name), IsFalse, Commentf("reject case %v", tbl))
+		}
+	}
+}
+
+func (s *filterSuite) TestParseLegacyFailures(c *C) {
+	cases := []struct {
+		arg string
+		msg string
+	}{
+		{
+			arg: "[a",
+			msg: `error parsing regexp: missing closing \]:.*`,
+		},
+		{
+			arg: "",
+			msg: "pattern cannot be empty",
+		},
+	}
+
+	for _, tc := range cases {
+		_, err := filter.ParseMySQLReplicationRules(&filter.MySQLReplicationRules{
+			DoDBs: []string{tc.arg},
+		})
+		c.Assert(err, ErrorMatches, tc.msg, Commentf("test case = %s", tc.arg))
+	}
+}
diff --git a/pkg/table-filter/filter.go b/pkg/table-filter/filter.go
new file mode 100644
index 000000000..abe64446d
--- /dev/null
+++ b/pkg/table-filter/filter.go
@@ -0,0 +1,109 @@
+// Copyright 2020 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filter
+
+import (
+	"strings"
+)
+
+// Filter is a structure to check if a table should be included for processing.
+type Filter interface {
+	// MatchTable checks if a table can be processed after applying the filter.
+	MatchTable(schema string, table string) bool
+	// MatchSchema checks if a schema can be processed after applying the filter.
+	MatchSchema(schema string) bool
+	// toLower changes the filter to compare with case-insensitive strings.
+	toLower() Filter
+}
+
+// filter is a concrete implementation of Filter.
+type filter []rule
+
+// Parse a filter from a list of serialized filter rules. The parsed filter is
+// case-sensitive by default.
+func Parse(args []string) (Filter, error) {
+	p := parser{
+		rules:    make([]rule, 0, len(args)),
+		fileName: "<cmdline>",
+		lineNum:  1,
+	}
+
+	for _, arg := range args {
+		if err := p.parse(arg, true); err != nil {
+			return nil, err
+		}
+	}
+
+	// https://github.com/golang/go/wiki/SliceTricks#reversing.
+	rules := p.rules
+	for i := len(rules)/2 - 1; i >= 0; i-- {
+		opp := len(rules) - 1 - i
+		rules[i], rules[opp] = rules[opp], rules[i]
+	}
+	return filter(rules), nil
+}
+
+// CaseInsensitive returns a new filter which is the case-insensitive version of
+// the input filter.
+func CaseInsensitive(f Filter) Filter {
+	return loweredFilter{wrapped: f.toLower()}
+}
+
+// MatchTable checks if a table can be processed after applying the filter `f`.
+func (f filter) MatchTable(schema string, table string) bool {
+	for _, rule := range f {
+		if rule.schema.matchString(schema) && rule.table.matchString(table) {
+			return rule.positive
+		}
+	}
+	return false
+}
+
+// MatchSchema checks if a schema can be processed after applying the filter `f`.
+func (f filter) MatchSchema(schema string) bool {
+	for _, rule := range f {
+		if rule.schema.matchString(schema) && (rule.positive || rule.table.matchAllStrings()) {
+			return rule.positive
+		}
+	}
+	return false
+}
+
+func (f filter) toLower() Filter {
+	rules := make([]rule, 0, len(f))
+	for _, r := range f {
+		rules = append(rules, rule{
+			schema:   r.schema.toLower(),
+			table:    r.table.toLower(),
+			positive: r.positive,
+		})
+	}
+	return filter(rules)
+}
+
+type loweredFilter struct {
+	wrapped Filter
+}
+
+func (f loweredFilter) MatchTable(schema string, table string) bool {
+	return f.wrapped.MatchTable(strings.ToLower(schema), strings.ToLower(table))
+}
+
+func (f loweredFilter) MatchSchema(schema string) bool {
+	return f.wrapped.MatchSchema(strings.ToLower(schema))
+}
+
+func (f loweredFilter) toLower() Filter {
+	return f
+}
diff --git a/pkg/table-filter/filter_test.go b/pkg/table-filter/filter_test.go
new file mode 100644
index 000000000..3230056ec
--- /dev/null
+++ b/pkg/table-filter/filter_test.go
@@ -0,0 +1,448 @@
+// Copyright 2020 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filter_test
+
+import (
+	"io/ioutil"
+	"path/filepath"
+	"testing"
+
+	. "github.com/pingcap/check"
+
+	filter "github.com/pingcap/tidb-tools/pkg/table-filter"
+)
+
+func Test(t *testing.T) {
+	TestingT(t)
+}
+
+type filterSuite struct{}
+
+var _ = Suite(&filterSuite{})
+
+func (s *filterSuite) TestMatchTables(c *C) {
+	cases := []struct {
+		args       []string
+		tables     []filter.Table
+		acceptedCS []bool
+		acceptedCI []bool
+	}{
+		{
+			args: nil,
+			tables: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+			},
+			acceptedCS: []bool{false},
+			acceptedCI: []bool{false},
+		},
+		{
+			args: []string{"*.*"},
+			tables: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+			},
+			acceptedCS: []bool{true},
+			acceptedCI: []bool{true},
+		},
+		{
+			args: []string{"foo.*"},
+			tables: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+				{Schema: "foo1", Name: "bar"},
+				{Schema: "foo2", Name: "bar"},
+			},
+			acceptedCS: []bool{true, false, false},
+			acceptedCI: []bool{true, false, false},
+		},
+		{
+			args: []string{"*.*", "!foo1.*"},
+			tables: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+				{Schema: "foo1", Name: "bar"},
+				{Schema: "foo2", Name: "bar"},
+			},
+			acceptedCS: []bool{true, false, true},
+			acceptedCI: []bool{true, false, true},
+		},
+		{
+			args: []string{"foo.bar1"},
+			tables: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+				{Schema: "foo", Name: "bar1"},
+				{Schema: "fff", Name: "bar1"},
+			},
+			acceptedCS: []bool{false, true, false},
+			acceptedCI: []bool{false, true, false},
+		},
+		{
+			args: []string{"*.*", "!foo.bar"},
+			tables: []filter.Table{
+				{Schema: "foo", Name: "bar"},
+				{Schema: "foo", Name: "bar1"},
+				{Schema: "fff", Name: "bar1"},
+			},
+			acceptedCS: []bool{false, true, true},
+			acceptedCI: []bool{false, true, true},
+		},
+		{
+			args: []string{"/^foo/.*", `!/^foo/./^sbtest-\d/`},
+			tables: []filter.Table{
+				{Schema: "foo", Name: "sbtest"},
+				{Schema: "foo1", Name: "sbtest-1"},
+				{Schema: "fff", Name: "bar"},
+			},
+			acceptedCS: []bool{true, false, false},
+			acceptedCI: []bool{true, false, false},
+		},
+		{
+			args: []string{"*.*", "!foo[bar].*", "!bar?.*", `!special\\.*`},
+			tables: []filter.Table{
+				{Schema: "foor", Name: "a"},
+				{Schema: "foo[bar]", Name: "b"},
+				{Schema: "ba", Name: "c"},
+				{Schema: "bar?", Name: "d"},
+				{Schema: `special\`, Name: "e"},
+				{Schema: `special\\`, Name: "f"},
+				{Schema: "bazzz", Name: "g"},
+				{Schema: `special\$`, Name: "h"},
+				{Schema: `afooa`, Name: "i"},
+			},
+			acceptedCS: []bool{false, true, true, false, false, true, true, true, true},
+			acceptedCI: []bool{false, true, true, false, false, true, true, true, true},
+		},
+		{
+			args: []string{"*.*", "!/^FOO/.*", "!*./FoO$/"},
+			tables: []filter.Table{
+				{Schema: "FOO1", Name: "a"},
+				{Schema: "foo2", Name: "b"},
+				{Schema: "BoO3", Name: "cFoO"},
+				{Schema: "Foo4", Name: "dfoo"},
+				{Schema: "5", Name: "5"},
+			},
+			acceptedCS: []bool{false, true, false, true, true},
+			acceptedCI: []bool{false, false, false, false, true},
+		},
+		{
+			args: []string{"*.*", "!a?b?./f[0-9]/"},
+			tables: []filter.Table{
+				{Schema: "abbd", Name: "f1"},
+				{Schema: "aaaa", Name: "f2"},
+				{Schema: "5", Name: "5"},
+				{Schema: "abbc", Name: "fa"},
+			},
+			acceptedCS: []bool{false, true, true, true},
+			acceptedCI: []bool{false, true, true, true},
+		},
+		{
+			args: []string{"*.*", "!/t[0-8]/.a??"},
+			tables: []filter.Table{
+				{Schema: "t1", Name: "a01"},
+				{Schema: "t9", Name: "a02"},
+				{Schema: "5", Name: "5"},
+				{Schema: "t8", Name: "a001"},
+			},
+			acceptedCS: []bool{false, true, true, true},
+			acceptedCI: []bool{false, true, true, true},
+		},
+		{
+			args: []string{"*.*", "!a*.A*"},
+			tables: []filter.Table{
+				{Schema: "aB", Name: "Ab"},
+				{Schema: "AaB", Name: "aab"},
+				{Schema: "acB", Name: "Afb"},
+			},
+			acceptedCS: []bool{false, true, false},
+			acceptedCI: []bool{false, false, false},
+		},
+		{
+			args: []string{"BAR.*"},
+			tables: []filter.Table{
+				{Schema: "bar", Name: "a"},
+				{Schema: "BAR", Name: "a"},
+			},
+			acceptedCS: []bool{false, true},
+			acceptedCI: []bool{true, true},
+		},
+		{
+			args: []string{"# comment", "x.y", "   \t"},
+			tables: []filter.Table{
+				{Schema: "x", Name: "y"},
+				{Schema: "y", Name: "y"},
+			},
+			acceptedCS: []bool{true, false},
+			acceptedCI: []bool{true, false},
+		},
+		{
+			args: []string{"p_123$.45", "中文.表名"},
+			tables: []filter.Table{
+				{Schema: "p_123", Name: "45"},
+				{Schema: "p_123$", Name: "45"},
+				{Schema: "英文", Name: "表名"},
+				{Schema: "中文", Name: "表名"},
+			},
+			acceptedCS: []bool{false, true, false, true},
+			acceptedCI: []bool{false, true, false, true},
+		},
+		{
+			args: []string{`\\\..*`},
+			tables: []filter.Table{
+				{Schema: `\.`, Name: "a"},
+				{Schema: `\\\.`, Name: "b"},
+				{Schema: `\a`, Name: "c"},
+			},
+			acceptedCS: []bool{true, false, false},
+			acceptedCI: []bool{true, false, false},
+		},
+		{
+			args: []string{"[!a-z].[^a-z]"},
+			tables: []filter.Table{
+				{Schema: "!", Name: "z"},
+				{Schema: "!", Name: "^"},
+				{Schema: "!", Name: "9"},
+				{Schema: "a", Name: "z"},
+				{Schema: "a", Name: "^"},
+				{Schema: "a", Name: "9"},
+				{Schema: "1", Name: "z"},
+				{Schema: "1", Name: "^"},
+				{Schema: "1", Name: "9"},
+			},
+			acceptedCS: []bool{true, true, false, false, false, false, true, true, false},
+			acceptedCI: []bool{true, true, false, false, false, false, true, true, false},
+		},
+		{
+			args: []string{"\"some \"\"quoted\"\"\".`identifiers?`"},
+			tables: []filter.Table{
+				{Schema: `some "quoted"`, Name: "identifiers?"},
+				{Schema: `some "quoted"`, Name: "identifiers!"},
+				{Schema: `some ""quoted""`, Name: "identifiers?"},
+				{Schema: `SOME "QUOTED"`, Name: "IDENTIFIERS?"},
+				{Schema: "some\t\"quoted\"", Name: "identifiers?"},
+			},
+			acceptedCS: []bool{true, false, false, false, false},
+			acceptedCI: []bool{true, false, false, true, false},
+		},
+		{
+			args: []string{"db*.*", "!*.cfg*", "*.cfgsample"},
+			tables: []filter.Table{
+				{Schema: "irrelevant", Name: "table"},
+				{Schema: "db1", Name: "tbl1"},
+				{Schema: "db1", Name: "cfg1"},
+				{Schema: "db1", Name: "cfgsample"},
+				{Schema: "else", Name: "cfgsample"},
+			},
+			acceptedCS: []bool{false, true, false, true, true},
+			acceptedCI: []bool{false, true, false, true, true},
+		},
+	}
+
+	for _, tc := range cases {
+		c.Log("test case =", tc.args)
+		fcs, err := filter.Parse(tc.args)
+		c.Assert(err, IsNil)
+		fci := filter.CaseInsensitive(fcs)
+		for i, tbl := range tc.tables {
+			c.Assert(fcs.MatchTable(tbl.Schema, tbl.Name), Equals, tc.acceptedCS[i], Commentf("cs tbl %v", tbl))
+			c.Assert(fci.MatchTable(tbl.Schema, tbl.Name), Equals, tc.acceptedCI[i], Commentf("ci tbl %v", tbl))
+		}
+	}
+}
+
+func (s *filterSuite) TestMatchSchemas(c *C) {
+	cases := []struct {
+		args       []string
+		schemas    []string
+		acceptedCS []bool
+		acceptedCI []bool
+	}{
+		{
+			args:       nil,
+			schemas:    []string{"foo"},
+			acceptedCS: []bool{false},
+			acceptedCI: []bool{false},
+		},
+		{
+			args:       []string{"*.*"},
+			schemas:    []string{"foo"},
+			acceptedCS: []bool{true},
+			acceptedCI: []bool{true},
+		},
+		{
+			args:       []string{"foo.*"},
+			schemas:    []string{"foo", "foo1"},
+			acceptedCS: []bool{true, false},
+			acceptedCI: []bool{true, false},
+		},
+		{
+			args:       []string{"*.*", "!foo1.*"},
+			schemas:    []string{"foo", "foo1"},
+			acceptedCS: []bool{true, false},
+			acceptedCI: []bool{true, false},
+		},
+		{
+			args:       []string{"foo.bar1"},
+			schemas:    []string{"foo", "foo1"},
+			acceptedCS: []bool{true, false},
+			acceptedCI: []bool{true, false},
+		},
+		{
+			args:       []string{"*.*", "!foo.bar"},
+			schemas:    []string{"foo", "foo1"},
+			acceptedCS: []bool{true, true},
+			acceptedCI: []bool{true, true},
+		},
+		{
+			args:       []string{"/^foo/.*", `!/^foo/./^sbtest-\d/`},
+			schemas:    []string{"foo", "foo2"},
+			acceptedCS: []bool{true, true},
+			acceptedCI: []bool{true, true},
+		},
+		{
+			args:       []string{"*.*", "!FOO*.*", "!*.*FoO"},
+			schemas:    []string{"foo", "FOO", "foobar", "FOOBAR", "bar", "BAR"},
+			acceptedCS: []bool{true, false, true, false, true, true},
+			acceptedCI: []bool{false, false, false, false, true, true},
+		},
+	}
+
+	for _, tc := range cases {
+		c.Log("test case =", tc.args)
+		fcs, err := filter.Parse(tc.args)
+		c.Assert(err, IsNil)
+		fci := filter.CaseInsensitive(fcs)
+		for i, schema := range tc.schemas {
+			c.Assert(fcs.MatchSchema(schema), Equals, tc.acceptedCS[i], Commentf("cs schema %s", schema))
+			c.Assert(fci.MatchSchema(schema), Equals, tc.acceptedCI[i], Commentf("ci schema %s", schema))
+		}
+	}
+}
+
+func (s *filterSuite) TestParseFailures(c *C) {
+	cases := []struct {
+		arg string
+		msg string
+	}{
+		{
+			arg: "/^t[0-9]+((?!_copy).)*$/.*",
+			msg: ".*: invalid pattern: error parsing regexp:.*",
+		},
+		{
+			arg: "/^t[0-9]+sp(?=copy).*/.*",
+			msg: ".*: invalid pattern: error parsing regexp:.*",
+		},
+		{
+			arg: "a.b.c",
+			msg: ".*: syntax error: stray characters after table pattern",
+		},
+		{
+			arg: "a%b.c",
+			msg: ".*: unexpected special character '%'",
+		},
+		{
+			arg: `a\tb.c`,
+			msg: `.*: cannot escape a letter or number \(\\t\), it is reserved for future extension`,
+		},
+		{
+			arg: "[].*",
+			msg: ".*: syntax error: failed to parse character class",
+		},
+		{
+			arg: "[!].*",
+			msg: `.*: invalid pattern: error parsing regexp: missing closing \]:.*`,
+		},
+		{
+			arg: "[.*",
+			msg: `.*: syntax error: failed to parse character class`,
+		},
+		{
+			arg: `[\d\D].*`,
+			msg: `.*: syntax error: failed to parse character class`,
+		},
+		{
+			arg: "db",
+			msg: `.*: missing table pattern`,
+		},
+		{
+			arg: "db.",
+			msg: `.*: syntax error: missing pattern`,
+		},
+		{
+			arg: "`db`*.*",
+			msg: `.*: syntax error: missing '\.' between schema and table patterns`,
+		},
+		{
+			arg: "/db.*",
+			msg: `.*: syntax error: incomplete regexp`,
+		},
+		{
+			arg: "`db.*",
+			msg: `.*: syntax error: incomplete quoted identifier`,
+		},
+		{
+			arg: `"db.*`,
+			msg: `.*: syntax error: incomplete quoted identifier`,
+		},
+		{
+			arg: `db\`,
+			msg: `.*: syntax error: cannot place \\ at end of line`,
+		},
+		{
+			arg: "db.tbl#not comment",
+			msg: `.*: unexpected special character '#'`,
+		},
+	}
+
+	for _, tc := range cases {
+		_, err := filter.Parse([]string{tc.arg})
+		c.Assert(err, ErrorMatches, tc.msg, Commentf("test case = %s", tc.arg))
+	}
+}
+
+func (s *filterSuite) TestImport(c *C) {
+	dir := c.MkDir()
+	path1 := filepath.Join(dir, "1.txt")
+	path2 := filepath.Join(dir, "2.txt")
+	ioutil.WriteFile(path1, []byte(`
+		db?.tbl?
+		db02.tbl02
+	`), 0644)
+	ioutil.WriteFile(path2, []byte(`
+		db03.tbl03
+		!db4.tbl4
+	`), 0644)
+
+	f, err := filter.Parse([]string{"@" + path1, "@" + path2, "db04.tbl04"})
+	c.Assert(err, IsNil)
+
+	c.Assert(f.MatchTable("db1", "tbl1"), IsTrue)
+	c.Assert(f.MatchTable("db2", "tbl2"), IsTrue)
+	c.Assert(f.MatchTable("db3", "tbl3"), IsTrue)
+	c.Assert(f.MatchTable("db4", "tbl4"), IsFalse)
+	c.Assert(f.MatchTable("db01", "tbl01"), IsFalse)
+	c.Assert(f.MatchTable("db02", "tbl02"), IsTrue)
+	c.Assert(f.MatchTable("db03", "tbl03"), IsTrue)
+	c.Assert(f.MatchTable("db04", "tbl04"), IsTrue)
+}
+
+func (s *filterSuite) TestRecursiveImport(c *C) {
+	dir := c.MkDir()
+	path3 := filepath.Join(dir, "3.txt")
+	path4 := filepath.Join(dir, "4.txt")
+	ioutil.WriteFile(path3, []byte("db1.tbl1"), 0644)
+	ioutil.WriteFile(path4, []byte("# comment\n\n@"+path3), 0644)
+
+	_, err := filter.Parse([]string{"@" + path4})
+	c.Assert(err, ErrorMatches, `.*4\.txt:3: importing filter files recursively is not allowed`)
+
+	_, err = filter.Parse([]string{"@" + filepath.Join(dir, "5.txt")})
+	c.Assert(err, ErrorMatches, `.*: cannot open filter file: open .*5\.txt: .*`)
+}
diff --git a/pkg/table-filter/matchers.go b/pkg/table-filter/matchers.go
new file mode 100644
index 000000000..f752ebf89
--- /dev/null
+++ b/pkg/table-filter/matchers.go
@@ -0,0 +1,96 @@
+// Copyright 2020 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filter
+
+import (
+	"regexp"
+	"strings"
+
+	"github.com/pingcap/errors"
+)
+
+// rule of a filter, consisting of a schema and table pattern, and may be an
+// accept-list (positive) or deny-list (negative).
+type rule struct {
+	schema   matcher
+	table    matcher
+	positive bool
+}
+
+// matcher matches a name against a pattern.
+type matcher interface {
+	matchString(name string) bool
+	matchAllStrings() bool
+	toLower() matcher
+}
+
+// stringMatcher is a matcher with a literal string.
+type stringMatcher string
+
+func (m stringMatcher) matchString(name string) bool {
+	return string(m) == name
+}
+
+func (stringMatcher) matchAllStrings() bool {
+	return false
+}
+
+func (m stringMatcher) toLower() matcher {
+	return stringMatcher(strings.ToLower(string(m)))
+}
+
+// trueMatcher is a matcher which matches everything. The `*` pattern.
+type trueMatcher struct{}
+
+func (trueMatcher) matchString(string) bool {
+	return true
+}
+
+func (trueMatcher) matchAllStrings() bool {
+	return true
+}
+
+func (m trueMatcher) toLower() matcher {
+	return m
+}
+
+// regexpMatcher is a matcher based on a regular expression.
+type regexpMatcher struct {
+	pattern *regexp.Regexp
+}
+
+func newRegexpMatcher(pat string) (matcher, error) {
+	if pat == "(?s)^.*$" {
+		// special case for '*'
+		return trueMatcher{}, nil
+	}
+	pattern, err := regexp.Compile(pat)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	return regexpMatcher{pattern: pattern}, nil
+}
+
+func (m regexpMatcher) matchString(name string) bool {
+	return m.pattern.MatchString(name)
+}
+
+func (regexpMatcher) matchAllStrings() bool {
+	return false
+}
+
+func (m regexpMatcher) toLower() matcher {
+	pattern := regexp.MustCompile("(?i)" + m.pattern.String())
+	return regexpMatcher{pattern: pattern}
+}
diff --git a/pkg/table-filter/parser.go b/pkg/table-filter/parser.go
new file mode 100644
index 000000000..74cba9d44
--- /dev/null
+++ b/pkg/table-filter/parser.go
@@ -0,0 +1,273 @@
+// Copyright 2020 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package filter
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"regexp"
+	"strings"
+
+	"github.com/pingcap/errors"
+)
+
+type parser struct {
+	rules    []rule
+	fileName string
+	lineNum  int64
+}
+
+func (p *parser) wrapErrorFormat(format string) string {
+	return fmt.Sprintf("at %s:%d: %s", strings.ReplaceAll(p.fileName, "%", "%%"), p.lineNum, format)
+}
+
+func (p *parser) errorf(format string, args ...interface{}) error {
+	return errors.Errorf(p.wrapErrorFormat(format), args...)
+}
+
+func (p *parser) annotatef(err error, format string, args ...interface{}) error {
+	return errors.Annotatef(err, p.wrapErrorFormat(format), args...)
+}
+
+func (p *parser) parse(line string, canImport bool) (err error) {
+	line = strings.Trim(line, " \t")
+	if len(line) == 0 {
+		return nil
+	}
+
+	positive := true
+	switch line[0] {
+	case '#':
+		return nil
+	case '!':
+		positive = false
+		line = line[1:]
+	case '@':
+		if !canImport {
+			// FIXME: should we relax this?
+			return p.errorf("importing filter files recursively is not allowed")
+		}
+		// FIXME: can't deal with file names which ends in spaces (perhaps not a big deal)
+		return p.importFile(line[1:])
+	}
+
+	var sm, tm matcher
+
+	sm, line, err = p.parsePattern(line)
+	if err != nil {
+		return err
+	}
+	if len(line) == 0 {
+		return p.errorf("missing table pattern")
+	}
+	if line[0] != '.' {
+		return p.errorf("syntax error: missing '.' between schema and table patterns")
+	}
+
+	tm, line, err = p.parsePattern(line[1:])
+	if err != nil {
+		return err
+	}
+	if len(line) != 0 {
+		return p.errorf("syntax error: stray characters after table pattern")
+	}
+
+	p.rules = append(p.rules, rule{
+		schema:   sm,
+		table:    tm,
+		positive: positive,
+	})
+	return nil
+}
+
+func (p *parser) importFile(fileName string) error {
+	file, err := os.Open(fileName)
+	if err != nil {
+		return p.annotatef(err, "cannot open filter file")
+	}
+	defer file.Close()
+
+	oldFileName, oldLineNum := p.fileName, p.lineNum
+	p.fileName, p.lineNum = fileName, 1
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		if err := p.parse(scanner.Text(), false); err != nil {
+			return err
+		}
+		p.lineNum++
+	}
+
+	p.fileName, p.lineNum = oldFileName, oldLineNum
+
+	if err := scanner.Err(); err != nil {
+		return p.annotatef(err, "cannot read filter file")
+	}
+	return nil
+}
+
+var (
+	regexpRegexp        = regexp.MustCompile(`^/(?:\\.|[^/])+/`)
+	doubleQuotedRegexp  = regexp.MustCompile(`^"(?:""|[^"])+"`)
+	backquotedRegexp    = regexp.MustCompile("^`(?:``|[^`])+`")
+	wildcardRangeRegexp = regexp.MustCompile(`^\[!?(?:\\[^0-9a-zA-Z]|[^\\\]])+\]`)
+)
+
+func (p *parser) newRegexpMatcher(pat string) (matcher, error) {
+	m, err := newRegexpMatcher(pat)
+	if err != nil {
+		return nil, p.annotatef(err, "invalid pattern")
+	}
+	return m, nil
+}
+
+func (p *parser) parsePattern(line string) (matcher, string, error) {
+	if len(line) == 0 {
+		return nil, "", p.errorf("syntax error: missing pattern")
+	}
+
+	switch line[0] {
+	case '/':
+		// a regexp pattern
+		loc := regexpRegexp.FindStringIndex(line)
+		if len(loc) < 2 {
+			return nil, "", p.errorf("syntax error: incomplete regexp")
+		}
+		m, err := p.newRegexpMatcher(line[1 : loc[1]-1])
+		if err != nil {
+			return nil, "", err
+		}
+		return m, line[loc[1]:], nil
+
+	case '"':
+		// a double-quoted pattern
+		loc := doubleQuotedRegexp.FindStringIndex(line)
+		if len(loc) < 2 {
+			return nil, "", p.errorf("syntax error: incomplete quoted identifier")
+		}
+		name := strings.ReplaceAll(line[1:loc[1]-1], `""`, `"`)
+		return stringMatcher(name), line[loc[1]:], nil
+
+	case '`':
+		// a backquoted pattern
+		loc := backquotedRegexp.FindStringIndex(line)
+		if len(loc) < 2 {
+			return nil, "", p.errorf("syntax error: incomplete quoted identifier")
+		}
+		name := strings.ReplaceAll(line[1:loc[1]-1], "``", "`")
+		return stringMatcher(name), line[loc[1]:], nil
+
+	default:
+		// wildcard or literal string.
+		return p.parseWildcardPattern(line)
+	}
+}
+
+func isASCIIAlphanumeric(b byte) bool {
+	return '0' <= b && b <= '9' || 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z'
+}
+
+func (p *parser) parseWildcardPattern(line string) (matcher, string, error) {
+	var (
+		literalStringBuilder   strings.Builder
+		wildcardPatternBuilder strings.Builder
+		isLiteralString        = true
+		i                      = 0
+	)
+	literalStringBuilder.Grow(len(line))
+	wildcardPatternBuilder.Grow(len(line) + 6)
+	wildcardPatternBuilder.WriteString("(?s)^")
+
+parseLoop:
+	for i < len(line) {
+		c := line[i]
+		switch c {
+		case '\\':
+			// escape character
+			if i == len(line)-1 {
+				return nil, "", p.errorf(`syntax error: cannot place \ at end of line`)
+			}
+			esc := line[i+1]
+			if isASCIIAlphanumeric(esc) {
+				return nil, "", p.errorf(`cannot escape a letter or number (\%c), it is reserved for future extension`, esc)
+			}
+			if isLiteralString {
+				literalStringBuilder.WriteByte(esc)
+			}
+			if esc < 0x80 {
+				wildcardPatternBuilder.WriteByte('\\')
+			}
+			wildcardPatternBuilder.WriteByte(esc)
+
+			i += 2
+
+		case '.':
+			// table separator, end now.
+			break parseLoop
+
+		case '*':
+			// wildcard
+			isLiteralString = false
+			wildcardPatternBuilder.WriteString(".*")
+			i++
+
+		case '?':
+			isLiteralString = false
+			wildcardPatternBuilder.WriteByte('.')
+			i++
+
+		case '[':
+			// range of characters
+			isLiteralString = false
+			rangeLoc := wildcardRangeRegexp.FindStringIndex(line[i:])
+			if len(rangeLoc) < 2 {
+				return nil, "", p.errorf("syntax error: failed to parse character class")
+			}
+			end := i + rangeLoc[1]
+			switch line[1] {
+			case '!':
+				wildcardPatternBuilder.WriteString("[^")
+				wildcardPatternBuilder.WriteString(line[i+2 : end])
+			case '^': // `[^` is not special in a glob pattern. escape it.
+				wildcardPatternBuilder.WriteString(`[\^`)
+				wildcardPatternBuilder.WriteString(line[i+2 : end])
+			default:
+				wildcardPatternBuilder.WriteString(line[i:end])
+			}
+			fmt.Println(wildcardPatternBuilder.String())
+			i = end
+
+		default:
+			if c == '$' || c == '_' || isASCIIAlphanumeric(c) || c >= 0x80 {
+				literalStringBuilder.WriteByte(c)
+				wildcardPatternBuilder.WriteByte(c)
+				i++
+			} else {
+				return nil, "", p.errorf("unexpected special character '%c'", c)
+			}
+		}
+	}
+
+	line = line[i:]
+	if isLiteralString {
+		return stringMatcher(literalStringBuilder.String()), line, nil
+	}
+	wildcardPatternBuilder.WriteByte('$')
+	m, err := p.newRegexpMatcher(wildcardPatternBuilder.String())
+	if err != nil {
+		return nil, "", err
+	}
+	return m, line, nil
+}

From 10542639c5bbc3c45c6220b68d086b55e592d2ab Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Fri, 15 May 2020 18:14:48 +0800
Subject: [PATCH 2/2] table-filter: fix build error

---
 pkg/filter/filter.go       | 8 --------
 pkg/table-filter/compat.go | 8 ++++++++
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pkg/filter/filter.go b/pkg/filter/filter.go
index 41c24b588..ecf0e3896 100644
--- a/pkg/filter/filter.go
+++ b/pkg/filter/filter.go
@@ -35,14 +35,6 @@ const (
 // Table represents a table.
 type Table = tfilter.Table
 
-// Clone clones a new filter.Table
-func (t *Table) Clone() *Table {
-	return &Table{
-		Schema: t.Schema,
-		Name:   t.Name,
-	}
-}
-
 type cache struct {
 	sync.RWMutex
 	items map[string]ActionType // `schema`.`table` => do/ignore
diff --git a/pkg/table-filter/compat.go b/pkg/table-filter/compat.go
index a15a4d1a3..c5b2adf11 100644
--- a/pkg/table-filter/compat.go
+++ b/pkg/table-filter/compat.go
@@ -40,6 +40,14 @@ func (t *Table) String() string {
 	return fmt.Sprintf("`%s`", t.Schema)
 }
 
+// Clone clones a new filter.Table
+func (t *Table) Clone() *Table {
+	return &Table{
+		Schema: t.Schema,
+		Name:   t.Name,
+	}
+}
+
 // MySQLReplicationRules is a set of rules based on MySQL's replication filter.
 type MySQLReplicationRules struct {
 	// DoTables is a whitelist of tables.