Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lexer: replace version comments with feature-ids mechanism (#777) #829

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 52 additions & 8 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ func startWithSlash(s *Scanner) (tok int, pos Pos, lit string) {
case '!': // '/*!' MySQL-specific comments
// See http://dev.mysql.com/doc/refman/5.7/en/comments.html
// in '/*!', which we always recognize regardless of version.
_ = s.scanVersionDigits(5, 5)
s.scanVersionDigits(5, 5)
s.inBangComment = true
return s.scan()

Expand All @@ -358,9 +358,9 @@ func startWithSlash(s *Scanner) (tok int, pos Pos, lit string) {
break
}
s.r.inc()
// in '/*T!', try to consume the 5 to 6 digit version string.
commentVersion := s.scanVersionDigits(5, 6)
if commentVersion <= CommentCodeCurrentVersion {
// in '/*T!', try to match the pattern '/*T![feature1,feature2,...]'.
features := s.scanFeatureIDs()
if SpecialCommentsController.ContainsAll(features) {
s.inBangComment = true
return s.scan()
}
Expand Down Expand Up @@ -746,21 +746,65 @@ func (s *Scanner) scanDigits() string {

// scanVersionDigits scans for `min` to `max` digits (range inclusive) used in
// `/*!12345 ... */` comments.
func (s *Scanner) scanVersionDigits(min, max int) (version CommentCodeVersion) {
func (s *Scanner) scanVersionDigits(min, max int) {
pos := s.r.pos()
for i := 0; i < max; i++ {
ch := s.r.peek()
if isDigit(ch) {
version = version*10 + CommentCodeVersion(ch-'0')
s.r.inc()
} else if i < min {
s.r.p = pos
return CommentCodeNoVersion
return
} else {
break
}
}
return
}

func (s *Scanner) scanFeatureIDs() (featureIDs []string) {
pos := s.r.pos()
const init, expectChar, obtainChar = 0, 1, 2
state := init
var b strings.Builder
for !s.r.eof() {
ch := s.r.peek()
s.r.inc()
switch state {
case init:
if ch == '[' {
state = expectChar
break
}
s.r.p = pos
return nil
case expectChar:
if isIdentChar(ch) {
b.WriteRune(ch)
state = obtainChar
break
}
s.r.p = pos
return nil
case obtainChar:
if isIdentChar(ch) {
b.WriteRune(ch)
state = obtainChar
break
} else if ch == ',' {
featureIDs = append(featureIDs, b.String())
b.Reset()
state = expectChar
break
} else if ch == ']' {
featureIDs = append(featureIDs, b.String())
return featureIDs
}
s.r.p = pos
return nil
}
}
s.r.p = pos
return nil
}

func (s *Scanner) lastErrorAsWarn() {
Expand Down
109 changes: 86 additions & 23 deletions lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func runTest(c *C, table []testCaseItem) {
}

func (s *testLexerSuite) TestComment(c *C) {

SpecialCommentsController.Register("test")
table := []testCaseItem{
{"-- select --\n1", intLit},
{"/*!40101 SET character_set_client = utf8 */;", set},
Expand All @@ -178,8 +178,8 @@ SELECT`, selectKwd},

// The odd behavior of '*/' inside conditional comment is the same as
// that of MySQL.
{"/*T!99999 '*/0 -- ' */", intLit}, // equivalent to 0
{"/*T!00000 '*/0 -- ' */", stringLit}, // equivalent to '*/0 -- '
{"/*T![unsupported] '*/0 -- ' */", intLit}, // equivalent to 0
{"/*T![test] '*/0 -- ' */", stringLit}, // equivalent to '*/0 -- '
}
runTest(c, table)
}
Expand Down Expand Up @@ -272,21 +272,22 @@ func (s *testLexerSuite) TestSpecialComment(c *C) {
c.Assert(pos, Equals, Pos{1, 1, 16})
}

func (s *testLexerSuite) TestSpecialCodeComment(c *C) {
l := NewScanner("/*T!40000 auto_random(5) */")
func (s *testLexerSuite) TestFeatureIDsComment(c *C) {
SpecialCommentsController.Register("auto_rand")
l := NewScanner("/*T![auto_rand] auto_random(5) */")
tok, pos, lit := l.scan()
c.Assert(tok, Equals, identifier)
c.Assert(lit, Equals, "auto_random")
c.Assert(pos, Equals, Pos{0, 10, 10})
c.Assert(pos, Equals, Pos{0, 16, 16})
tok, pos, lit = l.scan()
c.Assert(tok, Equals, int('('))
tok, pos, lit = l.scan()
c.Assert(lit, Equals, "5")
c.Assert(pos, Equals, Pos{0, 22, 22})
c.Assert(pos, Equals, Pos{0, 28, 28})
tok, pos, lit = l.scan()
c.Assert(tok, Equals, int(')'))

l = NewScanner(WrapStringWithCodeVersion("auto_random(5)", CommentCodeCurrentVersion+1))
l = NewScanner("/*T![unsupported_feature] unsupported(123) */")
tok, pos, lit = l.scan()
c.Assert(tok, Equals, 0)
}
Expand Down Expand Up @@ -316,6 +317,7 @@ func (s *testLexerSuite) TestOptimizerHint(c *C) {
}

func (s *testLexerSuite) TestOptimizerHintAfterCertainKeywordOnly(c *C) {
SpecialCommentsController.Register("test")
tests := []struct {
input string
tokens []int
Expand Down Expand Up @@ -357,11 +359,11 @@ func (s *testLexerSuite) TestOptimizerHintAfterCertainKeywordOnly(c *C) {
tokens: []int{selectKwd, '*', 0},
},
{
input: "SELECT /*T!000000 * */ /*+ hint */",
input: "SELECT /*T![test] * */ /*+ hint */",
tokens: []int{selectKwd, '*', 0},
},
{
input: "SELECT /*T!999999 * */ /*+ hint */",
input: "SELECT /*T![unsupported] * */ /*+ hint */",
tokens: []int{selectKwd, hintComment, 0},
},
{
Expand Down Expand Up @@ -479,77 +481,66 @@ func (s *testLexerSuite) TestVersionDigits(c *C) {
input string
min int
max int
version CommentCodeVersion
nextChar rune
}{
{
input: "12345",
min: 5,
max: 5,
version: 12345,
nextChar: unicode.ReplacementChar,
},
{
input: "12345xyz",
min: 5,
max: 5,
version: 12345,
nextChar: 'x',
},
{
input: "1234xyz",
min: 5,
max: 5,
version: CommentCodeNoVersion,
nextChar: '1',
},
{
input: "123456",
min: 5,
max: 5,
version: 12345,
nextChar: '6',
},
{
input: "1234",
min: 5,
max: 5,
version: CommentCodeNoVersion,
nextChar: '1',
},
{
input: "",
min: 5,
max: 5,
version: CommentCodeNoVersion,
nextChar: unicode.ReplacementChar,
},
{
input: "1234567xyz",
min: 5,
max: 6,
version: 123456,
nextChar: '7',
},
{
input: "12345xyz",
min: 5,
max: 6,
version: 12345,
nextChar: 'x',
},
{
input: "12345",
min: 5,
max: 6,
version: 12345,
nextChar: unicode.ReplacementChar,
},
{
input: "1234xyz",
min: 5,
max: 6,
version: CommentCodeNoVersion,
nextChar: '1',
},
}
Expand All @@ -558,8 +549,80 @@ func (s *testLexerSuite) TestVersionDigits(c *C) {
for _, t := range tests {
comment := Commentf("input = %s", t.input)
scanner.reset(t.input)
version := scanner.scanVersionDigits(t.min, t.max)
c.Assert(version, Equals, t.version, comment)
scanner.scanVersionDigits(t.min, t.max)
nextChar := scanner.r.readByte()
c.Assert(nextChar, Equals, t.nextChar, comment)
}
}

func (s *testLexerSuite) TestFeatureIDs(c *C) {
tests := []struct {
input string
featureIDs []string
nextChar rune
}{
{
input: "[feature]",
featureIDs: []string{"feature"},
nextChar: unicode.ReplacementChar,
},
{
input: "[feature] xx",
featureIDs: []string{"feature"},
nextChar: ' ',
},
{
input: "[feature1,feature2]",
featureIDs: []string{"feature1", "feature2"},
nextChar: unicode.ReplacementChar,
},
{
input: "[feature1,feature2,feature3]",
featureIDs: []string{"feature1", "feature2", "feature3"},
nextChar: unicode.ReplacementChar,
},
{
input: "[id_en_ti_fier]",
featureIDs: []string{"id_en_ti_fier"},
nextChar: unicode.ReplacementChar,
},
{
input: "[invalid, whitespace]",
featureIDs: nil,
nextChar: '[',
},
{
input: "[unclosed_brac",
featureIDs: nil,
nextChar: '[',
},
{
input: "unclosed_brac]",
featureIDs: nil,
nextChar: 'u',
},
{
input: "[invalid_comma,]",
featureIDs: nil,
nextChar: '[',
},
{
input: "[,]",
featureIDs: nil,
nextChar: '[',
},
{
input: "[]",
featureIDs: nil,
nextChar: '[',
},
}
scanner := NewScanner("")
for _, t := range tests {
comment := Commentf("input = %s", t.input)
scanner.reset(t.input)
featureIDs := scanner.scanFeatureIDs()
c.Assert(featureIDs, DeepEquals, t.featureIDs, comment)
nextChar := scanner.r.readByte()
c.Assert(nextChar, Equals, t.nextChar, comment)
}
Expand Down
55 changes: 35 additions & 20 deletions misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,11 @@
package parser

import (
"fmt"
"strings"

"github.com/pingcap/parser/charset"
)

// CommentCodeVersion is used to track the highest version can be parsed in the comment with pattern /*T!00001 xxx */
type CommentCodeVersion int

const (
CommentCodeNoVersion CommentCodeVersion = iota
CommentCodeAutoRandom CommentCodeVersion = 40000

CommentCodeCurrentVersion
)

func (ccv CommentCodeVersion) String() string {
return fmt.Sprintf("%05d", ccv)
}

// WrapStringWithCodeVersion convert a string `str` to `/*T!xxxxx str */`, where `xxxxx` is determined by CommentCodeVersion.
func WrapStringWithCodeVersion(str string, ccv CommentCodeVersion) string {
return fmt.Sprintf("/*T!%05d %s */", ccv, str)
}

func isLetter(ch rune) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}
Expand Down Expand Up @@ -830,3 +810,38 @@ func handleIdent(lval *yySymType) int {
lval.ident = cs
return underscoreCS
}

// SpecialCommentsController controls whether special comments like `/*T![xxx] yyy */`
// can be parsed as `yyy`. To add such rules, please use SpecialCommentsController.Register().
// For example:
// SpecialCommentsController.Register("30100");
// Now the parser will treat
// select a, /*T![30100] mysterious_keyword */ from t;
// and
// select a, mysterious_keyword from t;
// equally.
// Similar special comments without registration are ignored by parser.
var SpecialCommentsController = specialCommentsCtrl{
supportedFeatures: map[string]struct{}{},
}

type specialCommentsCtrl struct {
supportedFeatures map[string]struct{}
}

func (s *specialCommentsCtrl) Register(featureID string) {
s.supportedFeatures[featureID] = struct{}{}
}

func (s *specialCommentsCtrl) Unregister(featureID string) {
delete(s.supportedFeatures, featureID)
}

func (s *specialCommentsCtrl) ContainsAll(featureIDs []string) bool {
for _, f := range featureIDs {
if _, found := s.supportedFeatures[f]; !found {
return false
}
}
return true
}
Loading