-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Lexical colorization/classification for template strings. #2026
Changes from all commits
ea30c68
3429fab
f1f085e
3fea0ae
ab79fae
1838800
15b333e
65e23b1
1dca562
a1b90f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1143,6 +1143,9 @@ module ts { | |
InMultiLineCommentTrivia, | ||
InSingleQuoteStringLiteral, | ||
InDoubleQuoteStringLiteral, | ||
InTemplateHeadOrNoSubstitutionTemplate, | ||
InTemplateMiddleOrTail, | ||
InTemplateSubstitutionPosition, | ||
} | ||
|
||
export enum TokenClass { | ||
|
@@ -1168,7 +1171,26 @@ module ts { | |
} | ||
|
||
export interface Classifier { | ||
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; | ||
/** | ||
* Gives lexical classifications of tokens on a line without any syntactic context. | ||
* For instance, a token consisting of the text 'string' can be either an identifier | ||
* named 'string' or the keyword 'string', however, because this classifier is not aware, | ||
* it relies on certain heuristics to give acceptable results. For classifications where | ||
* speed trumps accuracy, this function is preferable; however, for true accuracy, the | ||
* syntactic classifier is ideal. In fact, in certain editing scenarios, combining the | ||
* lexical, syntactic, and semantic classifiers may issue the best user experience. | ||
* | ||
* @param text The text of a line to classify. | ||
* @param lexState The state of the lexical classifier at the end of the previous line. | ||
* @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. | ||
* If there is no syntactic classifier (syntacticClassifierAbsent=true), | ||
* certain heuristics may be used in its place; however, if there is a | ||
* syntactic classifier (syntacticClassifierAbsent=false), certain | ||
* classifications which may be incorrectly categorized will be given | ||
* back as Identifiers in order to allow the syntactic classifier to | ||
* subsume the classification. | ||
*/ | ||
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; | ||
} | ||
|
||
/** | ||
|
@@ -5617,6 +5639,28 @@ module ts { | |
noRegexTable[SyntaxKind.TrueKeyword] = true; | ||
noRegexTable[SyntaxKind.FalseKeyword] = true; | ||
|
||
// Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact) | ||
// classification on template strings. Because of the context free nature of templates, | ||
// the only precise way to classify a template portion would be by propagating the stack across | ||
// lines, just as we do with the end-of-line state. However, this is a burden for implementers, | ||
// and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead | ||
// flatten any nesting when the template stack is non-empty and encode it in the end-of-line state. | ||
// Situations in which this fails are | ||
// 1) When template strings are nested across different lines: | ||
// `hello ${ `world | ||
// ` }` | ||
// | ||
// Where on the second line, you will get the closing of a template, | ||
// a closing curly, and a new template. | ||
// | ||
// 2) When substitution expressions have curly braces and the curly brace falls on the next line: | ||
// `hello ${ () => { | ||
// return "world" } } ` | ||
// | ||
// Where on the second line, you will get the 'return' keyword, | ||
// a string literal, and a template end consisting of '} } `'. | ||
var templateStack: SyntaxKind[] = []; | ||
|
||
function isAccessibilityModifier(kind: SyntaxKind) { | ||
switch (kind) { | ||
case SyntaxKind.PublicKeyword: | ||
|
@@ -5650,13 +5694,19 @@ module ts { | |
// if there are more cases we want the classifier to be better at. | ||
return true; | ||
} | ||
|
||
// 'classifyKeywordsInGenerics' should be 'true' when a syntactic classifier is not present. | ||
function getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult { | ||
|
||
// If there is a syntactic classifier ('syntacticClassifierAbsent' is false), | ||
// we will be more conservative in order to avoid conflicting with the syntactic classifier. | ||
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): ClassificationResult { | ||
var offset = 0; | ||
var token = SyntaxKind.Unknown; | ||
var lastNonTriviaToken = SyntaxKind.Unknown; | ||
|
||
// Empty out the template stack for reuse. | ||
while (templateStack.length > 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. templateStack.length = 0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's actually faster to do it this way. |
||
templateStack.pop(); | ||
} | ||
|
||
// If we're in a string literal, then prepend: "\ | ||
// (and a newline). That way when we lex we'll think we're still in a string literal. | ||
// | ||
|
@@ -5675,6 +5725,17 @@ module ts { | |
text = "/*\n" + text; | ||
offset = 3; | ||
break; | ||
case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate: | ||
text = "`\n" + text; | ||
offset = 2; | ||
break; | ||
case EndOfLineState.InTemplateMiddleOrTail: | ||
text = "}\n" + text; | ||
offset = 2; | ||
// fallthrough | ||
case EndOfLineState.InTemplateSubstitutionPosition: | ||
templateStack.push(SyntaxKind.TemplateHead); | ||
break; | ||
} | ||
|
||
scanner.setText(text); | ||
|
@@ -5739,12 +5800,45 @@ module ts { | |
token === SyntaxKind.StringKeyword || | ||
token === SyntaxKind.NumberKeyword || | ||
token === SyntaxKind.BooleanKeyword) { | ||
if (angleBracketStack > 0 && !classifyKeywordsInGenerics) { | ||
// If it looks like we're could be in something generic, don't classify this | ||
// as a keyword. We may just get overwritten by the syntactic classifier, | ||
// causing a noisy experience for the user. | ||
token = SyntaxKind.Identifier; | ||
} | ||
if (angleBracketStack > 0 && !syntacticClassifierAbsent) { | ||
// If it looks like we're could be in something generic, don't classify this | ||
// as a keyword. We may just get overwritten by the syntactic classifier, | ||
// causing a noisy experience for the user. | ||
token = SyntaxKind.Identifier; | ||
} | ||
} | ||
else if (token === SyntaxKind.TemplateHead) { | ||
templateStack.push(token); | ||
} | ||
else if (token === SyntaxKind.OpenBraceToken) { | ||
// If we don't have anything on the template stack, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't understand your comment wrapping. |
||
// then we aren't trying to keep track of a previously scanned template head. | ||
if (templateStack.length > 0) { | ||
templateStack.push(token); | ||
} | ||
} | ||
else if (token === SyntaxKind.CloseBraceToken) { | ||
// If we don't have anything on the template stack, | ||
// then we aren't trying to keep track of a previously scanned template head. | ||
if (templateStack.length > 0) { | ||
var lastTemplateStackToken = lastOrUndefined(templateStack); | ||
|
||
if (lastTemplateStackToken === SyntaxKind.TemplateHead) { | ||
token = scanner.reScanTemplateToken(); | ||
|
||
// Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us. | ||
if (token === SyntaxKind.TemplateTail) { | ||
templateStack.pop(); | ||
} | ||
else { | ||
Debug.assert(token === SyntaxKind.TemplateMiddle, "Should have been a template middle. Was " + token); | ||
} | ||
} | ||
else { | ||
Debug.assert(lastTemplateStackToken === SyntaxKind.OpenBraceToken, "Should have been an open brace. Was: " + token); | ||
templateStack.pop(); | ||
} | ||
} | ||
} | ||
|
||
lastNonTriviaToken = token; | ||
|
@@ -5789,6 +5883,22 @@ module ts { | |
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia; | ||
} | ||
} | ||
else if (isTemplateLiteralKind(token)) { | ||
if (scanner.isUnterminated()) { | ||
if (token === SyntaxKind.TemplateTail) { | ||
result.finalLexState = EndOfLineState.InTemplateMiddleOrTail; | ||
} | ||
else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) { | ||
result.finalLexState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate; | ||
} | ||
else { | ||
Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token); | ||
} | ||
} | ||
} | ||
else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) { | ||
result.finalLexState = EndOfLineState.InTemplateSubstitutionPosition; | ||
} | ||
} | ||
} | ||
|
||
|
@@ -5892,6 +6002,9 @@ module ts { | |
return TokenClass.Whitespace; | ||
case SyntaxKind.Identifier: | ||
default: | ||
if (isTemplateLiteralKind(token)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return TokenClass.StringLiteral; | ||
} | ||
return TokenClass.Identifier; | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Explain that rudimentary means 'inexact' and give example of where template classification will be wrong.