Skip to content

Commit

Permalink
[clojure mode] Mark invalid tokens
Browse files Browse the repository at this point in the history
Types of invalid tokens that can be indicated include:
- invalid numbers (e.g., `42a`, `0x0042m`, etc.)
- invalid character literals (e.g., `\ab`, `\a1`, `\newlines`, `\NEWLINE`)
- invalid symbols (e.g., `42foo`, etc.)
  • Loading branch information
finalfantasia authored and marijnh committed Sep 18, 2018
1 parent 72c708d commit 9f00e4d
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 28 deletions.
50 changes: 25 additions & 25 deletions mode/clojure/clojure.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,23 +156,34 @@ CodeMirror.defineMode("clojure", function (options) {
var specialForm = createLookupMap(specialForms);
var coreSymbol = createLookupMap(coreSymbols);
var hasBodyParameter = createLookupMap(haveBodyParameter);
var numberLiteral = /^[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?/;
var symbolCharacter = /[!#$&'*+\-.\/:<=>?_|\w\xa1-\uffff]/;
var delimiter = /^(?:[\\\[\]\s"(),;@^`{}~]|$)/;
var numberLiteral = /^(?:[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?(?=[\\\[\]\s"#'(),;@^`{}~]|$))/;
var characterLiteral = /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/;

// simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/
// simple-symbol := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/
// qualified-symbol := (<simple-namespace>(<.><simple-namespace>)*</>)?<simple-symbol>
var qualifiedSymbol = /^(?:(?:[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*(?:\.[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*\/)?(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*(?=[\\\[\]\s"(),;@^`{}~]|$))/;

function base(stream, state) {
if (stream.eatSpace()) return ["space", null];
if (stream.match(numberLiteral)) return [null, "number"];

var ch = stream.next();

if (ch === "\\") {stream.next(); readSymbol(stream); return [null, "string-2"];}
if (ch === '"') return (state.tokenize = inString)(stream, state);
if (is(ch, /[(\[{]/)) return ["open", "bracket"];
if (is(ch, /[)\]}]/)) return ["close", "bracket"];
if (ch === ";") {stream.skipToEnd(); return ["space", "comment"];}
if (is(ch, /[#'@^`~]/)) return [null, "meta"];

var symbol = readSymbol(stream);
if (stream.match(characterLiteral)) return [null, "string-2"];
if (stream.eat(/^"/)) return (state.tokenize = inString)(stream, state);
if (stream.eat(/^[(\[{]/)) return ["open", "bracket"];
if (stream.eat(/^[)\]}]/)) return ["close", "bracket"];
if (stream.eat(/^;/)) {stream.skipToEnd(); return ["space", "comment"];}
if (stream.eat(/^[#'@^`~]/)) return [null, "meta"];

var matches = stream.match(qualifiedSymbol);
var symbol = matches && matches[0];

if (!symbol) {
// advance stream by at least one character so we don't get stuck.
stream.next();
stream.eatWhile(function (c) {return !is(c, delimiter);});
return [null, "error"];
}

if (symbol === "comment" && state.lastToken === "(")
return (state.tokenize = inComment)(stream, state);
Expand All @@ -187,7 +198,7 @@ CodeMirror.defineMode("clojure", function (options) {
var escaped = false, next;

while (next = stream.next()) {
if (next === '"' && !escaped) {state.tokenize = base; break;}
if (next === "\"" && !escaped) {state.tokenize = base; break;}
escaped = !escaped && next === "\\";
}

Expand All @@ -211,17 +222,6 @@ CodeMirror.defineMode("clojure", function (options) {
return ["space", "comment"];
}

function readSymbol(stream) {
var ch;

while (ch = stream.next()) {
if (ch === "\\") stream.next();
else if (!is(ch, symbolCharacter)) {stream.backUp(1); break;}
}

return stream.current();
}

function createLookupMap(words) {
var obj = {};

Expand Down
48 changes: 45 additions & 3 deletions mode/clojure/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,22 @@
"[number 8r52] [number +8r52] [number -8r52]",
"[number 36rhello] [number +36rhello] [number -36rhello]",
"[number 36rz] [number +36rz] [number -36rz]",
"[number 36rZ] [number +36rZ] [number -36rZ]"
"[number 36rZ] [number +36rZ] [number -36rZ]",

// invalid numbers
"[error 42foo]",
"[error 42Nfoo]",
"[error 42Mfoo]",
"[error 42.42Mfoo]",
"[error 42.42M!]",
"[error 42!]",
"[error 0x42afm]"
);

MT("characters",
"[string-2 \\1]",
"[string-2 \\a]",
"[string-2 \\a\\b\\c]",
"[string-2 \\#]",
"[string-2 \\\\]",
"[string-2 \\\"]",
Expand All @@ -63,7 +73,21 @@
"[string-2 \\u1000]",
"[string-2 \\uAaAa]",
"[string-2 \\u9F9F]",
"[string-2 \\o123]"
"[string-2 \\o123]",
"[string-2 \\符]",
"[string-2 \\シ]",
"[string-2 \\ۇ]",
// FIXME
// "[string-2 \\🙂]",

// invalid character literals
"[error \\abc]",
"[error \\a123]",
"[error \\a!]",
"[error \\newlines]",
"[error \\NEWLINE]",
"[error \\u9F9FF]",
"[error \\o1234]"
);

MT("strings",
Expand All @@ -72,6 +96,8 @@
"[string \"I'm]", // this is
"[string a]", // a multi-line
"[string teapot.\"]" // string

// TODO unterminated (multi-line) strings?
);

MT("comments",
Expand Down Expand Up @@ -123,7 +149,23 @@
"[variable 符号]",
"[variable シンボル]",
"[variable ئۇيغۇر]",
"[variable 🙂❤🇺🇸]"
"[variable 🙂❤🇺🇸]",

// invalid symbols
"[error 3foo]",
"[error 3+]",
"[error 3|]",
"[error 3_]"
);

MT("numbers and other forms",
"[number 42][bracket (][builtin foo][bracket )]",
"[number 42][bracket [[][variable foo][bracket ]]]",
"[number 42][meta #][bracket {][variable foo][bracket }]",
"[number 42][bracket {][atom :foo] [variable bar][bracket }]",
"[number 42][meta `][variable foo]",
"[number 42][meta ~][variable foo]",
"[number 42][meta #][variable foo]"
);

var specialForms = [".", "catch", "def", "do", "if", "monitor-enter",
Expand Down

0 comments on commit 9f00e4d

Please sign in to comment.