-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokenizer.js
107 lines (99 loc) · 2.29 KB
/
tokenizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
let tokens = [
[/^\s+/, null],
// semicolon
[/^;/, 'SEMI'],
// comments
[/^\/\/.*/, null],
// multiline comments
[/^\/\*[\s\S]*?\*\//, null],
[/^\.\./, 'CONCAT'],
[/^[\d\.]+/, 'NUMBER'],
// literals: string
[/^"[^"]*"|^'[^']*'/, 'STRING'],
// commas
// [/^\,/,'COMMA'],
// paren
[/^\(/, 'O-PAREN'],
[/^\)/, 'C-PAREN'],
// brackets
[/^\{/, 'O-BRACE'],
[/^\}/, 'C-BRACE'],
[/^\[/, 'O-BRACK'],
[/^\]/, 'C-BRACK'],
[/^\!/, 'EXPI'],
[/^:/, 'COLON'],
[/^=/, 'DECLARATION'],
[/^==/, 'EQUALITY'],
[/^\,/, 'COMMA'],
[/^\+/, 'ADD'],
[/^-/, 'SUB'],
[/^\*/, 'MULT'],
[/^\//, 'DIV'],
// words/vars
[/^[a-zA-Z][a-zA-Z\d]*/, 'WORD'],
]
class Lexer {
constructor(i) {
this._string = i;
this._cursor = 0;
}
get_cursor() {
return this._cursor;
}
isEOF() {
return this._cursor < this._string.length;
}
getNextToken() {
// console.log(`lost at ${this._string.slice(this._cursor)}`)
if (!this.isEOF()) {
return {
type: 'EOF',
value: '',
};
}
const string = this._string.slice(this._cursor);
for (const [regex, tokenType] of tokens) {
const tokenValue = this._match(regex, string);
// console.log(tokenType)
if (tokenValue == null) {
continue;
}
if (tokenType == null) {
this._cursor += tokenValue.length;
return this.getNextToken();
}
this._cursor += tokenValue.length;
if (tokenType == 'STRING') {
return {
type: tokenType,
value: tokenValue,
}
}
// console.log(string[0].match(/^([a-zA-Z]*)/))
return {
type: tokenType,
value: tokenValue,
}
}
console.log('thisfar')
throw new SyntaxError(`Unexpected token: "${string[0]}" at line ${this._string.slice(0, this._cursor).split('\n').length}, position: ${this._cursor - 1}`);
}
_match(regexp, string) {
// console.log(regexp, string);
// console.log(regexp.exec(string));
const matched = regexp.exec(string);
if (matched == null) {
return null
}
// this._cursor += matched[0].length;
return matched[0];
}
Tokenize() {
const tokens = [];
while (this.isEOF()) {
tokens.push(this.getNextToken());
}
return tokens;
}
}
export default Lexer