Skip to content

Commit

Permalink
feat: Improve identifiers, strings and characters.
Browse files Browse the repository at this point in the history
Add support for escape sequences.

Allow identifiers to start with numbers.

Make strings and chars handle # correctly.
  • Loading branch information
reiniscirpons committed May 26, 2024
1 parent 229bffb commit fc43939
Show file tree
Hide file tree
Showing 6 changed files with 5,306 additions and 5,439 deletions.
55 changes: 30 additions & 25 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ module.exports = grammar({
extras: $ => [
$.comment,
/\s/,
$.line_continuation
],

inline: $ => [
Expand Down Expand Up @@ -112,7 +111,7 @@ module.exports = grammar({

for_statement: $ => seq(
'for',
field('variable', $.identifier),
field('identifier', $.identifier),
'in',
field('values', seq($._expression)),
'do',
Expand Down Expand Up @@ -169,6 +168,8 @@ module.exports = grammar({
)),

// GAP source file location: src/read.c ReadSelector
// TODO: Allow ~ as the variable of the list expression
// (same for other selectors)
list_selector: $ => prec.left(PREC.CALL, seq(
$._variable,
'[',
Expand Down Expand Up @@ -259,12 +260,20 @@ module.exports = grammar({
float: _ => {
const digits = /[0-9]+/;
const exponent = /[edqEDQ][\+-]?[0-9]+/;
const leading_period = token(seq(
optional(digits),

const middle_period = token(seq(
digits,
'.',
digits,
optional(exponent),
));

const leading_period_with_exponent = token(seq(
'.',
digits,
exponent,
));

const trailing_period_with_exponent = token(seq(
digits,
'.',
Expand All @@ -284,8 +293,16 @@ module.exports = grammar({
'.',
));

const leading_period = token(prec(-1,seq(
'.',
digits,
)));


return choice(
leading_period,
//leading_period,
middle_period,
leading_period_with_exponent,
trailing_period_with_exponent,
//trailing_period
);
Expand All @@ -301,12 +318,13 @@ module.exports = grammar({
char: $ => seq(
'\'',
choice(
token.immediate(/[^\n']/),
token.immediate(prec(1, /[^\n']/)),
$.escape_sequence
),
'\''
),


// TODO: support multiline triple strings
// (ruby and python modules use an external scanner written in C++
// for that... there are some nasty edge cases)
Expand All @@ -319,35 +337,26 @@ module.exports = grammar({
seq(
'"""',
optional(repeat1(choice(
token.immediate(/./),
token.immediate(prec(1, /./)),
$.escape_sequence
))),
'"""',
)
),

_literal_contents: $ => repeat1(choice(
token.immediate(/[^\n"\\]/),
token.immediate(prec(1, /[^\n"\\]/)),
$.escape_sequence
)),

// TODO: Properly handle line continuation characters in strings.
// Currently the test
// "abc\
// def"
// Gets parsed as
// (string (escape_sequence))
// instead of
// (string) (line_continuation) (string)
// Likely easiest to fix when implementing proper line continuation logic.
escape_sequence: _ => token(seq(
escape_sequence: _ => token(prec(1, seq(
'\\',
choice(
/[^0-7]/, // single character
/0x[0-9a-fA-F]{2,2}/, // hex code
/[0-7]{3,3}/, // octal
)
)),
))),


function: $ => seq(
Expand Down Expand Up @@ -494,20 +503,16 @@ module.exports = grammar({
')',
),

// TODO: support identifiers starting with numbers e.g. 2n
// TODO: support backslash quotes in identifiers; e.g. these are
// three valid identifiers:
// \[\]
// \+
// multi\ word\ identifier
identifier: _ => /[a-zA-Z_@][a-zA-Z_@0-9]*/,
identifier: _ => /([a-zA-Z_@0-9]|(\\.))*([a-zA-Z_@]|(\\.))[a-zA-Z_@0-9]*/,

comment: _ => token(seq('#', /.*/)),

// TODO: make line continuations seamless, i.e. parse
// 1234\
// 5678
// as just (integer) instead of ((integer) (line_continuation) (integer))
// TODO: implement external scanner for line continuations
line_continuation: _ => token(seq('\\', choice(seq(optional('\r'), '\n'), '\0'))),

}
Expand Down
116 changes: 70 additions & 46 deletions src/grammar.json
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@
},
{
"type": "FIELD",
"name": "variable",
"name": "identifier",
"content": {
"type": "SYMBOL",
"name": "identifier"
Expand Down Expand Up @@ -972,16 +972,8 @@
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[0-9]+"
},
{
"type": "BLANK"
}
]
"type": "PATTERN",
"value": "[0-9]+"
},
{
"type": "STRING",
Expand All @@ -1006,6 +998,26 @@
]
}
},
{
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "."
},
{
"type": "PATTERN",
"value": "[0-9]+"
},
{
"type": "PATTERN",
"value": "[edqEDQ][\\+-]?[0-9]+"
}
]
}
},
{
"type": "TOKEN",
"content": {
Expand Down Expand Up @@ -1053,8 +1065,12 @@
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\\n']"
"type": "PREC",
"value": 1,
"content": {
"type": "PATTERN",
"value": "[^\\n']"
}
}
},
{
Expand Down Expand Up @@ -1115,8 +1131,12 @@
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "."
"type": "PREC",
"value": 1,
"content": {
"type": "PATTERN",
"value": "."
}
}
},
{
Expand Down Expand Up @@ -1147,8 +1167,12 @@
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\\n\"\\\\]"
"type": "PREC",
"value": 1,
"content": {
"type": "PATTERN",
"value": "[^\\n\"\\\\]"
}
}
},
{
Expand All @@ -1161,30 +1185,34 @@
"escape_sequence": {
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^0-7]"
},
{
"type": "PATTERN",
"value": "0x[0-9a-fA-F]{2,2}"
},
{
"type": "PATTERN",
"value": "[0-7]{3,3}"
}
]
}
]
"type": "PREC",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^0-7]"
},
{
"type": "PATTERN",
"value": "0x[0-9a-fA-F]{2,2}"
},
{
"type": "PATTERN",
"value": "[0-7]{3,3}"
}
]
}
]
}
}
},
"function": {
Expand Down Expand Up @@ -1940,7 +1968,7 @@
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z_@][a-zA-Z_@0-9]*"
"value": "([a-zA-Z_@0-9]|(\\\\.))*([a-zA-Z_@]|(\\\\.))[a-zA-Z_@0-9]*"
},
"comment": {
"type": "TOKEN",
Expand Down Expand Up @@ -2009,10 +2037,6 @@
{
"type": "PATTERN",
"value": "\\s"
},
{
"type": "SYMBOL",
"name": "line_continuation"
}
],
"conflicts": [
Expand Down
24 changes: 10 additions & 14 deletions src/node-types.json
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,16 @@
"type": "for_statement",
"named": true,
"fields": {
"identifier": {
"multiple": false,
"required": true,
"types": [
{
"type": "identifier",
"named": true
}
]
},
"values": {
"multiple": false,
"required": true,
Expand Down Expand Up @@ -873,16 +883,6 @@
"named": true
}
]
},
"variable": {
"multiple": false,
"required": true,
"types": [
{
"type": "identifier",
"named": true
}
]
}
},
"children": {
Expand Down Expand Up @@ -3238,10 +3238,6 @@
"type": "integer",
"named": true
},
{
"type": "line_continuation",
"named": true
},
{
"type": "local",
"named": false
Expand Down
Loading

0 comments on commit fc43939

Please sign in to comment.