Skip to content

Commit

Permalink
feat: Add support for conversion markers in floats.
Browse files Browse the repository at this point in the history
Add support for conversion markers in floats.

Add support for conversion markers in trailing period floats.

Add more tests.
  • Loading branch information
reiniscirpons committed May 26, 2024
1 parent 9fcb7b4 commit dfb2a20
Show file tree
Hide file tree
Showing 8 changed files with 4,562 additions and 4,501 deletions.
17 changes: 17 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
GAP_DIR=temp_gap_for_tests
EXAMPLES_DIR=./examples
TST_TO_G=./examples/tst_to_g.py

# Recursively get files matching given extension regex from input directory,
# flatten and copy to output directory.
Expand All @@ -24,23 +25,39 @@ create_gap_tests: $(GAP_DIR)
@$(call get_files, ".*\.\(gd\|gi\|g\)", $(GAP_DIR)/grp, $(EXAMPLES_DIR)/temp_gap)
@$(call get_files, ".*\.\(gd\|gi\|g\)", $(GAP_DIR)/lib, $(EXAMPLES_DIR)/temp_gap)
@$(call get_files, ".*\.\(gd\|gi\|g\)", $(GAP_DIR)/tst, $(EXAMPLES_DIR)/temp_gap)
mkdir -p $(EXAMPLES_DIR)/temp_tst
@$(call get_files, ".*\.\(tst\)", $(GAP_DIR)/grp, $(EXAMPLES_DIR)/temp_tst)
@$(call get_files, ".*\.\(tst\)", $(GAP_DIR)/lib, $(EXAMPLES_DIR)/temp_tst)
@$(call get_files, ".*\.\(tst\)", $(GAP_DIR)/tst, $(EXAMPLES_DIR)/temp_tst)
for tst_file in $(EXAMPLES_DIR)/temp_tst/*.tst; do \
python3 $(TST_TO_G) $${tst_file}; \
done

create_pkg_tests: $(GAP_DIR)/pkg
mkdir -p $(EXAMPLES_DIR)/temp_pkg
@$(call get_files, ".*\.\(gd\|gi\|g\)", $(GAP_DIR)/pkg, $(EXAMPLES_DIR)/temp_pkg)
mkdir -p $(EXAMPLES_DIR)/temp_tst
@$(call get_files, ".*\.\(tst\)", $(GAP_DIR)/pkg, $(EXAMPLES_DIR)/temp_tst)
for tst_file in $(EXAMPLES_DIR)/temp_tst/*.tst; do \
python3 $(TST_TO_G) $${tst_file}; \
done

test_gap: create_gap_tests
tree-sitter parse '$(EXAMPLES_DIR)/temp_gap/*.g*' --quiet --stat

test_pkg: create_pkg_tests
tree-sitter parse '$(EXAMPLES_DIR)/temp_pkg/*.g*' --quiet --stat

test_tst: create_gap_tests create_pkg_tests
tree-sitter parse '$(EXAMPLES_DIR)/temp_tst/*.g*' --quiet --stat

test_all: create_gap_tests create_pkg_tests
tree-sitter parse '$(EXAMPLES_DIR)/**/*.g*' --quiet --stat

clean:
rm -rf $(EXAMPLES_DIR)/temp_gap
rm -rf $(EXAMPLES_DIR)/temp_pkg
rm -rf $(EXAMPLES_DIR)/temp_tst

distclean: clean
rm -rf ./$(GAP_DIR)
57 changes: 57 additions & 0 deletions examples/tst_to_g.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python3
"""Quick, naive script to extract GAP code from .tst file."""
import argparse


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Exctract GAP code from a .tst file.")
parser.add_argument(
"in_file",
type=str,
help="name of the file to process",
)
parser.add_argument(
"-o",
"--out_file",
type=str,
default=None,
help="name of the output file, defaults to appending .g if ommited",
required=False,
)
args = parser.parse_args()

with open(args.in_file, "r") as in_file:
text = in_file.read()

result_lines = []
ignore = False
reading_input = False
empty_line = False
for line in text.split("\n"):
if line.startswith("gap> "):
ignore = False
reading_input = True
result_lines.append(line[len("gap> ") :])
elif line.startswith("> ") and reading_input:
result_lines.append(line[len("> ") :])
elif reading_input:
reading_input = False
ignore = True
elif ignore and len(line) == 0:
empty_line = True
elif ignore and empty_line and line[0] == "#":
ignore = False
empty_line = False
result_lines.append("")
result_lines.append(line)
elif ignore and empty_line:
empty_line = False
elif not ignore:
result_lines.append(line)

out_file_name = args.out_file
if out_file_name is None:
out_file_name = args.in_file + ".g"

with open(out_file_name, "w") as out_file:
out_file.write("\n".join(result_lines))
66 changes: 48 additions & 18 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ module.exports = grammar({
$.string_start,
$._string_content,
$.string_end,
$._trailing_period_float,
],

extras: $ => [
Expand Down Expand Up @@ -53,9 +54,6 @@ module.exports = grammar({
word: $ => $.identifier,

rules: {
// TODO: add support for GAP tst file syntax. This probably needs to be
// a separate tree-sitter project which imports the base GAP syntax, similar
// to how the cpp grammar is implemented (it imports the c grammar).
source_file: $ => repeat(
choice(
seq($._expression, ';'),
Expand All @@ -79,12 +77,32 @@ module.exports = grammar({
$.continue_statement,
$.return_statement,
$.call, // procedure call
// TODO: should we handle `Unbind`, `Info`, `Assert`, `TryNextMethod`
// TODO: (fingolfin) should we handle `Unbind`, `Info`, `Assert`, `TryNextMethod`
// statements? For now, we get away with just treating them as
// procedure calls
// NOTE: (reiniscirpons) these are already distinguished as builtin
// functions in ./queries/highlights.scm, we probably dont need to do
// anything special in the grammar itself


// TODO: (fingolfin) add support for `quit`, `QUIT`, `?`, pragmas ???
// NOTE: (reiniscirpons) some pointers for this:
// As per
// GAP source location src/read.c TryReadStatement
// quit, QUIT and ? cannot be used within statements or expressions, so
// it might be easiest to implement them as a separate construct that can
// appear in the source file.
//
// The help syntax is described in the GAP manual
// https://docs.gap-system.org/doc/ref/chap2.html
// The help scanner function implemented here:
// GAP source file location: src/scanner.c ReadHelp
// The help request string parser implemented here:
// GAP source file location: lib/helpbase.gi HELP
//
// Pragmas are described in the GAP manual
// https://docs.gap-system.org/doc/ref/chap5.html


// TODO: add support for `quit`, `QUIT`, `?`, pragmas ???
),

assignment_statement: $ => seq(
Expand Down Expand Up @@ -236,8 +254,9 @@ module.exports = grammar({
)),

// GAP source file location: src/read.c ReadSelector
// TODO: fix issues with integer record selectors, i.e.
// make sure that a.1 is not parsed as (identifier) (float)
// TODO: (reiniscirpons) fix issues with integer record selectors once
// leading period floats are introduced, i.e. make sure that a.1 is not
// parsed as (identifier) (float)
record_selector: $ => prec.left(PREC.CALL, seq(
field('variable', $._expression),
'.',
Expand Down Expand Up @@ -275,7 +294,7 @@ module.exports = grammar({
[prec.left, '*', PREC.MULTI],
[prec.left, '/', PREC.MULTI],
[prec.left, 'mod', PREC.MULTI],
[prec.right, '^', PREC.POWER], // TODO: actually, ^ is *NOT* associative in GAP at all,
[prec.right, '^', PREC.POWER], // TODO: (fingolfin) actually, ^ is *NOT* associative in GAP at all,
// so an expression like `2^2^2` is a syntax error. Not sure how / whether to express that
].map(([fn, operator, precedence]) => fn(precedence, seq(
$._expression,
Expand All @@ -296,8 +315,8 @@ module.exports = grammar({
),

// GAP source file location: src/scanner.c GetNumber
float: _ => {
// TODO: trailing period floats currently cause issues with ranges e.g.
float: $ => {
// TODO: (reiniscirpons) trailing period floats currently cause issues with ranges e.g.
// [1..10] fails producing the parse (list_expression (float) (Error))
// since it (correctly) tries to parse the prefix [1. as the start of a list
// followed by the float "1.". The issue is that with only a single character of
Expand All @@ -310,26 +329,33 @@ module.exports = grammar({
LITERAL_REGEXP.LINE_CONTINUATION,
);

const trailing_period_with_conversion = lineContinuation(
/[0-9]+\.(_[a-zA-Z]?|[a-cf-pr-zA-CF-PR-Z])/,
LITERAL_REGEXP.LINE_CONTINUATION,
);

const middle_period = lineContinuation(
/[0-9]+\.[0-9]+/,
/[0-9]+\.[0-9]+(_[a-zA-Z]?|[a-cf-pr-zA-CF-PR-Z])?/,
LITERAL_REGEXP.LINE_CONTINUATION,
);

// TODO: Leading periods currently conflict with record selectors
// TODO: (reiniscirpons) Leading periods currently conflict with record selectors
// TODO: (reiniscirpons) add conversion marker support for leading period floats
const leading_period = lineContinuation(
/\.[0-9]+/,
LITERAL_REGEXP.LINE_CONTINUATION,
);

const float_with_exponent = lineContinuation(
/([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)[edqEDQ][\+-]?[0-9]+/,
/([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)[edqEDQ][\+-]?[0-9]+_?[a-zA-Z]?/,
LITERAL_REGEXP.LINE_CONTINUATION,
);

return choice(
//leading_period,
middle_period,
//trailing_period,
$._trailing_period_float,
trailing_period_with_conversion,
float_with_exponent,
);
},
Expand Down Expand Up @@ -363,7 +389,7 @@ module.exports = grammar({
LITERAL_REGEXP.LINE_CONTINUATION,
),

// TODO: restrict where tilde can be used, i.e., only "inside" a list or
// TODO: (fingolfin) restrict where tilde can be used, i.e., only "inside" a list or
// record expression (but at arbitrary depth)
tilde: _ => '~',

Expand Down Expand Up @@ -463,9 +489,12 @@ module.exports = grammar({
$.record_entry
),

// TODO: add special rules for calls to Declare{GlobalFunction,Operation,...},
// TODO: (fingolfin) add special rules for calls to Declare{GlobalFunction,Operation,...},
// BindGlobal, BIND_GLOBAL, Install{Method,GlobalFunction,} ? They are not part of the language per se, but they
// are how we can find out function declarations / definitions
// NOTE: (reiniscirpons) not sure we need to do anything specials for these functions in the grammar itself.
// We can maybe distinguish them in ./queries/highlights.scm as builtin functions. When parsing they should be
// treated the same as any other function call I think.

list_expression: $ => seq(
'[',
Expand Down Expand Up @@ -591,7 +620,8 @@ function lineContinuation(base_regex, line_continuation_regex) {
let square_bracket = false;
let curly_brace = false
for (const c of base_regex.source) {
// TODO: Refactor more
// TODO: (reiniscirpons) Refactor more

// BEFORE
if (!curly_brace && !square_bracket && !escaped &&
(c == '\\' || c == '[' ||
Expand Down
16 changes: 14 additions & 2 deletions src/grammar.json
Original file line number Diff line number Diff line change
Expand Up @@ -1092,11 +1092,19 @@
"members": [
{
"type": "PATTERN",
"value": "([0-9](\\\\\\r?\\n)*)+(\\.(\\\\\\r?\\n)*)([0-9](\\\\\\r?\\n)*)+"
"value": "([0-9](\\\\\\r?\\n)*)+(\\.(\\\\\\r?\\n)*)([0-9](\\\\\\r?\\n)*)+((_(\\\\\\r?\\n)*)([a-zA-Z](\\\\\\r?\\n)*)?|([a-cf-pr-zA-CF-PR-Z](\\\\\\r?\\n)*))?"
},
{
"type": "SYMBOL",
"name": "_trailing_period_float"
},
{
"type": "PATTERN",
"value": "(([0-9](\\\\\\r?\\n)*)+(\\.(\\\\\\r?\\n)*)([0-9](\\\\\\r?\\n)*)*|([0-9](\\\\\\r?\\n)*)*(\\.(\\\\\\r?\\n)*)([0-9](\\\\\\r?\\n)*)+)([edqEDQ](\\\\\\r?\\n)*)([\\+-](\\\\\\r?\\n)*)?([0-9](\\\\\\r?\\n)*)+"
"value": "([0-9](\\\\\\r?\\n)*)+(\\.(\\\\\\r?\\n)*)((_(\\\\\\r?\\n)*)([a-zA-Z](\\\\\\r?\\n)*)?|([a-cf-pr-zA-CF-PR-Z](\\\\\\r?\\n)*))"
},
{
"type": "PATTERN",
"value": "(([0-9](\\\\\\r?\\n)*)+(\\.(\\\\\\r?\\n)*)([0-9](\\\\\\r?\\n)*)*|([0-9](\\\\\\r?\\n)*)*(\\.(\\\\\\r?\\n)*)([0-9](\\\\\\r?\\n)*)+)([edqEDQ](\\\\\\r?\\n)*)([\\+-](\\\\\\r?\\n)*)?([0-9](\\\\\\r?\\n)*)+(_(\\\\\\r?\\n)*)?([a-zA-Z](\\\\\\r?\\n)*)?"
}
]
},
Expand Down Expand Up @@ -2163,6 +2171,10 @@
{
"type": "SYMBOL",
"name": "string_end"
},
{
"type": "SYMBOL",
"name": "_trailing_period_float"
}
],
"inline": [
Expand Down
Loading

0 comments on commit dfb2a20

Please sign in to comment.