Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the generalized Pratt parsing example #277

Open
wants to merge 47 commits into
base: hkmc2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
e330f06
Implement the lexer
chengluyu Jan 26, 2025
63a77b2
WIP Add the parser
chengluyu Feb 6, 2025
0ab1110
WIP
chengluyu Feb 7, 2025
04a770e
Support Unicode escapes in string literals
chengluyu Feb 7, 2025
a58aeb7
Refactor `TreeTracer` and move functions to libraries
chengluyu Feb 7, 2025
361b265
Selectively open `Token` from `Lexer`
chengluyu Feb 7, 2025
d7d6cdd
A bunch of improvements to the parser
chengluyu Feb 9, 2025
79e6dfa
Add magic constants and use them in traces
chengluyu Feb 11, 2025
685b5b2
Move the parser to the library
chengluyu Feb 11, 2025
0ca0927
Use separator comments
chengluyu Feb 12, 2025
70b3951
Improve an error message
chengluyu Feb 12, 2025
2505a78
Clean up and add new tests
chengluyu Feb 12, 2025
9b77505
Amend error message changes
chengluyu Feb 12, 2025
bfb8766
Amend changes caused by merging match arms
chengluyu Feb 12, 2025
60fe72d
Support parsing empty tuples
chengluyu Feb 12, 2025
685baaa
Support `rec` modifier
chengluyu Feb 12, 2025
a5fb56c
Support empty sequences
chengluyu Feb 12, 2025
c7d8996
Support identifiers ending with primes
chengluyu Feb 12, 2025
b40fd37
Support multiple let bindings using `and`
chengluyu Feb 12, 2025
f65a889
Group `set`
chengluyu Feb 12, 2025
64fb5c2
Merge branch 'hkmc2' into generalized-pratt-parsing
LPTK Feb 13, 2025
80114b8
Ensure the token stream is drained after parse
chengluyu Feb 12, 2025
73899fb
Fix a `:todo` in the test
chengluyu Feb 13, 2025
44a883e
Render parse rules
chengluyu Feb 17, 2025
5fbabca
Remove the space before the first argument list of `fold`
chengluyu Feb 18, 2025
e1b85a3
Resolve PR comments
chengluyu Feb 18, 2025
b8f0e3f
Add and use `XML` and `Iter` libraries
chengluyu Feb 18, 2025
e7285db
Fix issues mentioned in the discussion
chengluyu Feb 18, 2025
7182fe3
Add `Choice.Optional`
chengluyu Feb 19, 2025
d1abaad
Add `@` application operator
LPTK Feb 19, 2025
7c205d3
Tokenize strings with escape sequence support
chengluyu Feb 19, 2025
d142519
Split the parser into smaller modules
chengluyu Feb 19, 2025
82d0d56
Add small `mkStr` utility
LPTK Feb 20, 2025
0e0659d
Add example of class symbolic import error to triage
chengluyu Feb 20, 2025
2b46cbb
Add `RecursiveDescent` and extract `Token` into a module
chengluyu Feb 20, 2025
133e511
Add Pratt parsing
chengluyu Feb 20, 2025
741b7d6
Support splitting function applications via `of`
chengluyu Feb 20, 2025
624f0fe
Prepare to parse type categories
chengluyu Feb 21, 2025
a2478d2
Refactor the rule visualizer
chengluyu Feb 21, 2025
050e119
Use rule names in railroad diagrams
chengluyu Feb 21, 2025
8bd2497
Make `rec` optional instead of a separate phraseme
chengluyu Feb 21, 2025
b873961
Merge branch hkmc2 into generalized-pratt-parsing
chengluyu Feb 22, 2025
8be3383
Improve an error message
chengluyu Feb 22, 2025
6e6c860
Reduce one dynamic access
chengluyu Feb 22, 2025
625503f
Move `Option.get` to `unsafe`
chengluyu Feb 22, 2025
ce057c6
Amend test changes
chengluyu Feb 22, 2025
bc78a6b
Visualize rules for module items
chengluyu Feb 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions hkmc2/shared/src/main/scala/hkmc2/semantics/Elaborator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ object Elaborator:
val untyped = assumeBuiltinTpe("untyped")
// println(s"Builtins: $Int, $Num, $Str, $untyped")
val Predef = assumeBuiltinMod("Predef")
object source:
private val module = assumeBuiltinMod("source")
private def assumeObject(nme: Str): BlockMemberSymbol =
module.tree.definedSymbols.get(nme).getOrElse:
throw new NoSuchElementException:
s"builtin module symbol source.$nme. we have"
val line = assumeObject("line")
val name = assumeObject("name")
val file = assumeObject("file")
def getBuiltinOp(op: Str): Opt[Str] =
if getBuiltin(op).isDefined then builtinBinOps.get(op) else N
/** Classes that do not use `instanceof` in pattern matching. */
Expand Down Expand Up @@ -491,8 +500,18 @@ extends Importer:
ErrorReport(
msg"[debinding error] Method '${nme.name}' cannot be accessed without being called." -> nme.toLoc :: Nil)
case S(_) | N => ()
maybeApp:
Term.Sel(preTrm, nme)(sym)
if sym.contains(ctx.builtins.source.line) then
val loc = tree.toLoc.getOrElse(???)
val (line, _, _) = loc.origin.fph.getLineColAt(loc.spanStart)
Term.Lit(Tree.IntLit(loc.origin.startLineNum + line - 1))
else if sym.contains(ctx.builtins.source.name) then
Term.Lit(Tree.StrLit(ctx.getOuter.map(_.nme).getOrElse("")))
else if sym.contains(ctx.builtins.source.file) then
val loc = tree.toLoc.getOrElse(???)
Term.Lit(Tree.StrLit(loc.origin.fileName))
else
maybeApp:
Term.Sel(preTrm, nme)(sym)
case MemberProj(ct, nme) =>
val c = cls(ct, inAppPrefix = false)
val f = c.symbol.flatMap(_.asCls) match
Expand Down
63 changes: 63 additions & 0 deletions hkmc2/shared/src/main/scala/hkmc2/syntax/Lexer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,69 @@ class Lexer(origin: Origin, dbg: Bool)(using raise: Raise):
case 'r' => str(i + 1, false, '\r' :: cur)
case 'b' => str(i + 1, false, '\b' :: cur)
case 'f' => str(i + 1, false, '\f' :: cur)
case 'u' =>
/**
* This code handles two types of Unicode escape sequences:
*
* + Traditional Unicode escape: "\uXXXX"
* - Consists of the characters '\' and 'u' followed by exactly
* four hexadecimal digits.
* - Example: "\u0041" represents the character 'A'.
* + Unicode code point escape: "\u{XXXXXX}"
* - Starts with "\u{" and ends with "}", allowing between 1 and
* 6 hexadecimal digits in between.
* - Example: "\u{1F600}" represents the grinning face emoji.
*
* In both cases, the scanned code point is validated to ensure
* that it falls within the allowed Unicode range (0x0 to 0x10FFFF).
* If any errors occur during scanning or conversion, such as
* invalid characters, missing digits, or code points out of range,
* a warning is raised with a precise location.
*/
@tailrec
def scanHexDigits(idx: Int, maxDigits: Int, value: Int, count: Int): (Int, Int, Int) =
if idx < length && isHexDigit(bytes(idx)) then
if count < maxDigits then
scanHexDigits(idx + 1, maxDigits, (value << 4) + Character.digit(bytes(idx), 16), count + 1)
else
scanHexDigits(idx + 1, maxDigits, value, count + 1)
else
(idx, value, count)

if i + 1 < length && bytes(i + 1) == '{' then
// Scan up to 6 hex digits after the opening brace.
val (nextIdx, acc, count) = scanHexDigits(i + 2, 6, 0, 0)
val result = if count == 0 then
raise(WarningReport(msg"Expected at least one hexadecimal digit in Unicode escape sequence" -> S(loc(i + 1, nextIdx)) :: Nil,
source = Lexing))
cur
else if count > 6 then
raise(WarningReport(msg"Too many hexadecimal digits in Unicode escape sequence" -> S(loc(nextIdx - (count - 6), nextIdx)) :: Nil,
source = Lexing))
cur
else if acc > 0x10FFFF then
raise(WarningReport(msg"Unicode code point out of range: 0x${acc.toHexString}" -> S(loc(i + 2, nextIdx)) :: Nil,
source = Lexing))
cur
else
Character.toChars(acc).reverseIterator.toList ::: cur
// Close the brace.
val finalIdx = if nextIdx >= length || bytes(nextIdx) != '}' then
raise(WarningReport(msg"Unterminated Unicode escape sequence: missing '}'" -> S(loc(nextIdx, nextIdx)) :: Nil,
source = Lexing))
nextIdx
else
nextIdx + 1
str(finalIdx, false, result)
else
// Process the traditional 4-digit Unicode escape (\uXXXX).
val (nextIdx, acc, count) = scanHexDigits(i + 1, 4, 0, 0)
if count != 4 then
raise(WarningReport(msg"Invalid Unicode escape sequence: expected 4 hexadecimal digits but got ${count.toString}" -> S(loc(i + 1, nextIdx)) :: Nil,
source = Lexing))
str(nextIdx, false, cur)
else
str(nextIdx, false, acc.toChar :: cur)
case ch =>
raise(WarningReport(msg"Found invalid escape character" -> S(loc(i, i + 1)) :: Nil,
source = Lexing))
Expand Down
2 changes: 1 addition & 1 deletion hkmc2/shared/src/main/scala/hkmc2/syntax/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,7 @@ abstract class Parser(
yeetSpaces match
case (NEWLINE, l0) :: _ =>
consume
???
lastWords(s"infix on newline at ${l0.showStart}:${l0.showEnd}")
case _ =>
if verbose then printDbg("$ parsing the right-hand side")
val rhs = expr(kw.rightPrecOrMin)
Expand Down
207 changes: 207 additions & 0 deletions hkmc2/shared/src/test/mlscript-compile/Iter.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import runtime from "./Runtime.mjs";
import Predef from "./Predef.mjs";
let Iterable1, Iterator1, Iter1, Result1;
Iterable1 = function Iterable(mk1) { return new Iterable.class(mk1); };
Iterable1.class = class Iterable {
constructor(mk) {
this.mk = mk;
this[globalThis.Symbol.iterator] = this.mk;
runtime.Unit
}
toString() { return "Iterable(" + globalThis.Predef.render(this.mk) + ")"; }
};
Iterator1 = function Iterator(next1) { return new Iterator.class(next1); };
Iterator1.class = class Iterator {
constructor(next) {
this.next = next;
}
toString() { return "Iterator(" + globalThis.Predef.render(this.next) + ")"; }
};
Result1 = class Result {
static {
this.Next = function Next(value1) { return new Next.class(value1); };
this.Next.class = class Next {
constructor(value) {
this.value = value;
this.done = false;
}
toString() { return "Next(" + globalThis.Predef.render(this.value) + ")"; }
};
const Done$class = class Done {
constructor() {
this.done = true;
}
toString() { return "Done"; }
};
this.Done = new Done$class;
this.Done.class = Done$class;
}
static toString() { return "Result"; }
};
Iter1 = class Iter {
static {}
static derive(iterable, makeNext) {
return Iterable1(() => {
let iterator, tmp, tmp1;
tmp = runtime.safeCall(iterable[globalThis.Symbol.iterator]());
iterator = tmp;
tmp1 = runtime.safeCall(makeNext(iterator));
return Iterator1(tmp1)
})
}
static mapping(xs, op) {
return Iter.derive(xs, (iterator) => {
return () => {
let next, scrut, tmp, tmp1;
tmp = runtime.safeCall(iterator.next());
next = tmp;
scrut = next.done;
if (scrut === true) {
return Result1.Done
} else {
tmp1 = runtime.safeCall(op(next.value));
return Result1.Next(tmp1)
}
}
})
}
static filtering(xs1, op1) {
return Iter.derive(xs1, (iterator) => {
return () => {
let next, scrut, scrut1, scrut2, tmp, tmp1, tmp2;
tmp = runtime.safeCall(iterator.next());
next = tmp;
tmp3: while (true) {
scrut = next.done;
if (scrut === false) {
scrut1 = runtime.safeCall(op1(next.value));
if (scrut1 === false) {
tmp1 = runtime.safeCall(iterator.next());
next = tmp1;
tmp2 = runtime.Unit;
continue tmp3;
} else {
tmp2 = runtime.Unit;
}
} else {
tmp2 = runtime.Unit;
}
break;
}
scrut2 = next.done;
if (scrut2 === true) {
return Result1.Done
} else {
return Result1.Next(next.value)
}
}
})
}
static taking(xs2, n) {
let i;
i = 0;
return Iter.filtering(xs2, (_) => {
let tmp;
tmp = i + 1;
i = tmp;
return i <= n
})
}
static zippingWithIndex(xs3) {
let i;
i = 0;
return Iter.mapping(xs3, (x) => {
let j, tmp;
j = i;
tmp = i + 1;
i = tmp;
return [
x,
j
]
})
}
static foldingImpl(iterator, acc, op2) {
let next, scrut, tmp, tmp1, tmp2, tmp3;
tmp = runtime.safeCall(iterator.next());
next = tmp;
tmp4: while (true) {
scrut = next.done;
if (scrut === false) {
tmp1 = runtime.safeCall(op2(acc, next.value));
acc = tmp1;
tmp2 = runtime.safeCall(iterator.next());
next = tmp2;
tmp3 = runtime.Unit;
continue tmp4;
} else {
tmp3 = runtime.Unit;
}
break;
}
return acc
}
static reduced(xs4, op3) {
let iterator1, next, scrut, tmp, tmp1, tmp2;
tmp = runtime.safeCall(xs4[globalThis.Symbol.iterator]());
iterator1 = tmp;
tmp1 = runtime.safeCall(iterator1.next());
next = tmp1;
scrut = next.done;
if (scrut === true) {
throw new globalThis.Error.class("Empty iterator");
} else {
tmp2 = runtime.Unit;
}
return Iter.foldingImpl(iterator1, next.value, op3)
}
static folded(xs5, z, op4) {
let iterator1, tmp;
tmp = runtime.safeCall(xs5[globalThis.Symbol.iterator]());
iterator1 = tmp;
return Iter.foldingImpl(iterator1, z, op4)
}
static rightFolded(xs6, z1, op5) {
let go, iterator1, tmp;
go = function go() {
let next, scrut, tmp1;
next = runtime.safeCall(iterator1.next());
scrut = next.done;
if (scrut === true) {
return z1
} else {
tmp1 = go();
return runtime.safeCall(op5(next.value, tmp1))
}
};
tmp = runtime.safeCall(xs6[globalThis.Symbol.iterator]());
iterator1 = tmp;
return go()
}
static joined(xs7, sep) {
let iterator1, next, sep$_, scrut, tmp, tmp1, tmp2, tmp3;
tmp = runtime.safeCall(xs7[globalThis.Symbol.iterator]());
iterator1 = tmp;
tmp1 = runtime.safeCall(iterator1.next());
next = tmp1;
scrut = next.done;
if (scrut === true) {
return ""
} else {
tmp2 = globalThis.String(sep);
sep$_ = tmp2;
tmp3 = globalThis.String(next.value);
return Iter.foldingImpl(iterator1, tmp3, (acc1, x) => {
let tmp4, tmp5;
tmp4 = acc1 + sep;
tmp5 = globalThis.String(x);
return tmp4 + tmp5
})
}
}
static toArray(view) {
return runtime.safeCall(globalThis.Array.from(view))
}
static toString() { return "Iter"; }
};
let Iter = Iter1; export default Iter;
Loading
Loading