Skip to content

Commit aeb05d0

Browse files
committed
refactor: improve parser
1 parent 59991c3 commit aeb05d0

File tree

5 files changed

+129
-189
lines changed

5 files changed

+129
-189
lines changed

pomsky-syntax/src/exprs/alternation.rs

+1-23
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
use crate::Span;
55

6-
use super::{Literal, Rule};
6+
use super::Rule;
77

88
/// An [alternation](https://www.regular-expressions.info/alternation.html).
99
/// This is a list of alternatives. Each alternative is a [`Rule`].
@@ -20,28 +20,6 @@ pub struct Alternation {
2020
}
2121

2222
impl Alternation {
23-
pub(crate) fn new_expr(rules: Vec<Rule>) -> Rule {
24-
rules
25-
.into_iter()
26-
.reduce(|a, b| match (a, b) {
27-
(Rule::Alternation(mut a), Rule::Alternation(b)) => {
28-
a.span = a.span.join(b.span);
29-
a.rules.extend(b.rules);
30-
Rule::Alternation(a)
31-
}
32-
(Rule::Alternation(mut a), b) => {
33-
a.span = a.span.join(b.span());
34-
a.rules.push(b);
35-
Rule::Alternation(a)
36-
}
37-
(a, b) => {
38-
let span = a.span().join(b.span());
39-
Rule::Alternation(Alternation { rules: vec![a, b], span })
40-
}
41-
})
42-
.unwrap_or_else(|| Rule::Literal(Literal::new("".to_string(), Span::default())))
43-
}
44-
4523
#[cfg(feature = "dbg")]
4624
pub(super) fn pretty_print(&self, buf: &mut crate::PrettyPrinter, needs_parens: bool) {
4725
if needs_parens {

pomsky-syntax/src/exprs/intersection.rs

-27
Original file line numberDiff line numberDiff line change
@@ -20,33 +20,6 @@ pub struct Intersection {
2020
}
2121

2222
impl Intersection {
23-
pub(crate) fn new_expr(rules: Vec<Rule>, start_span: Span) -> Option<Rule> {
24-
rules
25-
.into_iter()
26-
.reduce(|a, b| match (a, b) {
27-
(Rule::Intersection(mut a), Rule::Intersection(b)) => {
28-
a.span = a.span.join(b.span);
29-
a.rules.extend(b.rules);
30-
Rule::Intersection(a)
31-
}
32-
(Rule::Intersection(mut a), b) => {
33-
a.span = a.span.join(b.span());
34-
a.rules.push(b);
35-
Rule::Intersection(a)
36-
}
37-
(a, b) => {
38-
let span = a.span().join(b.span());
39-
Rule::Intersection(Intersection { rules: vec![a, b], span })
40-
}
41-
})
42-
.map(|mut rule| {
43-
if let Rule::Intersection(i) = &mut rule {
44-
i.span = i.span.join(start_span)
45-
}
46-
rule
47-
})
48-
}
49-
5023
#[cfg(feature = "dbg")]
5124
pub(super) fn pretty_print(&self, buf: &mut crate::PrettyPrinter, needs_parens: bool) {
5225
if needs_parens {

pomsky-syntax/src/lexer/token.rs

+8-13
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ pub enum Token {
99
/// `$` (end boundary)
1010
Dollar,
1111
/// `%` (`\b` boundary)
12-
BWord,
12+
Percent,
13+
/// `<` (word start)
14+
AngleLeft,
15+
/// `>` (word end)
16+
AngleRight,
1317

1418
/// `*` (`*?` repetition)
1519
Star,
@@ -42,28 +46,19 @@ pub enum Token {
4246

4347
/// `[` (open character class)
4448
OpenBracket,
45-
46-
/// `-` (unicode range)
47-
Dash,
48-
4949
/// `]` (close character class)
5050
CloseBracket,
5151

52+
/// `-` (unicode range)
53+
Dash,
5254
/// `.` (any code point except newline)
5355
Dot,
5456

5557
/// `>>` (positive lookahead)
5658
LookAhead,
57-
5859
/// `<<` (positive lookbehind)
5960
LookBehind,
6061

61-
/// `<` (word start)
62-
AngleLeft,
63-
64-
/// `>` (word end)
65-
AngleRight,
66-
6762
/// `::` (back reference)
6863
DoubleColon,
6964

@@ -100,7 +95,7 @@ impl core::fmt::Display for Token {
10095
f.write_str(match self {
10196
Token::Caret => "`^`",
10297
Token::Dollar => "`$`",
103-
Token::BWord => "`%`",
98+
Token::Percent => "`%`",
10499
Token::Star => "`*`",
105100
Token::Plus => "`+`",
106101
Token::QuestionMark => "`?`",

pomsky-syntax/src/lexer/tokenize.rs

+37-22
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,42 @@ macro_rules! reserved_word_pattern {
4242
);
4343
}
4444

45+
static SINGLE_TOKEN_LOOKUP: [Option<Token>; 127] = const {
46+
let mut table = [const { None }; 127];
47+
table[b'^' as usize] = Some(Token::Caret);
48+
table[b'$' as usize] = Some(Token::Dollar);
49+
table[b'%' as usize] = Some(Token::Percent);
50+
table[b'<' as usize] = Some(Token::AngleLeft);
51+
table[b'>' as usize] = Some(Token::AngleRight);
52+
table[b'*' as usize] = Some(Token::Star);
53+
table[b'+' as usize] = Some(Token::Plus);
54+
table[b'?' as usize] = Some(Token::QuestionMark);
55+
table[b'|' as usize] = Some(Token::Pipe);
56+
table[b'&' as usize] = Some(Token::Ampersand);
57+
table[b':' as usize] = Some(Token::Colon);
58+
table[b')' as usize] = Some(Token::CloseParen);
59+
table[b'{' as usize] = Some(Token::OpenBrace);
60+
table[b'}' as usize] = Some(Token::CloseBrace);
61+
table[b',' as usize] = Some(Token::Comma);
62+
table[b'!' as usize] = Some(Token::Not);
63+
table[b'[' as usize] = Some(Token::OpenBracket);
64+
table[b']' as usize] = Some(Token::CloseBracket);
65+
table[b'-' as usize] = Some(Token::Dash);
66+
table[b'.' as usize] = Some(Token::Dot);
67+
table[b';' as usize] = Some(Token::Semicolon);
68+
table[b'=' as usize] = Some(Token::Equals);
69+
table
70+
};
71+
72+
fn lookup_single(c: char) -> Option<Token> {
73+
let c = c as u32;
74+
if c < 128 {
75+
SINGLE_TOKEN_LOOKUP[c as usize]
76+
} else {
77+
None
78+
}
79+
}
80+
4581
pub(crate) fn tokenize(mut input: &str) -> Vec<(Token, Span)> {
4682
let mut result = vec![];
4783
let mut offset = 0;
@@ -64,28 +100,7 @@ pub(crate) fn tokenize(mut input: &str) -> Vec<(Token, Span)> {
64100
if input.starts_with("<<") => (2, Token::LookBehind);
65101
if input.starts_with("::") => (2, Token::DoubleColon);
66102

67-
if c == '^' => (1, Token::Caret);
68-
if c == '$' => (1, Token::Dollar);
69-
if c == '<' => (1, Token::AngleLeft);
70-
if c == '>' => (1, Token::AngleRight);
71-
if c == '%' => (1, Token::BWord);
72-
if c == '*' => (1, Token::Star);
73-
if c == '+' => (1, Token::Plus);
74-
if c == '?' => (1, Token::QuestionMark);
75-
if c == '|' => (1, Token::Pipe);
76-
if c == '&' => (1, Token::Ampersand);
77-
if c == ':' => (1, Token::Colon);
78-
if c == ')' => (1, Token::CloseParen);
79-
if c == '{' => (1, Token::OpenBrace);
80-
if c == '}' => (1, Token::CloseBrace);
81-
if c == ',' => (1, Token::Comma);
82-
if c == '!' => (1, Token::Not);
83-
if c == '[' => (1, Token::OpenBracket);
84-
if c == '-' => (1, Token::Dash);
85-
if c == ']' => (1, Token::CloseBracket);
86-
if c == '.' => (1, Token::Dot);
87-
if c == ';' => (1, Token::Semicolon);
88-
if c == '=' => (1, Token::Equals);
103+
if let Some(token) = lookup_single(c) => (1, token);
89104

90105
if c == '\'' => match input[1..].find('\'') {
91106
Some(len_inner) => (len_inner + 2, Token::String),

0 commit comments

Comments
 (0)