Skip to content

Commit

Permalink
feat(ungrammar, codegen, css_parser): support "dynamic" unordered nod…
Browse files Browse the repository at this point in the history
…e fields (#1438)
  • Loading branch information
faultyserver authored Jan 5, 2024
1 parent a1bceee commit 4607357
Show file tree
Hide file tree
Showing 14 changed files with 542 additions and 80 deletions.
1 change: 1 addition & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ A-Diagnostic:

A-Tooling:
- xtask/**
- crates/biome_ungrammar/**

A-Website:
- website/**
Expand Down
4 changes: 4 additions & 0 deletions crates/biome_css_syntax/src/generated/nodes.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions crates/biome_js_syntax/src/generated/nodes.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions crates/biome_json_syntax/src/generated/nodes.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion crates/biome_rowan/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,18 @@ where
}
}

/// The main trait to go from untyped `SyntaxNode` to a typed ast. The
/// The main trait to go from untyped `SyntaxNode` to a typed ast. The
/// conversion itself has zero runtime cost: ast and syntax nodes have exactly
/// the same representation: a pointer to the tree root and a pointer to the
/// node itself.
///
/// The only exception to this is for Dynamic nodes, which allow the fields
/// of the AstNode to be mapped to any slot of the SyntaxNode using an additional
/// `slot_map`. This must get built every time the untyped syntax node is
/// converted into the typed ast node, and is determined by the order of fields
/// in the original grammar. Even still, this cost is relatively low and should
/// not be considered prohibitive, as the only work done is checking
/// [AstNode::can_cast] for each of the children to their respective slots.
pub trait AstNode: Clone {
type Language: Language;

Expand Down
38 changes: 37 additions & 1 deletion crates/biome_ungrammar/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,29 @@ pub(crate) enum TokenKind {
Colon,
LParen,
RParen,
DoublePipe,
DoubleAmpersand,
}

/// Utility type for quickly comparing simple tokens without having to
/// worry about clone/equality/instantiation from Node and Token types.
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
pub(crate) enum CombinatorKind {
Pipe,
DoublePipe,
DoubleAmpersand,
NonCombinator,
}

impl CombinatorKind {
pub fn new(value: &TokenKind) -> Self {
match value {
TokenKind::Pipe => CombinatorKind::Pipe,
TokenKind::DoublePipe => CombinatorKind::DoublePipe,
TokenKind::DoubleAmpersand => CombinatorKind::DoubleAmpersand,
_ => CombinatorKind::NonCombinator,
}
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -79,7 +102,20 @@ fn advance(input: &mut &str) -> Result<TokenKind> {
'?' => TokenKind::QMark,
'(' => TokenKind::LParen,
')' => TokenKind::RParen,
'|' => TokenKind::Pipe,
'|' => match chars.clone().next() {
Some('|') => {
chars.next();
TokenKind::DoublePipe
}
_ => TokenKind::Pipe,
},
'&' => match chars.clone().next() {
Some('&') => {
chars.next();
TokenKind::DoubleAmpersand
}
_ => bail!("unexpected `&`, did you mean to write `&&`?"),
},
':' => TokenKind::Colon,
'\'' => {
let mut buf = String::new();
Expand Down
6 changes: 6 additions & 0 deletions crates/biome_ungrammar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ pub enum Rule {
Seq(Vec<Rule>),
/// An alternative between many rules, like `'+' | '-' | '*' | '/'`.
Alt(Vec<Rule>),
/// An unordered, alternative rule, like `A || B || C`, meaning A, B, and C
/// can all appear 0 or 1 times, in any order.
UnorderedSome(Vec<Rule>),
/// An unordered, required rule, like `A && B && C`, meaning A, B, and C
/// _must_ all appear exactly 1 time, but can be in any order.
UnorderedAll(Vec<Rule>),
/// An optional rule, like `A?`.
Opt(Box<Rule>),
/// A repeated rule, like `A*`.
Expand Down
56 changes: 47 additions & 9 deletions crates/biome_ungrammar/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::collections::HashMap;

use crate::{
error::{bail, format_err, Result},
lexer::{self, TokenKind},
lexer::{self, CombinatorKind, TokenKind},
Grammar, Node, NodeData, Rule, Token, TokenData,
};

Expand Down Expand Up @@ -92,6 +92,8 @@ impl Parser {
}
}

/// Parse a full Node. The entire production of:
/// name '=' Rule.
fn node(p: &mut Parser) -> Result<()> {
let token = p.bump()?;
let node = match token.kind {
Expand All @@ -108,37 +110,63 @@ fn node(p: &mut Parser) -> Result<()> {
Ok(())
}

/// Parse any Rule, the right-hand side of a production. This handles
/// all of the combinators other than juxtaposition:
/// 'auto' | Expr | Value
/// length || color || direction
/// veritcal && horizontal
fn rule(p: &mut Parser) -> Result<Rule> {
if let Some(lexer::Token {
kind: TokenKind::Pipe,
kind: TokenKind::Pipe | TokenKind::DoubleAmpersand | TokenKind::DoublePipe,
loc,
}) = p.peek()
{
bail!(
*loc,
"The first element in a sequence of productions or alternatives \
must not have a leading pipe (`|`)"
must not be a combinator (`|`, `||`, or `&&`)"
);
}

let lhs = seq_rule(p)?;
let mut alt = vec![lhs];
let mut rules = vec![lhs];
let mut combinator_kind: Option<CombinatorKind> = None;
while let Some(token) = p.peek() {
if token.kind != TokenKind::Pipe {
let token_combinator = CombinatorKind::new(&token.kind);

if matches!(token_combinator, CombinatorKind::NonCombinator) {
break;
}

match combinator_kind {
Some(kind) if kind != token_combinator => {
bail!(token.loc, "Cannot mix combinators at the same level in a Rule. Use parentheses to specify precedence");
}
None => combinator_kind = Some(token_combinator),
_ => (),
}

p.bump()?;
let rule = seq_rule(p)?;
alt.push(rule)
rules.push(rule)
}
let res = if alt.len() == 1 {
alt.pop().unwrap()
let res = if rules.len() == 1 {
rules.pop().unwrap()
} else {
Rule::Alt(alt)
match combinator_kind {
Some(CombinatorKind::DoubleAmpersand) => Rule::UnorderedAll(rules),
Some(CombinatorKind::DoublePipe) => Rule::UnorderedSome(rules),
Some(CombinatorKind::Pipe) => Rule::Alt(rules),
None | Some(CombinatorKind::NonCombinator) => {
unreachable!("Matched more than one rule but didn't determine a combinator")
}
}
};
Ok(res)
}

/// Parse a multi-element sequence as a single Rule:
/// 'while' '(' Expr ')'
fn seq_rule(p: &mut Parser) -> Result<Rule> {
let lhs = atom_rule(p)?;

Expand All @@ -154,6 +182,11 @@ fn seq_rule(p: &mut Parser) -> Result<Rule> {
Ok(res)
}

/// Parse any single-element Rule, returning an Error if no rule is parsed.
/// Rule
/// Rule*
/// Rule?
/// ( Rule )
fn atom_rule(p: &mut Parser) -> Result<Rule> {
match opt_atom_rule(p)? {
Some(it) => Ok(it),
Expand All @@ -164,6 +197,11 @@ fn atom_rule(p: &mut Parser) -> Result<Rule> {
}
}

/// Parse any single-element Rule. Returns None if no rule is parsed.
/// Rule
/// Rule*
/// Rule?
/// ( Rule )
fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> {
let token = match p.peek() {
Some(it) => it,
Expand Down
2 changes: 2 additions & 0 deletions crates/biome_ungrammar/ungrammar.ungram
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Rule =
| 'token_ident'
| Rule *
| Rule ( '|' Rule) *
| Rule ( '||' Rule) *
| Rule ( '&&' Rule) *
| Rule '?'
| Rule '*'
| '(' Rule ')'
Expand Down
Loading

0 comments on commit 4607357

Please sign in to comment.