feat(ungrammar, codegen, css_parser): support "dynamic" unordered nod…

…e fields (#1438)
biomejs · Jan 5, 2024 · 4607357 · 4607357
1 parent a1bceee
commit 4607357
Show file tree

Hide file tree

Showing 14 changed files with 542 additions and 80 deletions.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -48,6 +48,7 @@ A-Diagnostic:
 
 A-Tooling:
   - xtask/**
+  - crates/biome_ungrammar/**
 
 A-Website:
   - website/**

diff --git a/crates/biome_css_syntax/src/generated/nodes.rs b/crates/biome_css_syntax/src/generated/nodes.rs
diff --git a/crates/biome_js_syntax/src/generated/nodes.rs b/crates/biome_js_syntax/src/generated/nodes.rs
diff --git a/crates/biome_json_syntax/src/generated/nodes.rs b/crates/biome_json_syntax/src/generated/nodes.rs
diff --git a/crates/biome_rowan/src/ast/mod.rs b/crates/biome_rowan/src/ast/mod.rs
@@ -105,10 +105,18 @@ where
     }
 }
 
-/// The main trait to go from untyped `SyntaxNode`  to a typed ast. The
+/// The main trait to go from untyped `SyntaxNode` to a typed ast. The
 /// conversion itself has zero runtime cost: ast and syntax nodes have exactly
 /// the same representation: a pointer to the tree root and a pointer to the
 /// node itself.
+///
+/// The only exception to this is for Dynamic nodes, which allow the fields
+/// of the AstNode to be mapped to any slot of the SyntaxNode using an additional
+/// `slot_map`. This must get built every time the untyped syntax node is
+/// converted into the typed ast node, and is determined by the order of fields
+/// in the original grammar. Even still, this cost is relatively low and should
+/// not be considered prohibitive, as the only work done is checking
+/// [AstNode::can_cast] for each of the children to their respective slots.
 pub trait AstNode: Clone {
     type Language: Language;
 

diff --git a/crates/biome_ungrammar/src/lexer.rs b/crates/biome_ungrammar/src/lexer.rs
@@ -12,6 +12,29 @@ pub(crate) enum TokenKind {
     Colon,
     LParen,
     RParen,
+    DoublePipe,
+    DoubleAmpersand,
+}
+
+/// Utility type for quickly comparing simple tokens without having to
+/// worry about clone/equality/instantiation from Node and Token types.
+#[derive(Debug, Eq, PartialEq, Clone, Copy)]
+pub(crate) enum CombinatorKind {
+    Pipe,
+    DoublePipe,
+    DoubleAmpersand,
+    NonCombinator,
+}
+
+impl CombinatorKind {
+    pub fn new(value: &TokenKind) -> Self {
+        match value {
+            TokenKind::Pipe => CombinatorKind::Pipe,
+            TokenKind::DoublePipe => CombinatorKind::DoublePipe,
+            TokenKind::DoubleAmpersand => CombinatorKind::DoubleAmpersand,
+            _ => CombinatorKind::NonCombinator,
+        }
+    }
 }
 
 #[derive(Debug)]
@@ -79,7 +102,20 @@ fn advance(input: &mut &str) -> Result<TokenKind> {
         '?' => TokenKind::QMark,
         '(' => TokenKind::LParen,
         ')' => TokenKind::RParen,
-        '|' => TokenKind::Pipe,
+        '|' => match chars.clone().next() {
+            Some('|') => {
+                chars.next();
+                TokenKind::DoublePipe
+            }
+            _ => TokenKind::Pipe,
+        },
+        '&' => match chars.clone().next() {
+            Some('&') => {
+                chars.next();
+                TokenKind::DoubleAmpersand
+            }
+            _ => bail!("unexpected `&`, did you mean to write `&&`?"),
+        },
         ':' => TokenKind::Colon,
         '\'' => {
             let mut buf = String::new();

diff --git a/crates/biome_ungrammar/src/lib.rs b/crates/biome_ungrammar/src/lib.rs
@@ -112,6 +112,12 @@ pub enum Rule {
     Seq(Vec<Rule>),
     /// An alternative between many rules, like `'+' | '-' | '*' | '/'`.
     Alt(Vec<Rule>),
+    /// An unordered, alternative rule, like `A || B || C`, meaning A, B, and C
+    /// can all appear 0 or 1 times, in any order.
+    UnorderedSome(Vec<Rule>),
+    /// An unordered, required rule, like `A && B && C`, meaning A, B, and C
+    /// _must_ all appear exactly 1 time, but can be in any order.
+    UnorderedAll(Vec<Rule>),
     /// An optional rule, like `A?`.
     Opt(Box<Rule>),
     /// A repeated rule, like `A*`.

diff --git a/crates/biome_ungrammar/src/parser.rs b/crates/biome_ungrammar/src/parser.rs
@@ -3,7 +3,7 @@ use std::collections::HashMap;
 
 use crate::{
     error::{bail, format_err, Result},
-    lexer::{self, TokenKind},
+    lexer::{self, CombinatorKind, TokenKind},
     Grammar, Node, NodeData, Rule, Token, TokenData,
 };
 
@@ -92,6 +92,8 @@ impl Parser {
     }
 }
 
+/// Parse a full Node. The entire production of:
+/// name '=' Rule.
 fn node(p: &mut Parser) -> Result<()> {
     let token = p.bump()?;
     let node = match token.kind {
@@ -108,37 +110,63 @@ fn node(p: &mut Parser) -> Result<()> {
     Ok(())
 }
 
+/// Parse any Rule, the right-hand side of a production. This handles
+/// all of the combinators other than juxtaposition:
+/// 'auto' | Expr | Value
+/// length || color || direction
+/// veritcal && horizontal
 fn rule(p: &mut Parser) -> Result<Rule> {
     if let Some(lexer::Token {
-        kind: TokenKind::Pipe,
+        kind: TokenKind::Pipe | TokenKind::DoubleAmpersand | TokenKind::DoublePipe,
         loc,
     }) = p.peek()
     {
         bail!(
             *loc,
             "The first element in a sequence of productions or alternatives \
-            must not have a leading pipe (`|`)"
+            must not be a combinator (`|`, `||`, or `&&`)"
         );
     }
 
     let lhs = seq_rule(p)?;
-    let mut alt = vec![lhs];
+    let mut rules = vec![lhs];
+    let mut combinator_kind: Option<CombinatorKind> = None;
     while let Some(token) = p.peek() {
-        if token.kind != TokenKind::Pipe {
+        let token_combinator = CombinatorKind::new(&token.kind);
+
+        if matches!(token_combinator, CombinatorKind::NonCombinator) {
             break;
         }
+
+        match combinator_kind {
+            Some(kind) if kind != token_combinator => {
+                bail!(token.loc, "Cannot mix combinators at the same level in a Rule. Use parentheses to specify precedence");
+            }
+            None => combinator_kind = Some(token_combinator),
+            _ => (),
+        }
+
         p.bump()?;
         let rule = seq_rule(p)?;
-        alt.push(rule)
+        rules.push(rule)
     }
-    let res = if alt.len() == 1 {
-        alt.pop().unwrap()
+    let res = if rules.len() == 1 {
+        rules.pop().unwrap()
     } else {
-        Rule::Alt(alt)
+        match combinator_kind {
+            Some(CombinatorKind::DoubleAmpersand) => Rule::UnorderedAll(rules),
+            Some(CombinatorKind::DoublePipe) => Rule::UnorderedSome(rules),
+            Some(CombinatorKind::Pipe) => Rule::Alt(rules),
+            None | Some(CombinatorKind::NonCombinator) => {
+                unreachable!("Matched more than one rule but didn't determine a combinator")
+            }
+        }
     };
     Ok(res)
 }
 
+/// Parse a multi-element sequence as a single Rule:
+/// 'while' '(' Expr ')'
 fn seq_rule(p: &mut Parser) -> Result<Rule> {
     let lhs = atom_rule(p)?;
 
@@ -154,6 +182,11 @@ fn seq_rule(p: &mut Parser) -> Result<Rule> {
     Ok(res)
 }
 
+/// Parse any single-element Rule, returning an Error if no rule is parsed.
+/// Rule
+/// Rule*
+/// Rule?
+/// ( Rule )
 fn atom_rule(p: &mut Parser) -> Result<Rule> {
     match opt_atom_rule(p)? {
         Some(it) => Ok(it),
@@ -164,6 +197,11 @@ fn atom_rule(p: &mut Parser) -> Result<Rule> {
     }
 }
 
+/// Parse any single-element Rule. Returns None if no rule is parsed.
+/// Rule
+/// Rule*
+/// Rule?
+/// ( Rule )
 fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> {
     let token = match p.peek() {
         Some(it) => it,

diff --git a/crates/biome_ungrammar/ungrammar.ungram b/crates/biome_ungrammar/ungrammar.ungram
@@ -10,6 +10,8 @@ Rule =
 | 'token_ident'
 | Rule *
 | Rule ( '|' Rule) *
+| Rule ( '||' Rule) *
+| Rule ( '&&' Rule) *
 | Rule '?'
 | Rule '*'
 | '(' Rule ')'