From 242dab12430e6730b50a959a995fee1a1324b653 Mon Sep 17 00:00:00 2001 From: Randolf Jung Date: Tue, 1 Aug 2023 22:53:39 +0200 Subject: [PATCH 1/2] Implement basic deferred parsing - Part 2 --- src/ast/mod.rs | 11 ------ src/ast/parent.rs | 3 -- src/parser.rs | 88 +++++++++++++++++++++++++++----------------- src/validator/mod.rs | 2 - src/visitor.rs | 1 - 5 files changed, 54 insertions(+), 51 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6d01af7c..dd5dc269 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -329,13 +329,6 @@ pub enum Rule<'a> { #[doc(hidden)] comments_after_rule: Option>, }, - /// Unknown expression - Unknown { - #[doc(hidden)] - rule: Box>, - #[doc(hidden)] - range: (usize, usize), - }, } impl<'a> Rule<'a> { @@ -345,7 +338,6 @@ impl<'a> Rule<'a> { match self { Rule::Type { span, .. } => *span, Rule::Group { span, .. } => *span, - Rule::Unknown { rule, .. } => rule.span(), } } @@ -441,7 +433,6 @@ impl<'a> fmt::Display for Rule<'a> { write!(f, "{}", rule_str) } - Rule::Unknown { rule, .. } => rule.fmt(f), } } } @@ -452,7 +443,6 @@ impl<'a> Rule<'a> { match self { Rule::Type { rule, .. } => rule.name.to_string(), Rule::Group { rule, .. } => rule.name.to_string(), - Rule::Unknown { rule, .. } => rule.name(), } } @@ -462,7 +452,6 @@ impl<'a> Rule<'a> { match self { Rule::Type { rule, .. } => rule.is_type_choice_alternate, Rule::Group { rule, .. } => rule.is_group_choice_alternate, - Rule::Unknown { rule, .. } => rule.is_choice_alternate(), } } } diff --git a/src/ast/parent.rs b/src/ast/parent.rs index c319e873..a75b73e7 100644 --- a/src/ast/parent.rs +++ b/src/ast/parent.rs @@ -235,9 +235,6 @@ impl<'a, 'b: 'a> Visitor<'a, 'b, Error> for ParentVisitor<'a, 'b> { self.visit_type_rule(rule)?; } - Rule::Unknown { rule, .. } => { - self.visit_rule(rule)?; - } } Ok(()) diff --git a/src/parser.rs b/src/parser.rs index cc635a1c..b3bbadc0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -57,9 +57,9 @@ pub struct Parser<'a> { typenames: Rc>, groupnames: Rc>, #[cfg(feature = "ast-span")] - unknown_rules: Vec<(&'a str, Span)>, + unknown_rule_idents: Vec<(&'a str, Span)>, #[cfg(not(feature = "ast-span"))] - unknown_rules: Vec<&'a str>, + unknown_rule_idents: Vec<&'a str>, is_guaranteed: bool, } @@ -170,7 +170,7 @@ impl<'a> Parser<'a> { "undefined", ])), groupnames: Rc::new(BTreeSet::default()), - unknown_rules: Vec::default(), + unknown_rule_idents: Vec::default(), is_guaranteed: false, }; @@ -389,7 +389,6 @@ impl<'a> Parser<'a> { match &rule { Rule::Type { rule, .. } => Rc::make_mut(&mut self.typenames).insert(rule.name.ident), Rule::Group { rule, .. } => Rc::make_mut(&mut self.groupnames).insert(rule.name.ident), - _ => unreachable!(), }; } @@ -404,6 +403,14 @@ impl<'a> Parser<'a> { ..Default::default() }; + struct UnknownRule<'a> { + rule: Rule<'a>, + index: usize, + range: (usize, usize), + } + + let mut unknown_rules = Vec::default(); + while self.cur_token != Token::EOF { let begin_rule_range = self.lexer_position.range.0; match self.parse_rule(false) { @@ -427,15 +434,16 @@ impl<'a> Parser<'a> { continue; } - if !self.unknown_rules.is_empty() { + if !self.unknown_rule_idents.is_empty() { if self.is_guaranteed { self.register_rule(&r); } - c.rules.push(Rule::Unknown { - rule: Box::new(r), + unknown_rules.push(UnknownRule { + rule: r, + index: c.rules.len(), range: (begin_rule_range, self.lexer_position.range.1), }); - self.unknown_rules = Vec::default(); + self.unknown_rule_idents = Vec::default(); } else { self.register_rule(&r); c.rules.push(r); @@ -451,32 +459,45 @@ impl<'a> Parser<'a> { } } + // In practice unknown rules usually are declared backwards, so we reverse + // it here. + unknown_rules.reverse(); + // Try to specialize unknown rules until the set of them stabilizes. { let mut errors; - let mut rules; + let mut known_rules = Vec::default(); loop { + let mut resolved_rules = Vec::default(); + let mut unresolved_rules = Vec::default(); + errors = Vec::default(); - rules = Vec::default(); - for (index, rule) in c.rules.iter().enumerate() { - if let Rule::Unknown { range, .. } = rule { - match self.resolve_rule(*range, false) { - Ok(rule) => rules.push((index, rule)), - Err(_) => match self.resolve_rule(*range, true) { - Ok(rule) => rules.push((index, rule)), - Err(mut error) => errors.append(&mut error), - }, - } + for unknown_rule in unknown_rules { + match self.resolve_rule(unknown_rule.range, false) { + Ok(rule) => resolved_rules.push((unknown_rule.index, rule)), + Err(_) => match self.resolve_rule(unknown_rule.range, true) { + Ok(rule) => resolved_rules.push((unknown_rule.index, rule)), + Err(mut error) => { + errors.append(&mut error); + unresolved_rules.push(unknown_rule); + } + }, } } - if rules.is_empty() { + if resolved_rules.is_empty() { break; } - for (index, rule) in rules { - c.rules[index] = rule; + for (_, rule) in &resolved_rules { + self.register_rule(&rule); } + known_rules.append(&mut resolved_rules); + unknown_rules = unresolved_rules; } self.errors.append(&mut errors); + known_rules.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap()); + for (index, rule) in known_rules { + c.rules.insert(index, rule); + } } if !self.errors.is_empty() { @@ -508,11 +529,11 @@ impl<'a> Parser<'a> { let rule = parser .parse_rule(parse_group_rule) .map_err(|err| vec![err])?; - if !parser.unknown_rules.is_empty() { + if !parser.unknown_rule_idents.is_empty() { Err( #[cfg(feature = "ast-span")] parser - .unknown_rules + .unknown_rule_idents .into_iter() .map(|(ident, span)| Error::PARSER { position: Position { @@ -529,7 +550,7 @@ impl<'a> Parser<'a> { .collect(), #[cfg(not(feature = "ast-span"))] parser - .unknown_rules + .unknown_rule_idents .into_iter() .map(|ident| Error::PARSER { msg: ErrorMsg { @@ -540,7 +561,6 @@ impl<'a> Parser<'a> { .collect(), ) } else { - self.register_rule(&rule); Ok(rule) } } @@ -1455,12 +1475,12 @@ impl<'a> Parser<'a> { #[cfg(feature = "ast-span")] if !is_generic_param && !self.typenames.contains(ident.ident) { - self.unknown_rules.push((ident.ident, ident.span)); + self.unknown_rule_idents.push((ident.ident, ident.span)); } #[cfg(not(feature = "ast-span"))] if !is_generic_param && !self.typenames.contains(ident.ident) { - self.unknown_rules.push(ident.ident); + self.unknown_rule_idents.push(ident.ident); } } @@ -1488,12 +1508,12 @@ impl<'a> Parser<'a> { #[cfg(feature = "ast-span")] if !is_generic_param && !self.typenames.contains(ident.ident) { - self.unknown_rules.push((ident.ident, ident.span)); + self.unknown_rule_idents.push((ident.ident, ident.span)); } #[cfg(not(feature = "ast-span"))] if !is_generic_param && !self.typenames.contains(ident.ident) { - self.unknown_rules.push(ident.ident); + self.unknown_rule_idents.push(ident.ident); } } @@ -2253,7 +2273,7 @@ impl<'a> Parser<'a> { if self.groupnames.contains(name.ident) || matches!(name.socket, Some(SocketPlug::GROUP)) { if name.socket == None { - self.unknown_rules.pop(); + self.unknown_rule_idents.pop(); } return Ok(GroupEntry::TypeGroupname { ge: TypeGroupnameEntry { @@ -2275,7 +2295,7 @@ impl<'a> Parser<'a> { if self.groupnames.contains(name.ident) || matches!(name.socket, Some(SocketPlug::GROUP)) { if name.socket == None { - self.unknown_rules.pop(); + self.unknown_rule_idents.pop(); } return Ok(GroupEntry::TypeGroupname { ge: TypeGroupnameEntry { @@ -2413,7 +2433,7 @@ impl<'a> Parser<'a> { } if name.socket == None { - self.unknown_rules.pop(); + self.unknown_rule_idents.pop(); } return Ok(GroupEntry::TypeGroupname { ge: TypeGroupnameEntry { @@ -2443,7 +2463,7 @@ impl<'a> Parser<'a> { } if name.socket == None { - self.unknown_rules.pop(); + self.unknown_rule_idents.pop(); } return Ok(GroupEntry::TypeGroupname { ge: TypeGroupnameEntry { diff --git a/src/validator/mod.rs b/src/validator/mod.rs index b81a9b78..45ca8f79 100644 --- a/src/validator/mod.rs +++ b/src/validator/mod.rs @@ -462,7 +462,6 @@ pub fn generic_params_from_rule<'a>(rule: &Rule<'a>) -> Option> { .generic_params .as_ref() .map(|gp| gp.params.iter().map(|gp| gp.param.ident).collect()), - Rule::Unknown { rule, .. } => generic_params_from_rule(rule), } } @@ -520,7 +519,6 @@ pub fn type_choices_from_group_choice<'a>( cddl, &GroupChoice::new(vec![rule.entry.clone()]), )), - _ => {} } } } diff --git a/src/visitor.rs b/src/visitor.rs index 85a5e7c9..4bd485ae 100644 --- a/src/visitor.rs +++ b/src/visitor.rs @@ -188,7 +188,6 @@ where match rule { Rule::Type { rule, .. } => visitor.visit_type_rule(rule), Rule::Group { rule, .. } => visitor.visit_group_rule(rule), - Rule::Unknown { rule, .. } => walk_rule(visitor, rule), } } From 7b6370d46fe8628704150949edb67b755f03a599 Mon Sep 17 00:00:00 2001 From: anweiss <2326106+anweiss@users.noreply.github.com> Date: Wed, 2 Aug 2023 08:52:13 -0400 Subject: [PATCH 2/2] Apply suggestions from code review --- src/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index b3bbadc0..d1e306fb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -488,7 +488,7 @@ impl<'a> Parser<'a> { break; } for (_, rule) in &resolved_rules { - self.register_rule(&rule); + self.register_rule(rule); } known_rules.append(&mut resolved_rules); unknown_rules = unresolved_rules; @@ -2272,7 +2272,7 @@ impl<'a> Parser<'a> { if let Some((name, generic_args, _)) = entry_type.groupname_entry() { if self.groupnames.contains(name.ident) || matches!(name.socket, Some(SocketPlug::GROUP)) { - if name.socket == None { + if name.socket.is_none() { self.unknown_rule_idents.pop(); } return Ok(GroupEntry::TypeGroupname { @@ -2432,7 +2432,7 @@ impl<'a> Parser<'a> { self.next_token()?; } - if name.socket == None { + if name.socket.is_none() { self.unknown_rule_idents.pop(); } return Ok(GroupEntry::TypeGroupname {