Skip to content

Commit

Permalink
feat(parser,css_parser): implement checkpoint rewinding (#1417)
Browse files Browse the repository at this point in the history
  • Loading branch information
faultyserver authored Jan 3, 2024
1 parent d869a33 commit 9f46988
Show file tree
Hide file tree
Showing 11 changed files with 254 additions and 80 deletions.
1 change: 1 addition & 0 deletions crates/biome_css_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub use parser::CssParserOptions;
mod lexer;
mod parser;
mod prelude;
mod state;
mod syntax;
mod token_source;

Expand Down
48 changes: 46 additions & 2 deletions crates/biome_css_parser/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
use crate::lexer::CssReLexContext;
use crate::token_source::CssTokenSource;
use crate::state::CssParserState;
use crate::token_source::{CssTokenSource, CssTokenSourceCheckpoint};
use biome_css_syntax::CssSyntaxKind;
use biome_parser::diagnostic::merge_diagnostics;
use biome_parser::event::Event;
use biome_parser::prelude::*;
use biome_parser::token_source::Trivia;
use biome_parser::ParserContext;
use biome_parser::{prelude::*, ParserContextCheckpoint};

pub(crate) struct CssParser<'source> {
context: ParserContext<CssSyntaxKind>,
source: CssTokenSource<'source>,
state: CssParserState,
}

#[derive(Default, Debug, Clone, Copy)]
Expand All @@ -29,6 +31,7 @@ impl<'source> CssParser<'source> {
Self {
context: ParserContext::default(),
source: CssTokenSource::from_str(source, config),
state: CssParserState::new(),
}
}

Expand All @@ -39,6 +42,35 @@ impl<'source> CssParser<'source> {
self.source_mut().re_lex(context)
}

#[allow(dead_code)] //TODO remove this allow once we actually use it
pub(crate) fn state(&self) -> &CssParserState {
&self.state
}

pub(crate) fn state_mut(&mut self) -> &mut CssParserState {
&mut self.state
}

pub fn checkpoint(&self) -> CssParserCheckpoint {
CssParserCheckpoint {
context: self.context.checkpoint(),
source: self.source.checkpoint(),
// `state` is not checkpointed because it (currently) only contains
// scoped properties that aren't only dependent on checkpoints and
// should be reset manually when the scope of their use is exited.
}
}

pub fn rewind(&mut self, checkpoint: CssParserCheckpoint) {
let CssParserCheckpoint { context, source } = checkpoint;

self.context.rewind(context);
self.source.rewind(source);
// `state` is not checkpointed because it (currently) only contains
// scoped properties that aren't only dependent on checkpoints and
// should be reset manually when the scope of their use is exited.
}

pub fn finish(self) -> (Vec<Event<CssSyntaxKind>>, Vec<ParseDiagnostic>, Vec<Trivia>) {
let (trivia, lexer_diagnostics) = self.source.finish();
let (events, parse_diagnostics) = self.context.finish();
Expand Down Expand Up @@ -68,4 +100,16 @@ impl<'source> Parser for CssParser<'source> {
fn source_mut(&mut self) -> &mut Self::Source {
&mut self.source
}

fn is_speculative_parsing(&self) -> bool {
self.state.speculative_parsing
}
}

pub struct CssParserCheckpoint {
pub(super) context: ParserContextCheckpoint,
pub(super) source: CssTokenSourceCheckpoint,
// `state` is not checkpointed because it (currently) only contains
// scoped properties that aren't only dependent on checkpoints and
// should be reset manually when the scope of their use is exited.
}
22 changes: 22 additions & 0 deletions crates/biome_css_parser/src/state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
pub(crate) struct CssParserState {
/// Indicates that the parser is speculatively parsing a syntax. Speculative parsing means that the
/// parser tries to parse a syntax as one kind and determines at the end if the assumption was right
/// by testing if the parser is at a specific token (or has no errors). For this approach to work,
/// the parser isn't allowed to skip any tokens while doing error recovery because it may then successfully
/// skip over all invalid tokens, so that it appears as if it was able to parse the syntax correctly.
///
/// Speculative parsing is useful if a syntax is ambiguous and no amount of lookahead (except parsing the whole syntax)
/// is sufficient to determine what syntax it is. For example, the syntax `(a, b) ...`
/// in JavaScript is either a parenthesized expression or an arrow expression if `...` is a `=>`.
/// The challenge is, that it isn't possible to tell which of the two kinds it is until the parser
/// processed all of `(a, b)`.
pub(crate) speculative_parsing: bool,
}

impl CssParserState {
pub fn new() -> Self {
Self {
speculative_parsing: false,
}
}
}
83 changes: 83 additions & 0 deletions crates/biome_css_parser/src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,3 +601,86 @@ pub(crate) fn parse_string(p: &mut CssParser) -> ParsedSyntax {
fn is_at_string(p: &mut CssParser) -> bool {
p.at(CSS_STRING_LITERAL)
}

/// Attempt to parse some input with the given parsing function. If parsing
/// succeeds, `Ok` is returned with the result of the parse and the state is
/// preserved. If parsing fails, this function rewinds the parser back to
/// where it was before attempting the parse and the `Err` value is returned.
#[allow(dead_code)] // TODO: Remove this allow once it's actually used
pub(crate) fn try_parse<T, E>(
p: &mut CssParser,
func: impl FnOnce(&mut CssParser) -> Result<T, E>,
) -> Result<T, E> {
let checkpoint = p.checkpoint();
let old_speculative_parsing = std::mem::replace(&mut p.state_mut().speculative_parsing, true);

let res = func(p);
p.state_mut().speculative_parsing = old_speculative_parsing;

if res.is_err() {
p.rewind(checkpoint);
}

res
}

#[cfg(test)]
mod tests {
use crate::{parser::CssParser, CssParserOptions};
use biome_css_syntax::{CssSyntaxKind, T};
use biome_parser::prelude::ParsedSyntax::{Absent, Present};
use biome_parser::Parser;

use super::{parse_regular_identifier, parse_regular_number, try_parse};

#[test]
fn try_parse_rewinds_to_checkpoint() {
let mut p = CssParser::new("width: blue;", CssParserOptions::default());

let pre_try_range = p.cur_range();
let result = try_parse(&mut p, |p| {
// advance the parser within the attempt
// parse `width`
parse_regular_identifier(p).ok();
// parse `:`
p.expect(T![:]);

// attempt to parse a number, but fail because the input has `blue`.
match parse_regular_number(p) {
Present(marker) => Ok(Present(marker)),
Absent => Err(()),
}
});

assert!(result.is_err());
// The parser should've rewound back to the start.
assert_eq!(p.cur_range(), pre_try_range);
assert_eq!(p.cur_text(), "width");
}

#[test]
fn try_parse_preserves_position_on_success() {
let mut p = CssParser::new("width: 100;", CssParserOptions::default());

let pre_try_range = p.cur_range();
let result = try_parse(&mut p, |p| {
// advance the parser within the attempt
// parse `width`
parse_regular_identifier(p).ok();
// parse `:`
p.expect(T![:]);

// attempt to parse a number, and succeed because the input has `100`.
match parse_regular_number(p) {
Present(marker) => Ok(Present(marker)),
Absent => Err(()),
}
});

assert!(result.is_ok());
assert_eq!(result.unwrap().kind(&p), Some(CssSyntaxKind::CSS_NUMBER));
// The parser should not have rewound and is now at the semicolon
assert_ne!(p.cur_range(), pre_try_range);
assert_eq!(p.cur_text(), ";");
}
}
21 changes: 20 additions & 1 deletion crates/biome_css_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use biome_css_syntax::{CssSyntaxKind, TextRange};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{BufferedLexer, LexContext};
use biome_parser::prelude::{BumpWithContext, NthToken, TokenSource};
use biome_parser::token_source::Trivia;
use biome_parser::token_source::{TokenSourceCheckpoint, Trivia};
use biome_rowan::TriviaPieceKind;
use std::collections::VecDeque;

Expand Down Expand Up @@ -34,6 +34,8 @@ struct Lookahead {
after_newline: bool,
}

pub(crate) type CssTokenSourceCheckpoint = TokenSourceCheckpoint<CssSyntaxKind>;

impl<'src> CssTokenSource<'src> {
/// Creates a new token source.
pub(crate) fn new(lexer: BufferedLexer<'src, CssLexer<'src>>) -> CssTokenSource<'src> {
Expand Down Expand Up @@ -139,6 +141,23 @@ impl<'src> CssTokenSource<'src> {

None
}

/// Creates a checkpoint to which it can later return using [Self::rewind].
pub fn checkpoint(&self) -> CssTokenSourceCheckpoint {
CssTokenSourceCheckpoint {
trivia_len: self.trivia_list.len() as u32,
lexer_checkpoint: self.lexer.checkpoint(),
}
}

/// Restores the token source to a previous state
pub fn rewind(&mut self, checkpoint: CssTokenSourceCheckpoint) {
assert!(self.trivia_list.len() >= checkpoint.trivia_len as usize);
self.trivia_list.truncate(checkpoint.trivia_len as usize);
self.lexer.rewind(checkpoint.lexer_checkpoint);
self.non_trivia_lookahead.clear();
self.lookahead_offset = 0;
}
}

impl<'source> TokenSource for CssTokenSource<'source> {
Expand Down
2 changes: 1 addition & 1 deletion crates/biome_js_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ use biome_js_factory::JsSyntaxFactory;
use biome_js_syntax::{JsLanguage, JsSyntaxKind, LanguageVariant};
use biome_parser::tree_sink::LosslessTreeSink;
pub(crate) use parser::{JsParser, ParseRecoveryTokenSet};
pub(crate) use state::{ParserState, StrictMode};
pub(crate) use state::{JsParserState, StrictMode};
use std::fmt::Debug;

pub enum JsSyntaxFeature {
Expand Down
18 changes: 8 additions & 10 deletions crates/biome_js_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ pub(crate) use crate::parser::parse_recovery::{
};
use crate::prelude::*;
use crate::state::{ChangeParserState, ParserStateGuard};
use crate::token_source::JsTokenSourceCheckpoint;
use crate::*;
use crate::{
state::ParserStateCheckpoint,
token_source::{JsTokenSource, TokenSourceCheckpoint},
};
use crate::{state::JsParserStateCheckpoint, token_source::JsTokenSource};
use biome_js_syntax::{
JsFileSource,
JsSyntaxKind::{self},
Expand All @@ -33,7 +31,7 @@ pub(crate) use parsed_syntax::ParsedSyntax;
/// The Parser yields lower level events instead of nodes.
/// These events are then processed into a syntax tree through a [`TreeSink`] implementation.
pub struct JsParser<'source> {
pub(super) state: ParserState,
pub(super) state: JsParserState,
pub source_type: JsFileSource,
context: ParserContext<JsSyntaxKind>,
source: JsTokenSource<'source>,
Expand All @@ -46,23 +44,23 @@ impl<'source> JsParser<'source> {
let source = JsTokenSource::from_str(source);

JsParser {
state: ParserState::new(&source_type),
state: JsParserState::new(&source_type),
source_type,
context: ParserContext::default(),
source,
options,
}
}

pub(crate) fn state(&self) -> &ParserState {
pub(crate) fn state(&self) -> &JsParserState {
&self.state
}

pub(crate) fn options(&self) -> &JsParserOptions {
&self.options
}

pub(crate) fn state_mut(&mut self) -> &mut ParserState {
pub(crate) fn state_mut(&mut self) -> &mut JsParserState {
&mut self.state
}

Expand Down Expand Up @@ -213,8 +211,8 @@ impl<'source> Parser for JsParser<'source> {

pub struct JsParserCheckpoint {
pub(super) context: ParserContextCheckpoint,
pub(super) source: TokenSourceCheckpoint,
state: ParserStateCheckpoint,
pub(super) source: JsTokenSourceCheckpoint,
state: JsParserStateCheckpoint,
}

#[cfg(test)]
Expand Down
5 changes: 2 additions & 3 deletions crates/biome_js_parser/src/parser/rewrite_parser.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::parser::JsParser;
use crate::token_source::TokenSourceCheckpoint;
use crate::{parser::JsParser, token_source::JsTokenSourceCheckpoint};

use crate::prelude::*;
use biome_console::fmt::Display;
Expand Down Expand Up @@ -34,7 +33,7 @@ pub(crate) struct RewriteParser<'parser, 'source> {
}

impl<'parser, 'source> RewriteParser<'parser, 'source> {
pub fn new(p: &'parser mut JsParser<'source>, checkpoint: TokenSourceCheckpoint) -> Self {
pub fn new(p: &'parser mut JsParser<'source>, checkpoint: JsTokenSourceCheckpoint) -> Self {
Self {
inner: p,
offset: checkpoint.current_start(),
Expand Down
Loading

0 comments on commit 9f46988

Please sign in to comment.