Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(parser,css_parser): implement checkpoint rewinding #1417

Merged
merged 3 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/biome_css_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub use parser::CssParserOptions;
mod lexer;
mod parser;
mod prelude;
mod state;
mod syntax;
mod token_source;

Expand Down
48 changes: 46 additions & 2 deletions crates/biome_css_parser/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
use crate::lexer::CssReLexContext;
use crate::token_source::CssTokenSource;
use crate::state::CssParserState;
use crate::token_source::{CssTokenSource, CssTokenSourceCheckpoint};
use biome_css_syntax::CssSyntaxKind;
use biome_parser::diagnostic::merge_diagnostics;
use biome_parser::event::Event;
use biome_parser::prelude::*;
use biome_parser::token_source::Trivia;
use biome_parser::ParserContext;
use biome_parser::{prelude::*, ParserContextCheckpoint};

pub(crate) struct CssParser<'source> {
context: ParserContext<CssSyntaxKind>,
source: CssTokenSource<'source>,
state: CssParserState,
}

#[derive(Default, Debug, Clone, Copy)]
Expand All @@ -29,6 +31,7 @@ impl<'source> CssParser<'source> {
Self {
context: ParserContext::default(),
source: CssTokenSource::from_str(source, config),
state: CssParserState::new(),
}
}

Expand All @@ -39,6 +42,35 @@ impl<'source> CssParser<'source> {
self.source_mut().re_lex(context)
}

#[allow(dead_code)] //TODO remove this allow once we actually use it
pub(crate) fn state(&self) -> &CssParserState {
&self.state
}

pub(crate) fn state_mut(&mut self) -> &mut CssParserState {
&mut self.state
}

pub fn checkpoint(&self) -> CssParserCheckpoint {
CssParserCheckpoint {
context: self.context.checkpoint(),
source: self.source.checkpoint(),
// `state` is not checkpointed because it (currently) only contains
// scoped properties that aren't only dependent on checkpoints and
// should be reset manually when the scope of their use is exited.
}
}

pub fn rewind(&mut self, checkpoint: CssParserCheckpoint) {
let CssParserCheckpoint { context, source } = checkpoint;

self.context.rewind(context);
self.source.rewind(source);
// `state` is not checkpointed because it (currently) only contains
// scoped properties that aren't only dependent on checkpoints and
// should be reset manually when the scope of their use is exited.
}

pub fn finish(self) -> (Vec<Event<CssSyntaxKind>>, Vec<ParseDiagnostic>, Vec<Trivia>) {
let (trivia, lexer_diagnostics) = self.source.finish();
let (events, parse_diagnostics) = self.context.finish();
Expand Down Expand Up @@ -68,4 +100,16 @@ impl<'source> Parser for CssParser<'source> {
fn source_mut(&mut self) -> &mut Self::Source {
&mut self.source
}

fn is_speculative_parsing(&self) -> bool {
self.state.speculative_parsing
}
}

pub struct CssParserCheckpoint {
pub(super) context: ParserContextCheckpoint,
pub(super) source: CssTokenSourceCheckpoint,
// `state` is not checkpointed because it (currently) only contains
// scoped properties that aren't only dependent on checkpoints and
// should be reset manually when the scope of their use is exited.
}
22 changes: 22 additions & 0 deletions crates/biome_css_parser/src/state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
pub(crate) struct CssParserState {
/// Indicates that the parser is speculatively parsing a syntax. Speculative parsing means that the
/// parser tries to parse a syntax as one kind and determines at the end if the assumption was right
/// by testing if the parser is at a specific token (or has no errors). For this approach to work,
/// the parser isn't allowed to skip any tokens while doing error recovery because it may then successfully
/// skip over all invalid tokens, so that it appears as if it was able to parse the syntax correctly.
///
/// Speculative parsing is useful if a syntax is ambiguous and no amount of lookahead (except parsing the whole syntax)
/// is sufficient to determine what syntax it is. For example, the syntax `(a, b) ...`
/// in JavaScript is either a parenthesized expression or an arrow expression if `...` is a `=>`.
/// The challenge is, that it isn't possible to tell which of the two kinds it is until the parser
/// processed all of `(a, b)`.
pub(crate) speculative_parsing: bool,
}

impl CssParserState {
pub fn new() -> Self {
Self {
speculative_parsing: false,
}
}
}
83 changes: 83 additions & 0 deletions crates/biome_css_parser/src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,3 +601,86 @@ pub(crate) fn parse_string(p: &mut CssParser) -> ParsedSyntax {
fn is_at_string(p: &mut CssParser) -> bool {
p.at(CSS_STRING_LITERAL)
}

/// Attempt to parse some input with the given parsing function. If parsing
/// succeeds, `Ok` is returned with the result of the parse and the state is
/// preserved. If parsing fails, this function rewinds the parser back to
/// where it was before attempting the parse and the `Err` value is returned.
#[allow(dead_code)] // TODO: Remove this allow once it's actually used
pub(crate) fn try_parse<T, E>(
p: &mut CssParser,
func: impl FnOnce(&mut CssParser) -> Result<T, E>,
) -> Result<T, E> {
let checkpoint = p.checkpoint();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to implement speculative_parsing?
It blocks a parse recovery since we're going to rewind anyway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose it's probably best that we do just for safety. I don't think we need it strictly for how it'll be used here, but definitely no harm in including it. I'll add it in now.

let old_speculative_parsing = std::mem::replace(&mut p.state_mut().speculative_parsing, true);

let res = func(p);
p.state_mut().speculative_parsing = old_speculative_parsing;

if res.is_err() {
p.rewind(checkpoint);
}

res
}

#[cfg(test)]
mod tests {
use crate::{parser::CssParser, CssParserOptions};
use biome_css_syntax::{CssSyntaxKind, T};
use biome_parser::prelude::ParsedSyntax::{Absent, Present};
use biome_parser::Parser;

use super::{parse_regular_identifier, parse_regular_number, try_parse};

#[test]
fn try_parse_rewinds_to_checkpoint() {
let mut p = CssParser::new("width: blue;", CssParserOptions::default());

let pre_try_range = p.cur_range();
let result = try_parse(&mut p, |p| {
// advance the parser within the attempt
// parse `width`
parse_regular_identifier(p).ok();
// parse `:`
p.expect(T![:]);

// attempt to parse a number, but fail because the input has `blue`.
match parse_regular_number(p) {
Present(marker) => Ok(Present(marker)),
Absent => Err(()),
}
});

assert!(result.is_err());
// The parser should've rewound back to the start.
assert_eq!(p.cur_range(), pre_try_range);
assert_eq!(p.cur_text(), "width");
}

#[test]
fn try_parse_preserves_position_on_success() {
let mut p = CssParser::new("width: 100;", CssParserOptions::default());

let pre_try_range = p.cur_range();
let result = try_parse(&mut p, |p| {
// advance the parser within the attempt
// parse `width`
parse_regular_identifier(p).ok();
// parse `:`
p.expect(T![:]);

// attempt to parse a number, and succeed because the input has `100`.
match parse_regular_number(p) {
Present(marker) => Ok(Present(marker)),
Absent => Err(()),
}
});

assert!(result.is_ok());
assert_eq!(result.unwrap().kind(&p), Some(CssSyntaxKind::CSS_NUMBER));
// The parser should not have rewound and is now at the semicolon
assert_ne!(p.cur_range(), pre_try_range);
assert_eq!(p.cur_text(), ";");
}
}
21 changes: 20 additions & 1 deletion crates/biome_css_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use biome_css_syntax::{CssSyntaxKind, TextRange};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{BufferedLexer, LexContext};
use biome_parser::prelude::{BumpWithContext, NthToken, TokenSource};
use biome_parser::token_source::Trivia;
use biome_parser::token_source::{TokenSourceCheckpoint, Trivia};
use biome_rowan::TriviaPieceKind;
use std::collections::VecDeque;

Expand Down Expand Up @@ -34,6 +34,8 @@ struct Lookahead {
after_newline: bool,
}

pub(crate) type CssTokenSourceCheckpoint = TokenSourceCheckpoint<CssSyntaxKind>;

impl<'src> CssTokenSource<'src> {
/// Creates a new token source.
pub(crate) fn new(lexer: BufferedLexer<'src, CssLexer<'src>>) -> CssTokenSource<'src> {
Expand Down Expand Up @@ -139,6 +141,23 @@ impl<'src> CssTokenSource<'src> {

None
}

/// Creates a checkpoint to which it can later return using [Self::rewind].
pub fn checkpoint(&self) -> CssTokenSourceCheckpoint {
CssTokenSourceCheckpoint {
trivia_len: self.trivia_list.len() as u32,
lexer_checkpoint: self.lexer.checkpoint(),
}
}

/// Restores the token source to a previous state
pub fn rewind(&mut self, checkpoint: CssTokenSourceCheckpoint) {
assert!(self.trivia_list.len() >= checkpoint.trivia_len as usize);
self.trivia_list.truncate(checkpoint.trivia_len as usize);
self.lexer.rewind(checkpoint.lexer_checkpoint);
self.non_trivia_lookahead.clear();
self.lookahead_offset = 0;
}
}

impl<'source> TokenSource for CssTokenSource<'source> {
Expand Down
2 changes: 1 addition & 1 deletion crates/biome_js_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ use biome_js_factory::JsSyntaxFactory;
use biome_js_syntax::{JsLanguage, JsSyntaxKind, LanguageVariant};
use biome_parser::tree_sink::LosslessTreeSink;
pub(crate) use parser::{JsParser, ParseRecoveryTokenSet};
pub(crate) use state::{ParserState, StrictMode};
pub(crate) use state::{JsParserState, StrictMode};
use std::fmt::Debug;

pub enum JsSyntaxFeature {
Expand Down
18 changes: 8 additions & 10 deletions crates/biome_js_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ pub(crate) use crate::parser::parse_recovery::{
};
use crate::prelude::*;
use crate::state::{ChangeParserState, ParserStateGuard};
use crate::token_source::JsTokenSourceCheckpoint;
use crate::*;
use crate::{
state::ParserStateCheckpoint,
token_source::{JsTokenSource, TokenSourceCheckpoint},
};
use crate::{state::JsParserStateCheckpoint, token_source::JsTokenSource};
use biome_js_syntax::{
JsFileSource,
JsSyntaxKind::{self},
Expand All @@ -33,7 +31,7 @@ pub(crate) use parsed_syntax::ParsedSyntax;
/// The Parser yields lower level events instead of nodes.
/// These events are then processed into a syntax tree through a [`TreeSink`] implementation.
pub struct JsParser<'source> {
pub(super) state: ParserState,
pub(super) state: JsParserState,
pub source_type: JsFileSource,
context: ParserContext<JsSyntaxKind>,
source: JsTokenSource<'source>,
Expand All @@ -46,23 +44,23 @@ impl<'source> JsParser<'source> {
let source = JsTokenSource::from_str(source);

JsParser {
state: ParserState::new(&source_type),
state: JsParserState::new(&source_type),
source_type,
context: ParserContext::default(),
source,
options,
}
}

pub(crate) fn state(&self) -> &ParserState {
pub(crate) fn state(&self) -> &JsParserState {
&self.state
}

pub(crate) fn options(&self) -> &JsParserOptions {
&self.options
}

pub(crate) fn state_mut(&mut self) -> &mut ParserState {
pub(crate) fn state_mut(&mut self) -> &mut JsParserState {
&mut self.state
}

Expand Down Expand Up @@ -213,8 +211,8 @@ impl<'source> Parser for JsParser<'source> {

pub struct JsParserCheckpoint {
pub(super) context: ParserContextCheckpoint,
pub(super) source: TokenSourceCheckpoint,
state: ParserStateCheckpoint,
pub(super) source: JsTokenSourceCheckpoint,
state: JsParserStateCheckpoint,
}

#[cfg(test)]
Expand Down
5 changes: 2 additions & 3 deletions crates/biome_js_parser/src/parser/rewrite_parser.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::parser::JsParser;
use crate::token_source::TokenSourceCheckpoint;
use crate::{parser::JsParser, token_source::JsTokenSourceCheckpoint};

use crate::prelude::*;
use biome_console::fmt::Display;
Expand Down Expand Up @@ -34,7 +33,7 @@ pub(crate) struct RewriteParser<'parser, 'source> {
}

impl<'parser, 'source> RewriteParser<'parser, 'source> {
pub fn new(p: &'parser mut JsParser<'source>, checkpoint: TokenSourceCheckpoint) -> Self {
pub fn new(p: &'parser mut JsParser<'source>, checkpoint: JsTokenSourceCheckpoint) -> Self {
Self {
inner: p,
offset: checkpoint.current_start(),
Expand Down
Loading
Loading