Skip to content

Commit

Permalink
refactor: change the Parser.into_cst() to Parser.try_into_cst().
Browse files Browse the repository at this point in the history
The new function is fallible and fails with `Utf8Error` instead of panicking.
  • Loading branch information
plusvic committed Jan 17, 2025
1 parent 17a0dc5 commit c352f24
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 27 deletions.
2 changes: 1 addition & 1 deletion cli/src/commands/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ pub fn exec_cst(args: &ArgMatches) -> anyhow::Result<()> {
.with_context(|| format!("can not read `{}`", rules_path.display()))?;

let parser = Parser::new(src.as_slice());
let cst = parser.into_cst();
let cst = parser.try_into_cst()?;

println!("{cst:?}");
Ok(())
Expand Down
10 changes: 5 additions & 5 deletions fmt/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use thiserror::Error;
use tokens::Token::*;
use tokens::TokenStream;
use yara_x_parser::cst::SyntaxKind;
use yara_x_parser::Parser;
use yara_x_parser::{Parser, Span};

use crate::align::Align;
use crate::format_hex_patterns::FormatHexPatterns;
Expand Down Expand Up @@ -55,10 +55,10 @@ pub enum Error {
/// Error while writing to output.
#[error("Write error")]
WriteError(io::Error),
// TODO
// Error while parsing the input.
//#[error("Parse error")]
//ParseError(#[from] yara_x_parser::Error),

/// Error while parsing the input.
#[error("Parse error")]
ParseError { message: String, span: Span },
}

/// Formats YARA source code automatically.
Expand Down
4 changes: 2 additions & 2 deletions fmt/src/tokens/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,8 +481,8 @@ pub(crate) trait TokenStream<'a>: Iterator<Item = Token<'a>> {
// implements the TokenStream trait.
impl<'a, T> TokenStream<'a> for T where T: Iterator<Item = Token<'a>> {}

/// An iterator that takes a parse tree generated by the parser and produces a
/// sequence of tokens.
/// An iterator that takes a [`CSTStream`] generated by the parser and produces
/// a sequence of tokens.
pub(crate) struct Tokens<'src> {
events: CSTStream<'src>,
buffer: VecDeque<Token<'src>>,
Expand Down
23 changes: 14 additions & 9 deletions parser/src/cst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ that the CST does not account for operator associativity or precedence rules.
Expressions are represented in the CST exactly as they appear in the source
code, without any grouping based on operator precedence.
*/
use rowan::{GreenNodeBuilder, GreenToken, SyntaxNode};

use std::fmt::{Debug, Display, Formatter};
use std::iter;
use std::marker::PhantomData;
use std::str::from_utf8;
use std::str::{from_utf8, Utf8Error};

use rowan::{GreenNodeBuilder, GreenToken, SyntaxNode};

use crate::cst::SyntaxKind::{COMMENT, NEWLINE, WHITESPACE};
use crate::{Parser, Span};
Expand Down Expand Up @@ -209,9 +211,11 @@ impl CST {
}
}

impl From<Parser<'_>> for CST {
impl TryFrom<Parser<'_>> for CST {
type Error = Utf8Error;

/// Crates a [`CST`] from the given parser.
fn from(parser: Parser) -> Self {
fn try_from(parser: Parser) -> Result<Self, Utf8Error> {
let source = parser.source();
let mut builder = GreenNodeBuilder::new();
let mut prev_token_span: Option<Span> = None;
Expand All @@ -236,18 +240,19 @@ impl From<Parser<'_>> for CST {
// The span must within the source code, this unwrap
// can't fail.
let token = source.get(span.range()).unwrap();
// Tokens are always valid UTF-8, this unwrap can't
// fail.
// TODO: use from_utf8_unchecked?
let token = from_utf8(token).unwrap();
let token = from_utf8(token)?;

builder.token(kind.into(), token);
prev_token_span = Some(span);
}
Event::Error { message, span } => errors.push((span, message)),
}
}

Self { tree: rowan::SyntaxNode::new_root(builder.finish()), errors }
Ok(Self {
tree: rowan::SyntaxNode::new_root(builder.finish()),
errors,
})
}
}

Expand Down
12 changes: 8 additions & 4 deletions parser/src/cst/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use crate::{Parser, Span};

#[test]
fn cst_1() {
let cst = Parser::new(b"rule test { condition: true }").into_cst();
let cst =
Parser::new(b"rule test { condition: true }").try_into_cst().unwrap();

let source_file = cst.root();

Expand Down Expand Up @@ -79,7 +80,8 @@ fn cst_1() {

#[test]
fn cst_2() {
let cst = Parser::new(b"rule test { condition: true }").into_cst();
let cst =
Parser::new(b"rule test { condition: true }").try_into_cst().unwrap();

let mut c = cst.root().first_child().unwrap().children_with_tokens();

Expand Down Expand Up @@ -119,7 +121,8 @@ fn cst_2() {

#[test]
fn cst_3() {
let cst = Parser::new(b"rule test { condition: true }").into_cst();
let cst =
Parser::new(b"rule test { condition: true }").try_into_cst().unwrap();

let condition_blk =
cst.root().first_child().unwrap().first_child().unwrap();
Expand Down Expand Up @@ -159,7 +162,8 @@ fn cst_3() {

#[test]
fn cst_4() {
let cst = Parser::new(b"rule test { condition: true }").into_cst();
let cst =
Parser::new(b"rule test { condition: true }").try_into_cst().unwrap();
let source_file = cst.root().into_mut();

// Detach the first token, which is the `rule` keyword.
Expand Down
9 changes: 4 additions & 5 deletions parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ error nodes is valid YARA code.
*/

use indexmap::IndexSet;
use rustc_hash::{FxHashMap, FxHashSet};
use std::str::from_utf8;

#[cfg(feature = "logging")]
use log::*;
use rustc_hash::{FxHashMap, FxHashSet};
use std::str::{from_utf8, Utf8Error};

use crate::ast::AST;
use crate::cst::syntax_stream::SyntaxStream;
Expand Down Expand Up @@ -65,8 +64,8 @@ impl<'src> Parser<'src> {
/// third-party code.
#[inline]
#[doc(hidden)]
pub fn into_cst(self) -> CST {
CST::from(self)
pub fn try_into_cst(self) -> Result<CST, Utf8Error> {
CST::try_from(self)
}

/// Consumes the parser and returns a Concrete Syntax Tree (CST) as
Expand Down
2 changes: 1 addition & 1 deletion parser/src/parser/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ fn cst() {
let output_file = mint.new_goldenfile(output_path).unwrap();

let source = fs::read_to_string(path).unwrap();
let cst = CST::from(Parser::new(source.as_bytes()));
let cst = CST::try_from(Parser::new(source.as_bytes())).unwrap();
let mut w = BufWriter::new(output_file);
write!(&mut w, "{:?}", cst).unwrap();
});
Expand Down

0 comments on commit c352f24

Please sign in to comment.