Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to Winnow 0.7 #1822

Merged
merged 4 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion gix-actor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ bstr = { version = "1.3.0", default-features = false, features = [
"std",
"unicode",
] }
winnow = { version = "0.6", features = ["simd"] }
winnow = { version = "0.7.0", features = ["simd"] }
itoa = "1.0.1"
serde = { version = "1.0.114", optional = true, default-features = false, features = [
"derive",
Expand Down
42 changes: 20 additions & 22 deletions gix-actor/src/signature/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ pub(crate) mod function {
use bstr::ByteSlice;
use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch, Time};
use gix_utils::btoi::to_signed;
use winnow::error::{ErrMode, ErrorKind};
use winnow::error::ErrMode;
use winnow::stream::Stream;
use winnow::{
combinator::{alt, opt, separated_pair, terminated},
Expand All @@ -18,7 +18,7 @@ pub(crate) mod function {
/// Parse a signature from the bytes input `i` using `nom`.
pub fn decode<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
i: &mut &'a [u8],
) -> PResult<SignatureRef<'a>, E> {
) -> ModalResult<SignatureRef<'a>, E> {
separated_pair(
identity,
opt(b" "),
Expand Down Expand Up @@ -68,31 +68,29 @@ pub(crate) mod function {
/// Parse an identity from the bytes input `i` (like `name <email>`) using `nom`.
pub fn identity<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
i: &mut &'a [u8],
) -> PResult<IdentityRef<'a>, E> {
) -> ModalResult<IdentityRef<'a>, E> {
let start = i.checkpoint();
let eol_idx = i.find_byte(b'\n').unwrap_or(i.len());
let right_delim_idx =
i[..eol_idx]
.rfind_byte(b'>')
.ok_or(ErrMode::Cut(E::from_error_kind(i, ErrorKind::Eof).add_context(
i,
&start,
StrContext::Label("Closing '>' not found"),
)))?;
let right_delim_idx = i[..eol_idx]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change was introduced in #1439 and I wonder if this there isn't an 'packagiomatic' way of writing this. This code implements the parser by hand, but that was done merely due to lack of knowledge on how to use winnow primitives.

Is there anything that could be done to make it nicer?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main challenge is the rfind, right? I've not thought of a good way to handle cases like that.

If its parsing within the \n delimited range, there is and_then, like take_until(0.., b'\n').and_then(parser).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's the rfind() to assure it finds the right-most > as delimiter, within the line boundary.

.rfind_byte(b'>')
.ok_or(ErrMode::Cut(E::from_input(i).add_context(
i,
&start,
StrContext::Label("Closing '>' not found"),
)))?;
let i_name_and_email = &i[..right_delim_idx];
let skip_from_right = i_name_and_email
.iter()
.rev()
.take_while(|b| b.is_ascii_whitespace() || **b == b'>')
.count();
let left_delim_idx =
i_name_and_email
.find_byte(b'<')
.ok_or(ErrMode::Cut(E::from_error_kind(i, ErrorKind::Eof).add_context(
&i_name_and_email,
&start,
StrContext::Label("Opening '<' not found"),
)))?;
let left_delim_idx = i_name_and_email
.find_byte(b'<')
.ok_or(ErrMode::Cut(E::from_input(i).add_context(
&i_name_and_email,
&start,
StrContext::Label("Opening '<' not found"),
)))?;
let skip_from_left = i[left_delim_idx..]
.iter()
.take_while(|b| b.is_ascii_whitespace() || **b == b'<')
Expand All @@ -102,7 +100,7 @@ pub(crate) mod function {

let email = i
.get(left_delim_idx + skip_from_left..right_delim_idx - skip_from_right)
.ok_or(ErrMode::Cut(E::from_error_kind(i, ErrorKind::Eof).add_context(
.ok_or(ErrMode::Cut(E::from_input(i).add_context(
&i_name_and_email,
&start,
StrContext::Label("Skipped parts run into each other"),
Expand All @@ -126,7 +124,7 @@ mod tests {

fn decode<'i>(
i: &mut &'i [u8],
) -> PResult<SignatureRef<'i>, winnow::error::TreeError<&'i [u8], winnow::error::StrContext>> {
) -> ModalResult<SignatureRef<'i>, winnow::error::TreeError<&'i [u8], winnow::error::StrContext>> {
signature::decode.parse_next(i)
}

Expand Down Expand Up @@ -203,7 +201,7 @@ mod tests {
.map_err(to_bstr_err)
.expect_err("parse fails as > is missing")
.to_string(),
"in end of file at 'hello < 12345 -1215'\n 0: invalid Closing '>' not found at 'hello < 12345 -1215'\n 1: expected `<name> <<email>> <timestamp> <+|-><HHMM>` at 'hello < 12345 -1215'\n"
" at 'hello < 12345 -1215'\n 0: invalid Closing '>' not found at 'hello < 12345 -1215'\n 1: expected `<name> <<email>> <timestamp> <+|-><HHMM>` at 'hello < 12345 -1215'\n"
);
}

Expand Down
2 changes: 1 addition & 1 deletion gix-config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ gix-sec = { version = "^0.10.11", path = "../gix-sec" }
gix-ref = { version = "^0.50.0", path = "../gix-ref" }
gix-glob = { version = "^0.18.0", path = "../gix-glob" }

winnow = { version = "0.6", features = ["simd"] }
winnow = { version = "0.7.0", features = ["simd"] }
memchr = "2"
thiserror = "2.0.0"
unicode-bom = { version = "2.0.3" }
Expand Down
47 changes: 25 additions & 22 deletions gix-config/src/parse/nom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ use std::borrow::Cow;
use bstr::{BStr, ByteSlice};
use winnow::{
combinator::{alt, delimited, opt, preceded, repeat},
error::{ErrorKind, InputError as NomError, ParserError as _},
error::{ErrMode, InputError as NomError, ParserError as _},
prelude::*,
stream::{Offset as _, Stream as _},
stream::Offset as _,
token::{one_of, take_till, take_while},
};

Expand Down Expand Up @@ -78,7 +78,7 @@ fn newlines_from(input: &[u8], start: winnow::stream::Checkpoint<&[u8], &[u8]>)
start_input.next_slice(offset).iter().filter(|c| **c == b'\n').count()
}

fn comment<'i>(i: &mut &'i [u8]) -> PResult<Comment<'i>, NomError<&'i [u8]>> {
fn comment<'i>(i: &mut &'i [u8]) -> ModalResult<Comment<'i>, NomError<&'i [u8]>> {
(
one_of([';', '#']),
take_till(0.., |c| c == b'\n').map(|text: &[u8]| Cow::Borrowed(text.as_bstr())),
Expand All @@ -94,7 +94,7 @@ fn section<'i>(
i: &mut &'i [u8],
node: &mut ParseNode,
dispatch: &mut dyn FnMut(Event<'i>),
) -> PResult<(), NomError<&'i [u8]>> {
) -> ModalResult<(), NomError<&'i [u8]>> {
let start = i.checkpoint();
let header = section_header(i).map_err(|e| {
i.reset(&start);
Expand Down Expand Up @@ -129,11 +129,14 @@ fn section<'i>(
Ok(())
}

fn section_header<'i>(i: &mut &'i [u8]) -> PResult<section::Header<'i>, NomError<&'i [u8]>> {
fn section_header<'i>(i: &mut &'i [u8]) -> ModalResult<section::Header<'i>, NomError<&'i [u8]>> {
// No spaces must be between section name and section start
let name = preceded('[', take_while(1.., is_section_char).map(bstr::ByteSlice::as_bstr)).parse_next(i)?;

if opt(one_of::<_, _, NomError<&[u8]>>(']')).parse_next(i)?.is_some() {
if opt(one_of::<_, _, ErrMode<NomError<&[u8]>>>(']'))
.parse_next(i)?
.is_some()
{
// Either section does not have a subsection or using deprecated
// subsection syntax at this point.
let header = match memchr::memrchr(b'.', name.as_bytes()) {
Expand All @@ -150,7 +153,7 @@ fn section_header<'i>(i: &mut &'i [u8]) -> PResult<section::Header<'i>, NomError
};

if header.name.is_empty() {
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail));
return Err(winnow::error::ErrMode::from_input(i));
}
return Ok(header);
}
Expand All @@ -169,7 +172,7 @@ fn is_section_char(c: u8) -> bool {
c.is_ascii_alphanumeric() || c == b'-' || c == b'.'
}

fn sub_section<'i>(i: &mut &'i [u8]) -> PResult<Cow<'i, BStr>, NomError<&'i [u8]>> {
fn sub_section<'i>(i: &mut &'i [u8]) -> ModalResult<Cow<'i, BStr>, NomError<&'i [u8]>> {
let mut output = Cow::Borrowed(Default::default());
if let Some(sub) = opt(subsection_subset).parse_next(i)? {
output = Cow::Borrowed(sub.as_bstr());
Expand All @@ -181,15 +184,15 @@ fn sub_section<'i>(i: &mut &'i [u8]) -> PResult<Cow<'i, BStr>, NomError<&'i [u8]
Ok(output)
}

fn subsection_subset<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
fn subsection_subset<'i>(i: &mut &'i [u8]) -> ModalResult<&'i [u8], NomError<&'i [u8]>> {
alt((subsection_unescaped, subsection_escaped_char)).parse_next(i)
}

fn subsection_unescaped<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
fn subsection_unescaped<'i>(i: &mut &'i [u8]) -> ModalResult<&'i [u8], NomError<&'i [u8]>> {
take_while(1.., is_subsection_unescaped_char).parse_next(i)
}

fn subsection_escaped_char<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
fn subsection_escaped_char<'i>(i: &mut &'i [u8]) -> ModalResult<&'i [u8], NomError<&'i [u8]>> {
preceded('\\', one_of(is_subsection_escapable_char).take()).parse_next(i)
}

Expand All @@ -205,7 +208,7 @@ fn key_value_pair<'i>(
i: &mut &'i [u8],
node: &mut ParseNode,
dispatch: &mut dyn FnMut(Event<'i>),
) -> PResult<(), NomError<&'i [u8]>> {
) -> ModalResult<(), NomError<&'i [u8]>> {
*node = ParseNode::Name;
if let Some(name) = opt(config_name).parse_next(i)? {
dispatch(Event::SectionValueName(section::ValueName(Cow::Borrowed(name))));
Expand All @@ -223,7 +226,7 @@ fn key_value_pair<'i>(

/// Parses the config name of a config pair. Assumes the input has already been
/// trimmed of any leading whitespace.
fn config_name<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
fn config_name<'i>(i: &mut &'i [u8]) -> ModalResult<&'i BStr, NomError<&'i [u8]>> {
(
one_of(|c: u8| c.is_ascii_alphabetic()),
take_while(0.., |c: u8| c.is_ascii_alphanumeric() || c == b'-'),
Expand All @@ -233,7 +236,7 @@ fn config_name<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
.parse_next(i)
}

fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> {
fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> ModalResult<(), NomError<&'i [u8]>> {
if opt('=').parse_next(i)?.is_some() {
dispatch(Event::KeyValueSeparator);
if let Some(whitespace) = opt(take_spaces1).parse_next(i)? {
Expand All @@ -251,7 +254,7 @@ fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PR

/// Handles parsing of known-to-be values. This function handles both single
/// line values as well as values that are continuations.
fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> {
fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> ModalResult<(), NomError<&'i [u8]>> {
let start_checkpoint = i.checkpoint();
let mut value_start_checkpoint = i.checkpoint();
let mut value_end = None;
Expand All @@ -278,17 +281,17 @@ fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PRes
let escape_index = escaped_index - 1;
let Some(mut c) = i.next_token() else {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token));
return Err(winnow::error::ErrMode::from_input(i));
};
let mut consumed = 1;
if c == b'\r' {
c = i.next_token().ok_or_else(|| {
i.reset(&start_checkpoint);
winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token)
winnow::error::ErrMode::from_input(i)
})?;
if c != b'\n' {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice));
return Err(winnow::error::ErrMode::from_input(i));
}
consumed += 1;
}
Expand All @@ -313,7 +316,7 @@ fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PRes
b'n' | b't' | b'\\' | b'b' | b'"' => {}
_ => {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token));
return Err(winnow::error::ErrMode::from_input(i));
}
}
}
Expand All @@ -326,7 +329,7 @@ fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PRes
}
if is_in_quotes {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice));
return Err(winnow::error::ErrMode::from_input(i));
}

let value_end = match value_end {
Expand Down Expand Up @@ -360,13 +363,13 @@ fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PRes
Ok(())
}

fn take_spaces1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
fn take_spaces1<'i>(i: &mut &'i [u8]) -> ModalResult<&'i BStr, NomError<&'i [u8]>> {
take_while(1.., winnow::stream::AsChar::is_space)
.map(bstr::ByteSlice::as_bstr)
.parse_next(i)
}

fn take_newlines1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
fn take_newlines1<'i>(i: &mut &'i [u8]) -> ModalResult<&'i BStr, NomError<&'i [u8]>> {
repeat(1..1024, alt(("\r\n", "\n")))
.map(|()| ())
.take()
Expand Down
Loading
Loading