Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support hyphen for unquoted string #15

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub(crate) use self::scalar_str::{
read_double_quoted_str, read_single_quoted_str, read_unquoted_str,
to_scalar_string,
};
pub(crate) use self::token::{YamlToken, YamlTokenData, YAML_CHAR_INDICATORS};
pub(crate) use self::token::{YamlToken, YamlTokenData};
pub(crate) use self::token_iter::TokensIter;
pub(crate) use self::variant::{get_tag, YamlValueEnumAccess};

Expand Down
69 changes: 56 additions & 13 deletions src/scalar_str.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0

use crate::{CharsIter, RmsdError, RmsdPosition, YAML_CHAR_INDICATORS};
use crate::{CharsIter, RmsdError, RmsdPosition};

const YAML_CHAR_ESCAPE: char = '\\';

Expand Down Expand Up @@ -78,6 +78,7 @@ pub(crate) fn read_unquoted_str(
indent: usize,
iter: &mut CharsIter,
skip_line_folding: bool,
in_flow: bool,
) -> Result<(String, RmsdPosition), RmsdError> {
let mut ret = String::new();
let mut droped_first_newline = false;
Expand All @@ -102,17 +103,49 @@ pub(crate) fn read_unquoted_str(
}

while let Some(c) = iter.peek() {
if YAML_CHAR_INDICATORS.contains(&c) {
if iter.as_str().starts_with(":\n")
|| iter.as_str().starts_with(": ")
|| iter.as_str().starts_with("- ")
{
return Ok((ret, pos));
}

// YAML 1.2.2:
// In addition, inside flow collections, or when used as implicit
// keys, plain scalars must not contain the “[”, “]”, “{”, “}” and
// “,” characters.
if in_flow && ['[', ']', '{', '}', ','].contains(&c) {
return Ok((ret, pos));
} else if c == '\n'
&& (skip_line_folding
}

// When new line found, we finish reading when not intended as beginning
if c == '\n' {
if skip_line_folding
|| !iter
.as_str()
.starts_with(&format!("\n{}", " ".repeat(indent))))
{
iter.next();
return Ok((ret, pos));
} else if let Some(p) = process_with_line_folding(
.starts_with(&format!("\n{}", " ".repeat(indent)))
{
iter.next();
return Ok((ret, pos));
}
if let Some(next_line) =
iter.as_str().lines().nth(1).map(|s| s.trim_start())
{
println!("HAHA {:?}", next_line);
if next_line.contains(": ")
|| next_line.starts_with("- ")
|| next_line.starts_with("---\n")
|| next_line.starts_with("---\t")
|| next_line.starts_with("--- ")
|| next_line == "---"
{
iter.next();
return Ok((ret, pos));
}
}
}

if let Some(p) = process_with_line_folding(
&mut ret,
iter,
&mut pending_whitespace,
Expand Down Expand Up @@ -361,7 +394,7 @@ mod tests {
#[test]
fn test_unquoted_string() -> Result<(), RmsdError> {
let mut iter = CharsIter::new("abc d");
let ret = read_unquoted_str(0, &mut iter, false)?;
let ret = read_unquoted_str(0, &mut iter, false, false)?;
assert_eq!(ret.0, "abc d");
assert_eq!(ret.1.line, 1);
assert_eq!(ret.1.column, 5);
Expand All @@ -371,7 +404,7 @@ mod tests {
#[test]
fn test_unquoted_string_with_folding() -> Result<(), RmsdError> {
let mut iter = CharsIter::new("abc\n\n\n \nabc\nd\n");
let ret = read_unquoted_str(0, &mut iter, false)?;
let ret = read_unquoted_str(0, &mut iter, false, false)?;
assert_eq!(ret.0, "abc\n\n\nabc d");
assert_eq!(ret.1.line, 6);
assert_eq!(ret.1.column, 1);
Expand All @@ -381,7 +414,7 @@ mod tests {
#[test]
fn test_unquoted_string_with_leading_new_line() -> Result<(), RmsdError> {
let mut iter = CharsIter::new("\nabc");
let ret = read_unquoted_str(0, &mut iter, false)?;
let ret = read_unquoted_str(0, &mut iter, false, false)?;
assert_eq!(ret.0, "abc");
assert_eq!(ret.1.line, 2);
assert_eq!(ret.1.column, 3);
Expand All @@ -391,10 +424,20 @@ mod tests {
#[test]
fn test_unquoted_string_skip_line_folding() -> Result<(), RmsdError> {
let mut iter = CharsIter::new("abc\n d");
let ret = read_unquoted_str(0, &mut iter, true)?;
let ret = read_unquoted_str(0, &mut iter, true, false)?;
assert_eq!(ret.0, "abc");
assert_eq!(ret.1.line, 1);
assert_eq!(ret.1.column, 3);
Ok(())
}

#[test]
fn test_unquoted_string_allow_hyphen() -> Result<(), RmsdError> {
let mut iter = CharsIter::new("abc-d");
let ret = read_unquoted_str(0, &mut iter, true, false)?;
assert_eq!(ret.0, "abc-d");
assert_eq!(ret.1.line, 1);
assert_eq!(ret.1.column, 5);
Ok(())
}
}
42 changes: 19 additions & 23 deletions src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,6 @@ const YAML_CHAR_DIRECTIVE: char = '%';
const YAML_CHAR_RESERVED: char = '@';
const YAML_CHAR_RESERVED2: char = '`';

pub(crate) const YAML_CHAR_INDICATORS: [char; 19] = [
YAML_CHAR_SEQUENCE_ENTRY,
YAML_CHAR_MAPPING_KEY,
YAML_CHAR_MAPPING_VALUE,
YAML_CHAR_COLLECT_ENTRY,
YAML_CHAR_SEQUENCE_START,
YAML_CHAR_SEQUENCE_END,
YAML_CHAR_MAPPING_START,
YAML_CHAR_MAPPING_END,
YAML_CHAR_COMMENT,
YAML_CHAR_ANCHOR,
YAML_CHAR_ALIAS,
YAML_CHAR_TAG,
YAML_CHAR_LITERAL,
YAML_CHAR_FOLDED,
YAML_CHAR_SINGLE_QUOTE,
YAML_CHAR_DOUBLE_QUOTE,
YAML_CHAR_DIRECTIVE,
YAML_CHAR_RESERVED,
YAML_CHAR_RESERVED2,
];

/// YAML Token Data
/// Tokenization input data with white spaces and comments removed.
#[derive(Debug, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -124,6 +102,8 @@ impl YamlToken {
let mut iter = CharsIter::new(input);
let mut ret: Vec<Self> = Vec::new();
let mut indent = 0usize;
let mut flow_map_count = 0;
let mut flow_seq_count = 0;

while let Some(mut c) = iter.peek() {
if iter.next_pos().column == 1 {
Expand All @@ -149,11 +129,13 @@ impl YamlToken {
ret.is_empty(),
&mut indent,
is_after_map_indicator(&ret),
flow_map_count > 0 || flow_seq_count > 0,
)? {
ret.push(t);
}
}
YAML_CHAR_SEQUENCE_START => {
flow_seq_count += 1;
iter.next();
ret.push(YamlToken {
indent,
Expand All @@ -173,6 +155,9 @@ impl YamlToken {
// no special action required for `,`.
}
YAML_CHAR_SEQUENCE_END => {
if flow_seq_count > 0 {
flow_seq_count -= 1;
}
iter.next();
ret.push(YamlToken {
indent,
Expand All @@ -182,6 +167,7 @@ impl YamlToken {
})
}
YAML_CHAR_MAPPING_START => {
flow_map_count += 1;
iter.next();
ret.push(YamlToken {
indent,
Expand All @@ -191,6 +177,9 @@ impl YamlToken {
})
}
YAML_CHAR_MAPPING_END => {
if flow_map_count > 0 {
flow_map_count -= 1;
}
iter.next();
ret.push(YamlToken {
indent,
Expand Down Expand Up @@ -294,10 +283,14 @@ impl YamlToken {
&mut iter,
indent,
is_after_map_indicator(&ret),
flow_map_count > 0 || flow_seq_count > 0,
)?);
}
}
}
for token in &ret {
println!("HAHA {:?}", token.data);
}
Ok(ret)
}
}
Expand All @@ -307,6 +300,7 @@ fn process_map_seq_indicator(
is_begining: bool,
indent: &mut usize,
is_after_map_indicator: bool,
in_flow: bool,
) -> Result<Option<YamlToken>, RmsdError> {
// We might be got `---` as document begin which we should
// ignore
Expand Down Expand Up @@ -360,6 +354,7 @@ fn process_map_seq_indicator(
iter,
*indent,
is_after_map_indicator,
in_flow,
)?))
}
} else {
Expand All @@ -384,10 +379,11 @@ fn read_unquoted_str_token(
iter: &mut CharsIter,
indent: usize,
skip_line_folding: bool,
in_flow: bool,
) -> Result<YamlToken, RmsdError> {
let start = iter.next_pos();
let (unquoted_string, end) =
read_unquoted_str(indent, iter, skip_line_folding)?;
read_unquoted_str(indent, iter, skip_line_folding, in_flow)?;
Ok(YamlToken {
indent,
start,
Expand Down
3 changes: 2 additions & 1 deletion tests/from_str_flow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,10 @@ fn test_de_yaml_flow_array_of_struct() -> Result<(), RmsdError> {
fn test_de_yaml_flow_struct_of_array() -> Result<(), RmsdError> {
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
struct FooTest {
#[serde(rename = "uint-a")]
uint_a: Vec<u32>,
}
let yaml_str = r#"{uint_a: [1, 2, 3, 4]}"#;
let yaml_str = r#"{uint-a: [1, 2, 3, 4]}"#;

let foo_test: FooTest = rmsd_yaml::from_str(yaml_str)?;

Expand Down
Loading