From 1e15a0a45c669236f6bd3a00fb7e5f5c539d79c9 Mon Sep 17 00:00:00 2001 From: Gris Ge Date: Sat, 4 Jan 2025 01:38:22 +0800 Subject: [PATCH] Support hyphen for unquoted string For YAML like: `abc-d: 123` It should be treated as Map with `abc-d` as key and `123` as value. Test cases included. Signed-off-by: Gris Ge --- src/lib.rs | 2 +- src/scalar_str.rs | 69 ++++++++++++++++++++++++++++++++++-------- src/token.rs | 42 ++++++++++++------------- tests/from_str_flow.rs | 3 +- 4 files changed, 78 insertions(+), 38 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f3be274..26b7b9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,7 @@ pub(crate) use self::scalar_str::{ read_double_quoted_str, read_single_quoted_str, read_unquoted_str, to_scalar_string, }; -pub(crate) use self::token::{YamlToken, YamlTokenData, YAML_CHAR_INDICATORS}; +pub(crate) use self::token::{YamlToken, YamlTokenData}; pub(crate) use self::token_iter::TokensIter; pub(crate) use self::variant::{get_tag, YamlValueEnumAccess}; diff --git a/src/scalar_str.rs b/src/scalar_str.rs index b53a6d5..6392653 100644 --- a/src/scalar_str.rs +++ b/src/scalar_str.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 -use crate::{CharsIter, RmsdError, RmsdPosition, YAML_CHAR_INDICATORS}; +use crate::{CharsIter, RmsdError, RmsdPosition}; const YAML_CHAR_ESCAPE: char = '\\'; @@ -78,6 +78,7 @@ pub(crate) fn read_unquoted_str( indent: usize, iter: &mut CharsIter, skip_line_folding: bool, + in_flow: bool, ) -> Result<(String, RmsdPosition), RmsdError> { let mut ret = String::new(); let mut droped_first_newline = false; @@ -102,17 +103,49 @@ pub(crate) fn read_unquoted_str( } while let Some(c) = iter.peek() { - if YAML_CHAR_INDICATORS.contains(&c) { + if iter.as_str().starts_with(":\n") + || iter.as_str().starts_with(": ") + || iter.as_str().starts_with("- ") + { + return Ok((ret, pos)); + } + + // YAML 1.2.2: + // In addition, inside flow collections, or when used as implicit + // keys, plain scalars must not contain the “[”, “]”, “{”, “}” and + // “,” characters. + if in_flow && ['[', ']', '{', '}', ','].contains(&c) { return Ok((ret, pos)); - } else if c == '\n' - && (skip_line_folding + } + + // When new line found, we finish reading when not intended as beginning + if c == '\n' { + if skip_line_folding || !iter .as_str() - .starts_with(&format!("\n{}", " ".repeat(indent)))) - { - iter.next(); - return Ok((ret, pos)); - } else if let Some(p) = process_with_line_folding( + .starts_with(&format!("\n{}", " ".repeat(indent))) + { + iter.next(); + return Ok((ret, pos)); + } + if let Some(next_line) = + iter.as_str().lines().nth(1).map(|s| s.trim_start()) + { + println!("HAHA {:?}", next_line); + if next_line.contains(": ") + || next_line.starts_with("- ") + || next_line.starts_with("---\n") + || next_line.starts_with("---\t") + || next_line.starts_with("--- ") + || next_line == "---" + { + iter.next(); + return Ok((ret, pos)); + } + } + } + + if let Some(p) = process_with_line_folding( &mut ret, iter, &mut pending_whitespace, @@ -361,7 +394,7 @@ mod tests { #[test] fn test_unquoted_string() -> Result<(), RmsdError> { let mut iter = CharsIter::new("abc d"); - let ret = read_unquoted_str(0, &mut iter, false)?; + let ret = read_unquoted_str(0, &mut iter, false, false)?; assert_eq!(ret.0, "abc d"); assert_eq!(ret.1.line, 1); assert_eq!(ret.1.column, 5); @@ -371,7 +404,7 @@ mod tests { #[test] fn test_unquoted_string_with_folding() -> Result<(), RmsdError> { let mut iter = CharsIter::new("abc\n\n\n \nabc\nd\n"); - let ret = read_unquoted_str(0, &mut iter, false)?; + let ret = read_unquoted_str(0, &mut iter, false, false)?; assert_eq!(ret.0, "abc\n\n\nabc d"); assert_eq!(ret.1.line, 6); assert_eq!(ret.1.column, 1); @@ -381,7 +414,7 @@ mod tests { #[test] fn test_unquoted_string_with_leading_new_line() -> Result<(), RmsdError> { let mut iter = CharsIter::new("\nabc"); - let ret = read_unquoted_str(0, &mut iter, false)?; + let ret = read_unquoted_str(0, &mut iter, false, false)?; assert_eq!(ret.0, "abc"); assert_eq!(ret.1.line, 2); assert_eq!(ret.1.column, 3); @@ -391,10 +424,20 @@ mod tests { #[test] fn test_unquoted_string_skip_line_folding() -> Result<(), RmsdError> { let mut iter = CharsIter::new("abc\n d"); - let ret = read_unquoted_str(0, &mut iter, true)?; + let ret = read_unquoted_str(0, &mut iter, true, false)?; assert_eq!(ret.0, "abc"); assert_eq!(ret.1.line, 1); assert_eq!(ret.1.column, 3); Ok(()) } + + #[test] + fn test_unquoted_string_allow_hyphen() -> Result<(), RmsdError> { + let mut iter = CharsIter::new("abc-d"); + let ret = read_unquoted_str(0, &mut iter, true, false)?; + assert_eq!(ret.0, "abc-d"); + assert_eq!(ret.1.line, 1); + assert_eq!(ret.1.column, 5); + Ok(()) + } } diff --git a/src/token.rs b/src/token.rs index f27d366..e412bd7 100644 --- a/src/token.rs +++ b/src/token.rs @@ -25,28 +25,6 @@ const YAML_CHAR_DIRECTIVE: char = '%'; const YAML_CHAR_RESERVED: char = '@'; const YAML_CHAR_RESERVED2: char = '`'; -pub(crate) const YAML_CHAR_INDICATORS: [char; 19] = [ - YAML_CHAR_SEQUENCE_ENTRY, - YAML_CHAR_MAPPING_KEY, - YAML_CHAR_MAPPING_VALUE, - YAML_CHAR_COLLECT_ENTRY, - YAML_CHAR_SEQUENCE_START, - YAML_CHAR_SEQUENCE_END, - YAML_CHAR_MAPPING_START, - YAML_CHAR_MAPPING_END, - YAML_CHAR_COMMENT, - YAML_CHAR_ANCHOR, - YAML_CHAR_ALIAS, - YAML_CHAR_TAG, - YAML_CHAR_LITERAL, - YAML_CHAR_FOLDED, - YAML_CHAR_SINGLE_QUOTE, - YAML_CHAR_DOUBLE_QUOTE, - YAML_CHAR_DIRECTIVE, - YAML_CHAR_RESERVED, - YAML_CHAR_RESERVED2, -]; - /// YAML Token Data /// Tokenization input data with white spaces and comments removed. #[derive(Debug, Clone, PartialEq, Eq)] @@ -124,6 +102,8 @@ impl YamlToken { let mut iter = CharsIter::new(input); let mut ret: Vec = Vec::new(); let mut indent = 0usize; + let mut flow_map_count = 0; + let mut flow_seq_count = 0; while let Some(mut c) = iter.peek() { if iter.next_pos().column == 1 { @@ -149,11 +129,13 @@ impl YamlToken { ret.is_empty(), &mut indent, is_after_map_indicator(&ret), + flow_map_count > 0 || flow_seq_count > 0, )? { ret.push(t); } } YAML_CHAR_SEQUENCE_START => { + flow_seq_count += 1; iter.next(); ret.push(YamlToken { indent, @@ -173,6 +155,9 @@ impl YamlToken { // no special action required for `,`. } YAML_CHAR_SEQUENCE_END => { + if flow_seq_count > 0 { + flow_seq_count -= 1; + } iter.next(); ret.push(YamlToken { indent, @@ -182,6 +167,7 @@ impl YamlToken { }) } YAML_CHAR_MAPPING_START => { + flow_map_count += 1; iter.next(); ret.push(YamlToken { indent, @@ -191,6 +177,9 @@ impl YamlToken { }) } YAML_CHAR_MAPPING_END => { + if flow_map_count > 0 { + flow_map_count -= 1; + } iter.next(); ret.push(YamlToken { indent, @@ -294,10 +283,14 @@ impl YamlToken { &mut iter, indent, is_after_map_indicator(&ret), + flow_map_count > 0 || flow_seq_count > 0, )?); } } } + for token in &ret { + println!("HAHA {:?}", token.data); + } Ok(ret) } } @@ -307,6 +300,7 @@ fn process_map_seq_indicator( is_begining: bool, indent: &mut usize, is_after_map_indicator: bool, + in_flow: bool, ) -> Result, RmsdError> { // We might be got `---` as document begin which we should // ignore @@ -360,6 +354,7 @@ fn process_map_seq_indicator( iter, *indent, is_after_map_indicator, + in_flow, )?)) } } else { @@ -384,10 +379,11 @@ fn read_unquoted_str_token( iter: &mut CharsIter, indent: usize, skip_line_folding: bool, + in_flow: bool, ) -> Result { let start = iter.next_pos(); let (unquoted_string, end) = - read_unquoted_str(indent, iter, skip_line_folding)?; + read_unquoted_str(indent, iter, skip_line_folding, in_flow)?; Ok(YamlToken { indent, start, diff --git a/tests/from_str_flow.rs b/tests/from_str_flow.rs index 6286b24..c731d5a 100644 --- a/tests/from_str_flow.rs +++ b/tests/from_str_flow.rs @@ -165,9 +165,10 @@ fn test_de_yaml_flow_array_of_struct() -> Result<(), RmsdError> { fn test_de_yaml_flow_struct_of_array() -> Result<(), RmsdError> { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] struct FooTest { + #[serde(rename = "uint-a")] uint_a: Vec, } - let yaml_str = r#"{uint_a: [1, 2, 3, 4]}"#; + let yaml_str = r#"{uint-a: [1, 2, 3, 4]}"#; let foo_test: FooTest = rmsd_yaml::from_str(yaml_str)?;