From 4e634bd64d96717a1c7c2012cca0d7cac7bb9e03 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 19 Mar 2023 20:32:46 +0100 Subject: [PATCH 1/5] do not fail snippet parsing when control chars are not explicitly escaped --- helix-lsp/src/snippet.rs | 149 +++++++++++++++++++++++++++++---------- 1 file changed, 111 insertions(+), 38 deletions(-) diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs index 4706a98435ca..aa3496ed1316 100644 --- a/helix-lsp/src/snippet.rs +++ b/helix-lsp/src/snippet.rs @@ -11,17 +11,17 @@ pub enum CaseChange { } #[derive(Debug, PartialEq, Eq)] -pub enum FormatItem<'a> { +pub enum FormatItem { Text(Tendril), Capture(usize), CaseChange(usize, CaseChange), - Conditional(usize, Option<&'a str>, Option<&'a str>), + Conditional(usize, Option, Option), } #[derive(Debug, PartialEq, Eq)] -pub struct Regex<'a> { +pub struct Regex { value: Tendril, - replacement: Vec>, + replacement: Vec, options: Tendril, } @@ -40,8 +40,8 @@ pub enum SnippetElement<'a> { }, Variable { name: &'a str, - default: Option<&'a str>, - regex: Option>, + default: Option>>, + regex: Option, }, Text(Tendril), } @@ -77,15 +77,20 @@ fn render_elements( *offset += text.chars().count(); insert.push_str(&text); } - &Variable { + Variable { name: _, regex: _, r#default, } => { // TODO: variables. For now, fall back to the default, which defaults to "". - let text = r#default.unwrap_or_default(); - *offset += text.chars().count(); - insert.push_str(text); + render_elements( + r#default.as_deref().unwrap_or_default(), + insert, + offset, + tabstops, + newline_with_offset, + include_placeholer, + ); } &Tabstop { tabstop } => { tabstops.push((tabstop, (*offset, *offset))); @@ -212,25 +217,28 @@ mod parser { } const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$']; - const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/']; - const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ',']; + const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '|', ',']; - fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> { + fn text<'a>( + escape_chars: &'static [char], + term_chars: &'static [char], + ) -> impl Parser<'a, Output = Tendril> { move |input: &'a str| { - let mut chars = input.char_indices(); + let mut chars = input.char_indices().peekable(); let mut res = Tendril::new(); while let Some((i, c)) = chars.next() { match c { '\\' => { - if let Some((_, c)) = chars.next() { + if let Some(&(_, c)) = chars.peek() { if escape_chars.contains(&c) { + chars.next(); res.push(c); continue; } } - return Ok((&input[i..], res)); + res.push('\\'); } - c if escape_chars.contains(&c) => return Ok((&input[i..], res)), + c if term_chars.contains(&c) => return Ok((&input[i..], res)), c => res.push(c), } } @@ -253,7 +261,7 @@ mod parser { ) } - fn format<'a>() -> impl Parser<'a, Output = FormatItem<'a>> { + fn format<'a>() -> impl Parser<'a, Output = FormatItem> { use FormatItem::*; choice!( @@ -267,7 +275,7 @@ mod parser { }), // '${' int ':+' if '}' map( - seq!("${", digit(), ":+", take_until(|c| c == '}'), "}"), + seq!("${", digit(), ":+", text(TEXT_ESCAPE_CHARS, &['}']), "}"), |seq| { Conditional(seq.1, Some(seq.3), None) } ), // '${' int ':?' if ':' else '}' @@ -276,9 +284,9 @@ mod parser { "${", digit(), ":?", - take_until(|c| c == ':'), + text(TEXT_ESCAPE_CHARS, &[':']), ":", - take_until(|c| c == '}'), + text(TEXT_ESCAPE_CHARS, &['}']), "}" ), |seq| { Conditional(seq.1, Some(seq.3), Some(seq.5)) } @@ -290,7 +298,7 @@ mod parser { digit(), ":", optional("-"), - take_until(|c| c == '}'), + text(TEXT_ESCAPE_CHARS, &['}']), "}" ), |seq| { Conditional(seq.1, None, Some(seq.4)) } @@ -298,19 +306,24 @@ mod parser { ) } - fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> { + fn regex<'a>() -> impl Parser<'a, Output = Regex> { map( seq!( "/", // TODO parse as ECMAScript and convert to rust regex - non_empty(text(&['/', '\\'])), + non_empty(text(&['/'], &['/'])), "/", one_or_more(choice!( format(), - map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text) + // text doesn't parse $, if format fails we just accept the $ as text + map("$", |_| FormatItem::Text("$".into())), + map(text(&['\\', '/'], &['/', '$']), FormatItem::Text), )), "/", - text(&['}', '\\',]), + // vscode really doesn't allow escaping } here + // so it's impossible to write a regex escape containing a } + // we can consider deviating here and allowing the escape + text(&[], &['}']), ), |(_, value, _, replacement, _, options)| Regex { value, @@ -341,7 +354,7 @@ mod parser { // The example there contains both a placeholder text and a nested placeholder // which indicates a list. Looking at the VSCode sourcecode, the placeholder // is indeed parsed as zero_or_more so the grammar is simply incorrect here - zero_or_more(anything(TEXT_ESCAPE_CHARS)), + zero_or_more(anything(TEXT_ESCAPE_CHARS, true)), "}" ), |seq| SnippetElement::Placeholder { @@ -357,7 +370,7 @@ mod parser { "${", digit(), "|", - sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","), + sep(text(CHOICE_TEXT_ESCAPE_CHARS, &['|', ',']), ","), "|}", ), |seq| SnippetElement::Choice { @@ -377,7 +390,13 @@ mod parser { }), // ${var:default} map( - seq!("${", var(), ":", take_until(|c| c == '}'), "}",), + seq!( + "${", + var(), + ":", + zero_or_more(anything(TEXT_ESCAPE_CHARS, true)), + "}", + ), |values| SnippetElement::Variable { name: values.1, default: Some(values.3), @@ -395,22 +414,27 @@ mod parser { ) } - fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> { + fn anything<'a>( + escape_chars: &'static [char], + end_at_brace: bool, + ) -> impl Parser<'a, Output = SnippetElement<'a>> { + let term_chars: &[_] = if end_at_brace { &['$', '}'] } else { &['$'] }; move |input: &'a str| { let parser = choice!( tabstop(), placeholder(), choice(), variable(), - map(text(escape_chars), SnippetElement::Text) + map("$", |_| SnippetElement::Text("$".into())), + map(text(escape_chars, term_chars), SnippetElement::Text), ); parser.parse(input) } } fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> { - map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet { - elements: parts, + map(one_or_more(anything(TEXT_ESCAPE_CHARS, false)), |parts| { + Snippet { elements: parts } }) } @@ -452,8 +476,13 @@ mod parser { } #[test] - fn parse_unterminated_placeholder_error() { - assert_eq!(Err("${1:)"), parse("match(${1:)")) + fn unterminated_placeholder() { + assert_eq!( + Ok(Snippet { + elements: vec![Text("match(".into()), Text("$".into()), Text("{1:)".into())] + }), + parse("match(${1:)") + ) } #[test] @@ -542,7 +571,7 @@ mod parser { Text(" ".into()), Variable { name: "name", - default: Some("foo"), + default: Some(vec![Text("foo".into())]), regex: None }, Text(" ".into()), @@ -572,12 +601,56 @@ mod parser { default: None, regex: Some(Regex { value: "(.*).+$".into(), - replacement: vec![FormatItem::Capture(1)], + replacement: vec![FormatItem::Capture(1), FormatItem::Text("$".into())], options: Tendril::new(), }), }] }), - parse("${TM_FILENAME/(.*).+$/$1/}") + parse("${TM_FILENAME/(.*).+$/$1$/}") + ); + } + + #[test] + fn rust_macro() { + assert_eq!( + Ok(Snippet { + elements: vec![ + Text("macro_rules! ".into()), + Tabstop { tabstop: 1 }, + Text(" {\n (".into()), + Tabstop { tabstop: 2 }, + Text(") => {\n ".into()), + Tabstop { tabstop: 0 }, + Text("\n };\n}".into()) + ] + }), + parse("macro_rules! $1 {\n ($2) => {\n $0\n };\n}") + ); + } + #[test] + fn robust_parsing() { + assert_eq!( + Ok(Snippet { + elements: vec![ + Text("$".into()), + Text("{}".into()), + Text("$".into()), + Text("\\a$}\\".into()), + ] + }), + parse("${}$\\a\\$\\}\\\\") + ); + assert_eq!( + Ok(Snippet { + elements: vec![ + Placeholder { + tabstop: 1, + value: vec![Text("$".into()), Text("{".into())] + }, + Text("}".into()) + ] + }), + parse("${1:${}}") ); } } From eff1f345302f6dbec25c3b6ac49c2219641929b2 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 19 Mar 2023 20:40:33 +0100 Subject: [PATCH 2/5] fix single-char variable names --- helix-lsp/src/snippet.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs index aa3496ed1316..e59351a4d4ff 100644 --- a/helix-lsp/src/snippet.rs +++ b/helix-lsp/src/snippet.rs @@ -196,23 +196,23 @@ mod parser { fn var<'a>() -> impl Parser<'a, Output = &'a str> { // var = [_a-zA-Z][_a-zA-Z0-9]* - move |input: &'a str| match input - .char_indices() - .take_while(|(p, c)| { - *c == '_' - || if *p == 0 { - c.is_ascii_alphabetic() - } else { - c.is_ascii_alphanumeric() - } - }) - .last() - { - Some((index, c)) if index >= 1 => { - let index = index + c.len_utf8(); - Ok((&input[index..], &input[0..index])) - } - _ => Err(input), + move |input: &'a str| { + input + .char_indices() + .take_while(|(p, c)| { + *c == '_' + || if *p == 0 { + c.is_ascii_alphabetic() + } else { + c.is_ascii_alphanumeric() + } + }) + .last() + .map(|(index, c)| { + let index = index + c.len_utf8(); + (&input[index..], &input[0..index]) + }) + .ok_or(input) } } From a38064709e6ab306bec1d2952a42d7c3c728d457 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 19 Mar 2023 22:05:53 +0100 Subject: [PATCH 3/5] accept empty snippet regex replace elements --- helix-lsp/src/snippet.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs index e59351a4d4ff..5bfe96570fe2 100644 --- a/helix-lsp/src/snippet.rs +++ b/helix-lsp/src/snippet.rs @@ -311,9 +311,9 @@ mod parser { seq!( "/", // TODO parse as ECMAScript and convert to rust regex - non_empty(text(&['/'], &['/'])), + text(&['/'], &['/']), "/", - one_or_more(choice!( + zero_or_more(choice!( format(), // text doesn't parse $, if format fails we just accept the $ as text map("$", |_| FormatItem::Text("$".into())), From a5c8c51f88efc1b448876624a7f93d1f711714ac Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 19 Mar 2023 22:06:29 +0100 Subject: [PATCH 4/5] accept bracket snippet variables without default --- helix-lsp/src/snippet.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs index 5bfe96570fe2..a11ff60f2220 100644 --- a/helix-lsp/src/snippet.rs +++ b/helix-lsp/src/snippet.rs @@ -388,6 +388,12 @@ mod parser { default: None, regex: None, }), + // ${var} + map(seq!("${", var(), "}",), |values| SnippetElement::Variable { + name: values.1, + default: None, + regex: None, + }), // ${var:default} map( seq!( From 4dab5319a69e5066da45d489fe4087783853fdb8 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 19 Mar 2023 22:06:44 +0100 Subject: [PATCH 5/5] detailed snippet tests --- helix-lsp/src/snippet.rs | 389 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 368 insertions(+), 21 deletions(-) diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs index a11ff60f2220..a4f049e8349f 100644 --- a/helix-lsp/src/snippet.rs +++ b/helix-lsp/src/snippet.rs @@ -633,31 +633,378 @@ mod parser { parse("macro_rules! $1 {\n ($2) => {\n $0\n };\n}") ); } + + fn assert_text(snippet: &str, parsed_text: &str) { + let res = parse(snippet).unwrap(); + let text = crate::snippet::render(&res, "\n", true).0; + assert_eq!(text, parsed_text) + } + #[test] fn robust_parsing() { - assert_eq!( - Ok(Snippet { - elements: vec![ - Text("$".into()), - Text("{}".into()), - Text("$".into()), - Text("\\a$}\\".into()), - ] - }), - parse("${}$\\a\\$\\}\\\\") + assert_text("$", "$"); + assert_text("\\\\$", "\\$"); + assert_text("{", "{"); + assert_text("\\}", "}"); + assert_text("\\abc", "\\abc"); + assert_text("foo${f:\\}}bar", "foo}bar"); + assert_text("\\{", "\\{"); + assert_text("I need \\\\\\$", "I need \\$"); + assert_text("\\", "\\"); + assert_text("\\{{", "\\{{"); + assert_text("{{", "{{"); + assert_text("{{dd", "{{dd"); + assert_text("}}", "}}"); + assert_text("ff}}", "ff}}"); + assert_text("farboo", "farboo"); + assert_text("far{{}}boo", "far{{}}boo"); + assert_text("far{{123}}boo", "far{{123}}boo"); + assert_text("far\\{{123}}boo", "far\\{{123}}boo"); + assert_text("far{{id:bern}}boo", "far{{id:bern}}boo"); + assert_text("far{{id:bern {{basel}}}}boo", "far{{id:bern {{basel}}}}boo"); + assert_text( + "far{{id:bern {{id:basel}}}}boo", + "far{{id:bern {{id:basel}}}}boo", ); - assert_eq!( - Ok(Snippet { - elements: vec![ - Placeholder { - tabstop: 1, - value: vec![Text("$".into()), Text("{".into())] - }, - Text("}".into()) - ] - }), - parse("${1:${}}") + assert_text( + "far{{id:bern {{id2:basel}}}}boo", + "far{{id:bern {{id2:basel}}}}boo", + ); + assert_text("${}$\\a\\$\\}\\\\", "${}$\\a$}\\"); + assert_text("farboo", "farboo"); + assert_text("far{{}}boo", "far{{}}boo"); + assert_text("far{{123}}boo", "far{{123}}boo"); + assert_text("far\\{{123}}boo", "far\\{{123}}boo"); + assert_text("far`123`boo", "far`123`boo"); + assert_text("far\\`123\\`boo", "far\\`123\\`boo"); + assert_text("\\$far-boo", "$far-boo"); + } + + fn assert_snippet(snippet: &str, expect: &[SnippetElement]) { + let parsed_snippet = parse(snippet).unwrap(); + assert_eq!(parsed_snippet.elements, expect.to_owned()) + } + + #[test] + fn parse_variable() { + use SnippetElement::*; + assert_snippet( + "$far-boo", + &[ + Variable { + name: "far", + default: None, + regex: None, + }, + Text("-boo".into()), + ], + ); + assert_snippet( + "far$farboo", + &[ + Text("far".into()), + Variable { + name: "farboo", + regex: None, + default: None, + }, + ], + ); + assert_snippet( + "far${farboo}", + &[ + Text("far".into()), + Variable { + name: "farboo", + regex: None, + default: None, + }, + ], + ); + assert_snippet("$123", &[Tabstop { tabstop: 123 }]); + assert_snippet( + "$farboo", + &[Variable { + name: "farboo", + regex: None, + default: None, + }], + ); + assert_snippet( + "$far12boo", + &[Variable { + name: "far12boo", + regex: None, + default: None, + }], + ); + assert_snippet( + "000_${far}_000", + &[ + Text("000_".into()), + Variable { + name: "far", + regex: None, + default: None, + }, + Text("_000".into()), + ], + ); + } + + #[test] + fn parse_variable_transform() { + assert_snippet( + "${foo///}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: Tendril::new(), + replacement: Vec::new(), + options: Tendril::new(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/regex/format/gmi}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: "regex".into(), + replacement: vec![FormatItem::Text("format".into())], + options: "gmi".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/([A-Z][a-z])/format/}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: "([A-Z][a-z])".into(), + replacement: vec![FormatItem::Text("format".into())], + options: Tendril::new(), + }), + default: None, + }], + ); + + // invalid regex TODO: reneable tests once we actually parse this regex flavour + // assert_text( + // "${foo/([A-Z][a-z])/format/GMI}", + // "${foo/([A-Z][a-z])/format/GMI}", + // ); + // assert_text( + // "${foo/([A-Z][a-z])/format/funky}", + // "${foo/([A-Z][a-z])/format/funky}", + // ); + // assert_text("${foo/([A-Z][a-z]/format/}", "${foo/([A-Z][a-z]/format/}"); + assert_text( + "${foo/regex\\/format/options}", + "${foo/regex\\/format/options}", + ); + + // tricky regex + assert_snippet( + "${foo/m\\/atch/$1/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: "m/atch".into(), + replacement: vec![FormatItem::Capture(1)], + options: "i".into(), + }), + default: None, + }], + ); + + // incomplete + assert_text("${foo///", "${foo///"); + assert_text("${foo/regex/format/options", "${foo/regex/format/options"); + + // format string + assert_snippet( + "${foo/.*/${0:fooo}/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![FormatItem::Conditional(0, None, Some("fooo".into()))], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/${1}/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![FormatItem::Capture(1)], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/$1/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![FormatItem::Capture(1)], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/This-$1-encloses/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![ + FormatItem::Text("This-".into()), + FormatItem::Capture(1), + FormatItem::Text("-encloses".into()), + ], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/complex${1:else}/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![ + FormatItem::Text("complex".into()), + FormatItem::Conditional(1, None, Some("else".into())), + ], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/complex${1:-else}/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![ + FormatItem::Text("complex".into()), + FormatItem::Conditional(1, None, Some("else".into())), + ], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/complex${1:+if}/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![ + FormatItem::Text("complex".into()), + FormatItem::Conditional(1, Some("if".into()), None), + ], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/complex${1:?if:else}/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![ + FormatItem::Text("complex".into()), + FormatItem::Conditional(1, Some("if".into()), Some("else".into())), + ], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${foo/.*/complex${1:/upcase}/i}", + &[Variable { + name: "foo", + regex: Some(Regex { + value: ".*".into(), + replacement: vec![ + FormatItem::Text("complex".into()), + FormatItem::CaseChange(1, CaseChange::Upcase), + ], + options: "i".into(), + }), + default: None, + }], + ); + assert_snippet( + "${TM_DIRECTORY/src\\//$1/}", + &[Variable { + name: "TM_DIRECTORY", + regex: Some(Regex { + value: "src/".into(), + replacement: vec![FormatItem::Capture(1)], + options: Tendril::new(), + }), + default: None, + }], + ); + assert_snippet( + "${TM_SELECTED_TEXT/a/\\/$1/g}", + &[Variable { + name: "TM_SELECTED_TEXT", + regex: Some(Regex { + value: "a".into(), + replacement: vec![FormatItem::Text("/".into()), FormatItem::Capture(1)], + options: "g".into(), + }), + default: None, + }], + ); + assert_snippet( + "${TM_SELECTED_TEXT/a/in\\/$1ner/g}", + &[Variable { + name: "TM_SELECTED_TEXT", + regex: Some(Regex { + value: "a".into(), + replacement: vec![ + FormatItem::Text("in/".into()), + FormatItem::Capture(1), + FormatItem::Text("ner".into()), + ], + options: "g".into(), + }), + default: None, + }], + ); + assert_snippet( + "${TM_SELECTED_TEXT/a/end\\//g}", + &[Variable { + name: "TM_SELECTED_TEXT", + regex: Some(Regex { + value: "a".into(), + replacement: vec![FormatItem::Text("end/".into())], + options: "g".into(), + }), + default: None, + }], ); } + // TODO port more tests from https://github.com/microsoft/vscode/blob/dce493cb6e36346ef2714e82c42ce14fc461b15c/src/vs/editor/contrib/snippet/test/browser/snippetParser.test.ts } }