diff --git a/Cargo.toml b/Cargo.toml index 95be757d..ff4a1c29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ serde_json = "1.0" rayon = "0.8.0" regex = "0.2.1" getopts = "0.2" +pretty_assertions = "0.4.0" [features] diff --git a/assets/default_newlines.packdump b/assets/default_newlines.packdump index 650fc7b8..86e30eb0 100644 Binary files a/assets/default_newlines.packdump and b/assets/default_newlines.packdump differ diff --git a/assets/default_nonewlines.packdump b/assets/default_nonewlines.packdump index 1f96921d..4a62aee1 100644 Binary files a/assets/default_nonewlines.packdump and b/assets/default_nonewlines.packdump differ diff --git a/examples/syntest.rs b/examples/syntest.rs index 713cf7ae..2a941d0a 100644 --- a/examples/syntest.rs +++ b/examples/syntest.rs @@ -1,6 +1,12 @@ //! An example of using syntect for testing syntax definitions. //! Basically exactly the same as what Sublime Text can do, //! but without needing ST installed +// To run tests only for a particular package, while showing the operations, you could use: +// cargo run --example syntest -- --debug testdata/Packages/Makefile/ +// to specify that the syntax definitions should be parsed instead of loaded from the dump file, +// you can tell it where to parse them from - the following will execute only 1 syntax test after +// parsing the sublime-syntax files in the JavaScript folder: +// cargo run --example syntest testdata/Packages/JavaScript/syntax_test_json.json testdata/Packages/JavaScript/ extern crate syntect; extern crate walkdir; #[macro_use] @@ -123,7 +129,8 @@ fn process_assertions(assertion: &AssertionRange, test_against_line_scopes: &Vec } /// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed -fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool) -> Result { +fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool, debug: bool) -> Result { + use syntect::util::debug_print_ops; let f = File::open(path).unwrap(); let mut reader = BufReader::new(f); let mut line = String::new(); @@ -189,7 +196,17 @@ fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool) -> Result Result = std::env::args().collect(); + let mut args: Vec = std::env::args().collect(); + let debug_arg = args.iter().position(|s| s == "--debug"); + if debug_arg.is_some() { + args.remove(debug_arg.unwrap()); + } + let tests_path = if args.len() < 2 { "." } else { @@ -254,21 +276,21 @@ fn main() { ss.link_syntaxes(); } - let exit_code = recursive_walk(&ss, &tests_path); + let exit_code = recursive_walk(&ss, &tests_path, debug_arg.is_some()); println!("exiting with code {}", exit_code); std::process::exit(exit_code); } -fn recursive_walk(ss: &SyntaxSet, path: &str) -> i32 { +fn recursive_walk(ss: &SyntaxSet, path: &str, debug: bool) -> i32 { let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass let walker = WalkDir::new(path).into_iter(); for entry in walker.filter_entry(|e|e.file_type().is_dir() || is_a_syntax_test_file(e)) { let entry = entry.unwrap(); if entry.file_type().is_file() { println!("Testing file {}", entry.path().display()); - let result = test_file(&ss, entry.path(), true); + let result = test_file(&ss, entry.path(), true, debug); println!("{:?}", result); if exit_code != 2 { // leave exit code 2 if there was an error if let Err(_) = result { // set exit code 2 if there was an error diff --git a/src/lib.rs b/src/lib.rs index 4e8b7a5e..dabf72de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,9 @@ extern crate serde; #[macro_use] extern crate serde_derive; extern crate serde_json; +#[cfg(test)] +#[macro_use] +extern crate pretty_assertions; pub mod highlighting; pub mod parsing; pub mod util; diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 5243f2c4..82175328 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -159,9 +159,9 @@ impl ParseState { let context_chain = { let proto_start = self.proto_starts.last().cloned().unwrap_or(0); // Sublime applies with_prototypes from bottom to top - let with_prototypes = self.stack[proto_start..].iter().filter_map(|lvl| lvl.prototype.as_ref().cloned()).map(|ctx| (true, ctx)); - let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx)); - let cur_context = Some((false, cur_level.context.clone())).into_iter(); + let with_prototypes = self.stack[proto_start..].iter().filter_map(|lvl| lvl.prototype.as_ref().map(|ctx| (true, ctx.clone(), lvl.captures.as_ref()))); + let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None)); + let cur_context = Some((false, cur_level.context.clone(), cur_level.captures.as_ref())).into_iter(); with_prototypes.chain(cur_prototype).chain(cur_context) }; @@ -172,7 +172,7 @@ impl ParseState { let mut match_from_with_proto = false; let mut cur_match: Option = None; - for (from_with_proto, ctx) in context_chain { + for (from_with_proto, ctx, captures) in context_chain { for (pat_context_ptr, pat_index) in context_iter(ctx) { let mut pat_context = pat_context_ptr.borrow_mut(); let match_pat = pat_context.match_at_mut(pat_index); @@ -208,8 +208,8 @@ impl ParseState { } match_pat.ensure_compiled_if_possible(); - let refs_regex = if match_pat.has_captures && cur_level.captures.is_some() { - let &(ref region, ref s) = cur_level.captures.as_ref().unwrap(); + let refs_regex = if match_pat.has_captures && captures.is_some() { + let &(ref region, ref s) = captures.unwrap(); Some(match_pat.compile_with_refs(region, s)) } else { None @@ -451,15 +451,23 @@ impl ParseState { MatchOperation::None => return false, }; for (i, r) in ctx_refs.iter().enumerate() { - let proto = if i == 0 { + // if a with_prototype was specified, and multiple contexts were pushed, + // then the with_prototype applies only to the last context pushed, i.e. + // top most on the stack after all the contexts are pushed - this is also + // referred to as the "target" of the push by sublimehq - see + // https://forum.sublimetext.com/t/dev-build-3111/19240/17 for more info + let proto = if i == ctx_refs.len() - 1 { pat.with_prototype.clone() } else { None }; let ctx_ptr = r.resolve(); let captures = { - let ctx = ctx_ptr.borrow(); - if ctx.uses_backrefs { + let mut uses_backrefs = ctx_ptr.borrow().uses_backrefs; + if let Some(ref proto) = proto { + uses_backrefs = uses_backrefs || proto.borrow().uses_backrefs; + } + if uses_backrefs { Some((regions.clone(), line.to_owned())) } else { None @@ -535,6 +543,7 @@ mod tests { test_stack.push(Scope::new("text.html.ruby").unwrap()); test_stack.push(Scope::new("text.html.basic").unwrap()); test_stack.push(Scope::new("source.js.embedded.html").unwrap()); + test_stack.push(Scope::new("source.js").unwrap()); test_stack.push(Scope::new("string.quoted.single.js").unwrap()); test_stack.push(Scope::new("source.ruby.rails.embedded.html").unwrap()); test_stack.push(Scope::new("meta.function.parameters.ruby").unwrap()); @@ -719,4 +728,28 @@ contexts: debug_print_ops(line, &ops); ops } + + #[test] + fn can_parse_issue120() { + let ps = SyntaxSet::load_from_folder("testdata").unwrap(); + let syntax = ps.find_syntax_by_name("Embed_Escape Used by tests in src/parsing/parser.rs").unwrap(); + + let line1 = "\"abctest\" foobar"; + let expect1 = [ + ", , ", + ", ", + ", , ", + ", , ", + "", + ]; + expect_scope_stacks_with_syntax(&line1, &expect1, syntax.to_owned()); + + let line2 = ">abctestfoobar"; + let expect2 = [ + ", ", + ", ", + "", + ]; + expect_scope_stacks_with_syntax(&line2, &expect2, syntax.to_owned()); + } } diff --git a/src/parsing/syntax_definition.rs b/src/parsing/syntax_definition.rs index f06bbab4..1106e24d 100644 --- a/src/parsing/syntax_definition.rs +++ b/src/parsing/syntax_definition.rs @@ -57,6 +57,20 @@ pub struct Context { pub patterns: Vec, } +impl Context { + pub fn new(meta_include_prototype: bool) -> Context { + Context { + meta_scope: Vec::new(), + meta_content_scope: Vec::new(), + meta_include_prototype: meta_include_prototype, + clear_scopes: None, + uses_backrefs: false, + patterns: Vec::new(), + prototype: None, + } + } +} + #[derive(Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum Pattern { Match(MatchPattern), diff --git a/src/parsing/syntax_set.rs b/src/parsing/syntax_set.rs index bf191861..2530883d 100644 --- a/src/parsing/syntax_set.rs +++ b/src/parsing/syntax_set.rs @@ -95,7 +95,6 @@ impl SyntaxSet { for entry in WalkDir::new(folder).sort_by(|a, b| a.cmp(b)) { let entry = entry.map_err(LoadingError::WalkDir)?; if entry.path().extension().map_or(false, |e| e == "sublime-syntax") { - // println!("{}", entry.path().display()); let syntax = load_syntax_file(entry.path(), lines_include_newline)?; if let Some(path_str) = entry.path().to_str() { self.path_syntaxes.push((path_str.to_string(), self.syntaxes.len())); diff --git a/src/parsing/yaml_load.rs b/src/parsing/yaml_load.rs index 0a02b238..064abcf4 100644 --- a/src/parsing/yaml_load.rs +++ b/src/parsing/yaml_load.rs @@ -13,7 +13,7 @@ use std::ops::DerefMut; pub enum ParseSyntaxError { /// Invalid YAML file syntax, or at least something yaml_rust can't handle InvalidYaml(ScanError), - /// The file must contain at least on YAML document + /// The file must contain at least one YAML document EmptyFile, /// Some keys are required for something to be a valid `.sublime-syntax` MissingMandatoryKey(&'static str), @@ -152,15 +152,8 @@ impl SyntaxDefinition { state: &mut ParserState, is_prototype: bool) -> Result { - let mut context = Context { - meta_scope: Vec::new(), - meta_content_scope: Vec::new(), - meta_include_prototype: !is_prototype, - clear_scopes: None, - uses_backrefs: false, - patterns: Vec::new(), - prototype: None, - }; + let mut context = Context::new(!is_prototype); + for y in vec.iter() { let map = y.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?; @@ -265,12 +258,13 @@ impl SyntaxDefinition { .map(|s| str_to_scopes(s, state.scope_repo)) .unwrap_or_else(|| Ok(vec![]))?; + let captures = if let Ok(map) = get_key(map, "captures", |x| x.as_hash()) { let mut res_map = Vec::new(); for (key, value) in map.iter() { if let (Some(key_int), Some(val_str)) = (key.as_i64(), value.as_str()) { res_map.push((key_int as usize, - str_to_scopes(val_str, state.scope_repo)?)); + str_to_scopes(val_str, state.scope_repo)?)); } } Some(res_map) @@ -287,13 +281,51 @@ impl SyntaxDefinition { MatchOperation::Push(SyntaxDefinition::parse_pushargs(y, state)?) } else if let Ok(y) = get_key(map, "set", Some) { MatchOperation::Set(SyntaxDefinition::parse_pushargs(y, state)?) + } else if let Ok(y) = get_key(map, "embed", Some) { + // Same as push so we translate it to what it would be + let mut embed_escape_context_yaml = vec!(); + if let Ok(s) = get_key(map, "embed_scope", Some) { + let mut commands = Hash::new(); + commands.insert(Yaml::String("meta_content_scope".to_string()), s.clone()); + embed_escape_context_yaml.push(Yaml::Hash(commands)); + } + if let Ok(v) = get_key(map, "escape", Some) { + let mut match_map = Hash::new(); + match_map.insert(Yaml::String("match".to_string()), v.clone()); + match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true)); + if let Ok(y) = get_key(map, "escape_captures", Some) { + match_map.insert(Yaml::String("captures".to_string()), y.clone()); + } + embed_escape_context_yaml.push(Yaml::Hash(match_map)); + let escape_context = SyntaxDefinition::parse_context( + &embed_escape_context_yaml, + state, + false + )?; + MatchOperation::Push(vec![ContextReference::Inline(escape_context), SyntaxDefinition::parse_reference(y, state)?]) + } else { + return Err(ParseSyntaxError::MissingMandatoryKey("escape")); + } + } else { MatchOperation::None }; let with_prototype = if let Ok(v) = get_key(map, "with_prototype", |x| x.as_vec()) { // should a with_prototype include the prototype? I don't think so. - Some(SyntaxDefinition::parse_context(v, state, true)?) + Some(Self::parse_context(v, state, true)?) + } else if let Ok(v) = get_key(map, "escape", Some) { + let mut context = Context::new(false); + let mut match_map = Hash::new(); + match_map.insert(Yaml::String("match".to_string()), Yaml::String(format!("(?={})", v.as_str().unwrap()))); + match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true)); + let pattern = SyntaxDefinition::parse_match_pattern(&match_map, state)?; + if pattern.has_captures { + context.uses_backrefs = true; + } + context.patterns.push(Pattern::Match(pattern)); + + Some(Rc::new(RefCell::new(context))) } else { None }; @@ -307,6 +339,7 @@ impl SyntaxDefinition { operation: operation, with_prototype: with_prototype, }; + Ok(pattern) } @@ -314,7 +347,7 @@ impl SyntaxDefinition { state: &mut ParserState) -> Result, ParseSyntaxError> { // check for a push of multiple items - if y.as_vec().map_or(false, |v| !v.is_empty() && v[0].as_str().is_some()) { + if y.as_vec().map_or(false, |v| !v.is_empty() && (v[0].as_str().is_some() || (v[0].as_vec().is_some() && v[0].as_vec().unwrap()[0].as_hash().is_some()))) { // this works because Result implements FromIterator to handle the errors y.as_vec() .unwrap() @@ -621,6 +654,117 @@ mod tests { } } + #[test] + fn can_parse_embed_as_with_prototypes() { + let old_def = SyntaxDefinition::load_from_str(r#" + name: C + scope: source.c + file_extensions: [c, h] + variables: + ident: '[QY]+' + contexts: + main: + - match: '(>)\s*' + captures: + 1: meta.tag.style.begin.html punctuation.definition.tag.end.html + push: + - [{ meta_content_scope: 'source.css.embedded.html'}, { match: '(?i)(?=)\s*' + captures: + 1: meta.tag.style.begin.html punctuation.definition.tag.end.html + embed: scope:source.css + embed_scope: source.css.embedded.html + escape: (?i)(?=)\s*' + captures: + 1: meta.tag.style.begin.html punctuation.definition.tag.end.html + embed: scope:source.css + embed_scope: source.css.embedded.html + "#,false); + assert!(def.is_err()); + match def.unwrap_err() { + ParseSyntaxError::MissingMandatoryKey(key) => assert_eq!(key, "escape"), + _ => assert!(false, "Got unexpected ParseSyntaxError"), + } + } + + #[test] + fn can_parse_ugly_yaml() { + let defn: SyntaxDefinition = + SyntaxDefinition::load_from_str(" + name: LaTeX + scope: text.tex.latex + contexts: + main: + - match: '((\\\\)(?:framebox|makebox))\\b' + captures: + 1: support.function.box.latex + 2: punctuation.definition.backslash.latex + push: + - [{meta_scope: meta.function.box.latex}, {match: '', pop: true}] + - argument + - optional-arguments + argument: + - match: '\\{' + scope: punctuation.definition.group.brace.begin.latex + - match: '(?=\\S)' + pop: true + optional-arguments: + - match: '(?=\\S)' + pop: true + ", + false) + .unwrap(); + assert_eq!(defn.name, "LaTeX"); + let top_level_scope = Scope::new("text.tex.latex").unwrap(); + assert_eq!(defn.scope, top_level_scope); + + let first_pattern: &Pattern = &defn.contexts["main"].borrow().patterns[0]; + match first_pattern { + &Pattern::Match(ref match_pat) => { + let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed"); + assert_eq!(&m[0], &(1,vec![Scope::new("support.function.box.latex").unwrap()])); + + //use parsing::syntax_definition::ContextReference::*; + // TODO: check the first pushed reference is Inline(...) and has a meta_scope of meta.function.box.latex + // TODO: check the second pushed reference is Named("argument".to_owned()) + // TODO: check the third pushed reference is Named("optional-arguments".to_owned()) + + assert!(match_pat.with_prototype.is_none()); + } + _ => assert!(false), + } + } + #[test] fn can_rewrite_regex() { fn rewrite(s: &str) -> String { diff --git a/testdata/Packages b/testdata/Packages index 1b206784..fa6b8629 160000 --- a/testdata/Packages +++ b/testdata/Packages @@ -1 +1 @@ -Subproject commit 1b2067847ab557d74f41685eace343354dc2c198 +Subproject commit fa6b8629c95041bf262d4c1dab95c456a0530122 diff --git a/testdata/embed_escape_test.sublime-syntax b/testdata/embed_escape_test.sublime-syntax new file mode 100644 index 00000000..a9a498de --- /dev/null +++ b/testdata/embed_escape_test.sublime-syntax @@ -0,0 +1,33 @@ +%YAML 1.2 +--- +name: Embed_Escape Used by tests in src/parsing/parser.rs +scope: source.embed-test +contexts: + main: + - match: (") + scope: meta.attribute-with-value.style.html string.quoted.double punctuation.definition.string.begin.html + embed: embedded_context + embed_scope: meta.attribute-with-value.style.html source.css + escape: '\1' + escape_captures: + 0: meta.attribute-with-value.style.html string.quoted.double punctuation.definition.string.end.html + - match: '(>)\s*' + captures: + 1: meta.tag.style.begin.html punctuation.definition.tag.end.html + embed: embedded_context + embed_scope: source.css.embedded.html + escape: (?i)(?=' + - match: foobar + scope: top-level.test + + embedded_context: + - match: a + scope: a + push: # prove that multiple context levels can be "escape"d + - match: b + push: + - match: c + push: + - match: 'test' + scope: test.embedded diff --git a/testdata/minimized_tests/syntax_test_aspmini.asp b/testdata/minimized_tests/syntax_test_aspmini.asp index 94bf4929..0d3c8c15 100644 --- a/testdata/minimized_tests/syntax_test_aspmini.asp +++ b/testdata/minimized_tests/syntax_test_aspmini.asp @@ -2,7 +2,7 @@ <% Class TestClass2 Public Sub TestSub () Response.Write("wow") End Sub End Class '^^^^^ meta.class.asp meta.class.identifier.asp storage.type.asp -' ^ meta.class.asp meta.class.body.asp meta.class.asp meta.class.identifier.asp +' ^ meta.class.asp meta.class.identifier.asp ' ^ meta.class.asp meta.class.body.asp %>

foobar