diff --git a/README.md b/README.md index b5497466..cbd153e9 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,46 @@ $ cat birds.py | srgn --python 'class' 'def .+:\n\s+[^"\s]{3}' # do not try this Note how this does not surface either `from_egg` (has a docstring) or `register_bird` (not a method, *`def` outside `class`*). +#### Multiple language scopes + +Language scopes themselves can be specified multiple times as well. For example, in the +Rust snippet + +```rust file=music.rs +pub enum Genre { + Rock, + Jazz, +} + +struct Instrument { + name: String, +} + +pub struct Musician { + name: String, + primary_instrument: Instrument, + genres: Vec, +} +``` + +we can query multiple items of interest at once as: + +```console +$ cat music.rs | srgn --rust 'pub-enum' --rust 'pub-struct' # OR'd together +1:pub enum Genre { +2: Rock, +3: Jazz, +4:} +10:pub struct Musician { +11: name: String, +12: primary_instrument: Instrument, +13: genres: Vec, +14:} + +``` + +where both `pub enum` and `pub struct`, but not the private `struct` were found. + #### Working recursively If standard input is not given, `srgn` knows how to find relevant source files diff --git a/src/main.rs b/src/main.rs index a5f04c05..57cf50e9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ use colored::Colorize; use colored::Styles; use ignore::WalkBuilder; use ignore::WalkState; +use itertools::Itertools; use log::error; use log::trace; use log::{debug, info, LevelFilter}; @@ -78,13 +79,11 @@ fn main() -> Result<()> { info!("Launching app with args: {:?}", args); debug!("Assembling scopers."); - let (language_scoper, language_scoper_as_scoper) = get_language_scoper(&args).unzip(); let general_scoper = get_general_scoper(&args)?; - let all_scopers = if let Some(ls) = language_scoper_as_scoper { - vec![ls, general_scoper] - } else { - vec![general_scoper] - }; + // Will be sent across threads and might (the borrow checker is convinced at least) + // outlive the main one. Scoped threads would work here, `ignore` uses them + // internally even, but we have no access here. + let language_scopers = Arc::new(get_language_scopers(&args)); debug!("Done assembling scopers."); debug!("Assembling actions."); @@ -94,7 +93,7 @@ fn main() -> Result<()> { // Only have this kick in if a language scoper is in play; otherwise, we'd just be a // poor imitation of ripgrep itself. Plus, this retains the `tr`-like behavior, // setting it apart from other utilities. - let search_mode = actions.is_empty() && language_scoper.is_some(); + let search_mode = actions.is_empty() && !language_scopers.is_empty(); let is_readable_stdin = grep_cli::is_readable_stdin(); info!("Detected stdin as readable: {is_readable_stdin}."); @@ -103,12 +102,12 @@ fn main() -> Result<()> { let input = match ( args.options.stdin_override_to.unwrap_or(is_readable_stdin), args.options.glob.clone(), - language_scoper, + &language_scopers.is_empty(), ) { // stdin considered viable: always use it. (true, None, _) // Nothing explicitly available: this should open an interactive stdin prompt. - | (false, None, None) => Input::Stdin, + | (false, None, true) => Input::Stdin, (true, Some(..), _) => { // Usage error... warn loudly, the user is likely interested. error!("Detected stdin, and request for files: will use stdin and ignore files."); @@ -124,15 +123,25 @@ fn main() -> Result<()> { // If pattern wasn't manually overridden, consult the language scoper itself, if // any. - (false, None, Some(language_scoper)) => Input::WalkOn(Box::new(move |path| { - let res = language_scoper.is_valid_path(path); - trace!( - "Language scoper considers path '{}' valid: {}", - path.display(), + (false, None, false) => { + let language_scopers = Arc::clone(&language_scopers); + Input::WalkOn(Box::new(move |path| { + // TODO: perform this work only once (it's super fast but in the hot + // path). + let res = language_scopers + .iter() + .map(|s| s.is_valid_path(path)) + .all_equal_value() + .expect("all language scopers to agree on path validity"); + + trace!( + "Language scoper considers path '{}' valid: {}", + path.display(), + res + ); res - ); - res - })), + })) + }, }; if search_mode { @@ -161,13 +170,14 @@ fn main() -> Result<()> { match (input, args.options.sorted) { (Input::Stdin, _ /* no effect */) => { info!("Will read from stdin and write to stdout, applying actions."); - handle_actions_on_stdin(&all_scopers, &actions, &args)?; + handle_actions_on_stdin(&general_scoper, &language_scopers, &actions, &args)?; } (Input::WalkOn(validator), false) => { info!("Will walk file tree, applying actions."); handle_actions_on_many_files_threaded( &validator, - &all_scopers, + &general_scoper, + &language_scopers, &actions, &args, search_mode, @@ -181,7 +191,8 @@ fn main() -> Result<()> { info!("Will walk file tree, applying actions."); handle_actions_on_many_files_sorted( &validator, - &all_scopers, + &general_scoper, + &language_scopers, &actions, &args, search_mode, @@ -208,8 +219,10 @@ enum Input { } /// Main entrypoint for simple `stdin` -> `stdout` processing. +#[allow(clippy::borrowed_box)] // Used throughout, not much of a pain fn handle_actions_on_stdin( - scopers: &[Box], + general_scoper: &Box, + language_scopers: &[Box], actions: &[Box], args: &cli::Cli, ) -> Result<(), ProgramError> { @@ -221,7 +234,8 @@ fn handle_actions_on_stdin( apply( &source, &mut destination, - scopers, + general_scoper, + language_scopers, actions, args.options.fail_none, args.options.fail_any, @@ -243,9 +257,11 @@ fn handle_actions_on_stdin( /// /// [ripgrep]: /// https://github.com/BurntSushi/ripgrep/blob/71d71d2d98964653cdfcfa315802f518664759d7/GUIDE.md#L1016-L1017 +#[allow(clippy::borrowed_box)] // Used throughout, not much of a pain fn handle_actions_on_many_files_sorted( validator: &Validator, - scopers: &[Box], + general_scoper: &Box, + language_scopers: &[Box], actions: &[Box], args: &cli::Cli, search_mode: bool, @@ -267,7 +283,16 @@ fn handle_actions_on_many_files_sorted( match entry { Ok(entry) => { let path = entry.path(); - let res = process_path(path, &root, validator, scopers, actions, args, search_mode); + let res = process_path( + path, + &root, + validator, + general_scoper, + language_scopers, + actions, + args, + search_mode, + ); n_files_seen += match res { Err(PathProcessingError::NotAFile | PathProcessingError::InvalidFile) => 0, @@ -329,9 +354,11 @@ fn handle_actions_on_many_files_sorted( } /// Main entrypoint for processing using at least 1 thread. +#[allow(clippy::borrowed_box)] // Used throughout, not much of a pain fn handle_actions_on_many_files_threaded( validator: &Validator, - scopers: &[Box], + general_scoper: &Box, + language_scopers: &[Box], actions: &[Box], args: &cli::Cli, search_mode: bool, @@ -360,8 +387,16 @@ fn handle_actions_on_many_files_threaded( Box::new(|entry| match entry { Ok(entry) => { let path = entry.path(); - let res = - process_path(path, &root, validator, scopers, actions, args, search_mode); + let res = process_path( + path, + &root, + validator, + general_scoper, + language_scopers, + actions, + args, + search_mode, + ); match res { Err(PathProcessingError::NotAFile | PathProcessingError::InvalidFile) => (), @@ -438,11 +473,14 @@ fn handle_actions_on_many_files_threaded( } } +#[allow(clippy::too_many_arguments)] +#[allow(clippy::borrowed_box)] // Used throughout, not much of a pain fn process_path( path: &Path, root: &Path, validator: &Validator, - scopers: &[Box], + general_scoper: &Box, + language_scopers: &[Box], actions: &[Box], args: &cli::Cli, search_mode: bool, @@ -473,7 +511,8 @@ fn process_path( let changed = apply( &source, &mut destination, - scopers, + general_scoper, + language_scopers, actions, args.options.fail_none, args.options.fail_any, @@ -540,12 +579,14 @@ fn process_path( /// compared to the input. #[allow(clippy::too_many_arguments)] // Our de-facto filthy main function which does too much. Sue me #[allow(clippy::fn_params_excessive_bools)] // TODO: use an options struct +#[allow(clippy::borrowed_box)] // Used throughout, not much of a pain fn apply( source: &str, // Use a string to avoid repeated and unnecessary bytes -> utf8 conversions and // corresponding checks. destination: &mut String, - scopers: &[Box], + general_scoper: &Box, + language_scopers: &[Box], actions: &[Box], fail_none: bool, fail_any: bool, @@ -555,9 +596,8 @@ fn apply( ) -> std::result::Result { debug!("Building view."); let mut builder = ScopedViewBuilder::new(source); - for scoper in scopers { - builder.explode(scoper); - } + builder.explode(&language_scopers); + builder.explode(general_scoper); let mut view = builder.build(); debug!("Done building view: {view:?}"); @@ -765,30 +805,37 @@ impl fmt::Display for ScoperBuildError { impl Error for ScoperBuildError {} -fn get_language_scoper(args: &cli::Cli) -> Option<(Box, Box)> { +#[allow(clippy::cognitive_complexity)] // 🤷‍♀️ macros +fn get_language_scopers(args: &cli::Cli) -> Vec> { // We have `LanguageScoper: Scoper`, but we cannot upcast // (https://github.com/rust-lang/rust/issues/65991), so hack around the limitation // by providing both. - let mut scopers: Vec<(Box, Box)> = Vec::new(); + let mut scopers: Vec> = Vec::new(); macro_rules! handle_language_scope { ($lang:ident, $lang_query:ident, $query_type:ident, $lang_type:ident) => { if let Some(lang_scope) = &args.languages_scopes.$lang { - if let Some(prepared) = lang_scope.$lang { - let query = $query_type::Prepared(prepared); - scopers.push(( - Box::new($lang_type::new(query.clone())), - Box::new($lang_type::new(query)), - )); - } else if let Some(custom) = &lang_scope.$lang_query { - let query = $query_type::Custom(custom.clone()); - scopers.push(( - Box::new($lang_type::new(query.clone())), - Box::new($lang_type::new(query)), - )); - } else { - unreachable!("Language specified, but no scope."); - }; + if !scopers.is_empty() { + let mut cmd = cli::Cli::command(); + cmd.error( + clap::error::ErrorKind::ArgumentConflict, + "Can only use one language at a time.", + ) + .exit(); + } + assert!(scopers.is_empty()); + + for query in &lang_scope.$lang { + let query = $query_type::Prepared(query.clone()); + scopers.push(Box::new($lang_type::new(query.clone()))); + } + + for query in &lang_scope.$lang_query { + let query = $query_type::Custom(query.clone()); + scopers.push(Box::new($lang_type::new(query.clone()))); + } + + assert!(!scopers.is_empty(), "Language specified, but no scope."); // Internal bug }; }; } @@ -800,14 +847,7 @@ fn get_language_scoper(args: &cli::Cli) -> Option<(Box, Box< handle_language_scope!(rust, rust_query, RustQuery, Rust); handle_language_scope!(typescript, typescript_query, TypeScriptQuery, TypeScript); - // We could just `return` after the first found, but then we wouldn't know whether - // we had a bug. So collect, then assert we only found one max. - assert!( - scopers.len() <= 1, - "clap limits to single value (`multiple = false`)" - ); - - scopers.into_iter().next() + scopers } fn get_general_scoper(args: &cli::Cli) -> Result> { @@ -1189,11 +1229,11 @@ mod cli { pub struct CSharpScope { /// Scope C# code using a prepared query. #[arg(long, env, verbatim_doc_comment)] - pub csharp: Option, + pub csharp: Vec, /// Scope C# code using a custom tree-sitter query. #[arg(long, env, verbatim_doc_comment, value_name = TREE_SITTER_QUERY_VALUE_NAME)] - pub csharp_query: Option, + pub csharp_query: Vec, } #[derive(Parser, Debug, Clone)] @@ -1202,12 +1242,12 @@ mod cli { #[allow(clippy::doc_markdown)] // CamelCase detected as 'needs backticks' /// Scope HashiCorp Configuration Language code using a prepared query. #[arg(long, env, verbatim_doc_comment)] - pub hcl: Option, + pub hcl: Vec, #[allow(clippy::doc_markdown)] // CamelCase detected as 'needs backticks' /// Scope HashiCorp Configuration Language code using a custom tree-sitter query. #[arg(long, env, verbatim_doc_comment, value_name = TREE_SITTER_QUERY_VALUE_NAME)] - pub hcl_query: Option, + pub hcl_query: Vec, } #[derive(Parser, Debug, Clone)] @@ -1215,11 +1255,11 @@ mod cli { pub struct GoScope { /// Scope Go code using a prepared query. #[arg(long, env, verbatim_doc_comment)] - pub go: Option, + pub go: Vec, /// Scope Go code using a custom tree-sitter query. #[arg(long, env, verbatim_doc_comment, value_name = TREE_SITTER_QUERY_VALUE_NAME)] - pub go_query: Option, + pub go_query: Vec, } #[derive(Parser, Debug, Clone)] @@ -1227,11 +1267,11 @@ mod cli { pub struct PythonScope { /// Scope Python code using a prepared query. #[arg(long, env, verbatim_doc_comment)] - pub python: Option, + pub python: Vec, /// Scope Python code using a custom tree-sitter query. #[arg(long, env, verbatim_doc_comment, value_name = TREE_SITTER_QUERY_VALUE_NAME)] - pub python_query: Option, + pub python_query: Vec, } #[derive(Parser, Debug, Clone)] @@ -1239,11 +1279,11 @@ mod cli { pub struct RustScope { /// Scope Rust code using a prepared query. #[arg(long, env, verbatim_doc_comment)] - pub rust: Option, + pub rust: Vec, /// Scope Rust code using a custom tree-sitter query. #[arg(long, env, verbatim_doc_comment, value_name = TREE_SITTER_QUERY_VALUE_NAME)] - pub rust_query: Option, + pub rust_query: Vec, } #[derive(Parser, Debug, Clone)] @@ -1251,11 +1291,11 @@ mod cli { pub struct TypeScriptScope { /// Scope TypeScript code using a prepared query. #[arg(long, env, verbatim_doc_comment)] - pub typescript: Option, + pub typescript: Vec, /// Scope TypeScript code using a custom tree-sitter query. #[arg(long, env, verbatim_doc_comment, value_name = TREE_SITTER_QUERY_VALUE_NAME)] - pub typescript_query: Option, + pub typescript_query: Vec, } #[cfg(feature = "german")] diff --git a/src/scoping/langs/mod.rs b/src/scoping/langs/mod.rs index ba0c54c6..f53708ac 100644 --- a/src/scoping/langs/mod.rs +++ b/src/scoping/langs/mod.rs @@ -1,8 +1,11 @@ -use super::Scoper; +use super::{scope::RangesWithContext, Scoper}; #[cfg(doc)] -use crate::scoping::scope::Scope::{In, Out}; +use crate::scoping::{ + scope::Scope::{In, Out}, + view::ScopedViewBuilder, +}; use crate::{find::Find, ranges::Ranges}; -use log::{debug, trace}; +use log::{debug, info, trace}; use std::{marker::PhantomData, str::FromStr}; pub use tree_sitter::{ Language as TSLanguage, Parser as TSParser, Query as TSQuery, QueryCursor as TSQueryCursor, @@ -120,7 +123,7 @@ pub(super) const IGNORE: &str = "_SRGN_IGNORE"; /// A scoper for a language. /// /// Functions much the same, but provides specific language-related functionality. -pub trait LanguageScoper: Find + Send + Sync { +pub trait LanguageScoper: Scoper + Find + Send + Sync { /// The language's tree-sitter language. fn lang() -> TSLanguage where @@ -209,7 +212,49 @@ impl Scoper for T where T: LanguageScoper, { - fn scope_raw<'viewee>(&self, input: &'viewee str) -> super::scope::RangesWithContext<'viewee> { + fn scope_raw<'viewee>(&self, input: &'viewee str) -> RangesWithContext<'viewee> { self.scope_via_query(input).into() } } + +impl Scoper for Box { + fn scope_raw<'viewee>(&self, input: &'viewee str) -> RangesWithContext<'viewee> { + self.as_ref().scope_raw(input) + } +} + +impl Scoper for &[Box] { + /// Allows *multiple* scopers to be applied all at once. + /// + /// They are OR'd together in the sense that if *any* of the scopers hit, a + /// position/range is considered in scope. In some sense, this is the opposite of + /// [`ScopedViewBuilder::explode`], which is subtractive. + fn scope_raw<'viewee>(&self, input: &'viewee str) -> RangesWithContext<'viewee> { + trace!("Scoping many scopes: {:?}", input); + + if self.is_empty() { + trace!("Short-circuiting: self is empty, nothing to scope."); + return vec![(0..input.len(), None)].into_iter().collect(); + } + + // This is slightly leaky in that it drops down to a more 'primitive' layer and + // uses `Ranges`. + let mut ranges: Ranges = self + .iter() + .flat_map(|s| s.scope_raw(input)) + .map(|(range, ctx)| { + assert!( + ctx.is_none(), + "When language scoping runs, no contexts exist yet." + ); + range + }) + .collect(); + ranges.merge(); + info!("New ranges after scoping many: {ranges:?}"); + + let ranges: RangesWithContext<'_> = ranges.into_iter().map(|r| (r, None)).collect(); + + ranges + } +} diff --git a/src/scoping/view.rs b/src/scoping/view.rs index 1c52ab67..0db3a9ea 100644 --- a/src/scoping/view.rs +++ b/src/scoping/view.rs @@ -347,7 +347,7 @@ impl<'viewee> ScopedViewBuilder<'viewee> { /// /// Panics if the [`Scoper`] scopes such that the view is no longer consistent, i.e. /// gaps were created and the original input can no longer be reconstructed from the - /// new view. + /// new view. This would be an internal bug. pub fn explode(&mut self, scoper: &impl Scoper) -> &mut Self { trace!("Exploding scopes: {:?}", self.scopes); let mut new = Vec::with_capacity(self.scopes.0.len()); diff --git a/tests/cli.rs b/tests/cli.rs index b7224aca..04f1e2ef 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -213,6 +213,18 @@ Heizoelrueckstossabdaempfung. ], Some(include_str!("langs/python/base.py")), )] + #[case( + "python-multiple-scopes", + false, + &[ + "--python", + "comments", + "--python", + "strings", + "A", + ], + Some("# A comment\nx = \"A string\"\ndef A(): pass\nclass A: pass"), + )] fn test_cli( #[case] mut snapshot_name: String, #[case] os_dependent: bool, @@ -609,6 +621,20 @@ Heizoelrueckstossabdaempfung. ], None, )] + // + // + #[case( + "fail-multiple-languages", + None, + &[ + // This should be stopped very early on, in CLI entry + "--python", + "strings", + "--go", + "strings", + ], + None, + )] fn test_cli_failure_modes( #[case] snapshot_name: String, #[case] stdin: Option<&str>, diff --git a/tests/snapshots/cli__tests__fail-multiple-languages.snap b/tests/snapshots/cli__tests__fail-multiple-languages.snap new file mode 100644 index 00000000..77827355 --- /dev/null +++ b/tests/snapshots/cli__tests__fail-multiple-languages.snap @@ -0,0 +1,19 @@ +--- +source: tests/cli.rs +expression: "CommandSnap {\n args,\n stdin: None,\n stdout: stdout.split_inclusive('\\n').map(ToOwned::to_owned).collect_vec(),\n exit_code,\n}" +info: + stderr: + - "error: Can only use one language at a time." + - "" + - "Usage: srgn [OPTIONS] [SCOPE] [REPLACEMENT]" + - "" + - "For more information, try '--help'." +--- +args: + - "--python" + - strings + - "--go" + - strings +stdin: ~ +stdout: [] +exit_code: 2 diff --git a/tests/snapshots/cli__tests__python-multiple-scopes.snap b/tests/snapshots/cli__tests__python-multiple-scopes.snap new file mode 100644 index 00000000..ae122c68 --- /dev/null +++ b/tests/snapshots/cli__tests__python-multiple-scopes.snap @@ -0,0 +1,21 @@ +--- +source: tests/cli.rs +expression: "CommandSnap {\n args,\n stdin: stdin.map(|s|\n s.split_inclusive('\\n').map(ToOwned::to_owned).collect_vec()),\n stdout: stdout.split_inclusive('\\n').map(ToOwned::to_owned).collect_vec(),\n exit_code,\n}" +info: + stderr: [] +--- +args: + - "--python" + - comments + - "--python" + - strings + - A +stdin: + - "# A comment\n" + - "x = \"A string\"\n" + - "def A(): pass\n" + - "class A: pass" +stdout: + - "1:# A comment\n" + - "2:x = \"A string\"\n" +exit_code: 0