From 00520b30f5f38e543e17b1a4cc5e8417bc488ea4 Mon Sep 17 00:00:00 2001 From: Balaji Sivaraman Date: Mon, 12 Feb 2018 22:47:22 +0530 Subject: [PATCH] output: add --stats flag This commit provides basic support for a --stats flag, which will print various aggregate statistics about a search after all of the results have been printed. This is mostly intended to support a similar feature found in the Silver Searcher. Note though that we don't emit the total bytes searched; this is a first pass at an implementation and we can improve upon it later. Closes #411, Closes #799 --- complete/_rg | 1 + src/app.rs | 20 +++++++++++++++++++ src/args.rs | 25 ++++++++++++++++++++++-- src/main.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++++-- tests/tests.rs | 44 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 138 insertions(+), 4 deletions(-) diff --git a/complete/_rg b/complete/_rg index a0572f94e..6002beb14 100644 --- a/complete/_rg +++ b/complete/_rg @@ -91,6 +91,7 @@ _rg() { '(-e -f --file --files --regexp --type-list)1: :_rg_pattern' '(--type-list)*:file:_files' '(-z --search-zip)'{-z,--search-zip}'[search in compressed files]' + "(--stats)--stats[print stats about this search]" ) [[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && { diff --git a/src/app.rs b/src/app.rs index bfa2c7088..54ae1ac3b 100644 --- a/src/app.rs +++ b/src/app.rs @@ -559,6 +559,7 @@ pub fn all_args_and_flags() -> Vec { flag_search_zip(&mut args); flag_smart_case(&mut args); flag_sort_files(&mut args); + flag_stats(&mut args); flag_text(&mut args); flag_threads(&mut args); flag_type(&mut args); @@ -1488,6 +1489,25 @@ This flag can be disabled with --no-sort-files. args.push(arg); } +fn flag_stats(args: &mut Vec) { + const SHORT: &str = "Print statistics about this ripgrep search."; + const LONG: &str = long!("\ +Print aggregate statistics about this ripgrep search. When this flag is +present, ripgrep will print the following stats to stdout at the end of the +search: number of matched lines, number of files with matches, number of files +searched, and the time taken for the entire search to complete. + +This set of aggregate statistics may expand over time. + +Note that this flag has no effect if --files, --files-with-matches or +--files-without-match is passed."); + + let arg = RGArg::switch("stats") + .help(SHORT).long_help(LONG); + + args.push(arg); +} + fn flag_text(args: &mut Vec) { const SHORT: &str = "Search binary files as if they were text."; const LONG: &str = long!("\ diff --git a/src/args.rs b/src/args.rs index a5e134157..d2077d678 100644 --- a/src/args.rs +++ b/src/args.rs @@ -79,7 +79,8 @@ pub struct Args { type_list: bool, types: Types, with_filename: bool, - search_zip_files: bool + search_zip_files: bool, + stats: bool } impl Args { @@ -221,6 +222,12 @@ impl Args { self.max_count == Some(0) } + + /// Returns whether ripgrep should track stats for this run + pub fn stats(&self) -> bool { + self.stats + } + /// Create a new writer for single-threaded searching with color support. pub fn stdout(&self) -> termcolor::StandardStream { termcolor::StandardStream::stdout(self.color_choice) @@ -411,7 +418,8 @@ impl<'a> ArgMatches<'a> { type_list: self.is_present("type-list"), types: self.types()?, with_filename: with_filename, - search_zip_files: self.is_present("search-zip") + search_zip_files: self.is_present("search-zip"), + stats: self.stats() }; if args.mmap { debug!("will try to use memory maps"); @@ -825,6 +833,19 @@ impl<'a> ArgMatches<'a> { } } + /// Returns whether status should be tracked for this run of ripgrep + + /// This is automatically disabled if we're asked to only list the + /// files that wil be searched, files with matches or files + /// without matches. + fn stats(&self) -> bool { + if self.is_present("files-with-matches") || + self.is_present("files-without-match") { + return false; + } + self.is_present("stats") + } + /// Returns the approximate number of threads that ripgrep should use. fn threads(&self) -> Result { if self.is_present("sort-files") { diff --git a/src/main.rs b/src/main.rs index bc0648160..f0be94c0c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,6 +27,7 @@ use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::mpsc; use std::thread; +use std::time::{Duration, Instant}; use args::Args; use worker::Work; @@ -85,16 +86,19 @@ fn run(args: Arc) -> Result { } fn run_parallel(args: &Arc) -> Result { + let start_time = Instant::now(); let bufwtr = Arc::new(args.buffer_writer()); let quiet_matched = args.quiet_matched(); let paths_searched = Arc::new(AtomicUsize::new(0)); let match_line_count = Arc::new(AtomicUsize::new(0)); + let paths_matched = Arc::new(AtomicUsize::new(0)); args.walker_parallel().run(|| { let args = Arc::clone(args); let quiet_matched = quiet_matched.clone(); let paths_searched = paths_searched.clone(); let match_line_count = match_line_count.clone(); + let paths_matched = paths_matched.clone(); let bufwtr = Arc::clone(&bufwtr); let mut buf = bufwtr.buffer(); let mut worker = args.worker(); @@ -129,6 +133,9 @@ fn run_parallel(args: &Arc) -> Result { if quiet_matched.set_match(count > 0) { return Quit; } + if args.stats() && count > 0 { + paths_matched.fetch_add(1, Ordering::SeqCst); + } } // BUG(burntsushi): We should handle this error instead of ignoring // it. See: https://github.com/BurntSushi/ripgrep/issues/200 @@ -141,15 +148,28 @@ fn run_parallel(args: &Arc) -> Result { eprint_nothing_searched(); } } - Ok(match_line_count.load(Ordering::SeqCst) as u64) + let match_line_count = match_line_count.load(Ordering::SeqCst) as u64; + let paths_searched = paths_searched.load(Ordering::SeqCst) as u64; + let paths_matched = paths_matched.load(Ordering::SeqCst) as u64; + if args.stats() { + print_stats( + match_line_count, + paths_searched, + paths_matched, + start_time.elapsed(), + ); + } + Ok(match_line_count) } fn run_one_thread(args: &Arc) -> Result { + let start_time = Instant::now(); let stdout = args.stdout(); let mut stdout = stdout.lock(); let mut worker = args.worker(); let mut paths_searched: u64 = 0; let mut match_line_count = 0; + let mut paths_matched: u64 = 0; for result in args.walker() { let dent = match get_or_log_dir_entry( result, @@ -170,18 +190,30 @@ fn run_one_thread(args: &Arc) -> Result { } } paths_searched += 1; - match_line_count += + let count = if dent.is_stdin() { worker.run(&mut printer, Work::Stdin) } else { worker.run(&mut printer, Work::DirEntry(dent)) }; + match_line_count += count; + if args.stats() && count > 0 { + paths_matched += 1; + } } if !args.paths().is_empty() && paths_searched == 0 { if !args.no_messages() { eprint_nothing_searched(); } } + if args.stats() { + print_stats( + match_line_count, + paths_searched, + paths_matched, + start_time.elapsed(), + ); + } Ok(match_line_count) } @@ -373,6 +405,22 @@ fn eprint_nothing_searched() { Try running again with --debug."); } +fn print_stats( + match_count: u64, + paths_searched: u64, + paths_matched: u64, + time_elapsed: Duration, +) { + let time_elapsed = + time_elapsed.as_secs() as f64 + + (time_elapsed.subsec_nanos() as f64 * 1e-9); + println!("\n{} matched lines\n\ + {} files contained matches\n\ + {} files searched\n\ + {:.3} seconds", match_count, paths_matched, + paths_searched, time_elapsed); +} + // The Rust standard library suppresses the default SIGPIPE behavior, so that // writing to a closed pipe doesn't kill the process. The goal is to instead // handle errors through the normal result mechanism. Ripgrep needs some diff --git a/tests/tests.rs b/tests/tests.rs index 4ffb6d6dd..8cb871843 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1811,6 +1811,50 @@ be, to a very large extent, the result of luck. Sherlock Holmes assert_eq!(lines, expected); }); +sherlock!(feature_411_single_threaded_search_stats, +|wd: WorkDir, mut cmd: Command| { + cmd.arg("--stats"); + + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines.contains("2 matched lines"), true); + assert_eq!(lines.contains("1 files contained matches"), true); + assert_eq!(lines.contains("1 files searched"), true); + assert_eq!(lines.contains("seconds"), true); +}); + +#[test] +fn feature_411_parallel_search_stats() { + let wd = WorkDir::new("feature_411"); + wd.create("sherlock_1", hay::SHERLOCK); + wd.create("sherlock_2", hay::SHERLOCK); + + let mut cmd = wd.command(); + cmd.arg("--stats"); + cmd.arg("Sherlock"); + + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines.contains("4 matched lines"), true); + assert_eq!(lines.contains("2 files contained matches"), true); + assert_eq!(lines.contains("2 files searched"), true); + assert_eq!(lines.contains("seconds"), true); +} + +sherlock!(feature_411_ignore_stats_1, |wd: WorkDir, mut cmd: Command| { + cmd.arg("--files-with-matches"); + cmd.arg("--stats"); + + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines.contains("seconds"), false); +}); + +sherlock!(feature_411_ignore_stats_2, |wd: WorkDir, mut cmd: Command| { + cmd.arg("--files-without-match"); + cmd.arg("--stats"); + + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines.contains("seconds"), false); +}); + #[test] fn feature_740_passthru() { let wd = WorkDir::new("feature_740");