From 7976b188af9c89ced3e5d8064fa7255d7a2dc089 Mon Sep 17 00:00:00 2001 From: reuben olinsky Date: Tue, 15 Oct 2024 08:41:18 -0700 Subject: [PATCH] feat: implement command hashing (#206) * Implement hash builtin * Update simple command resolution to go through hashed paths (when appropriate) * Simple optimization for executable lookup in completion code path --- brush-core/src/builtins.rs | 1 + brush-core/src/builtins/command.rs | 5 +- brush-core/src/builtins/factory.rs | 2 +- brush-core/src/builtins/hash.rs | 99 +++++++++++++++++++ brush-core/src/builtins/printf.rs | 3 +- brush-core/src/builtins/type_.rs | 36 +++++-- brush-core/src/commands.rs | 38 +++++-- brush-core/src/error.rs | 4 + brush-core/src/escape.rs | 22 +++-- brush-core/src/lib.rs | 1 + brush-core/src/pathcache.rs | 43 ++++++++ brush-core/src/shell.rs | 48 ++++++++- brush-interactive/src/reedline/highlighter.rs | 2 +- brush-shell/tests/cases/builtins/hash.yaml | 63 ++++++++++++ brush-shell/tests/cases/builtins/type.yaml | 25 +++++ 15 files changed, 363 insertions(+), 29 deletions(-) create mode 100644 brush-core/src/builtins/hash.rs create mode 100644 brush-core/src/pathcache.rs create mode 100644 brush-shell/tests/cases/builtins/hash.yaml diff --git a/brush-core/src/builtins.rs b/brush-core/src/builtins.rs index 7ba09126..bd1340df 100644 --- a/brush-core/src/builtins.rs +++ b/brush-core/src/builtins.rs @@ -32,6 +32,7 @@ mod factory; mod false_; mod fg; mod getopts; +mod hash; mod help; mod jobs; #[cfg(unix)] diff --git a/brush-core/src/builtins/command.rs b/brush-core/src/builtins/command.rs index eb6ed8b6..e4d1c1fe 100644 --- a/brush-core/src/builtins/command.rs +++ b/brush-core/src/builtins/command.rs @@ -82,7 +82,7 @@ impl Display for FoundCommand { impl CommandCommand { #[allow(clippy::unwrap_in_result)] - fn try_find_command(&self, shell: &shell::Shell) -> Option { + fn try_find_command(&self, shell: &mut shell::Shell) -> Option { // Look in path. if self.command_name.contains(std::path::MAIN_SEPARATOR) { let candidate_path = shell.get_absolute_path(Path::new(&self.command_name)); @@ -105,8 +105,7 @@ impl CommandCommand { } shell - .find_executables_in_path(self.command_name.as_str()) - .first() + .find_first_executable_in_path_using_cache(&self.command_name) .map(|path| FoundCommand::External(path.to_string_lossy().to_string())) } } diff --git a/brush-core/src/builtins/factory.rs b/brush-core/src/builtins/factory.rs index be8462db..3454a483 100644 --- a/brush-core/src/builtins/factory.rs +++ b/brush-core/src/builtins/factory.rs @@ -235,6 +235,7 @@ pub(crate) fn get_default_builtins( m.insert("false".into(), builtin::()); m.insert("fg".into(), builtin::()); m.insert("getopts".into(), builtin::()); + m.insert("hash".into(), builtin::()); m.insert("help".into(), builtin::()); m.insert("jobs".into(), builtin::()); #[cfg(unix)] @@ -251,7 +252,6 @@ pub(crate) fn get_default_builtins( // TODO: Unimplemented non-special builtins m.insert("fc".into(), builtin::()); - m.insert("hash".into(), builtin::()); m.insert("ulimit".into(), builtin::()); if !options.sh_mode { diff --git a/brush-core/src/builtins/hash.rs b/brush-core/src/builtins/hash.rs new file mode 100644 index 00000000..70772e13 --- /dev/null +++ b/brush-core/src/builtins/hash.rs @@ -0,0 +1,99 @@ +use clap::Parser; +use std::{io::Write, path::PathBuf}; + +use crate::{builtins, commands}; + +#[derive(Parser)] +pub(crate) struct HashCommand { + /// Remove entries associated with the given names. + #[arg(short = 'd')] + remove: bool, + + /// Display paths in a format usable for input. + #[arg(short = 'l')] + display_as_usable_input: bool, + + /// The path to associate with the names. + #[arg(short = 'p')] + path_to_use: Option, + + /// Remove all entries. + #[arg(short = 'r')] + remove_all: bool, + + /// Display the paths associated with the names. + #[arg(short = 't')] + display_paths: bool, + + /// Names to process. + names: Vec, +} + +impl builtins::Command for HashCommand { + async fn execute( + &self, + context: commands::ExecutionContext<'_>, + ) -> Result { + let mut result = builtins::ExitCode::Success; + + if self.remove_all { + context.shell.program_location_cache.reset(); + } else if self.remove { + for name in &self.names { + if !context.shell.program_location_cache.unset(name) { + writeln!(context.stderr(), "{name}: not found")?; + result = builtins::ExitCode::Custom(1); + } + } + } else if self.display_paths { + for name in &self.names { + if let Some(path) = context.shell.program_location_cache.get(name) { + if self.display_as_usable_input { + writeln!( + context.stdout(), + "builtin hash -p {} {name}", + path.to_string_lossy() + )?; + } else { + let mut prefix = String::new(); + + if self.names.len() > 1 { + prefix.push_str(name.as_str()); + prefix.push('\t'); + } + + writeln!( + context.stdout(), + "{prefix}{}", + path.to_string_lossy().as_ref() + )?; + } + } else { + writeln!(context.stderr(), "{name}: not found")?; + result = builtins::ExitCode::Custom(1); + } + } + } else if let Some(path) = &self.path_to_use { + for name in &self.names { + context.shell.program_location_cache.set(name, path.clone()); + } + } else { + for name in &self.names { + // Remove from the cache if already hashed. + let _ = context.shell.program_location_cache.unset(name); + + // Hash the path. + if context + .shell + .find_first_executable_in_path_using_cache(name) + .is_none() + { + writeln!(context.stderr(), "{name}: not found")?; + result = builtins::ExitCode::Custom(1); + } + } + } + + Ok(result) + } +} diff --git a/brush-core/src/builtins/printf.rs b/brush-core/src/builtins/printf.rs index 248d24ac..5e3f4fd8 100644 --- a/brush-core/src/builtins/printf.rs +++ b/brush-core/src/builtins/printf.rs @@ -54,7 +54,8 @@ impl PrintfCommand { } fn evaluate_format_with_percent_q(prefix: Option<&str>, arg: &str) -> String { - let mut result = escape::quote_if_needed(arg, escape::QuoteMode::BackslashEscape); + let mut result = + escape::quote_if_needed(arg, escape::QuoteMode::BackslashEscape).to_string(); if let Some(prefix) = prefix { result.insert_str(0, prefix); diff --git a/brush-core/src/builtins/type_.rs b/brush-core/src/builtins/type_.rs index 87239811..1aad6588 100644 --- a/brush-core/src/builtins/type_.rs +++ b/brush-core/src/builtins/type_.rs @@ -41,7 +41,7 @@ enum ResolvedType { Keyword, Function(Arc), Builtin, - File(PathBuf), + File { path: PathBuf, hashed: bool }, } impl builtins::Command for TypeCommand { @@ -64,7 +64,7 @@ impl builtins::Command for TypeCommand { } for resolved_type in resolved_types { - if self.show_path_only && !matches!(resolved_type, ResolvedType::File(_)) { + if self.show_path_only && !matches!(resolved_type, ResolvedType::File { .. }) { // Do nothing. } else if self.type_only { match resolved_type { @@ -80,7 +80,7 @@ impl builtins::Command for TypeCommand { ResolvedType::Builtin => { writeln!(context.stdout(), "builtin")?; } - ResolvedType::File(path) => { + ResolvedType::File { path, .. } => { if self.show_path_only || self.force_path_search { writeln!(context.stdout(), "{}", path.to_string_lossy())?; } else { @@ -103,9 +103,21 @@ impl builtins::Command for TypeCommand { ResolvedType::Builtin => { writeln!(context.stdout(), "{name} is a shell builtin")?; } - ResolvedType::File(path) => { - if self.show_path_only || self.force_path_search { + ResolvedType::File { path, hashed } => { + if hashed && self.all_locations && !self.force_path_search { + // Do nothing. + } else if self.show_path_only || self.force_path_search { writeln!(context.stdout(), "{}", path.to_string_lossy())?; + if hashed { + break; + } + } else if hashed { + writeln!( + context.stdout(), + "{name} is hashed ({path})", + name = name, + path = path.to_string_lossy() + )?; } else { writeln!( context.stdout(), @@ -160,11 +172,21 @@ impl TypeCommand { // Look in path. if name.contains(std::path::MAIN_SEPARATOR) { if shell.get_absolute_path(Path::new(name)).executable() { - types.push(ResolvedType::File(PathBuf::from(name))); + types.push(ResolvedType::File { + path: PathBuf::from(name), + hashed: false, + }); } } else { + if let Some(path) = shell.program_location_cache.get(name) { + types.push(ResolvedType::File { path, hashed: true }); + } + for item in shell.find_executables_in_path(name) { - types.push(ResolvedType::File(item)); + types.push(ResolvedType::File { + path: item, + hashed: false, + }); } } diff --git a/brush-core/src/commands.rs b/brush-core/src/commands.rs index dc39a072..fbdb1c6b 100644 --- a/brush-core/src/commands.rs +++ b/brush-core/src/commands.rs @@ -1,6 +1,6 @@ #[cfg(unix)] use std::os::unix::process::CommandExt; -use std::{ffi::OsStr, fmt::Display, process::Stdio, sync::Arc}; +use std::{borrow::Cow, ffi::OsStr, fmt::Display, process::Stdio, sync::Arc}; use brush_parser::ast; #[cfg(unix)] @@ -178,7 +178,7 @@ impl From<&String> for CommandArg { } impl CommandArg { - pub fn quote_for_tracing(&self) -> String { + pub fn quote_for_tracing(&self) -> Cow<'_, str> { match self { CommandArg::String(s) => escape::quote_if_needed(s, escape::QuoteMode::Quote), CommandArg::Assignment(a) => { @@ -189,7 +189,7 @@ impl CommandArg { a.value.to_string().as_str(), escape::QuoteMode::Quote, )); - s + s.into() } } } @@ -320,15 +320,39 @@ pub(crate) async fn execute( return execute_builtin_command(&builtin, cmd_context, args).await; } } - } - // Strip the command name off args. - execute_external_command(cmd_context, process_group_id, &args[1..]) + if let Some(path) = cmd_context + .shell + .find_first_executable_in_path_using_cache(&cmd_context.command_name) + { + let resolved_path = path.to_string_lossy(); + execute_external_command( + cmd_context, + resolved_path.as_ref(), + process_group_id, + &args[1..], + ) + } else { + tracing::error!("{}: command not found", cmd_context.command_name); + Ok(CommandSpawnResult::ImmediateExit(127)) + } + } else { + let resolved_path = cmd_context.command_name.clone(); + + // Strip the command name off args. + execute_external_command( + cmd_context, + resolved_path.as_str(), + process_group_id, + &args[1..], + ) + } } #[allow(clippy::too_many_lines)] pub(crate) fn execute_external_command( context: ExecutionContext<'_>, + executable_path: &str, process_group_id: &mut Option, args: &[CommandArg], ) -> Result { @@ -355,7 +379,7 @@ pub(crate) fn execute_external_command( #[allow(unused_mut)] let mut cmd = compose_std_command( context.shell, - context.command_name.as_str(), + executable_path, context.command_name.as_str(), cmd_args.as_slice(), context.params.open_files, diff --git a/brush-core/src/error.rs b/brush-core/src/error.rs index a99c25d3..d2e890dc 100644 --- a/brush-core/src/error.rs +++ b/brush-core/src/error.rs @@ -43,6 +43,10 @@ pub enum Error { #[error("function not found: {0}")] FunctionNotFound(String), + /// Command was not found. + #[error("command not found: {0}")] + CommandNotFound(String), + /// The requested functionality has not yet been implemented in this shell. #[error("UNIMPLEMENTED: {0}")] Unimplemented(&'static str), diff --git a/brush-core/src/escape.rs b/brush-core/src/escape.rs index 8d6d09e8..6ebea540 100644 --- a/brush-core/src/escape.rs +++ b/brush-core/src/escape.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use itertools::Itertools; use crate::error; @@ -171,14 +173,18 @@ pub(crate) enum QuoteMode { Quote, } -pub(crate) fn quote_if_needed>(s: S, mode: QuoteMode) -> String { +pub(crate) fn quote_if_needed(s: &str, mode: QuoteMode) -> Cow<'_, str> { match mode { - QuoteMode::BackslashEscape => escape_with_backslash(s.as_ref()), - QuoteMode::Quote => escape_with_quoting(s.as_ref()), + QuoteMode::BackslashEscape => escape_with_backslash(s), + QuoteMode::Quote => escape_with_quoting(s), } } -fn escape_with_backslash(s: &str) -> String { +fn escape_with_backslash(s: &str) -> Cow<'_, str> { + if !s.chars().any(needs_escaping) { + return s.into(); + } + let mut output = String::new(); // TODO: Handle other interesting sequences. @@ -192,15 +198,15 @@ fn escape_with_backslash(s: &str) -> String { } } - output + output.into() } -fn escape_with_quoting(s: &str) -> String { +fn escape_with_quoting(s: &str) -> Cow<'_, str> { // TODO: Handle single-quote! if s.is_empty() || s.chars().any(needs_escaping) { - std::format!("'{s}'") + std::format!("'{s}'").into() } else { - s.to_owned() + s.into() } } diff --git a/brush-core/src/lib.rs b/brush-core/src/lib.rs index 3aa3e08f..481619f9 100644 --- a/brush-core/src/lib.rs +++ b/brush-core/src/lib.rs @@ -20,6 +20,7 @@ mod keywords; mod namedoptions; mod openfiles; mod options; +mod pathcache; mod patterns; mod processes; mod prompt; diff --git a/brush-core/src/pathcache.rs b/brush-core/src/pathcache.rs new file mode 100644 index 00000000..b36b357a --- /dev/null +++ b/brush-core/src/pathcache.rs @@ -0,0 +1,43 @@ +use std::path::PathBuf; + +/// A cache of paths associated with names. +#[derive(Clone, Default)] +pub struct PathCache { + /// The cache itself. + cache: std::collections::HashMap, +} + +impl PathCache { + /// Clears all elements from the cache. + pub fn reset(&mut self) { + self.cache.clear(); + } + + /// Returns the path associated with the given name. + /// + /// # Arguments + /// + /// * `name` - The name to lookup. + pub fn get>(&self, name: S) -> Option { + self.cache.get(name.as_ref()).cloned() + } + + /// Sets the path associated with the given name. + /// + /// # Arguments + /// + /// * `name` - The name to set. + pub fn set>(&mut self, name: S, path: PathBuf) { + self.cache.insert(name.as_ref().to_string(), path); + } + + /// Removes the path associated with the given name, if there is one. + /// Returns whether or not an entry was removed. + /// + /// # Arguments + /// + /// * `name` - The name to remove. + pub fn unset>(&mut self, name: S) -> bool { + self.cache.remove(name.as_ref()).is_some() + } +} diff --git a/brush-core/src/shell.rs b/brush-core/src/shell.rs index 72ab6ca5..b6bf2e80 100644 --- a/brush-core/src/shell.rs +++ b/brush-core/src/shell.rs @@ -10,12 +10,12 @@ use crate::env::{EnvironmentLookup, EnvironmentScope, ShellEnvironment}; use crate::interp::{self, Execute, ExecutionParameters, ExecutionResult}; use crate::options::RuntimeOptions; use crate::sys::fs::PathExt; -use crate::trace_categories; use crate::variables::{self, ShellValue, ShellVariable}; use crate::{ builtins, commands, completion, env, error, expansion, functions, jobs, keywords, openfiles, patterns, prompt, sys::users, traps, }; +use crate::{pathcache, trace_categories}; /// Represents an instance of a shell. pub struct Shell { @@ -72,6 +72,9 @@ pub struct Shell { /// Shell built-in commands. pub builtins: HashMap, + + /// Shell program location cache. + pub program_location_cache: pathcache::PathCache, } impl Clone for Shell { @@ -95,6 +98,7 @@ impl Clone for Shell { current_line_number: self.current_line_number, completion_config: self.completion_config.clone(), builtins: self.builtins.clone(), + program_location_cache: self.program_location_cache.clone(), depth: self.depth + 1, } } @@ -189,6 +193,7 @@ impl Shell { current_line_number: 0, completion_config: completion::Config::default(), builtins: builtins::get_default_builtins(options), + program_location_cache: pathcache::PathCache::default(), depth: 0, }; @@ -901,6 +906,47 @@ impl Shell { executables } + /// Determines whether the given filename is the name of an executable in one of the + /// directories in the shell's current PATH. If found, returns the path. + /// + /// # Arguments + /// + /// * `candidate_name` - The name of the file to look for. + pub fn find_first_executable_in_path>( + &self, + candidate_name: S, + ) -> Option { + for dir_str in self.env.get_str("PATH").unwrap_or_default().split(':') { + let candidate_path = Path::new(dir_str).join(candidate_name.as_ref()); + if candidate_path.executable() { + return Some(candidate_path); + } + } + None + } + + /// Uses the shell's hash-based path cache to check whether the given filename is the name + /// of an executable in one of the directories in the shell's current PATH. If found, + /// ensures the path is in the cache and returns it. + /// + /// # Arguments + /// + /// * `candidate_name` - The name of the file to look for. + pub fn find_first_executable_in_path_using_cache>( + &mut self, + candidate_name: S, + ) -> Option { + if let Some(cached_path) = self.program_location_cache.get(&candidate_name) { + Some(cached_path) + } else if let Some(found_path) = self.find_first_executable_in_path(&candidate_name) { + self.program_location_cache + .set(&candidate_name, found_path.clone()); + Some(found_path) + } else { + None + } + } + /// Gets the absolute form of the given path. /// /// # Arguments diff --git a/brush-interactive/src/reedline/highlighter.rs b/brush-interactive/src/reedline/highlighter.rs index 6b2f04d9..85ef58dc 100644 --- a/brush-interactive/src/reedline/highlighter.rs +++ b/brush-interactive/src/reedline/highlighter.rs @@ -317,7 +317,7 @@ impl<'a> StyledInputLine<'a> { CommandType::NotFound } } else { - if !self.shell.find_executables_in_path(name).is_empty() { + if self.shell.find_first_executable_in_path(name).is_some() { CommandType::External } else { CommandType::NotFound diff --git a/brush-shell/tests/cases/builtins/hash.yaml b/brush-shell/tests/cases/builtins/hash.yaml new file mode 100644 index 00000000..fef5bdd5 --- /dev/null +++ b/brush-shell/tests/cases/builtins/hash.yaml @@ -0,0 +1,63 @@ +name: "Builtins: hash" +cases: + - name: "Non-existent program" + ignore_stderr: true + stdin: | + hash non-existent-program + echo "Result: $?" + + - name: "Re-hash non-existent program" + ignore_stderr: true + stdin: | + hash -p /some/path non-existent-program + hash -t non-existent-program && echo "1. Result: $?" + hash non-existent-program && echo "2. Result: $?" + hash -t non-existent-program && echo "3. Result: $?" + + - name: "Existent program" + stdin: | + hash ls + echo "Result: $?" + + - name: "Display not-yet-hashed program" + ignore_stderr: true + stdin: | + hash -t ls + echo "1. Result: $?" + ls >/dev/null 2>&1 + hash -t ls + echo "2. Result: $?" + + - name: "Display hashed program" + stdin: | + hash -p /some/path somecmd && echo "1. Result: $?" + hash -t somecmd && echo "2. Result: $?" + + - name: "Display multiple hashed programs" + stdin: | + hash -p /some/path somecmd1 && echo "1. Result: $?" + hash -p /some/path somecmd2 && echo "2. Result: $?" + hash -t somecmd1 somecmd2 && echo "3. Result: $?" + + - name: "Display -l path" + stdin: | + hash -p "/some/spaces path" somecmd && echo "1. Result: $?" + hash -t somecmd && echo "2. Result: $?" + hash -t -l somecmd && echo "3. Result: $?" + + - name: "Remove" + ignore_stderr: true + stdin: | + hash -p /some/path somecmd && echo "1. Result: $?" + hash -t somecmd && echo "2. Result: $?" + hash -d somecmd && echo "3. Result: $?" + hash -t somecmd && echo "4. Result: $?" + + - name: "Remove all" + ignore_stderr: true + stdin: | + hash -p /some/path somecmd1 && echo "1. Result: $?" + hash -p /some/path somecmd2 && echo "2. Result: $?" + hash -r && echo "3. Result: $?" + hash -t somecmd1 && echo "4. Result: $?" + hash -t somecmd2 && echo "5. Result: $?" diff --git a/brush-shell/tests/cases/builtins/type.yaml b/brush-shell/tests/cases/builtins/type.yaml index 63fb3418..2ff9ee1f 100644 --- a/brush-shell/tests/cases/builtins/type.yaml +++ b/brush-shell/tests/cases/builtins/type.yaml @@ -48,3 +48,28 @@ cases: stdin: | function myfunc() { echo "Hello, world!"; } type -a myfunc + + - name: Test type with hashed path + stdin: | + hash -p /some/ls ls + type ls + + - name: Test type -a with hashed path + stdin: | + hash -p /some/ls ls + type -a ls + + - name: Test type -P with hashed path + stdin: | + hash -p /some/ls ls + type -P ls + + - name: Test type -P -a with hashed path + stdin: | + hash -p /some/ls ls + type -P -a ls + + - name: Test type -p -a with hashed path + stdin: | + hash -p /some/ls ls + type -p -a ls