From 9e64e7cdb48e9381eb017ebf3c31f606c7c4048e Mon Sep 17 00:00:00 2001 From: Tom Solberg Date: Sat, 21 Oct 2023 11:49:00 +0200 Subject: [PATCH 1/3] Remove rustfmt.toml --- src/rust/engine/rustfmt.toml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 src/rust/engine/rustfmt.toml diff --git a/src/rust/engine/rustfmt.toml b/src/rust/engine/rustfmt.toml deleted file mode 100644 index 8a3dc1612e9..00000000000 --- a/src/rust/engine/rustfmt.toml +++ /dev/null @@ -1,5 +0,0 @@ -tab_spaces = 2 - -# As-of 1.25.0: -# Warning: can't set `fn_args_density = Tall`, unstable features are only available in nightly channel. -#fn_args_density = "Tall" From 6ee88868f94f241810790059747ab3d233f2b22b Mon Sep 17 00:00:00 2001 From: Tom Solberg Date: Sat, 21 Oct 2023 11:49:02 +0200 Subject: [PATCH 2/3] Format Rust code --- src/rust/engine/address/src/lib.rs | 38 +- src/rust/engine/async_latch/src/lib.rs | 84 +- src/rust/engine/async_latch/src/tests.rs | 28 +- src/rust/engine/async_value/src/lib.rs | 156 +- src/rust/engine/async_value/src/tests.rs | 78 +- src/rust/engine/build.rs | 34 +- src/rust/engine/cache/src/lib.rs | 85 +- src/rust/engine/client/src/client.rs | 145 +- src/rust/engine/client/src/client_tests.rs | 64 +- src/rust/engine/client/src/lib.rs | 20 +- src/rust/engine/client/src/lib_tests.rs | 20 +- src/rust/engine/client/src/main.rs | 218 +- src/rust/engine/concrete_time/src/lib.rs | 161 +- src/rust/engine/concrete_time/src/tests.rs | 84 +- src/rust/engine/dep_inference/build.rs | 255 +- .../src/javascript/import_pattern.rs | 162 +- .../dep_inference/src/javascript/mod.rs | 267 +- .../dep_inference/src/javascript/tests.rs | 452 +- .../dep_inference/src/javascript/util.rs | 46 +- src/rust/engine/dep_inference/src/lib.rs | 20 +- .../engine/dep_inference/src/python/mod.rs | 652 ++- .../engine/dep_inference/src/python/tests.rs | 982 ++-- src/rust/engine/fs/brfs/src/main.rs | 1381 ++--- src/rust/engine/fs/brfs/src/syscall_tests.rs | 54 +- src/rust/engine/fs/brfs/src/tests.rs | 380 +- src/rust/engine/fs/fs_util/src/main.rs | 948 ++-- src/rust/engine/fs/src/directory.rs | 2067 +++---- src/rust/engine/fs/src/directory_tests.rs | 618 +-- src/rust/engine/fs/src/gitignore.rs | 436 +- src/rust/engine/fs/src/glob_matching.rs | 1356 ++--- src/rust/engine/fs/src/glob_matching_tests.rs | 44 +- src/rust/engine/fs/src/lib.rs | 1065 ++-- src/rust/engine/fs/src/posixfs_tests.rs | 603 ++- src/rust/engine/fs/src/tests.rs | 20 +- src/rust/engine/fs/store/benches/store.rs | 608 +-- .../engine/fs/store/src/immutable_inputs.rs | 219 +- src/rust/engine/fs/store/src/lib.rs | 2773 +++++----- src/rust/engine/fs/store/src/local.rs | 1744 +++--- src/rust/engine/fs/store/src/local_tests.rs | 1280 ++--- src/rust/engine/fs/store/src/remote.rs | 290 +- src/rust/engine/fs/store/src/remote_tests.rs | 563 +- src/rust/engine/fs/store/src/snapshot.rs | 395 +- src/rust/engine/fs/store/src/snapshot_ops.rs | 383 +- .../engine/fs/store/src/snapshot_ops_tests.rs | 274 +- .../engine/fs/store/src/snapshot_tests.rs | 916 ++-- src/rust/engine/fs/store/src/tests.rs | 2854 +++++----- src/rust/engine/graph/src/context.rs | 245 +- src/rust/engine/graph/src/entry.rs | 1484 +++--- src/rust/engine/graph/src/lib.rs | 1215 ++--- src/rust/engine/graph/src/node.rs | 110 +- src/rust/engine/graph/src/tests.rs | 1703 +++--- src/rust/engine/grpc_util/build.rs | 34 +- src/rust/engine/grpc_util/src/channel.rs | 489 +- src/rust/engine/grpc_util/src/headers.rs | 78 +- src/rust/engine/grpc_util/src/hyper_util.rs | 16 +- src/rust/engine/grpc_util/src/lib.rs | 318 +- src/rust/engine/grpc_util/src/metrics.rs | 240 +- src/rust/engine/grpc_util/src/prost.rs | 43 +- src/rust/engine/grpc_util/src/retry.rs | 246 +- src/rust/engine/grpc_util/src/tls.rs | 322 +- src/rust/engine/hashing/src/digest_tests.rs | 42 +- .../engine/hashing/src/fingerprint_tests.rs | 95 +- src/rust/engine/hashing/src/hasher_tests.rs | 84 +- src/rust/engine/hashing/src/lib.rs | 553 +- src/rust/engine/logging/build.rs | 66 +- src/rust/engine/logging/src/lib.rs | 102 +- src/rust/engine/logging/src/logger.rs | 398 +- src/rust/engine/nailgun/src/client.rs | 180 +- src/rust/engine/nailgun/src/lib.rs | 20 +- src/rust/engine/nailgun/src/server.rs | 637 +-- src/rust/engine/nailgun/src/tests.rs | 126 +- src/rust/engine/options/src/args.rs | 173 +- src/rust/engine/options/src/args_tests.rs | 230 +- src/rust/engine/options/src/build_root.rs | 72 +- .../engine/options/src/build_root_tests.rs | 78 +- src/rust/engine/options/src/config.rs | 361 +- src/rust/engine/options/src/config_tests.rs | 80 +- src/rust/engine/options/src/env.rs | 149 +- src/rust/engine/options/src/env_tests.rs | 305 +- src/rust/engine/options/src/id.rs | 123 +- src/rust/engine/options/src/id_tests.rs | 68 +- src/rust/engine/options/src/lib.rs | 440 +- src/rust/engine/options/src/parse.rs | 128 +- src/rust/engine/options/src/parse_tests.rs | 270 +- src/rust/engine/options/src/types.rs | 48 +- src/rust/engine/pantsd/src/lib.rs | 535 +- src/rust/engine/pantsd/src/pantsd_testing.rs | 77 +- src/rust/engine/pantsd/src/pantsd_tests.rs | 26 +- .../process_execution/docker/src/docker.rs | 1564 +++--- .../docker/src/docker_tests.rs | 1271 ++--- .../process_execution/docker/src/lib.rs | 20 +- .../process_execution/pe_nailgun/src/lib.rs | 414 +- .../pe_nailgun/src/nailgun_pool.rs | 880 +-- .../src/parsed_jvm_command_lines.rs | 186 +- .../src/parsed_jvm_command_lines_tests.rs | 299 +- .../process_execution/pe_nailgun/src/tests.rs | 78 +- .../process_execution/remote/src/lib.rs | 20 +- .../process_execution/remote/src/remote.rs | 1878 +++---- .../remote/src/remote_cache.rs | 998 ++-- .../remote/src/remote_cache_tests.rs | 1414 ++--- .../remote/src/remote_tests.rs | 4740 +++++++++-------- .../engine/process_execution/src/bounded.rs | 680 ++- .../process_execution/src/bounded_tests.rs | 684 +-- .../engine/process_execution/src/cache.rs | 442 +- .../process_execution/src/cache_tests.rs | 286 +- .../engine/process_execution/src/children.rs | 277 +- src/rust/engine/process_execution/src/lib.rs | 2394 ++++----- .../engine/process_execution/src/local.rs | 1390 ++--- .../process_execution/src/local_tests.rs | 1323 ++--- .../process_execution/src/named_caches.rs | 202 +- .../src/named_caches_tests.rs | 14 +- .../engine/process_execution/src/switched.rs | 177 +- .../engine/process_execution/src/tests.rs | 256 +- src/rust/engine/process_executor/src/main.rs | 1082 ++-- src/rust/engine/protos/build.rs | 20 +- src/rust/engine/protos/src/conversions.rs | 68 +- .../engine/protos/src/conversions_tests.rs | 72 +- src/rust/engine/protos/src/hashing.rs | 20 +- src/rust/engine/protos/src/lib.rs | 70 +- src/rust/engine/protos/src/verification.rs | 100 +- .../engine/protos/src/verification_tests.rs | 304 +- .../src/action_cache_tests.rs | 134 +- .../src/byte_store_tests.rs | 388 +- .../remote_provider_opendal/src/lib.rs | 530 +- .../remote_provider_reapi/src/action_cache.rs | 162 +- .../src/action_cache_tests.rs | 163 +- .../remote_provider_reapi/src/byte_store.rs | 681 +-- .../src/byte_store_tests.rs | 658 ++- .../remote_provider_reapi/src/lib.rs | 48 +- .../remote_provider_traits/src/lib.rs | 138 +- src/rust/engine/remote_provider/src/lib.rs | 170 +- src/rust/engine/rule_graph/src/builder.rs | 2859 +++++----- src/rust/engine/rule_graph/src/lib.rs | 790 +-- src/rust/engine/rule_graph/src/rules.rs | 375 +- src/rust/engine/rule_graph/src/tests.rs | 1944 +++---- src/rust/engine/sharded_lmdb/src/lib.rs | 1359 ++--- src/rust/engine/sharded_lmdb/src/tests.rs | 114 +- src/rust/engine/src/context.rs | 1575 +++--- src/rust/engine/src/downloads.rs | 318 +- src/rust/engine/src/externs/address.rs | 1386 ++--- src/rust/engine/src/externs/dep_inference.rs | 126 +- src/rust/engine/src/externs/engine_aware.rs | 162 +- src/rust/engine/src/externs/fs.rs | 748 ++- src/rust/engine/src/externs/interface.rs | 2748 +++++----- .../engine/src/externs/interface_tests.rs | 26 +- src/rust/engine/src/externs/mod.rs | 876 ++- src/rust/engine/src/externs/nailgun.rs | 87 +- src/rust/engine/src/externs/pantsd.rs | 36 +- src/rust/engine/src/externs/process.rs | 182 +- src/rust/engine/src/externs/scheduler.rs | 75 +- src/rust/engine/src/externs/stdio.rs | 122 +- src/rust/engine/src/externs/target.rs | 429 +- src/rust/engine/src/externs/testutil.rs | 92 +- src/rust/engine/src/externs/workunits.rs | 6 +- src/rust/engine/src/interning.rs | 46 +- src/rust/engine/src/intrinsics.rs | 1493 +++--- src/rust/engine/src/lib.rs | 22 +- src/rust/engine/src/nodes.rs | 2782 +++++----- src/rust/engine/src/python.rs | 757 ++- src/rust/engine/src/scheduler.rs | 558 +- src/rust/engine/src/session.rs | 783 ++- src/rust/engine/src/tasks.rs | 451 +- src/rust/engine/src/types.rs | 66 +- src/rust/engine/stdio/src/lib.rs | 618 +-- src/rust/engine/stdio/src/term.rs | 185 +- src/rust/engine/task_executor/src/lib.rs | 586 +- .../engine/testutil/local_cas/src/main.rs | 92 +- .../local_execution_server/src/main.rs | 102 +- .../testutil/mock/src/action_cache_service.rs | 213 +- src/rust/engine/testutil/mock/src/cas.rs | 393 +- .../engine/testutil/mock/src/cas_service.rs | 1167 ++-- .../testutil/mock/src/execution_server.rs | 768 +-- src/rust/engine/testutil/mock/src/lib.rs | 20 +- src/rust/engine/testutil/src/data.rs | 756 +-- src/rust/engine/testutil/src/file.rs | 60 +- src/rust/engine/testutil/src/lib.rs | 42 +- src/rust/engine/testutil/src/path.rs | 26 +- src/rust/engine/tryfuture/src/lib.rs | 102 +- src/rust/engine/ui/src/instance.rs | 127 +- src/rust/engine/ui/src/instance/indicatif.rs | 216 +- src/rust/engine/ui/src/instance/prodash.rs | 262 +- src/rust/engine/ui/src/lib.rs | 166 +- src/rust/engine/watch/src/lib.rs | 559 +- src/rust/engine/watch/src/tests.rs | 232 +- src/rust/engine/workunit_store/src/lib.rs | 1349 ++--- src/rust/engine/workunit_store/src/metrics.rs | 148 +- src/rust/engine/workunit_store/src/tests.rs | 314 +- 187 files changed, 48089 insertions(+), 47833 deletions(-) diff --git a/src/rust/engine/address/src/lib.rs b/src/rust/engine/address/src/lib.rs index fa40337c91a..89e3b778217 100644 --- a/src/rust/engine/address/src/lib.rs +++ b/src/rust/engine/address/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -26,17 +26,17 @@ #![allow(clippy::mutex_atomic)] pub struct AddressInput<'a> { - pub path: &'a str, - pub target: Option<&'a str>, - pub generated: Option<&'a str>, - pub parameters: Vec<(&'a str, &'a str)>, + pub path: &'a str, + pub target: Option<&'a str>, + pub generated: Option<&'a str>, + pub parameters: Vec<(&'a str, &'a str)>, } pub struct SpecInput<'a> { - /// The address (or literal, if no target/generated/parameters were specified) portion. - pub address: AddressInput<'a>, - /// If a spec wildcard was specified (`:` or `::`), its value. - pub wildcard: Option<&'a str>, + /// The address (or literal, if no target/generated/parameters were specified) portion. + pub address: AddressInput<'a>, + /// If a spec wildcard was specified (`:` or `::`), its value. + pub wildcard: Option<&'a str>, } peg::parser! { @@ -90,5 +90,5 @@ peg::parser! { } pub fn parse_address_spec(value: &str) -> Result { - parsers::spec(value).map_err(|e| format!("Failed to parse address spec `{value}`: {e}")) + parsers::spec(value).map_err(|e| format!("Failed to parse address spec `{value}`: {e}")) } diff --git a/src/rust/engine/async_latch/src/lib.rs b/src/rust/engine/async_latch/src/lib.rs index 25c5b1c8e26..21a4741c6a6 100644 --- a/src/rust/engine/async_latch/src/lib.rs +++ b/src/rust/engine/async_latch/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -40,45 +40,45 @@ use tokio::sync::watch; /// #[derive(Clone)] pub struct AsyncLatch { - sender: Arc>>>, - receiver: watch::Receiver<()>, + sender: Arc>>>, + receiver: watch::Receiver<()>, } impl AsyncLatch { - pub fn new() -> AsyncLatch { - let (sender, receiver) = watch::channel(()); - AsyncLatch { - sender: Arc::new(Mutex::new(Some(sender))), - receiver, + pub fn new() -> AsyncLatch { + let (sender, receiver) = watch::channel(()); + AsyncLatch { + sender: Arc::new(Mutex::new(Some(sender))), + receiver, + } } - } - /// - /// Mark this latch triggered, releasing all threads that are waiting for it to trigger. - /// - /// All calls to trigger after the first one are noops. - /// - pub fn trigger(&self) { - // To trigger the latch, we drop the Sender. - self.sender.lock().take(); - } + /// + /// Mark this latch triggered, releasing all threads that are waiting for it to trigger. + /// + /// All calls to trigger after the first one are noops. + /// + pub fn trigger(&self) { + // To trigger the latch, we drop the Sender. + self.sender.lock().take(); + } - /// - /// Wait for another thread to trigger this latch. - /// - pub async fn triggered(&self) { - // To see whether the latch is triggered, we clone the receiver, and then wait for our clone to - // return an Err, indicating that the Sender has been dropped. - let mut receiver = self.receiver.clone(); - while receiver.changed().await.is_ok() {} - } + /// + /// Wait for another thread to trigger this latch. + /// + pub async fn triggered(&self) { + // To see whether the latch is triggered, we clone the receiver, and then wait for our clone to + // return an Err, indicating that the Sender has been dropped. + let mut receiver = self.receiver.clone(); + while receiver.changed().await.is_ok() {} + } - /// - /// Return true if the latch has been triggered. - /// - pub fn poll_triggered(&self) -> bool { - self.sender.lock().is_none() - } + /// + /// Return true if the latch has been triggered. + /// + pub fn poll_triggered(&self) -> bool { + self.sender.lock().is_none() + } } #[cfg(test)] diff --git a/src/rust/engine/async_latch/src/tests.rs b/src/rust/engine/async_latch/src/tests.rs index bfe70527f2e..c084bd511e8 100644 --- a/src/rust/engine/async_latch/src/tests.rs +++ b/src/rust/engine/async_latch/src/tests.rs @@ -8,21 +8,21 @@ use crate::AsyncLatch; #[tokio::test] async fn basic() { - let latch = AsyncLatch::new(); + let latch = AsyncLatch::new(); - let mut join = tokio::spawn({ - let latch = latch.clone(); - async move { latch.triggered().await } - }); + let mut join = tokio::spawn({ + let latch = latch.clone(); + async move { latch.triggered().await } + }); - // Ensure that `triggered` doesn't return until `trigger` has been called. - tokio::select! { - _ = sleep(Duration::from_secs(1)) => {}, - _ = &mut join => { panic!("Background task should have continued to wait.") } - } - latch.trigger(); - join.await.unwrap(); + // Ensure that `triggered` doesn't return until `trigger` has been called. + tokio::select! { + _ = sleep(Duration::from_secs(1)) => {}, + _ = &mut join => { panic!("Background task should have continued to wait.") } + } + latch.trigger(); + join.await.unwrap(); - // And that calling `trigger` again is harmless. - latch.trigger(); + // And that calling `trigger` again is harmless. + latch.trigger(); } diff --git a/src/rust/engine/async_value/src/lib.rs b/src/rust/engine/async_value/src/lib.rs index 4f6dad329f4..e31d45e798c 100644 --- a/src/rust/engine/async_value/src/lib.rs +++ b/src/rust/engine/async_value/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -43,96 +43,94 @@ use tokio::sync::{mpsc, watch}; /// #[derive(Debug)] pub struct AsyncValue { - item_receiver: Weak>>, - interrupt_sender: mpsc::UnboundedSender, + item_receiver: Weak>>, + interrupt_sender: mpsc::UnboundedSender, } impl AsyncValue { - pub fn new() -> ( - AsyncValue, - AsyncValueSender, - AsyncValueReceiver, - ) { - let (interrupt_sender, interrupt_receiver) = mpsc::unbounded_channel(); - let (item_sender, item_receiver) = watch::channel(None); - let item_receiver = Arc::new(item_receiver); - ( - AsyncValue { - item_receiver: Arc::downgrade(&item_receiver), - interrupt_sender, - }, - AsyncValueSender { - item_sender, - interrupt_receiver, - }, - AsyncValueReceiver { item_receiver }, - ) - } + pub fn new() -> ( + AsyncValue, + AsyncValueSender, + AsyncValueReceiver, + ) { + let (interrupt_sender, interrupt_receiver) = mpsc::unbounded_channel(); + let (item_sender, item_receiver) = watch::channel(None); + let item_receiver = Arc::new(item_receiver); + ( + AsyncValue { + item_receiver: Arc::downgrade(&item_receiver), + interrupt_sender, + }, + AsyncValueSender { + item_sender, + interrupt_receiver, + }, + AsyncValueReceiver { item_receiver }, + ) + } - /// - /// Returns an AsyncValueReceiver for this value if the associated work has not already been - /// canceled. - /// - pub fn receiver(&self) -> Option> { - self - .item_receiver - .upgrade() - .map(|item_receiver| AsyncValueReceiver { item_receiver }) - } + /// + /// Returns an AsyncValueReceiver for this value if the associated work has not already been + /// canceled. + /// + pub fn receiver(&self) -> Option> { + self.item_receiver + .upgrade() + .map(|item_receiver| AsyncValueReceiver { item_receiver }) + } - pub fn try_interrupt(&mut self, i: I) -> Result<(), I> { - self - .interrupt_sender - .send(i) - .map_err(|send_error| send_error.0) - } + pub fn try_interrupt(&mut self, i: I) -> Result<(), I> { + self.interrupt_sender + .send(i) + .map_err(|send_error| send_error.0) + } } pub struct AsyncValueReceiver { - item_receiver: Arc>>, + item_receiver: Arc>>, } impl AsyncValueReceiver { - /// - /// Returns a Future that will wait for the result of this value, or None if the work was - /// canceled. - /// - pub async fn recv(&self) -> Option { - let mut item_receiver = (*self.item_receiver).clone(); - loop { - if let Some(ref value) = *item_receiver.borrow() { - return Some(value.clone()); - } + /// + /// Returns a Future that will wait for the result of this value, or None if the work was + /// canceled. + /// + pub async fn recv(&self) -> Option { + let mut item_receiver = (*self.item_receiver).clone(); + loop { + if let Some(ref value) = *item_receiver.borrow() { + return Some(value.clone()); + } - // TODO: Remove the `allow` once https://github.com/rust-lang/rust-clippy/issues/8281 - // is fixed upstream. - #[allow(clippy::question_mark)] - if item_receiver.changed().await.is_err() { - return None; - } + // TODO: Remove the `allow` once https://github.com/rust-lang/rust-clippy/issues/8281 + // is fixed upstream. + #[allow(clippy::question_mark)] + if item_receiver.changed().await.is_err() { + return None; + } + } } - } } pub struct AsyncValueSender { - item_sender: watch::Sender>, - interrupt_receiver: mpsc::UnboundedReceiver, + item_sender: watch::Sender>, + interrupt_receiver: mpsc::UnboundedReceiver, } impl AsyncValueSender { - pub fn send(self, item: T) { - let _ = self.item_sender.send(Some(item)); - } + pub fn send(self, item: T) { + let _ = self.item_sender.send(Some(item)); + } - pub async fn interrupted(&mut self) -> Option { - let mut recv = pin!(self.interrupt_receiver.recv()); - tokio::select! { - res = &mut recv => { - res - } - _ = self.item_sender.closed() => { None } + pub async fn interrupted(&mut self) -> Option { + let mut recv = pin!(self.interrupt_receiver.recv()); + tokio::select! { + res = &mut recv => { + res + } + _ = self.item_sender.closed() => { None } + } } - } } #[cfg(test)] diff --git a/src/rust/engine/async_value/src/tests.rs b/src/rust/engine/async_value/src/tests.rs index 84c2d4e6677..e8bfe870195 100644 --- a/src/rust/engine/async_value/src/tests.rs +++ b/src/rust/engine/async_value/src/tests.rs @@ -8,64 +8,64 @@ use tokio::time::sleep; #[tokio::test] async fn send() { - let (_value, sender, receiver) = AsyncValue::<_, ()>::new(); - let _send_task = tokio::spawn(async move { sender.send(42) }); - assert_eq!(Some(42), receiver.recv().await); + let (_value, sender, receiver) = AsyncValue::<_, ()>::new(); + let _send_task = tokio::spawn(async move { sender.send(42) }); + assert_eq!(Some(42), receiver.recv().await); } #[tokio::test] async fn cancel_explicit() { - let (value, mut sender, receiver) = AsyncValue::<(), ()>::new(); + let (value, mut sender, receiver) = AsyncValue::<(), ()>::new(); - // A task that will never do any meaningful work, and just wait to be canceled. - let _send_task = tokio::spawn(async move { sender.interrupted().await }); + // A task that will never do any meaningful work, and just wait to be canceled. + let _send_task = tokio::spawn(async move { sender.interrupted().await }); - // Ensure that a value is not received. - tokio::select! { - _ = sleep(Duration::from_secs(1)) => {}, - _ = receiver.recv() => { panic!("Should have continued to wait.") } - } + // Ensure that a value is not received. + tokio::select! { + _ = sleep(Duration::from_secs(1)) => {}, + _ = receiver.recv() => { panic!("Should have continued to wait.") } + } - // Then drop the AsyncValue and confirm that the background task returns. - std::mem::drop(value); - assert_eq!(None, receiver.recv().await); + // Then drop the AsyncValue and confirm that the background task returns. + std::mem::drop(value); + assert_eq!(None, receiver.recv().await); } #[tokio::test] async fn cancel_implicit() { - let (value, mut sender, receiver) = AsyncValue::<(), ()>::new(); + let (value, mut sender, receiver) = AsyncValue::<(), ()>::new(); - // A task that will never do any meaningful work, and just wait to be canceled. - let send_task = tokio::spawn(async move { sender.interrupted().await }); + // A task that will never do any meaningful work, and just wait to be canceled. + let send_task = tokio::spawn(async move { sender.interrupted().await }); - // Ensure that a value is not received. - tokio::select! { - _ = sleep(Duration::from_secs(1)) => {}, - _ = receiver.recv() => { panic!("Should have continued to wait.") } - } + // Ensure that a value is not received. + tokio::select! { + _ = sleep(Duration::from_secs(1)) => {}, + _ = receiver.recv() => { panic!("Should have continued to wait.") } + } - // Then drop the only receiver and confirm that the background task returns, and that new - // receivers cannot be created. - std::mem::drop(receiver); - assert_eq!(None, send_task.await.unwrap()); - assert!(value.receiver().is_none()); + // Then drop the only receiver and confirm that the background task returns, and that new + // receivers cannot be created. + std::mem::drop(receiver); + assert_eq!(None, send_task.await.unwrap()); + assert!(value.receiver().is_none()); } #[tokio::test] async fn interrupt_explicit() { - let (mut value, mut sender, receiver) = AsyncValue::<(), ()>::new(); + let (mut value, mut sender, receiver) = AsyncValue::<(), ()>::new(); - // A task that will never do any meaningful work, and just wait to be canceled. - let send_task = tokio::spawn(async move { sender.interrupted().await }); + // A task that will never do any meaningful work, and just wait to be canceled. + let send_task = tokio::spawn(async move { sender.interrupted().await }); - // Ensure that a value is not received. - tokio::select! { - _ = sleep(Duration::from_secs(1)) => {}, - _ = receiver.recv() => { panic!("Should have continued to wait.") } - } + // Ensure that a value is not received. + tokio::select! { + _ = sleep(Duration::from_secs(1)) => {}, + _ = receiver.recv() => { panic!("Should have continued to wait.") } + } - // Explicitly interrupt the task, and confirm that it exits and cancels the work. - value.try_interrupt(()).unwrap(); - assert_eq!(Some(()), send_task.await.unwrap()); - assert_eq!(None, receiver.recv().await); + // Explicitly interrupt the task, and confirm that it exits and cancels the work. + value.try_interrupt(()).unwrap(); + assert_eq!(Some(()), send_task.await.unwrap()); + assert_eq!(None, receiver.recv().await); } diff --git a/src/rust/engine/build.rs b/src/rust/engine/build.rs index 5e4021a2a10..bec53019420 100644 --- a/src/rust/engine/build.rs +++ b/src/rust/engine/build.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -26,12 +26,12 @@ #![allow(clippy::mutex_atomic)] fn main() { - pyo3_build_config::add_extension_module_link_args(); + pyo3_build_config::add_extension_module_link_args(); - // NB: The native extension only works with the Python interpreter version it was built with - // (e.g. Python 3.7 vs 3.8). - println!("cargo:rerun-if-env-changed=PY"); - if let Ok(py_var) = std::env::var("PY") { - println!("cargo:rerun-if-changed={py_var}"); - } + // NB: The native extension only works with the Python interpreter version it was built with + // (e.g. Python 3.7 vs 3.8). + println!("cargo:rerun-if-env-changed=PY"); + if let Ok(py_var) = std::env::var("PY") { + println!("cargo:rerun-if-changed={py_var}"); + } } diff --git a/src/rust/engine/cache/src/lib.rs b/src/rust/engine/cache/src/lib.rs index dabe6adb110..c72c0fa196a 100644 --- a/src/rust/engine/cache/src/lib.rs +++ b/src/rust/engine/cache/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -47,43 +47,42 @@ use task_executor::Executor; /// #[derive(Clone)] pub struct PersistentCache { - store: ShardedLmdb, + store: ShardedLmdb, } impl PersistentCache { - pub fn new( - store_dir: &Path, - max_size_bytes: usize, - executor: Executor, - lease_time: Duration, - shard_count: u8, - ) -> Result { - let store = ShardedLmdb::new( - store_dir.join("cache"), - max_size_bytes, - executor, - lease_time, - shard_count, - ) - .map_err(|err| format!("Could not initialize store for cache: {err:?}"))?; + pub fn new( + store_dir: &Path, + max_size_bytes: usize, + executor: Executor, + lease_time: Duration, + shard_count: u8, + ) -> Result { + let store = ShardedLmdb::new( + store_dir.join("cache"), + max_size_bytes, + executor, + lease_time, + shard_count, + ) + .map_err(|err| format!("Could not initialize store for cache: {err:?}"))?; - Ok(Self { store }) - } + Ok(Self { store }) + } - pub async fn store(&self, key: &CacheKey, value: Bytes) -> Result<(), String> { - // NB: This is an unusual usage of the ShardedLmdb interface. In order for this to be a cache, - // rather than storing the value under its _own_ Fingerprint, the value is stored under the - // Fingerprint of the CacheKey. - let fingerprint = Digest::of_bytes(&key.to_bytes()).hash; - self.store.store_bytes(fingerprint, value, false).await?; - Ok(()) - } + pub async fn store(&self, key: &CacheKey, value: Bytes) -> Result<(), String> { + // NB: This is an unusual usage of the ShardedLmdb interface. In order for this to be a cache, + // rather than storing the value under its _own_ Fingerprint, the value is stored under the + // Fingerprint of the CacheKey. + let fingerprint = Digest::of_bytes(&key.to_bytes()).hash; + self.store.store_bytes(fingerprint, value, false).await?; + Ok(()) + } - pub async fn load(&self, key: &CacheKey) -> Result, String> { - let fingerprint = Digest::of_bytes(&key.to_bytes()).hash; - self - .store - .load_bytes_with(fingerprint, move |bytes| Ok(Bytes::copy_from_slice(bytes))) - .await - } + pub async fn load(&self, key: &CacheKey) -> Result, String> { + let fingerprint = Digest::of_bytes(&key.to_bytes()).hash; + self.store + .load_bytes_with(fingerprint, move |bytes| Ok(Bytes::copy_from_slice(bytes))) + .await + } } diff --git a/src/rust/engine/client/src/client.rs b/src/rust/engine/client/src/client.rs index 46e046a2bb1..b15ac591af2 100644 --- a/src/rust/engine/client/src/client.rs +++ b/src/rust/engine/client/src/client.rs @@ -10,84 +10,85 @@ use nailgun::NailgunClientError; use pantsd::ConnectionSettings; pub async fn execute_command( - start: SystemTime, - connection_settings: ConnectionSettings, - mut env: Vec<(String, String)>, - argv: Vec, + start: SystemTime, + connection_settings: ConnectionSettings, + mut env: Vec<(String, String)>, + argv: Vec, ) -> Result { - env.push(( - "PANTSD_RUNTRACKER_CLIENT_START_TIME".to_owned(), - start - .duration_since(SystemTime::UNIX_EPOCH) - .map_err(|e| format!("Failed to determine current time: {e}"))? - .as_secs_f64() - .to_string(), - )); + env.push(( + "PANTSD_RUNTRACKER_CLIENT_START_TIME".to_owned(), + start + .duration_since(SystemTime::UNIX_EPOCH) + .map_err(|e| format!("Failed to determine current time: {e}"))? + .as_secs_f64() + .to_string(), + )); - env.push(( - "PANTSD_REQUEST_TIMEOUT_LIMIT".to_owned(), - connection_settings.timeout_limit.to_string(), - )); + env.push(( + "PANTSD_REQUEST_TIMEOUT_LIMIT".to_owned(), + connection_settings.timeout_limit.to_string(), + )); - let raw_io_fds = [ - std::io::stdin().as_raw_fd(), - std::io::stdout().as_raw_fd(), - std::io::stderr().as_raw_fd(), - ]; - let mut tty_settings = Vec::with_capacity(raw_io_fds.len()); - for raw_fd in &raw_io_fds { - match nix::sys::termios::tcgetattr(*raw_fd) { - Ok(termios) => tty_settings.push((raw_fd, termios)), - Err(err) => debug!( - "Failed to save terminal attributes for file descriptor {fd}: {err}", - fd = raw_fd, - err = err - ), + let raw_io_fds = [ + std::io::stdin().as_raw_fd(), + std::io::stdout().as_raw_fd(), + std::io::stderr().as_raw_fd(), + ]; + let mut tty_settings = Vec::with_capacity(raw_io_fds.len()); + for raw_fd in &raw_io_fds { + match nix::sys::termios::tcgetattr(*raw_fd) { + Ok(termios) => tty_settings.push((raw_fd, termios)), + Err(err) => debug!( + "Failed to save terminal attributes for file descriptor {fd}: {err}", + fd = raw_fd, + err = err + ), + } + if connection_settings.dynamic_ui { + if let Ok(path) = nix::unistd::ttyname(*raw_fd) { + env.push(( + format!("NAILGUN_TTY_PATH_{raw_fd}"), + path.display().to_string(), + )); + } + } } - if connection_settings.dynamic_ui { - if let Ok(path) = nix::unistd::ttyname(*raw_fd) { - env.push(( - format!("NAILGUN_TTY_PATH_{raw_fd}"), - path.display().to_string(), - )); - } - } - } - let command = argv - .get(0) - .ok_or_else(|| "Failed to determine current process argv0".to_owned())? - .clone(); + let command = argv + .get(0) + .ok_or_else(|| "Failed to determine current process argv0".to_owned())? + .clone(); - let args = argv.iter().skip(1).cloned().collect(); + let args = argv.iter().skip(1).cloned().collect(); - let nailgun_result = nailgun::client_execute(connection_settings.port, command, args, env).await; - for (raw_fd, termios) in tty_settings { - if let Err(err) = - nix::sys::termios::tcsetattr(*raw_fd, nix::sys::termios::SetArg::TCSADRAIN, &termios) - { - debug!( - "Failed to restore terminal attributes for file descriptor {fd}: {err}", - fd = raw_fd, - err = err - ); + let nailgun_result = + nailgun::client_execute(connection_settings.port, command, args, env).await; + for (raw_fd, termios) in tty_settings { + if let Err(err) = + nix::sys::termios::tcsetattr(*raw_fd, nix::sys::termios::SetArg::TCSADRAIN, &termios) + { + debug!( + "Failed to restore terminal attributes for file descriptor {fd}: {err}", + fd = raw_fd, + err = err + ); + } } - } - nailgun_result.map_err(|error| match error { - NailgunClientError::PreConnect(err) => format!( - "Problem connecting to pantsd at {port}: {err}", - port = connection_settings.port, - err = err - ), - NailgunClientError::PostConnect(err) => format!( - "Problem communicating with pantsd at {port}: {err}", - port = connection_settings.port, - err = err - ), - NailgunClientError::BrokenPipe => format!( - "Broken pipe communicating with pantsd at {port}.", - port = connection_settings.port - ), - NailgunClientError::KeyboardInterrupt => "User interrupt.".to_owned(), - }) + nailgun_result.map_err(|error| match error { + NailgunClientError::PreConnect(err) => format!( + "Problem connecting to pantsd at {port}: {err}", + port = connection_settings.port, + err = err + ), + NailgunClientError::PostConnect(err) => format!( + "Problem communicating with pantsd at {port}: {err}", + port = connection_settings.port, + err = err + ), + NailgunClientError::BrokenPipe => format!( + "Broken pipe communicating with pantsd at {port}.", + port = connection_settings.port + ), + NailgunClientError::KeyboardInterrupt => "User interrupt.".to_owned(), + }) } diff --git a/src/rust/engine/client/src/client_tests.rs b/src/rust/engine/client/src/client_tests.rs index 825f2bf3e75..519e3ba7cb4 100644 --- a/src/rust/engine/client/src/client_tests.rs +++ b/src/rust/engine/client/src/client_tests.rs @@ -11,41 +11,41 @@ use crate::execute_command; #[tokio::test] async fn test_client() { - let (build_root, options_parser, _tmpdir) = launch_pantsd(); - - let connection_settings = pantsd::find_pantsd(&build_root, &options_parser).unwrap(); - let exit_code = execute_command( - SystemTime::now(), - connection_settings, - std::env::vars().collect(), - ["pants", "-V"].iter().map(ToString::to_string).collect(), - ) - .await - .unwrap(); - assert_eq!(0, exit_code) + let (build_root, options_parser, _tmpdir) = launch_pantsd(); + + let connection_settings = pantsd::find_pantsd(&build_root, &options_parser).unwrap(); + let exit_code = execute_command( + SystemTime::now(), + connection_settings, + std::env::vars().collect(), + ["pants", "-V"].iter().map(ToString::to_string).collect(), + ) + .await + .unwrap(); + assert_eq!(0, exit_code) } #[tokio::test] async fn test_client_fingerprint_mismatch() { - // Launch `pantsd` with one fingerprint. - let (build_root, _options_parser, tmpdir) = launch_pantsd(); - - // Then connect with a different set of options (but with a matching `pants_subprocessdir`, so - // that we find the relevant `.pants.d/pids` directory). - let options_parser = OptionParser::new( - Env::new(HashMap::new()), - Args::new(vec![format!( - "--pants-subprocessdir={}", - tmpdir.path().display() - )]), - ) - .unwrap(); - let error = pantsd::find_pantsd(&build_root, &options_parser) - .err() + // Launch `pantsd` with one fingerprint. + let (build_root, _options_parser, tmpdir) = launch_pantsd(); + + // Then connect with a different set of options (but with a matching `pants_subprocessdir`, so + // that we find the relevant `.pants.d/pids` directory). + let options_parser = OptionParser::new( + Env::new(HashMap::new()), + Args::new(vec![format!( + "--pants-subprocessdir={}", + tmpdir.path().display() + )]), + ) .unwrap(); - - assert!( - error.contains("Fingerprint mismatched:"), - "Error was: {error}" - ) + let error = pantsd::find_pantsd(&build_root, &options_parser) + .err() + .unwrap(); + + assert!( + error.contains("Fingerprint mismatched:"), + "Error was: {error}" + ) } diff --git a/src/rust/engine/client/src/lib.rs b/src/rust/engine/client/src/lib.rs index 689cbf1f516..463d575f8be 100644 --- a/src/rust/engine/client/src/lib.rs +++ b/src/rust/engine/client/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] diff --git a/src/rust/engine/client/src/lib_tests.rs b/src/rust/engine/client/src/lib_tests.rs index ca963b5c878..5a3d2f86efc 100644 --- a/src/rust/engine/client/src/lib_tests.rs +++ b/src/rust/engine/client/src/lib_tests.rs @@ -5,14 +5,14 @@ use options::render_choice; #[test] fn test() { - assert!(render_choice(&[]).is_none()); - assert_eq!("One".to_owned(), render_choice(&["One"]).unwrap()); - assert_eq!( - "One or Two".to_owned(), - render_choice(&["One", "Two"]).unwrap() - ); - assert_eq!( - "One, Two or Three".to_owned(), - render_choice(&["One", "Two", "Three"]).unwrap() - ); + assert!(render_choice(&[]).is_none()); + assert_eq!("One".to_owned(), render_choice(&["One"]).unwrap()); + assert_eq!( + "One or Two".to_owned(), + render_choice(&["One", "Two"]).unwrap() + ); + assert_eq!( + "One, Two or Three".to_owned(), + render_choice(&["One", "Two", "Three"]).unwrap() + ); } diff --git a/src/rust/engine/client/src/main.rs b/src/rust/engine/client/src/main.rs index ddbdb812dc7..b4506cbd64a 100644 --- a/src/rust/engine/client/src/main.rs +++ b/src/rust/engine/client/src/main.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -44,88 +44,86 @@ use pantsd::find_pantsd; #[derive(AsRefStr, EnumString, EnumVariantNames)] #[strum(serialize_all = "snake_case")] enum PythonLogLevel { - Trace, - Debug, - Info, - Warn, - Error, + Trace, + Debug, + Info, + Warn, + Error, } async fn execute(start: SystemTime) -> Result { - let build_root = BuildRoot::find()?; - let (env, dropped) = Env::capture_lossy(); - let env_items = (&env).into(); - let argv = env::args().collect::>(); - let options_parser = OptionParser::new(env, Args::argv())?; - - let use_pantsd = options_parser.parse_bool(&option_id!("pantsd"), true)?; - if !use_pantsd.value { - return Err(format!( - "Pantsd has been turned off via {option_source:?}.", - option_source = use_pantsd.source - )); - } - - let concurrent = options_parser.parse_bool(&option_id!("concurrent"), false)?; - if concurrent.value { - return Err("Pantsd is being turned off since --concurrent is true.".to_owned()); - } - - let level_option = option_id!(-'l', "level"); - let log_level_option_value = - options_parser.parse_string(&level_option, PythonLogLevel::Info.as_ref())?; - let level = PythonLogLevel::from_str(&log_level_option_value.value).map_err(|_| { - format!( - "Not a valid log level {level} from {option_source:?}. Should be one of {levels}.", - level = log_level_option_value.value, - option_source = log_level_option_value.source, - levels = render_choice(PythonLogLevel::VARIANTS) - .expect("We know there is at least one PythonLogLevel enum variant."), - ) - })?; - env_logger::init_from_env(env_logger::Env::new().filter_or("__PANTS_LEVEL__", level.as_ref())); - - // Now that the logger has been set up, we can retroactively log any dropped env vars. - let mut keys_with_non_utf8_values = dropped.keys_with_non_utf8_values; - keys_with_non_utf8_values.sort(); - for name in keys_with_non_utf8_values { - log::warn!("Environment variable with non-UTF-8 value ignored: {name}"); - } - let mut non_utf8_keys = dropped.non_utf8_keys; - non_utf8_keys.sort(); - for name in non_utf8_keys { - log::warn!( - "Environment variable with non-UTF-8 name ignored: {}", - name.to_string_lossy() - ); - } - let pantsd_settings = find_pantsd(&build_root, &options_parser)?; - client::execute_command(start, pantsd_settings, env_items, argv).await + let build_root = BuildRoot::find()?; + let (env, dropped) = Env::capture_lossy(); + let env_items = (&env).into(); + let argv = env::args().collect::>(); + let options_parser = OptionParser::new(env, Args::argv())?; + + let use_pantsd = options_parser.parse_bool(&option_id!("pantsd"), true)?; + if !use_pantsd.value { + return Err(format!( + "Pantsd has been turned off via {option_source:?}.", + option_source = use_pantsd.source + )); + } + + let concurrent = options_parser.parse_bool(&option_id!("concurrent"), false)?; + if concurrent.value { + return Err("Pantsd is being turned off since --concurrent is true.".to_owned()); + } + + let level_option = option_id!(-'l', "level"); + let log_level_option_value = + options_parser.parse_string(&level_option, PythonLogLevel::Info.as_ref())?; + let level = PythonLogLevel::from_str(&log_level_option_value.value).map_err(|_| { + format!( + "Not a valid log level {level} from {option_source:?}. Should be one of {levels}.", + level = log_level_option_value.value, + option_source = log_level_option_value.source, + levels = render_choice(PythonLogLevel::VARIANTS) + .expect("We know there is at least one PythonLogLevel enum variant."), + ) + })?; + env_logger::init_from_env(env_logger::Env::new().filter_or("__PANTS_LEVEL__", level.as_ref())); + + // Now that the logger has been set up, we can retroactively log any dropped env vars. + let mut keys_with_non_utf8_values = dropped.keys_with_non_utf8_values; + keys_with_non_utf8_values.sort(); + for name in keys_with_non_utf8_values { + log::warn!("Environment variable with non-UTF-8 value ignored: {name}"); + } + let mut non_utf8_keys = dropped.non_utf8_keys; + non_utf8_keys.sort(); + for name in non_utf8_keys { + log::warn!( + "Environment variable with non-UTF-8 name ignored: {}", + name.to_string_lossy() + ); + } + let pantsd_settings = find_pantsd(&build_root, &options_parser)?; + client::execute_command(start, pantsd_settings, env_items, argv).await } fn try_execv_fallback_client(pants_server: OsString) -> Result { - let exe = PathBuf::from(pants_server.clone()); - let c_exe = CString::new(exe.into_os_string().into_vec()) - .expect("Failed to convert executable to a C string."); - - let mut c_args = vec![c_exe.clone()]; - c_args.extend( - env::args_os() - .skip(1) - .map(|arg| CString::new(arg.into_vec()).expect("Failed to convert argument to a C string.")), - ); - - execv(&c_exe, &c_args).map_err(|errno| { - eprintln!("Failed to exec pants at {pants_server:?}: {}", errno.desc()); - 1 - }) + let exe = PathBuf::from(pants_server.clone()); + let c_exe = CString::new(exe.into_os_string().into_vec()) + .expect("Failed to convert executable to a C string."); + + let mut c_args = vec![c_exe.clone()]; + c_args.extend(env::args_os().skip(1).map(|arg| { + CString::new(arg.into_vec()).expect("Failed to convert argument to a C string.") + })); + + execv(&c_exe, &c_args).map_err(|errno| { + eprintln!("Failed to exec pants at {pants_server:?}: {}", errno.desc()); + 1 + }) } fn execv_fallback_client(pants_server: OsString) -> Infallible { - if let Err(exit_code) = try_execv_fallback_client(pants_server) { - std::process::exit(exit_code); - } - unreachable!() + if let Err(exit_code) = try_execv_fallback_client(pants_server) { + std::process::exit(exit_code); + } + unreachable!() } // The value is taken from this C precedent: @@ -148,33 +146,33 @@ const PANTS_NO_NATIVE_CLIENT: &str = "PANTS_NO_NATIVE_CLIENT"; #[tokio::main] async fn main() { - let start = SystemTime::now(); - let no_native_client = - matches!(env::var_os(PANTS_NO_NATIVE_CLIENT), Some(value) if !value.is_empty()); - let pants_server = env::var_os(PANTS_SERVER_EXE); - - match &pants_server { - Some(pants_server) if no_native_client => { - // The user requested that the native client not be used. Immediately fall back to the legacy - // client. - execv_fallback_client(pants_server.clone()); - return; + let start = SystemTime::now(); + let no_native_client = + matches!(env::var_os(PANTS_NO_NATIVE_CLIENT), Some(value) if !value.is_empty()); + let pants_server = env::var_os(PANTS_SERVER_EXE); + + match &pants_server { + Some(pants_server) if no_native_client => { + // The user requested that the native client not be used. Immediately fall back to the legacy + // client. + execv_fallback_client(pants_server.clone()); + return; + } + _ => {} } - _ => {} - } - - match (execute(start).await, pants_server) { - (Err(_), Some(pants_server)) => { - // We failed to connect to `pantsd`, but a server variable was provided. Fall back - // to `execv`'ing the legacy Python client, which will handle spawning `pantsd`. - execv_fallback_client(pants_server); - } - (Err(err), None) => { - eprintln!("{err}"); - // We use this exit code to indicate an error running pants via the nailgun protocol to - // differentiate from a successful nailgun protocol session. - std::process::exit(EX_TEMPFAIL); + + match (execute(start).await, pants_server) { + (Err(_), Some(pants_server)) => { + // We failed to connect to `pantsd`, but a server variable was provided. Fall back + // to `execv`'ing the legacy Python client, which will handle spawning `pantsd`. + execv_fallback_client(pants_server); + } + (Err(err), None) => { + eprintln!("{err}"); + // We use this exit code to indicate an error running pants via the nailgun protocol to + // differentiate from a successful nailgun protocol session. + std::process::exit(EX_TEMPFAIL); + } + (Ok(exit_code), _) => std::process::exit(exit_code), } - (Ok(exit_code), _) => std::process::exit(exit_code), - } } diff --git a/src/rust/engine/concrete_time/src/lib.rs b/src/rust/engine/concrete_time/src/lib.rs index 9f9bd4bd98f..937c6bc4bfa 100644 --- a/src/rust/engine/concrete_time/src/lib.rs +++ b/src/rust/engine/concrete_time/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -41,102 +41,101 @@ use serde_derive::Serialize; /// duration between two arbitrary timestamps. #[derive(Debug, DeepSizeOf, Clone, Copy, PartialEq, Eq, Hash, Serialize)] pub struct Duration { - /// How many seconds did this `Duration` last? - pub secs: u64, - /// How many sub-second nanoseconds did this `Duration` last? - pub nanos: u32, + /// How many seconds did this `Duration` last? + pub secs: u64, + /// How many sub-second nanoseconds did this `Duration` last? + pub nanos: u32, } impl Duration { - /// Construct a new duration with `secs` seconds and `nanos` nanoseconds - pub fn new(secs: u64, nanos: u32) -> Self { - Self { secs, nanos } - } + /// Construct a new duration with `secs` seconds and `nanos` nanoseconds + pub fn new(secs: u64, nanos: u32) -> Self { + Self { secs, nanos } + } } impl From for Duration { - fn from(duration: std::time::Duration) -> Self { - Self { - secs: duration.as_secs(), - nanos: duration.subsec_nanos(), + fn from(duration: std::time::Duration) -> Self { + Self { + secs: duration.as_secs(), + nanos: duration.subsec_nanos(), + } } - } } impl From for std::time::Duration { - fn from(duration: Duration) -> std::time::Duration { - std::time::Duration::new(duration.secs, duration.nanos) - } + fn from(duration: Duration) -> std::time::Duration { + std::time::Duration::new(duration.secs, duration.nanos) + } } /// A timespan #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] pub struct TimeSpan { - /// Duration since the UNIX_EPOCH - pub start: Duration, - /// Duration since `start` - pub duration: Duration, + /// Duration since the UNIX_EPOCH + pub start: Duration, + /// Duration since `start` + pub duration: Duration, } impl TimeSpan { - fn since_epoch(time: &std::time::SystemTime) -> std::time::Duration { - time - .duration_since(std::time::UNIX_EPOCH) - .expect("Surely you're not before the unix epoch?") - } + fn since_epoch(time: &std::time::SystemTime) -> std::time::Duration { + time.duration_since(std::time::UNIX_EPOCH) + .expect("Surely you're not before the unix epoch?") + } - /// Construct a TimeSpan that started at `start` and ends now. - pub fn since(start: &std::time::SystemTime) -> TimeSpan { - let start = Self::since_epoch(start); - let duration = Self::since_epoch(&std::time::SystemTime::now()) - start; - TimeSpan { - start: start.into(), - duration: duration.into(), + /// Construct a TimeSpan that started at `start` and ends now. + pub fn since(start: &std::time::SystemTime) -> TimeSpan { + let start = Self::since_epoch(start); + let duration = Self::since_epoch(&std::time::SystemTime::now()) - start; + TimeSpan { + start: start.into(), + duration: duration.into(), + } } - } - /// Construct a TimeSpan that started at `start` and ends at `end`. - pub fn from_start_and_end_systemtime( - start: &std::time::SystemTime, - end: &std::time::SystemTime, - ) -> TimeSpan { - let start = Self::since_epoch(start); - let end = Self::since_epoch(end); - let duration = match end.checked_sub(start) { - Some(d) => d, - None => { - log::debug!("Invalid TimeSpan - start: {:?}, end: {:?}", start, end); - std::time::Duration::new(0, 0) - } - }; - TimeSpan { - start: start.into(), - duration: duration.into(), + /// Construct a TimeSpan that started at `start` and ends at `end`. + pub fn from_start_and_end_systemtime( + start: &std::time::SystemTime, + end: &std::time::SystemTime, + ) -> TimeSpan { + let start = Self::since_epoch(start); + let end = Self::since_epoch(end); + let duration = match end.checked_sub(start) { + Some(d) => d, + None => { + log::debug!("Invalid TimeSpan - start: {:?}, end: {:?}", start, end); + std::time::Duration::new(0, 0) + } + }; + TimeSpan { + start: start.into(), + duration: duration.into(), + } } - } - fn std_duration_from_timestamp(t: &prost_types::Timestamp) -> std::time::Duration { - std::time::Duration::new(t.seconds as u64, t.nanos as u32) - } + fn std_duration_from_timestamp(t: &prost_types::Timestamp) -> std::time::Duration { + std::time::Duration::new(t.seconds as u64, t.nanos as u32) + } - /// Construct a `TimeSpan` given a start and an end `Timestamp` from protobuf timestamp. - pub fn from_start_and_end( - start: &prost_types::Timestamp, - end: &prost_types::Timestamp, - time_span_description: &str, - ) -> Result { - let start = Self::std_duration_from_timestamp(start); - let end = Self::std_duration_from_timestamp(end); - match end.checked_sub(start) { - Some(duration) => Ok(TimeSpan { - start: start.into(), - duration: duration.into(), - }), - None => Err(format!( - "Got negative {time_span_description} time: {end:?} - {start:?}" - )), + /// Construct a `TimeSpan` given a start and an end `Timestamp` from protobuf timestamp. + pub fn from_start_and_end( + start: &prost_types::Timestamp, + end: &prost_types::Timestamp, + time_span_description: &str, + ) -> Result { + let start = Self::std_duration_from_timestamp(start); + let end = Self::std_duration_from_timestamp(end); + match end.checked_sub(start) { + Some(duration) => Ok(TimeSpan { + start: start.into(), + duration: duration.into(), + }), + None => Err(format!( + "Got negative {time_span_description} time: {end:?} - {start:?}" + )), + } } - } } #[cfg(test)] diff --git a/src/rust/engine/concrete_time/src/tests.rs b/src/rust/engine/concrete_time/src/tests.rs index d742d2c0792..ac5e3d32266 100644 --- a/src/rust/engine/concrete_time/src/tests.rs +++ b/src/rust/engine/concrete_time/src/tests.rs @@ -4,66 +4,66 @@ use crate::{Duration, TimeSpan}; #[test] fn convert_from_std_duration() { - let std = std::time::Duration::new(3, 141_592_653); - let concrete: Duration = std.into(); - assert_eq!(std.as_secs(), concrete.secs); - assert_eq!(std.subsec_nanos(), concrete.nanos); + let std = std::time::Duration::new(3, 141_592_653); + let concrete: Duration = std.into(); + assert_eq!(std.as_secs(), concrete.secs); + assert_eq!(std.subsec_nanos(), concrete.nanos); } #[test] fn convert_into_std_duration() { - let concrete = Duration::new(3, 141_592_653); - let std: std::time::Duration = concrete.into(); - assert_eq!(concrete.secs, std.as_secs()); - assert_eq!(concrete.nanos, std.subsec_nanos()); + let concrete = Duration::new(3, 141_592_653); + let std: std::time::Duration = concrete.into(); + assert_eq!(concrete.secs, std.as_secs()); + assert_eq!(concrete.nanos, std.subsec_nanos()); } #[test] fn time_span_since() { - let start = std::time::SystemTime::now(); - let sleep_duration = std::time::Duration::from_millis(1); - std::thread::sleep(sleep_duration); - let span = TimeSpan::since(&start); - assert!(std::convert::Into::::into(span.duration) >= sleep_duration); - assert_eq!( - start - .duration_since(std::time::SystemTime::UNIX_EPOCH) - .unwrap(), - span.start.into() - ); + let start = std::time::SystemTime::now(); + let sleep_duration = std::time::Duration::from_millis(1); + std::thread::sleep(sleep_duration); + let span = TimeSpan::since(&start); + assert!(std::convert::Into::::into(span.duration) >= sleep_duration); + assert_eq!( + start + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap(), + span.start.into() + ); } fn time_span_from_start_and_duration_in_seconds( - start: i64, - duration: i64, + start: i64, + duration: i64, ) -> Result { - use prost_types::Timestamp; + use prost_types::Timestamp; - let start_timestamp = Timestamp { - seconds: start, - nanos: 0, - }; + let start_timestamp = Timestamp { + seconds: start, + nanos: 0, + }; - let end_timestamp = Timestamp { - seconds: start + duration, - nanos: 0, - }; + let end_timestamp = Timestamp { + seconds: start + duration, + nanos: 0, + }; - TimeSpan::from_start_and_end(&start_timestamp, &end_timestamp, "") + TimeSpan::from_start_and_end(&start_timestamp, &end_timestamp, "") } #[test] fn time_span_from_prost_timestamp() { - let span = time_span_from_start_and_duration_in_seconds(42, 10).unwrap(); - assert_eq!( - TimeSpan { - start: Duration::new(42, 0), - duration: Duration::new(10, 0), - }, - span - ); + let span = time_span_from_start_and_duration_in_seconds(42, 10).unwrap(); + assert_eq!( + TimeSpan { + start: Duration::new(42, 0), + duration: Duration::new(10, 0), + }, + span + ); - // A negative duration is invalid. - let span = time_span_from_start_and_duration_in_seconds(42, -10); - assert!(span.is_err()); + // A negative duration is invalid. + let span = time_span_from_start_and_duration_in_seconds(42, -10); + assert!(span.is_err()); } diff --git a/src/rust/engine/dep_inference/build.rs b/src/rust/engine/dep_inference/build.rs index 1909ab9b03e..223be570d3d 100644 --- a/src/rust/engine/dep_inference/build.rs +++ b/src/rust/engine/dep_inference/build.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -39,11 +39,10 @@ use walkdir::WalkDir; /// For most names, there will only be 1 symbol; for those, we create a const u16 for convenience. /// For the names with multiple symbols, we generate a const array (hashmaps would have been nice, but I couldn't figure out how to make them const) fn gen_constants_file(language: &tree_sitter::Language, out_dir: &Path) { - let mut file = std::fs::File::create(out_dir.join("constants.rs")).unwrap(); + let mut file = std::fs::File::create(out_dir.join("constants.rs")).unwrap(); - file - .write_all( - b"\ + file.write_all( + b"\ // Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). @@ -55,51 +54,50 @@ impl KindID { ) .unwrap(); - let mut kinds_to_ids: HashMap> = HashMap::new(); + let mut kinds_to_ids: HashMap> = HashMap::new(); - // Collect the mapping of name->symbols - for id in (0_u16..(language.node_kind_count() as u16)) - .chain([language.id_for_node_kind("ERROR", true)].iter().cloned()) - { - if language.node_kind_is_named(id) { - let kind = language.node_kind_for_id(id).unwrap().to_uppercase(); - kinds_to_ids.entry(kind).or_default().insert(id); + // Collect the mapping of name->symbols + for id in (0_u16..(language.node_kind_count() as u16)) + .chain([language.id_for_node_kind("ERROR", true)].iter().cloned()) + { + if language.node_kind_is_named(id) { + let kind = language.node_kind_for_id(id).unwrap().to_uppercase(); + kinds_to_ids.entry(kind).or_default().insert(id); + } } - } - // Codegen for each name->symbol mapping - for (kind, ids) in kinds_to_ids { - let text = match ids.len() { - 1 => { - let single = ids.iter().next().unwrap(); - format!(" pub const {kind}: u16 = {single};\n") - } - _ => { - let items: String = ids - .iter() - .map(|id| id.to_string()) - .collect::>() - .join(", "); - format!( - " pub const {}: [u16; {}] = [{}];\n", - kind, - ids.len(), - items - ) - } - }; - file.write_all(text.as_bytes()).unwrap(); - } + // Codegen for each name->symbol mapping + for (kind, ids) in kinds_to_ids { + let text = match ids.len() { + 1 => { + let single = ids.iter().next().unwrap(); + format!(" pub const {kind}: u16 = {single};\n") + } + _ => { + let items: String = ids + .iter() + .map(|id| id.to_string()) + .collect::>() + .join(", "); + format!( + " pub const {}: [u16; {}] = [{}];\n", + kind, + ids.len(), + items + ) + } + }; + file.write_all(text.as_bytes()).unwrap(); + } - file.write_all(b"}\n").unwrap(); + file.write_all(b"}\n").unwrap(); } fn gen_visitor_file(language: &tree_sitter::Language, out_dir: &Path) { - let mut file = std::fs::File::create(out_dir.join("visitor.rs")).unwrap(); + let mut file = std::fs::File::create(out_dir.join("visitor.rs")).unwrap(); - file - .write_all( - b"\ + file.write_all( + b"\ // Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). @@ -145,39 +143,36 @@ pub trait Visitor { ) .unwrap(); - let mut kinds_seen = HashSet::new(); - for id in 0..language.node_kind_count() { - let id = id as u16; - if language.node_kind_is_named(id) { - let kind = language.node_kind_for_id(id).unwrap(); - if kinds_seen.insert(kind) { - file.write_all( + let mut kinds_seen = HashSet::new(); + for id in 0..language.node_kind_count() { + let id = id as u16; + if language.node_kind_is_named(id) { + let kind = language.node_kind_for_id(id).unwrap(); + if kinds_seen.insert(kind) { + file.write_all( format!(" fn visit_{kind}(&mut self, node: tree_sitter::Node) -> ChildBehavior {{\n ChildBehavior::Visit\n }}\n").as_bytes(), ) .unwrap(); - } + } + } } - } - file - .write_all( - b" fn visit(&mut self, node: tree_sitter::Node) -> ChildBehavior { + file.write_all( + b" fn visit(&mut self, node: tree_sitter::Node) -> ChildBehavior { match node.kind_id() { ", ) .unwrap(); - for id in 0..language.node_kind_count() { - let id = id as u16; - if language.node_kind_is_named(id) { - let kind = language.node_kind_for_id(id).unwrap(); - file - .write_all(format!(" {id} => self.visit_{kind}(node),\n").as_bytes()) - .unwrap(); + for id in 0..language.node_kind_count() { + let id = id as u16; + if language.node_kind_is_named(id) { + let kind = language.node_kind_for_id(id).unwrap(); + file.write_all(format!(" {id} => self.visit_{kind}(node),\n").as_bytes()) + .unwrap(); + } } - } - file - .write_all( - b" _ => ChildBehavior::Visit, + file.write_all( + b" _ => ChildBehavior::Visit, } } } @@ -187,68 +182,68 @@ pub trait Visitor { } fn gen_impl_hash_file(name: &'static str, source_dir: &Path, impl_dir: &Path, out_dir: &Path) { - let mut hasher = Sha256::default(); - for entry in WalkDir::new(impl_dir) - .sort_by_file_name() - .into_iter() - .chain(WalkDir::new(source_dir).sort_by_file_name().into_iter()) - .flatten() - { - if entry.file_type().is_file() && entry.path().file_name().unwrap() != "tests.rs" { - let mut reader = std::fs::File::open(entry.path()).expect("Failed to open file"); - let _ = std::io::copy(&mut reader, &mut hasher).expect("Failed to copy bytes"); + let mut hasher = Sha256::default(); + for entry in WalkDir::new(impl_dir) + .sort_by_file_name() + .into_iter() + .chain(WalkDir::new(source_dir).sort_by_file_name().into_iter()) + .flatten() + { + if entry.file_type().is_file() && entry.path().file_name().unwrap() != "tests.rs" { + let mut reader = std::fs::File::open(entry.path()).expect("Failed to open file"); + let _ = std::io::copy(&mut reader, &mut hasher).expect("Failed to copy bytes"); + } + } + hasher + .write_all(env::var("CARGO_PKG_VERSION").unwrap().as_bytes()) + .unwrap(); + let hash_bytes = &hasher.finalize(); + let hash = hex::encode(hash_bytes); + let mut file = std::fs::File::create(out_dir.join(format!("{name}_impl_hash.rs"))).unwrap(); + file.write_all(format!("pub const IMPL_HASH: &str = {hash:?};").as_bytes()) + .unwrap(); + if env::var_os("PANTS_PRINT_IMPL_HASHES") == Some("1".into()) { + println!("cargo:warning={name} hash impl hash: {hash}"); } - } - hasher - .write_all(env::var("CARGO_PKG_VERSION").unwrap().as_bytes()) - .unwrap(); - let hash_bytes = &hasher.finalize(); - let hash = hex::encode(hash_bytes); - let mut file = std::fs::File::create(out_dir.join(format!("{name}_impl_hash.rs"))).unwrap(); - file - .write_all(format!("pub const IMPL_HASH: &str = {hash:?};").as_bytes()) - .unwrap(); - if env::var_os("PANTS_PRINT_IMPL_HASHES") == Some("1".into()) { - println!("cargo:warning={name} hash impl hash: {hash}"); - } } fn gen_files_for_language( - language: tree_sitter::Language, - name: &'static str, - source_dir: &Path, - out_dir: &Path, + language: tree_sitter::Language, + name: &'static str, + source_dir: &Path, + out_dir: &Path, ) -> Result<(), Box> { - let subdir = out_dir.join(name); - fs::create_dir_all(&subdir)?; - gen_constants_file(&language, subdir.as_path()); - gen_visitor_file(&language, subdir.as_path()); - - // NB: This MUST be last in the list - let source_subdir = source_dir.join(name); - gen_impl_hash_file(name, source_subdir.as_path(), subdir.as_path(), out_dir); - Ok(()) + let subdir = out_dir.join(name); + fs::create_dir_all(&subdir)?; + gen_constants_file(&language, subdir.as_path()); + gen_visitor_file(&language, subdir.as_path()); + + // NB: This MUST be last in the list + let source_subdir = source_dir.join(name); + gen_impl_hash_file(name, source_subdir.as_path(), subdir.as_path(), out_dir); + Ok(()) } fn main() -> Result<(), Box> { - let source_dir = env::var_os("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR env var not set."); - let source_dir = Path::new(&source_dir).join("src"); - let out_dir = env::var_os("OUT_DIR").expect("OUT_DIR env var not set."); - let out_dir = Path::new(&out_dir); - gen_files_for_language( - tree_sitter_python::language(), - "python", - &source_dir, - out_dir, - )?; - gen_files_for_language( - tree_sitter_javascript::language(), - "javascript", - &source_dir, - out_dir, - )?; - println!("cargo:rerun-if-env-changed=PANTS_PRINT_IMPL_HASHES"); - println!("cargo:rerun-if-changed=build.rs"); - println!("cargo:rerun-if-changed=src"); - Ok(()) + let source_dir = + env::var_os("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR env var not set."); + let source_dir = Path::new(&source_dir).join("src"); + let out_dir = env::var_os("OUT_DIR").expect("OUT_DIR env var not set."); + let out_dir = Path::new(&out_dir); + gen_files_for_language( + tree_sitter_python::language(), + "python", + &source_dir, + out_dir, + )?; + gen_files_for_language( + tree_sitter_javascript::language(), + "javascript", + &source_dir, + out_dir, + )?; + println!("cargo:rerun-if-env-changed=PANTS_PRINT_IMPL_HASHES"); + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=src"); + Ok(()) } diff --git a/src/rust/engine/dep_inference/src/javascript/import_pattern.rs b/src/rust/engine/dep_inference/src/javascript/import_pattern.rs index a39892cb4c2..ef121ea5838 100644 --- a/src/rust/engine/dep_inference/src/javascript/import_pattern.rs +++ b/src/rust/engine/dep_inference/src/javascript/import_pattern.rs @@ -25,113 +25,113 @@ pub struct StarMatch<'a>(pub &'a str); /// #[derive(Debug, PartialEq, Eq)] pub enum Pattern<'a> { - Match(usize, Option>), - NoMatch, + Match(usize, Option>), + NoMatch, } impl<'a> Pattern<'a> { - fn from_prefix(prefix: &str) -> Self { - Pattern::Match(prefix.len(), None) - } + fn from_prefix(prefix: &str) -> Self { + Pattern::Match(prefix.len(), None) + } - fn from_prefix_match(prefix: &str, star_match: &'a str) -> Self { - Pattern::Match(prefix.len(), Some(StarMatch(star_match))) - } + fn from_prefix_match(prefix: &str, star_match: &'a str) -> Self { + Pattern::Match(prefix.len(), Some(StarMatch(star_match))) + } - pub fn matches(pattern: &str, import: &'a str) -> Self { - let mut pattern_parts = pattern.split('*'); - let prefix = pattern_parts.next(); - let suffix = pattern_parts.next(); - if pattern_parts.next().is_some() || import.is_empty() { - // Multiple '*' is not spec compliant, so never match. - // Empty import strings aren't interesting. - return Self::NoMatch; - }; - match (prefix, suffix) { - (Some(specifier), None) if specifier == import => Self::from_prefix(import), - (None, _) | (Some(""), _) => Self::NoMatch, // "*" isn't valid, or pattern is empty string which is also not interesting. - (Some(prefix), Some("")) => { - // "*" - if let Some(star_match) = import.strip_prefix(prefix) { - Self::from_prefix_match(prefix, star_match) - } else { - Self::NoMatch + pub fn matches(pattern: &str, import: &'a str) -> Self { + let mut pattern_parts = pattern.split('*'); + let prefix = pattern_parts.next(); + let suffix = pattern_parts.next(); + if pattern_parts.next().is_some() || import.is_empty() { + // Multiple '*' is not spec compliant, so never match. + // Empty import strings aren't interesting. + return Self::NoMatch; + }; + match (prefix, suffix) { + (Some(specifier), None) if specifier == import => Self::from_prefix(import), + (None, _) | (Some(""), _) => Self::NoMatch, // "*" isn't valid, or pattern is empty string which is also not interesting. + (Some(prefix), Some("")) => { + // "*" + if let Some(star_match) = import.strip_prefix(prefix) { + Self::from_prefix_match(prefix, star_match) + } else { + Self::NoMatch + } + } + (Some(prefix), Some(suffix)) => { + // "*" + if let Some(star_match) = import + .strip_prefix(prefix) + .and_then(|prefix_stripped| prefix_stripped.strip_suffix(suffix)) + { + Self::from_prefix_match(prefix, star_match) + } else { + Self::NoMatch + } + } + _ => Self::NoMatch, } - } - (Some(prefix), Some(suffix)) => { - // "*" - if let Some(star_match) = import - .strip_prefix(prefix) - .and_then(|prefix_stripped| prefix_stripped.strip_suffix(suffix)) - { - Self::from_prefix_match(prefix, star_match) - } else { - Self::NoMatch - } - } - _ => Self::NoMatch, } - } } /// Replaces patterns provided on the form outlined in /// [NodeJS subpath patterns](https://nodejs.org/api/packages.html#subpath-patterns). /// If no pattern matches, the import string is returned unchanged. pub fn imports_from_patterns( - root: &str, - patterns: &HashMap>, - import: String, + root: &str, + patterns: &HashMap>, + import: String, ) -> HashSet { - if let Some((star_match, pattern)) = find_best_match(patterns, &import) { - let mut matches = patterns[pattern] - .iter() - .filter_map(move |replacement| apply_replacements_to_match(&star_match, replacement)) - .map(|new_import| add_root_dir_to_dot_slash(root, new_import)) - .peekable(); - if matches.peek().is_some() { - Either::Right(matches) + if let Some((star_match, pattern)) = find_best_match(patterns, &import) { + let mut matches = patterns[pattern] + .iter() + .filter_map(move |replacement| apply_replacements_to_match(&star_match, replacement)) + .map(|new_import| add_root_dir_to_dot_slash(root, new_import)) + .peekable(); + if matches.peek().is_some() { + Either::Right(matches) + } else { + Either::Left(once(import)) + } } else { - Either::Left(once(import)) + Either::Left(once(import)) } - } else { - Either::Left(once(import)) - } - .collect() + .collect() } fn apply_replacements_to_match( - star_match: &Option, - replacement: &str, + star_match: &Option, + replacement: &str, ) -> Option { - if let Some(StarMatch(star_match)) = star_match { - if replacement.matches('*').count() != 1 { - return None; + if let Some(StarMatch(star_match)) = star_match { + if replacement.matches('*').count() != 1 { + return None; + } + Some(replacement.replace('*', star_match)) + } else { + Some(replacement.to_string()) } - Some(replacement.replace('*', star_match)) - } else { - Some(replacement.to_string()) - } } fn add_root_dir_to_dot_slash(root: &str, new_import: String) -> String { - if let Some(("", rest)) = new_import.split_once("./") { - Path::new(root).join(rest).to_str().unwrap().to_string() - } else { - new_import - } + if let Some(("", rest)) = new_import.split_once("./") { + Path::new(root).join(rest).to_str().unwrap().to_string() + } else { + new_import + } } fn find_best_match<'a, 'b>( - patterns: &'a FnvHashMap>, - import: &'b str, + patterns: &'a FnvHashMap>, + import: &'b str, ) -> Option<(Option>, &'a String)> { - patterns - .keys() - .map(|pattern| (pattern, Pattern::matches(pattern, import))) - .filter_map(|(pattern, matched)| match matched { - Pattern::Match(rank, star_match) => Some((rank, star_match, pattern)), - _ => None, - }) - .max_by(|(rank_x, _, _), (rank_y, _, _)| rank_x.cmp(rank_y)) - .map(|(_, pattern, star_match)| (pattern, star_match)) + patterns + .keys() + .map(|pattern| (pattern, Pattern::matches(pattern, import))) + .filter_map(|(pattern, matched)| match matched { + Pattern::Match(rank, star_match) => Some((rank, star_match, pattern)), + _ => None, + }) + .max_by(|(rank_x, _, _), (rank_y, _, _)| rank_x.cmp(rank_y)) + .map(|(_, pattern, star_match)| (pattern, star_match)) } diff --git a/src/rust/engine/dep_inference/src/javascript/mod.rs b/src/rust/engine/dep_inference/src/javascript/mod.rs index 0717e080143..4c47caf1606 100644 --- a/src/rust/engine/dep_inference/src/javascript/mod.rs +++ b/src/rust/engine/dep_inference/src/javascript/mod.rs @@ -20,174 +20,171 @@ include!(concat!(env!("OUT_DIR"), "/javascript_impl_hash.rs")); #[derive(Serialize, Deserialize)] pub struct ParsedJavascriptDependencies { - pub file_imports: HashSet, - pub package_imports: HashSet, + pub file_imports: HashSet, + pub package_imports: HashSet, } pub fn get_dependencies( - contents: &str, - filepath: PathBuf, - metadata: JavascriptInferenceMetadata, + contents: &str, + filepath: PathBuf, + metadata: JavascriptInferenceMetadata, ) -> Result { - let patterns = metadata - .import_patterns - .into_iter() - .map(|pattern| (pattern.pattern, pattern.replacements)) - .collect(); - let mut collector = ImportCollector::new(contents); - collector.collect(); - let (relative_files, packages): (HashSet, HashSet) = collector - .imports - .into_iter() - .flat_map(|import| imports_from_patterns(&metadata.package_root, &patterns, import)) - .partition(|import| { - import.starts_with('.') - || import.starts_with('/') - || (!metadata.package_root.is_empty() && import.starts_with(&metadata.package_root)) - }); - Ok(ParsedJavascriptDependencies { - file_imports: normalize_from_path(&metadata.package_root, filepath, relative_files), - package_imports: packages, - }) + let patterns = metadata + .import_patterns + .into_iter() + .map(|pattern| (pattern.pattern, pattern.replacements)) + .collect(); + let mut collector = ImportCollector::new(contents); + collector.collect(); + let (relative_files, packages): (HashSet, HashSet) = collector + .imports + .into_iter() + .flat_map(|import| imports_from_patterns(&metadata.package_root, &patterns, import)) + .partition(|import| { + import.starts_with('.') + || import.starts_with('/') + || (!metadata.package_root.is_empty() && import.starts_with(&metadata.package_root)) + }); + Ok(ParsedJavascriptDependencies { + file_imports: normalize_from_path(&metadata.package_root, filepath, relative_files), + package_imports: packages, + }) } fn normalize_from_path( - root: &str, - filepath: PathBuf, - file_imports: HashSet, + root: &str, + filepath: PathBuf, + file_imports: HashSet, ) -> HashSet { - let directory = filepath.parent().unwrap_or(Path::new("")); - file_imports - .into_iter() - .map(|string| { - let path = Path::new(&string); - if path.has_root() { - string - } else if path.starts_with(root) && !root.is_empty() { - normalize_path(path).map_or(string, |path| path.to_string_lossy().to_string()) - } else { - normalize_path(&directory.join(path)) - .map_or(string, |path| path.to_string_lossy().to_string()) - } - }) - .collect() + let directory = filepath.parent().unwrap_or(Path::new("")); + file_imports + .into_iter() + .map(|string| { + let path = Path::new(&string); + if path.has_root() { + string + } else if path.starts_with(root) && !root.is_empty() { + normalize_path(path).map_or(string, |path| path.to_string_lossy().to_string()) + } else { + normalize_path(&directory.join(path)) + .map_or(string, |path| path.to_string_lossy().to_string()) + } + }) + .collect() } struct ImportCollector<'a> { - pub imports: Vec, - code: &'a str, + pub imports: Vec, + code: &'a str, } impl ImportCollector<'_> { - pub fn new(code: &'_ str) -> ImportCollector<'_> { - ImportCollector { - imports: Vec::new(), - code, + pub fn new(code: &'_ str) -> ImportCollector<'_> { + ImportCollector { + imports: Vec::new(), + code, + } } - } - - pub fn collect(&mut self) { - let mut parser = Parser::new(); - parser - .set_language(tree_sitter_javascript::language()) - .expect("Error loading Javascript grammar"); - let parsed = parser.parse(self.code, None); - let tree = parsed.unwrap(); - let mut cursor = tree.walk(); - - self.walk(&mut cursor); - } - - fn code_at(&self, range: tree_sitter::Range) -> &str { - &self.code[range.start_byte..range.end_byte] - } - - fn is_pragma_ignored(&self, node: Node) -> bool { - fn comment_after_semicolon(node: Node) -> Option { - node - .next_named_sibling() - .filter(|comment| comment.kind_id() == KindID::COMMENT) + + pub fn collect(&mut self) { + let mut parser = Parser::new(); + parser + .set_language(tree_sitter_javascript::language()) + .expect("Error loading Javascript grammar"); + let parsed = parser.parse(self.code, None); + let tree = parsed.unwrap(); + let mut cursor = tree.walk(); + + self.walk(&mut cursor); } - fn comment_after_no_semicolon(node: Node) -> Option { - node - .children(&mut node.walk()) - .find(|node| node.kind_id() == KindID::COMMENT) + + fn code_at(&self, range: tree_sitter::Range) -> &str { + &self.code[range.start_byte..range.end_byte] } - let contains_pragma = |node: Node, comment: Node| -> bool { - let comment_range = comment.range(); - node.range().end_point.row == comment_range.start_point.row - && self - .code_at(comment_range) - .contains("// pants: no-infer-dep") - }; - comment_after_semicolon(node) - .or_else(|| comment_after_no_semicolon(node)) - .map_or(false, |comment| contains_pragma(node, comment)) - } - - fn insert_import(&mut self, import_string: Option) { - if let Some(import_string) = import_string { - let import_string = self.code_at(import_string.range()); - self - .imports - .push(import_string.strip_first_last().to_string()) + + fn is_pragma_ignored(&self, node: Node) -> bool { + fn comment_after_semicolon(node: Node) -> Option { + node.next_named_sibling() + .filter(|comment| comment.kind_id() == KindID::COMMENT) + } + fn comment_after_no_semicolon(node: Node) -> Option { + node.children(&mut node.walk()) + .find(|node| node.kind_id() == KindID::COMMENT) + } + let contains_pragma = |node: Node, comment: Node| -> bool { + let comment_range = comment.range(); + node.range().end_point.row == comment_range.start_point.row + && self + .code_at(comment_range) + .contains("// pants: no-infer-dep") + }; + comment_after_semicolon(node) + .or_else(|| comment_after_no_semicolon(node)) + .map_or(false, |comment| contains_pragma(node, comment)) } - } - fn propagate_pragma(&self, node: Node) -> ChildBehavior { - if !self.is_pragma_ignored(node) { - return ChildBehavior::Visit; + fn insert_import(&mut self, import_string: Option) { + if let Some(import_string) = import_string { + let import_string = self.code_at(import_string.range()); + self.imports + .push(import_string.strip_first_last().to_string()) + } + } + + fn propagate_pragma(&self, node: Node) -> ChildBehavior { + if !self.is_pragma_ignored(node) { + return ChildBehavior::Visit; + } + ChildBehavior::Ignore } - ChildBehavior::Ignore - } } impl Visitor for ImportCollector<'_> { - fn visit_import_statement(&mut self, node: Node) -> ChildBehavior { - if !self.is_pragma_ignored(node) { - self.insert_import(node.child_by_field_name("source")); + fn visit_import_statement(&mut self, node: Node) -> ChildBehavior { + if !self.is_pragma_ignored(node) { + self.insert_import(node.child_by_field_name("source")); + } + ChildBehavior::Ignore } - ChildBehavior::Ignore - } - fn visit_expression_statement(&mut self, node: Node) -> ChildBehavior { - if node.children(&mut node.walk()).any(|child| { - let id = child.kind_id(); - KindID::CALL_EXPRESSION.contains(&id) || id == KindID::AWAIT_EXPRESSION - }) { - return self.propagate_pragma(node); + fn visit_expression_statement(&mut self, node: Node) -> ChildBehavior { + if node.children(&mut node.walk()).any(|child| { + let id = child.kind_id(); + KindID::CALL_EXPRESSION.contains(&id) || id == KindID::AWAIT_EXPRESSION + }) { + return self.propagate_pragma(node); + } + ChildBehavior::Ignore + } + + fn visit_lexical_declaration(&mut self, node: Node) -> ChildBehavior { + self.propagate_pragma(node) } - ChildBehavior::Ignore - } - - fn visit_lexical_declaration(&mut self, node: Node) -> ChildBehavior { - self.propagate_pragma(node) - } - - fn visit_call_expression(&mut self, node: Node) -> ChildBehavior { - if let (Some(function), Some(args)) = (node.named_child(0), node.named_child(1)) { - if let "require" | "import" = self.code_at(function.range()) { - for arg in args.children(&mut args.walk()) { - if arg.kind_id() == KindID::STRING { - self.insert_import(Some(arg)) - } + + fn visit_call_expression(&mut self, node: Node) -> ChildBehavior { + if let (Some(function), Some(args)) = (node.named_child(0), node.named_child(1)) { + if let "require" | "import" = self.code_at(function.range()) { + for arg in args.children(&mut args.walk()) { + if arg.kind_id() == KindID::STRING { + self.insert_import(Some(arg)) + } + } + } } - } + ChildBehavior::Ignore } - ChildBehavior::Ignore - } } trait StripFirstLast { - fn strip_first_last(&self) -> &Self; + fn strip_first_last(&self) -> &Self; } impl StripFirstLast for str { - fn strip_first_last(&self) -> &Self { - let mut chars = self.chars(); - chars.next(); - chars.next_back(); - chars.as_str() - } + fn strip_first_last(&self) -> &Self { + let mut chars = self.chars(); + chars.next(); + chars.next_back(); + chars.as_str() + } } #[cfg(test)] diff --git a/src/rust/engine/dep_inference/src/javascript/tests.rs b/src/rust/engine/dep_inference/src/javascript/tests.rs index b2b8e6cbe9f..e6e04fb370b 100644 --- a/src/rust/engine/dep_inference/src/javascript/tests.rs +++ b/src/rust/engine/dep_inference/src/javascript/tests.rs @@ -10,404 +10,404 @@ use javascript_inference_metadata::ImportPattern; use protos::gen::pants::cache::{javascript_inference_metadata, JavascriptInferenceMetadata}; fn assert_imports(code: &str, imports: &[&str]) { - let mut collector = ImportCollector::new(code); - collector.collect(); - assert_eq!( - HashSet::from_iter(imports.iter().map(|s| s.to_string())), - collector.imports.into_iter().collect::>() - ); + let mut collector = ImportCollector::new(code); + collector.collect(); + assert_eq!( + HashSet::from_iter(imports.iter().map(|s| s.to_string())), + collector.imports.into_iter().collect::>() + ); } fn given_metadata( - root: &str, - pattern_replacements: HashMap>, + root: &str, + pattern_replacements: HashMap>, ) -> JavascriptInferenceMetadata { - let import_patterns: Vec = pattern_replacements - .iter() - .map(|(key, value)| ImportPattern { - pattern: key.clone(), - replacements: value.clone(), - }) - .collect(); - JavascriptInferenceMetadata { - package_root: root.to_string(), - import_patterns, - } + let import_patterns: Vec = pattern_replacements + .iter() + .map(|(key, value)| ImportPattern { + pattern: key.clone(), + replacements: value.clone(), + }) + .collect(); + JavascriptInferenceMetadata { + package_root: root.to_string(), + import_patterns, + } } fn assert_dependency_imports<'a>( - file_path: &str, - code: &str, - file_imports: impl IntoIterator, - package_imports: impl IntoIterator, - metadata: JavascriptInferenceMetadata, + file_path: &str, + code: &str, + file_imports: impl IntoIterator, + package_imports: impl IntoIterator, + metadata: JavascriptInferenceMetadata, ) { - let result = get_dependencies(code, PathBuf::from(file_path), metadata).unwrap(); - assert_eq!( - HashSet::from_iter(file_imports.into_iter().map(|s| s.to_string())), - result.file_imports, - ); - assert_eq!( - HashSet::from_iter(package_imports.into_iter().map(|s| s.to_string())), - result.package_imports, - ); + let result = get_dependencies(code, PathBuf::from(file_path), metadata).unwrap(); + assert_eq!( + HashSet::from_iter(file_imports.into_iter().map(|s| s.to_string())), + result.file_imports, + ); + assert_eq!( + HashSet::from_iter(package_imports.into_iter().map(|s| s.to_string())), + result.package_imports, + ); } #[test] fn simple_imports() { - assert_imports("import a from 'a'", &["a"]); - assert_imports("import('c')", &["c"]); - assert_imports("require('d')", &["d"]); - assert_imports("import('e');", &["e"]); - assert_imports("require('f');", &["f"]); - assert_imports("const g = import('g');", &["g"]); - assert_imports("const h = require('h');", &["h"]); + assert_imports("import a from 'a'", &["a"]); + assert_imports("import('c')", &["c"]); + assert_imports("require('d')", &["d"]); + assert_imports("import('e');", &["e"]); + assert_imports("require('f');", &["f"]); + assert_imports("const g = import('g');", &["g"]); + assert_imports("const h = require('h');", &["h"]); } #[test] fn await_import() { - assert_imports("const i = await import('i');", &["i"]); + assert_imports("const i = await import('i');", &["i"]); } #[test] fn ignore_imports() { - assert_imports("import a from 'b'; // pants: no-infer-dep", &[]); - assert_imports("import a from 'c' // pants: no-infer-dep", &[]); - assert_imports("import('e') // pants: no-infer-dep", &[]); - assert_imports("require('f') // pants: no-infer-dep", &[]); - assert_imports("import('e'); // pants: no-infer-dep", &[]); - assert_imports("require('f'); // pants: no-infer-dep", &[]); + assert_imports("import a from 'b'; // pants: no-infer-dep", &[]); + assert_imports("import a from 'c' // pants: no-infer-dep", &[]); + assert_imports("import('e') // pants: no-infer-dep", &[]); + assert_imports("require('f') // pants: no-infer-dep", &[]); + assert_imports("import('e'); // pants: no-infer-dep", &[]); + assert_imports("require('f'); // pants: no-infer-dep", &[]); } #[test] fn still_parses_from_syntax_error() { - assert_imports("import a from '.'; x=", &["."]); + assert_imports("import a from '.'; x=", &["."]); } #[test] fn non_string_literals() { - assert_imports( - r" + assert_imports( + r" const a = 5; require(a) ", - &[], - ); + &[], + ); } #[test] fn constructor_is_not_import() { - assert_imports( - r" + assert_imports( + r" new require('a') ", - &[], - ); + &[], + ); } #[test] fn dynamic_scope() { - assert_imports( - r" + assert_imports( + r" await import('some.wasm') ", - &["some.wasm"], - ); + &["some.wasm"], + ); } #[test] fn adds_dir_to_file_imports() -> Result<(), Box> { - let result = get_dependencies( - &"import a from './file.js'", - Path::new("dir/index.js").to_path_buf(), - Default::default(), - )?; - assert_eq!( - result.file_imports, - HashSet::from_iter(["dir/file.js".to_string()]) - ); - Ok(()) + let result = get_dependencies( + &"import a from './file.js'", + Path::new("dir/index.js").to_path_buf(), + Default::default(), + )?; + assert_eq!( + result.file_imports, + HashSet::from_iter(["dir/file.js".to_string()]) + ); + Ok(()) } #[test] fn root_level_files_have_no_dir() { - assert_dependency_imports( - "index.mjs", - &r#"import a from "./file.js""#, - ["file.js"], - [], - given_metadata(Default::default(), Default::default()), - ) + assert_dependency_imports( + "index.mjs", + &r#"import a from "./file.js""#, + ["file.js"], + [], + given_metadata(Default::default(), Default::default()), + ) } #[test] fn only_walks_one_dir_level_for_curdir() { - assert_dependency_imports( - "src/js/index.mjs", - &r#" + assert_dependency_imports( + "src/js/index.mjs", + &r#" import fs from "fs"; import { x } from "./xes.mjs"; "#, - ["src/js/xes.mjs"], - ["fs"], - given_metadata(Default::default(), Default::default()), - ) + ["src/js/xes.mjs"], + ["fs"], + given_metadata(Default::default(), Default::default()), + ) } #[test] fn walks_two_dir_levels_for_pardir() { - assert_dependency_imports( - "src/js/a/index.mjs", - &r#" + assert_dependency_imports( + "src/js/a/index.mjs", + &r#" import fs from "fs"; import { x } from "../xes.mjs"; "#, - ["src/js/xes.mjs"], - ["fs"], - given_metadata(Default::default(), Default::default()), - ) + ["src/js/xes.mjs"], + ["fs"], + given_metadata(Default::default(), Default::default()), + ) } #[test] fn silly_walking() { - assert_dependency_imports( - "src/js/a/index.mjs", - &r#" + assert_dependency_imports( + "src/js/a/index.mjs", + &r#" import { x } from "././///../../xes.mjs"; "#, - ["src/xes.mjs"], - [], - given_metadata(Default::default(), Default::default()), - ) + ["src/xes.mjs"], + [], + given_metadata(Default::default(), Default::default()), + ) } #[test] fn imports_outside_of_provided_source_root_are_unchanged() { - assert_dependency_imports( - "src/index.mjs", - &r#" + assert_dependency_imports( + "src/index.mjs", + &r#" import { x } from "../../xes.mjs"; "#, - ["../../xes.mjs"], - [], - given_metadata(Default::default(), Default::default()), - ); - - assert_dependency_imports( - "js/src/lib/index.mjs", - &r#" + ["../../xes.mjs"], + [], + given_metadata(Default::default(), Default::default()), + ); + + assert_dependency_imports( + "js/src/lib/index.mjs", + &r#" import { x } from "./../../../../lib2/xes.mjs"; "#, - ["./../../../../lib2/xes.mjs"], - [], - given_metadata(Default::default(), Default::default()), - ); + ["./../../../../lib2/xes.mjs"], + [], + given_metadata(Default::default(), Default::default()), + ); } #[test] fn subpath_package_import() { - assert_dependency_imports( - "js/src/lib/index.mjs", - &r#" + assert_dependency_imports( + "js/src/lib/index.mjs", + &r#" import chalk from '#myChalk'; "#, - [], - ["chalk"], - given_metadata( - "", - HashMap::from_iter([("#myChalk".to_string(), vec!["chalk".to_string()])]), - ), - ); + [], + ["chalk"], + given_metadata( + "", + HashMap::from_iter([("#myChalk".to_string(), vec!["chalk".to_string()])]), + ), + ); } #[test] fn subpath_file_import() { - assert_dependency_imports( - "js/src/lib/index.mjs", - &r#" + assert_dependency_imports( + "js/src/lib/index.mjs", + &r#" import stuff from '#nested/stuff.mjs'; "#, - ["js/src/lib/nested/stuff.mjs"], - [], - given_metadata( - "js", - HashMap::from_iter([( - "#nested/*.mjs".to_string(), - vec!["./src/lib/nested/*.mjs".to_string()], - )]), - ), - ); + ["js/src/lib/nested/stuff.mjs"], + [], + given_metadata( + "js", + HashMap::from_iter([( + "#nested/*.mjs".to_string(), + vec!["./src/lib/nested/*.mjs".to_string()], + )]), + ), + ); } #[test] fn polyfills() { - assert_dependency_imports( - "js/src/index.mjs", - &r#" + assert_dependency_imports( + "js/src/index.mjs", + &r#" import { ws } from '#websockets'; "#, - ["js/websockets-polyfill.js"], - ["websockets"], - given_metadata( - "js", - HashMap::from_iter([( - "#websockets".to_string(), - vec![ - "websockets".to_string(), - "./websockets-polyfill.js".to_string(), - ], - )]), - ), - ); + ["js/websockets-polyfill.js"], + ["websockets"], + given_metadata( + "js", + HashMap::from_iter([( + "#websockets".to_string(), + vec![ + "websockets".to_string(), + "./websockets-polyfill.js".to_string(), + ], + )]), + ), + ); } fn assert_matches_with_star<'a>( - pattern: Pattern, - matched: impl Into> + std::fmt::Debug, + pattern: Pattern, + matched: impl Into> + std::fmt::Debug, ) { - let matched = matched.into(); - let is_match = matches!( - pattern, - Pattern::Match(_, ref star_match) if star_match.as_ref().map(|StarMatch(string)| *string) == matched - ); - assert!( - is_match, - "pattern = {pattern:?}, expected_match = {matched:?}" - ) + let matched = matched.into(); + let is_match = matches!( + pattern, + Pattern::Match(_, ref star_match) if star_match.as_ref().map(|StarMatch(string)| *string) == matched + ); + assert!( + is_match, + "pattern = {pattern:?}, expected_match = {matched:?}" + ) } #[test] fn pattern_matches_trailing_star() { - let pattern = Pattern::matches("#lib/*", "#lib/something/index.js"); - assert_matches_with_star(pattern, "something/index.js") + let pattern = Pattern::matches("#lib/*", "#lib/something/index.js"); + assert_matches_with_star(pattern, "something/index.js") } #[test] fn pattern_matches_star() { - let pattern = Pattern::matches("#lib/*/index.js", "#lib/something/index.js"); - assert_matches_with_star(pattern, "something") + let pattern = Pattern::matches("#lib/*/index.js", "#lib/something/index.js"); + assert_matches_with_star(pattern, "something") } #[test] fn pattern_matches_star_with_extension() { - let pattern = Pattern::matches("#internal/*.js", "#internal/z.js"); - assert_matches_with_star(pattern, "z") + let pattern = Pattern::matches("#internal/*.js", "#internal/z.js"); + assert_matches_with_star(pattern, "z") } #[test] fn pattern_without_star_matches() { - let pattern = Pattern::matches("#some-lib", "#some-lib"); - assert_matches_with_star(pattern, None) + let pattern = Pattern::matches("#some-lib", "#some-lib"); + assert_matches_with_star(pattern, None) } #[test] fn static_pattern_mismatch() { - let pattern = Pattern::matches("#some-lib", "#some-other-lib"); - assert_eq!(pattern, Pattern::NoMatch) + let pattern = Pattern::matches("#some-lib", "#some-other-lib"); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn mismatch_after_star_pattern() { - let pattern = Pattern::matches("#some-lib/*.mjs", "#some-lib/a.js"); - assert_eq!(pattern, Pattern::NoMatch) + let pattern = Pattern::matches("#some-lib/*.mjs", "#some-lib/a.js"); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn mismatch_before_star_pattern() { - let pattern = Pattern::matches("#other-lib/*.js", "#some-lib/a.js"); - assert_eq!(pattern, Pattern::NoMatch) + let pattern = Pattern::matches("#other-lib/*.js", "#some-lib/a.js"); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn trailing_star_pattern_mismatch() { - let pattern = Pattern::matches("#some-lib/*", "#some-other-lib"); - assert_eq!(pattern, Pattern::NoMatch) + let pattern = Pattern::matches("#some-lib/*", "#some-other-lib"); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn star_only_pattern() { - // Users might do this. - // Nodejs / TS will crash on them later, so avoiding special casing seem ok. - let pattern = Pattern::matches("*", "some-other-lib"); - assert_eq!(pattern, Pattern::NoMatch) + // Users might do this. + // Nodejs / TS will crash on them later, so avoiding special casing seem ok. + let pattern = Pattern::matches("*", "some-other-lib"); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn empty_pattern_does_not_match_import() { - let pattern = Pattern::matches("", "#some-other-lib"); - assert_eq!(pattern, Pattern::NoMatch) + let pattern = Pattern::matches("", "#some-other-lib"); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn empty_import() { - let pattern = Pattern::matches("", ""); - assert_eq!(pattern, Pattern::NoMatch) + let pattern = Pattern::matches("", ""); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn empty_import_and_star_pattern() { - let pattern = Pattern::matches("*", ""); - assert_eq!(pattern, Pattern::NoMatch) + let pattern = Pattern::matches("*", ""); + assert_eq!(pattern, Pattern::NoMatch) } #[test] fn unicode_shenanigans() { - assert_matches_with_star(Pattern::matches("#🔥*🔥", "#🔥asd🔥"), "asd"); + assert_matches_with_star(Pattern::matches("#🔥*🔥", "#🔥asd🔥"), "asd"); } #[test] fn more_unicode_shenanigans() { - assert_matches_with_star( - Pattern::matches("#我的氣墊船充滿了鱔魚/*.js", "#我的氣墊船充滿了鱔魚/asd.js"), - "asd", - ); + assert_matches_with_star( + Pattern::matches("#我的氣墊船充滿了鱔魚/*.js", "#我的氣墊船充滿了鱔魚/asd.js"), + "asd", + ); } #[test] fn matching_unicode_shenanigans() { - assert_matches_with_star( - Pattern::matches("#*/stuff.js", "#🔥asd🔥/stuff.js"), - "🔥asd🔥", - ); + assert_matches_with_star( + Pattern::matches("#*/stuff.js", "#🔥asd🔥/stuff.js"), + "🔥asd🔥", + ); } #[test] fn unicode_shenanigans_with_equal_start_byte() { - assert_matches_with_star(Pattern::matches("#á/*é.js", "#á/asdáé.js"), "asdá"); + assert_matches_with_star(Pattern::matches("#á/*é.js", "#á/asdáé.js"), "asdá"); } #[test] fn replaces_groups() { - let mut patterns = HashMap::default(); - patterns.insert( - "#internal/*.js".to_string(), - vec!["./src/internal/*.js".to_string()], - ); - let imports = imports_from_patterns("dir", &patterns, "#internal/z.js".to_string()); + let mut patterns = HashMap::default(); + patterns.insert( + "#internal/*.js".to_string(), + vec!["./src/internal/*.js".to_string()], + ); + let imports = imports_from_patterns("dir", &patterns, "#internal/z.js".to_string()); - assert_eq!( - imports, - HashSet::from_iter(["dir/src/internal/z.js".to_string()]) - ) + assert_eq!( + imports, + HashSet::from_iter(["dir/src/internal/z.js".to_string()]) + ) } #[test] fn longest_prefix_wins() { - let mut patterns = HashMap::default(); - - patterns.insert( - "#internal/stuff/*.js".to_string(), - vec!["./src/stuff/*.js".to_string()], - ); - patterns.insert( - "#internal/*.js".to_string(), - vec!["./src/things/*.js".to_string()], - ); - - let imports = imports_from_patterns("dir", &patterns, "#internal/stuff/index.js".to_string()); - - assert_eq!( - imports, - HashSet::from_iter(["dir/src/stuff/index.js".to_string()]) - ) + let mut patterns = HashMap::default(); + + patterns.insert( + "#internal/stuff/*.js".to_string(), + vec!["./src/stuff/*.js".to_string()], + ); + patterns.insert( + "#internal/*.js".to_string(), + vec!["./src/things/*.js".to_string()], + ); + + let imports = imports_from_patterns("dir", &patterns, "#internal/stuff/index.js".to_string()); + + assert_eq!( + imports, + HashSet::from_iter(["dir/src/stuff/index.js".to_string()]) + ) } diff --git a/src/rust/engine/dep_inference/src/javascript/util.rs b/src/rust/engine/dep_inference/src/javascript/util.rs index e0329bee08f..a38e749b9dc 100644 --- a/src/rust/engine/dep_inference/src/javascript/util.rs +++ b/src/rust/engine/dep_inference/src/javascript/util.rs @@ -9,30 +9,30 @@ use std::path::{Component, Path, PathBuf}; /// This is different to [`NormalizePath`](https://docs.rs/normalize-path/latest/normalize_path/trait.NormalizePath.html), /// which returns the file name in this case. pub fn normalize_path(path: &Path) -> Option { - let mut components = path.components().peekable(); - let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { - components.next(); - PathBuf::from(c.as_os_str()) - } else { - PathBuf::new() - }; + let mut components = path.components().peekable(); + let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { + components.next(); + PathBuf::from(c.as_os_str()) + } else { + PathBuf::new() + }; - for component in components { - match component { - Component::Prefix(..) => unreachable!(), - Component::RootDir => { - ret.push(component.as_os_str()); - } - Component::CurDir => {} - Component::ParentDir => { - if !ret.pop() { - return None; + for component in components { + match component { + Component::Prefix(..) => unreachable!(), + Component::RootDir => { + ret.push(component.as_os_str()); + } + Component::CurDir => {} + Component::ParentDir => { + if !ret.pop() { + return None; + } + } + Component::Normal(c) => { + ret.push(c); + } } - } - Component::Normal(c) => { - ret.push(c); - } } - } - Some(ret) + Some(ret) } diff --git a/src/rust/engine/dep_inference/src/lib.rs b/src/rust/engine/dep_inference/src/lib.rs index a48ef2887e4..05041026a88 100644 --- a/src/rust/engine/dep_inference/src/lib.rs +++ b/src/rust/engine/dep_inference/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] diff --git a/src/rust/engine/dep_inference/src/python/mod.rs b/src/rust/engine/dep_inference/src/python/mod.rs index 2e7c7341305..64045004aaf 100644 --- a/src/rust/engine/dep_inference/src/python/mod.rs +++ b/src/rust/engine/dep_inference/src/python/mod.rs @@ -12,376 +12,374 @@ use tree_sitter::Parser; #[derive(Serialize, Deserialize)] pub struct ParsedPythonDependencies { - pub imports: HashMap, - pub string_candidates: HashMap, + pub imports: HashMap, + pub string_candidates: HashMap, } pub fn get_dependencies( - contents: &str, - filepath: PathBuf, + contents: &str, + filepath: PathBuf, ) -> Result { - let mut collector = ImportCollector::new(contents); - collector.collect(); - - let mut import_map = collector.import_map; - - // NB: the import collector doesn't do anything special for relative imports, we need to fix - // those up. - let keys_to_replace: HashSet<_> = import_map - .keys() - .filter(|key| key.starts_with('.')) - .cloned() - .collect(); - let parent_path = filepath - .parent() - .expect("Expected a filepath that was non-root"); - let path_parts: Vec<&str> = parent_path - .iter() - .map(|p| { - p.to_str() - .expect("Expected UTF-8-compatible filepath parts") - }) - .collect(); - for key in keys_to_replace { - let nonrelative = key.trim_start_matches('.'); - let level = key.len() - nonrelative.len(); - if level > path_parts.len() { - // Don't mess with the key, let Pants error with the original string - continue; - } + let mut collector = ImportCollector::new(contents); + collector.collect(); + + let mut import_map = collector.import_map; + + // NB: the import collector doesn't do anything special for relative imports, we need to fix + // those up. + let keys_to_replace: HashSet<_> = import_map + .keys() + .filter(|key| key.starts_with('.')) + .cloned() + .collect(); + let parent_path = filepath + .parent() + .expect("Expected a filepath that was non-root"); + let path_parts: Vec<&str> = parent_path + .iter() + .map(|p| { + p.to_str() + .expect("Expected UTF-8-compatible filepath parts") + }) + .collect(); + for key in keys_to_replace { + let nonrelative = key.trim_start_matches('.'); + let level = key.len() - nonrelative.len(); + if level > path_parts.len() { + // Don't mess with the key, let Pants error with the original string + continue; + } - let mut new_key_parts = path_parts[0..((path_parts.len() - level) + 1)].to_vec(); - if !nonrelative.is_empty() { - // an import like `from .. import *` can end up with key == '..', and hence nonrelative == ""; - // the result should just be the raw parent traversal, without a suffix part - new_key_parts.push(nonrelative); - } + let mut new_key_parts = path_parts[0..((path_parts.len() - level) + 1)].to_vec(); + if !nonrelative.is_empty() { + // an import like `from .. import *` can end up with key == '..', and hence nonrelative == ""; + // the result should just be the raw parent traversal, without a suffix part + new_key_parts.push(nonrelative); + } - let old_value = import_map.remove(&key).unwrap(); - import_map.insert(new_key_parts.join("."), old_value); - } + let old_value = import_map.remove(&key).unwrap(); + import_map.insert(new_key_parts.join("."), old_value); + } - Ok(ParsedPythonDependencies { - imports: import_map, - string_candidates: collector.string_candidates, - }) + Ok(ParsedPythonDependencies { + imports: import_map, + string_candidates: collector.string_candidates, + }) } struct ImportCollector<'a> { - pub import_map: HashMap, - pub string_candidates: HashMap, - code: &'a str, - weaken_imports: bool, + pub import_map: HashMap, + pub string_candidates: HashMap, + code: &'a str, + weaken_imports: bool, } impl ImportCollector<'_> { - pub fn new(code: &'_ str) -> ImportCollector<'_> { - ImportCollector { - import_map: HashMap::default(), - string_candidates: HashMap::default(), - code, - weaken_imports: false, + pub fn new(code: &'_ str) -> ImportCollector<'_> { + ImportCollector { + import_map: HashMap::default(), + string_candidates: HashMap::default(), + code, + weaken_imports: false, + } } - } - - pub fn collect(&mut self) { - let mut parser = Parser::new(); - parser - .set_language(tree_sitter_python::language()) - .expect("Error loading Python grammar"); - let parsed = parser.parse(self.code, None); - let tree = parsed.unwrap(); - let mut cursor = tree.walk(); - - self.walk(&mut cursor); - } - - fn code_at(&self, range: tree_sitter::Range) -> &str { - &self.code[range.start_byte..range.end_byte] - } - - fn string_at(&self, range: tree_sitter::Range) -> &str { - // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals - self - .code_at(range) - .trim_start_matches(|c| "rRuUfFbB".contains(c)) - .trim_matches(|c| "'\"".contains(c)) - } - - fn is_pragma_ignored(&self, node: tree_sitter::Node) -> bool { - if let Some(sibling) = node.next_named_sibling() { - let next_node_range = sibling.range(); - if sibling.kind_id() == KindID::COMMENT - && node.range().end_point.row == next_node_range.start_point.row - && self - .code_at(next_node_range) - .contains("# pants: no-infer-dep") - { - return true; - } + + pub fn collect(&mut self) { + let mut parser = Parser::new(); + parser + .set_language(tree_sitter_python::language()) + .expect("Error loading Python grammar"); + let parsed = parser.parse(self.code, None); + let tree = parsed.unwrap(); + let mut cursor = tree.walk(); + + self.walk(&mut cursor); } - false - } - - fn unnest_alias(node: tree_sitter::Node) -> tree_sitter::Node { - match node.kind_id() { - KindID::ALIASED_IMPORT => node - .named_child(0) - .expect("aliased imports must have a child"), - _ => node, + + fn code_at(&self, range: tree_sitter::Range) -> &str { + &self.code[range.start_byte..range.end_byte] + } + + fn string_at(&self, range: tree_sitter::Range) -> &str { + // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + self.code_at(range) + .trim_start_matches(|c| "rRuUfFbB".contains(c)) + .trim_matches(|c| "'\"".contains(c)) + } + + fn is_pragma_ignored(&self, node: tree_sitter::Node) -> bool { + if let Some(sibling) = node.next_named_sibling() { + let next_node_range = sibling.range(); + if sibling.kind_id() == KindID::COMMENT + && node.range().end_point.row == next_node_range.start_point.row + && self + .code_at(next_node_range) + .contains("# pants: no-infer-dep") + { + return true; + } + } + false } - } - - /// Handle different styles of references to modules/imports - /// - /// ```python - /// import $base - /// "$base" # string import - /// from $base import * # (the * node is passed as `specific` too) - /// from $base import $specific - /// ``` - fn insert_import( - &mut self, - base: tree_sitter::Node, - specific: Option, - is_string: bool, - ) { - // the specifically-imported item takes precedence over the base name for ignoring and lines - // etc. - let most_specific = specific.unwrap_or(base); - - if self.is_pragma_ignored(most_specific) { - return; + + fn unnest_alias(node: tree_sitter::Node) -> tree_sitter::Node { + match node.kind_id() { + KindID::ALIASED_IMPORT => node + .named_child(0) + .expect("aliased imports must have a child"), + _ => node, + } } - let base = ImportCollector::unnest_alias(base); - // * and errors are the same as not having an specific import - let specific = specific - .map(ImportCollector::unnest_alias) - .filter(|n| !matches!(n.kind_id(), KindID::WILDCARD_IMPORT | KindID::ERROR)); - - let base_range = base.range(); - let base_ref = if is_string { - self.string_at(base_range) - } else { - self.code_at(base_range) - }; - - let full_name = match specific { - Some(specific) => { - let specific_ref = self.code_at(specific.range()); - // `from ... import a` => `...a` should concat base_ref and specific_ref directly, but `from - // x import a` => `x.a` needs to insert a . between them - let joiner = if base_ref.ends_with('.') { "" } else { "." }; - [base_ref, specific_ref].join(joiner) - } - None => base_ref.to_string(), - }; - - let line0 = most_specific.range().start_point.row; - - self - .import_map - .entry(full_name) - .and_modify(|v| *v = (v.0, v.1 && self.weaken_imports)) - .or_insert(((line0 as u64) + 1, self.weaken_imports)); - } + /// Handle different styles of references to modules/imports + /// + /// ```python + /// import $base + /// "$base" # string import + /// from $base import * # (the * node is passed as `specific` too) + /// from $base import $specific + /// ``` + fn insert_import( + &mut self, + base: tree_sitter::Node, + specific: Option, + is_string: bool, + ) { + // the specifically-imported item takes precedence over the base name for ignoring and lines + // etc. + let most_specific = specific.unwrap_or(base); + + if self.is_pragma_ignored(most_specific) { + return; + } + + let base = ImportCollector::unnest_alias(base); + // * and errors are the same as not having an specific import + let specific = specific + .map(ImportCollector::unnest_alias) + .filter(|n| !matches!(n.kind_id(), KindID::WILDCARD_IMPORT | KindID::ERROR)); + + let base_range = base.range(); + let base_ref = if is_string { + self.string_at(base_range) + } else { + self.code_at(base_range) + }; + + let full_name = match specific { + Some(specific) => { + let specific_ref = self.code_at(specific.range()); + // `from ... import a` => `...a` should concat base_ref and specific_ref directly, but `from + // x import a` => `x.a` needs to insert a . between them + let joiner = if base_ref.ends_with('.') { "" } else { "." }; + [base_ref, specific_ref].join(joiner) + } + None => base_ref.to_string(), + }; + + let line0 = most_specific.range().start_point.row; + + self.import_map + .entry(full_name) + .and_modify(|v| *v = (v.0, v.1 && self.weaken_imports)) + .or_insert(((line0 as u64) + 1, self.weaken_imports)); + } } // NB: https://tree-sitter.github.io/tree-sitter/playground is very helpful impl Visitor for ImportCollector<'_> { - fn visit_import_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { - if !self.is_pragma_ignored(node) { - self.insert_import(node.named_child(0).unwrap(), None, false); - } - ChildBehavior::Ignore - } - - fn visit_import_from_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { - if !self.is_pragma_ignored(node) { - // the grammar is something like `from $module_name import $($name),* | '*'`, where $... is a field - // name. - let module_name = node - .child_by_field_name("module_name") - .expect("`from ... import ...` must have module_name"); - - let mut any_inserted = false; - for child in node.children_by_field_name("name", &mut node.walk()) { - self.insert_import(module_name, Some(child), false); - any_inserted = true; - } - - if !any_inserted { - // There's no names (i.e. it's probably not `from ... import some, names`), let's look for - // the * in a wildcard import. (It doesn't have a field name, so we have to search for it - // manually.) - for child in node.children(&mut node.walk()) { - if child.kind_id() == KindID::WILDCARD_IMPORT { - self.insert_import(module_name, Some(child), false); - any_inserted = true - } + fn visit_import_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { + if !self.is_pragma_ignored(node) { + self.insert_import(node.named_child(0).unwrap(), None, false); } - } - - if !any_inserted { - // Still nothing inserted, which means something has probably gone wrong and/or we haven't - // understood the syntax tree! We're working on a definite import statement, so silently - // doing nothing with it is likely to be wrong. Let's insert the import node itself and let - // that be surfaced as an dep-inference failure. - self.insert_import(node, None, false) - } + ChildBehavior::Ignore } - ChildBehavior::Ignore - } - - fn visit_try_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { - let mut should_weaken = false; - let mut cursor = node.walk(); - let children: Vec<_> = node.named_children(&mut cursor).collect(); - for child in children.iter() { - if child.kind_id() == KindID::EXCEPT_CLAUSE { - // N.B. Python allows any arbitrary expression as an except handler. - // We only parse identifiers, or (Set/Tuple/List)-of-identifier expressions. - let except_expr = child.named_child(0).unwrap(); - should_weaken = match except_expr.kind_id() { - KindID::IDENTIFIER => self.code_at(except_expr.range()) == "ImportError", - KindID::SET | KindID::LIST | KindID::TUPLE => except_expr - .named_children(&mut except_expr.walk()) - .any(|expr| { - expr.kind_id() == KindID::IDENTIFIER && self.code_at(expr.range()) == "ImportError" - }), - _ => false, - }; - if should_weaken { - break; + + fn visit_import_from_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { + if !self.is_pragma_ignored(node) { + // the grammar is something like `from $module_name import $($name),* | '*'`, where $... is a field + // name. + let module_name = node + .child_by_field_name("module_name") + .expect("`from ... import ...` must have module_name"); + + let mut any_inserted = false; + for child in node.children_by_field_name("name", &mut node.walk()) { + self.insert_import(module_name, Some(child), false); + any_inserted = true; + } + + if !any_inserted { + // There's no names (i.e. it's probably not `from ... import some, names`), let's look for + // the * in a wildcard import. (It doesn't have a field name, so we have to search for it + // manually.) + for child in node.children(&mut node.walk()) { + if child.kind_id() == KindID::WILDCARD_IMPORT { + self.insert_import(module_name, Some(child), false); + any_inserted = true + } + } + } + + if !any_inserted { + // Still nothing inserted, which means something has probably gone wrong and/or we haven't + // understood the syntax tree! We're working on a definite import statement, so silently + // doing nothing with it is likely to be wrong. Let's insert the import node itself and let + // that be surfaced as an dep-inference failure. + self.insert_import(node, None, false) + } } - } + ChildBehavior::Ignore } - for child in children.iter() { - let previous_weaken = self.weaken_imports; - if KindID::BLOCK.contains(&child.kind_id()) { - self.weaken_imports = should_weaken; - } - self.walk(&mut child.walk()); - self.weaken_imports = previous_weaken; + fn visit_try_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { + let mut should_weaken = false; + let mut cursor = node.walk(); + let children: Vec<_> = node.named_children(&mut cursor).collect(); + for child in children.iter() { + if child.kind_id() == KindID::EXCEPT_CLAUSE { + // N.B. Python allows any arbitrary expression as an except handler. + // We only parse identifiers, or (Set/Tuple/List)-of-identifier expressions. + let except_expr = child.named_child(0).unwrap(); + should_weaken = match except_expr.kind_id() { + KindID::IDENTIFIER => self.code_at(except_expr.range()) == "ImportError", + KindID::SET | KindID::LIST | KindID::TUPLE => except_expr + .named_children(&mut except_expr.walk()) + .any(|expr| { + expr.kind_id() == KindID::IDENTIFIER + && self.code_at(expr.range()) == "ImportError" + }), + _ => false, + }; + if should_weaken { + break; + } + } + } + + for child in children.iter() { + let previous_weaken = self.weaken_imports; + if KindID::BLOCK.contains(&child.kind_id()) { + self.weaken_imports = should_weaken; + } + self.walk(&mut child.walk()); + self.weaken_imports = previous_weaken; + } + ChildBehavior::Ignore } - ChildBehavior::Ignore - } - fn visit_with_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { - let with_clause = node.named_child(0).unwrap(); + fn visit_with_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior { + let with_clause = node.named_child(0).unwrap(); - let are_suppressing_importerror = with_clause - .named_children(&mut with_clause.walk()) - .any(|x| self.suppressing_importerror(x)); + let are_suppressing_importerror = with_clause + .named_children(&mut with_clause.walk()) + .any(|x| self.suppressing_importerror(x)); - // remember to visit the withitems themselves - // for ex detecting imports in `with open("/foo/bar") as f` - for child in with_clause.named_children(&mut with_clause.walk()) { - self.walk(&mut child.walk()); - } + // remember to visit the withitems themselves + // for ex detecting imports in `with open("/foo/bar") as f` + for child in with_clause.named_children(&mut with_clause.walk()) { + self.walk(&mut child.walk()); + } + + let body_node = node.child_by_field_name("body").unwrap(); + let body: Vec<_> = body_node.named_children(&mut body_node.walk()).collect(); + + if are_suppressing_importerror { + let previous_weaken = self.weaken_imports; + self.weaken_imports = true; + + for child in body { + self.walk(&mut child.walk()); + } + self.weaken_imports = previous_weaken; + } else { + for child in body { + self.walk(&mut child.walk()); + } + } - let body_node = node.child_by_field_name("body").unwrap(); - let body: Vec<_> = body_node.named_children(&mut body_node.walk()).collect(); - - if are_suppressing_importerror { - let previous_weaken = self.weaken_imports; - self.weaken_imports = true; - - for child in body { - self.walk(&mut child.walk()); - } - self.weaken_imports = previous_weaken; - } else { - for child in body { - self.walk(&mut child.walk()); - } + ChildBehavior::Ignore } - ChildBehavior::Ignore - } + fn visit_call(&mut self, node: tree_sitter::Node) -> ChildBehavior { + let funcname = node.named_child(0).unwrap(); + if self.code_at(funcname.range()) != "__import__" { + return ChildBehavior::Visit; + } - fn visit_call(&mut self, node: tree_sitter::Node) -> ChildBehavior { - let funcname = node.named_child(0).unwrap(); - if self.code_at(funcname.range()) != "__import__" { - return ChildBehavior::Visit; + let args = node.named_child(1).unwrap(); + if let Some(arg) = args.named_child(0) { + if arg.kind_id() == KindID::STRING { + // NB: Call nodes are children of expression nodes. The comment is a sibling of the expression. + if !self.is_pragma_ignored(node.parent().unwrap()) { + self.insert_import(arg, None, true); + } + } + } + ChildBehavior::Ignore } - let args = node.named_child(1).unwrap(); - if let Some(arg) = args.named_child(0) { - if arg.kind_id() == KindID::STRING { - // NB: Call nodes are children of expression nodes. The comment is a sibling of the expression. - if !self.is_pragma_ignored(node.parent().unwrap()) { - self.insert_import(arg, None, true); + fn visit_string(&mut self, node: tree_sitter::Node) -> ChildBehavior { + let range = node.range(); + let text: &str = self.string_at(range); + if !text.contains(|c: char| c.is_ascii_whitespace() || c == '\\') { + self.string_candidates + .insert(text.to_string(), (range.start_point.row + 1) as u64); } - } - } - ChildBehavior::Ignore - } - - fn visit_string(&mut self, node: tree_sitter::Node) -> ChildBehavior { - let range = node.range(); - let text: &str = self.string_at(range); - if !text.contains(|c: char| c.is_ascii_whitespace() || c == '\\') { - self - .string_candidates - .insert(text.to_string(), (range.start_point.row + 1) as u64); + ChildBehavior::Ignore } - ChildBehavior::Ignore - } } impl ImportCollector<'_> { - fn suppressing_importerror(&mut self, with_node: tree_sitter::Node) -> bool { - if with_node.kind_id() == KindID::WITH_ITEM { - let node = with_node.child_by_field_name("value").unwrap(); // synthetic - - let call_maybe_of_suppress = if node.kind_id() == KindID::CALL { - Some(node) // if we have a call directly `with suppress(ImportError):` - } else if KindID::AS_PATTERN.contains(&node.kind_id()) { - node.named_child(0).and_then(|n| match n.kind_id() { - KindID::CALL => Some(n), - _ => None, - }) // if we have a call with an `as` item `with suppress(ImportError) as e:` - } else { - None - }; - - if call_maybe_of_suppress.is_none() { - return false; - } - - let function_name_expr = call_maybe_of_suppress - .unwrap() - .child_by_field_name("function") - .unwrap(); - let is_supress = match function_name_expr.kind_id() { - KindID::ATTRIBUTE => function_name_expr - .child_by_field_name("attribute") - .map(|identifier| self.code_at(identifier.range()) == "suppress") - .unwrap_or(false), - KindID::IDENTIFIER => self.code_at(function_name_expr.range()) == "suppress", - _ => false, - }; - if !is_supress { - return false; - } - let cur = &mut node.walk(); - - let has_importerror = call_maybe_of_suppress - .unwrap() - .child_by_field_name("arguments") - .map(|x| { - x.named_children(cur) - .any(|arg| self.code_at(arg.range()) == "ImportError") - }) - .unwrap_or(false); - is_supress && has_importerror - } else { - false + fn suppressing_importerror(&mut self, with_node: tree_sitter::Node) -> bool { + if with_node.kind_id() == KindID::WITH_ITEM { + let node = with_node.child_by_field_name("value").unwrap(); // synthetic + + let call_maybe_of_suppress = if node.kind_id() == KindID::CALL { + Some(node) // if we have a call directly `with suppress(ImportError):` + } else if KindID::AS_PATTERN.contains(&node.kind_id()) { + node.named_child(0).and_then(|n| match n.kind_id() { + KindID::CALL => Some(n), + _ => None, + }) // if we have a call with an `as` item `with suppress(ImportError) as e:` + } else { + None + }; + + if call_maybe_of_suppress.is_none() { + return false; + } + + let function_name_expr = call_maybe_of_suppress + .unwrap() + .child_by_field_name("function") + .unwrap(); + let is_supress = match function_name_expr.kind_id() { + KindID::ATTRIBUTE => function_name_expr + .child_by_field_name("attribute") + .map(|identifier| self.code_at(identifier.range()) == "suppress") + .unwrap_or(false), + KindID::IDENTIFIER => self.code_at(function_name_expr.range()) == "suppress", + _ => false, + }; + if !is_supress { + return false; + } + let cur = &mut node.walk(); + + let has_importerror = call_maybe_of_suppress + .unwrap() + .child_by_field_name("arguments") + .map(|x| { + x.named_children(cur) + .any(|arg| self.code_at(arg.range()) == "ImportError") + }) + .unwrap_or(false); + is_supress && has_importerror + } else { + false + } } - } } #[cfg(test)] diff --git a/src/rust/engine/dep_inference/src/python/tests.rs b/src/rust/engine/dep_inference/src/python/tests.rs index 57f5432a500..3036bdc0bd7 100644 --- a/src/rust/engine/dep_inference/src/python/tests.rs +++ b/src/rust/engine/dep_inference/src/python/tests.rs @@ -5,57 +5,57 @@ use std::collections::{HashMap, HashSet}; use std::path::PathBuf; fn assert_collected( - code: &str, - import_map: HashMap<&str, (u64, bool)>, - string_candidates: HashMap<&str, u64>, + code: &str, + import_map: HashMap<&str, (u64, bool)>, + string_candidates: HashMap<&str, u64>, ) { - let mut collector = ImportCollector::new(code); - collector.collect(); - assert_eq!( - HashMap::from_iter(import_map.iter().map(|(k, v)| (k.to_string(), v.clone()))), - collector.import_map - ); - assert_eq!( - HashMap::from_iter( - string_candidates - .iter() - .map(|(k, v)| (k.to_string(), v.clone())) - ), - collector.string_candidates - ); + let mut collector = ImportCollector::new(code); + collector.collect(); + assert_eq!( + HashMap::from_iter(import_map.iter().map(|(k, v)| (k.to_string(), v.clone()))), + collector.import_map + ); + assert_eq!( + HashMap::from_iter( + string_candidates + .iter() + .map(|(k, v)| (k.to_string(), v.clone())) + ), + collector.string_candidates + ); } fn assert_imports(code: &str, imports: &[&str]) { - let mut collector = ImportCollector::new(code); - collector.collect(); - assert_eq!( - HashSet::from_iter(imports.iter().map(|s| s.to_string())), - collector.import_map.keys().cloned().collect::>() - ); + let mut collector = ImportCollector::new(code); + collector.collect(); + assert_eq!( + HashSet::from_iter(imports.iter().map(|s| s.to_string())), + collector.import_map.keys().cloned().collect::>() + ); } #[test] fn simple_imports() { - assert_imports("import a", &["a"]); - assert_imports("import a.b", &["a.b"]); - assert_imports("import a as x", &["a"]); - assert_imports("from a import b", &["a.b"]); - assert_imports("from a import *", &["a"]); - assert_imports("from a.b import c", &["a.b.c"]); - assert_imports("from a.b import c.d", &["a.b.c.d"]); - assert_imports("from a.b import c, d, e", &["a.b.c", "a.b.d", "a.b.e"]); - assert_imports( - r" + assert_imports("import a", &["a"]); + assert_imports("import a.b", &["a.b"]); + assert_imports("import a as x", &["a"]); + assert_imports("from a import b", &["a.b"]); + assert_imports("from a import *", &["a"]); + assert_imports("from a.b import c", &["a.b.c"]); + assert_imports("from a.b import c.d", &["a.b.c.d"]); + assert_imports("from a.b import c, d, e", &["a.b.c", "a.b.d", "a.b.e"]); + assert_imports( + r" from a.b import ( c, d, e, ) ", - &["a.b.c", "a.b.d", "a.b.e"], - ); - assert_imports( - r" + &["a.b.c", "a.b.d", "a.b.e"], + ); + assert_imports( + r" from a.b import ( c, @@ -64,400 +64,400 @@ from a.b import ( e, ) ", - &["a.b.c", "a.b.d", "a.b.e"], - ); - assert_imports( - r" + &["a.b.c", "a.b.d", "a.b.e"], + ); + assert_imports( + r" from a.b import ( c as x, d as y, e as z, ) ", - &["a.b.c", "a.b.d", "a.b.e"], - ); - - assert_imports("from . import b", &[".b"]); - assert_imports("from .a import b", &[".a.b"]); - assert_imports("from .. import b", &["..b"]); - assert_imports("from ..a import b", &["..a.b"]); - assert_imports("from ..a import b.c", &["..a.b.c"]); - assert_imports("from ... import b.c", &["...b.c"]); - assert_imports("from ...a import b.c", &["...a.b.c"]); - assert_imports("from ....a import b.c", &["....a.b.c"]); - assert_imports("from ....a import b, c", &["....a.b", "....a.c"]); - assert_imports("from ....a import b as d, c", &["....a.b", "....a.c"]); - - assert_imports("from .a import *", &[".a"]); - assert_imports("from . import *", &["."]); - assert_imports("from ..a import *", &["..a"]); - assert_imports("from .. import *", &[".."]); - - assert_imports( - "class X: def method(): if True: while True: class Y: def f(): import a", - &["a"], - ); - assert_imports("try:\nexcept:import a", &["a"]); - - // NB: Doesn't collect __future__ imports - assert_imports("from __future__ import annotations", &[]); + &["a.b.c", "a.b.d", "a.b.e"], + ); + + assert_imports("from . import b", &[".b"]); + assert_imports("from .a import b", &[".a.b"]); + assert_imports("from .. import b", &["..b"]); + assert_imports("from ..a import b", &["..a.b"]); + assert_imports("from ..a import b.c", &["..a.b.c"]); + assert_imports("from ... import b.c", &["...b.c"]); + assert_imports("from ...a import b.c", &["...a.b.c"]); + assert_imports("from ....a import b.c", &["....a.b.c"]); + assert_imports("from ....a import b, c", &["....a.b", "....a.c"]); + assert_imports("from ....a import b as d, c", &["....a.b", "....a.c"]); + + assert_imports("from .a import *", &[".a"]); + assert_imports("from . import *", &["."]); + assert_imports("from ..a import *", &["..a"]); + assert_imports("from .. import *", &[".."]); + + assert_imports( + "class X: def method(): if True: while True: class Y: def f(): import a", + &["a"], + ); + assert_imports("try:\nexcept:import a", &["a"]); + + // NB: Doesn't collect __future__ imports + assert_imports("from __future__ import annotations", &[]); } #[test] fn pragma_ignore() { - assert_imports("import a # pants: no-infer-dep", &[]); - assert_imports("import a.b # pants: no-infer-dep", &[]); - assert_imports("import a.b as d # pants: no-infer-dep", &[]); - assert_imports("from a import b # pants: no-infer-dep", &[]); - assert_imports("from a import * # pants: no-infer-dep", &[]); - assert_imports("from a import b, c # pants: no-infer-dep", &[]); - assert_imports("from a import b, c as d # pants: no-infer-dep", &[]); - assert_imports( - r" + assert_imports("import a # pants: no-infer-dep", &[]); + assert_imports("import a.b # pants: no-infer-dep", &[]); + assert_imports("import a.b as d # pants: no-infer-dep", &[]); + assert_imports("from a import b # pants: no-infer-dep", &[]); + assert_imports("from a import * # pants: no-infer-dep", &[]); + assert_imports("from a import b, c # pants: no-infer-dep", &[]); + assert_imports("from a import b, c as d # pants: no-infer-dep", &[]); + assert_imports( + r" from a.b import ( c # pants: no-infer-dep )", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a.b import ( c as d # pants: no-infer-dep )", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a.b import ( a, c, # pants: no-infer-dep d, )", - &["a.b.a", "a.b.d"], - ); - assert_imports( - r" + &["a.b.a", "a.b.d"], + ); + assert_imports( + r" from a.b import ( c as cc, # pants: no-infer-dep )", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a.b import ( c as dd, # pants: no-infer-dep )", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a.b import ( c, d, e ) # pants: no-infer-dep", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a import (b, # pants: no-infer-dep c)", - &["a.c"], - ); + &["a.c"], + ); - // Now let's have fun with line continuations - assert_imports( - r" + // Now let's have fun with line continuations + assert_imports( + r" from a.b import \ c # pants: no-infer-dep", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a.b \ import \ c # pants: no-infer-dep", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a.b import ( c as \ dd, # pants: no-infer-dep )", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" from a.b import \ * # pants: no-infer-dep", - &[], - ); - // Imports nested within other constructs - assert_imports( - r" + &[], + ); + // Imports nested within other constructs + assert_imports( + r" if x: import a # pants: no-infer-dep ", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" if x: import a # pants: no-infer-dep import b ", - &["b"], - ); - assert_imports( - r" + &["b"], + ); + assert_imports( + r" class X: def method(): if True: while True: class Y: def f(): import a # pants: no-infer-dep ", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" if x: import \ a # pants: no-infer-dep ", - &[], - ); + &[], + ); - // https://github.com/pantsbuild/pants/issues/19751 - assert_imports( - r" + // https://github.com/pantsbuild/pants/issues/19751 + assert_imports( + r" from typing import TYPE_CHECKING if TYPE_CHECKING: from a import ClassA # pants: no-infer-dep print('Hello, world!')", - &["typing.TYPE_CHECKING"], - ); + &["typing.TYPE_CHECKING"], + ); } #[test] fn dunder_import() { - assert_imports("__import__('pkg_resources')", &["pkg_resources"]); - assert_imports("__import__(b'pkg_resources')", &["pkg_resources"]); - assert_imports("__import__(u'pkg_resources')", &["pkg_resources"]); - assert_imports("__import__(f'pkg_resources')", &["pkg_resources"]); - assert_imports("__import__('''pkg_resources''')", &["pkg_resources"]); - assert_imports("__import__('ignored') # pants: no-infer-dep", &[]); - assert_imports( - r" + assert_imports("__import__('pkg_resources')", &["pkg_resources"]); + assert_imports("__import__(b'pkg_resources')", &["pkg_resources"]); + assert_imports("__import__(u'pkg_resources')", &["pkg_resources"]); + assert_imports("__import__(f'pkg_resources')", &["pkg_resources"]); + assert_imports("__import__('''pkg_resources''')", &["pkg_resources"]); + assert_imports("__import__('ignored') # pants: no-infer-dep", &[]); + assert_imports( + r" __import__( # pants: no-infer-dep 'ignored' )", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" __import__( 'ignored' # pants: no-infer-dep )", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" __import__( 'ignored' ) # pants: no-infer-dep", - &[], - ); - assert_imports( - r" + &[], + ); + assert_imports( + r" __import__( 'not_ignored' \ # pants: no-infer-dep )", - &["not_ignored"], - ); - assert_imports( - r" + &["not_ignored"], + ); + assert_imports( + r" __import__( 'ignored' \ ) # pants: no-infer-dep", - &[], - ); + &[], + ); } fn assert_imports_strong_weak(code: &str, strong: &[&str], weak: &[&str]) { - let mut collector = ImportCollector::new(code); - collector.collect(); - let (actual_weak, actual_strong): (Vec<_>, Vec<_>) = - collector.import_map.iter().partition(|(_, v)| v.1); - let expected_weak = HashSet::from_iter(weak.iter().map(|s| s.to_string())); - let found_weak = actual_weak - .iter() - .map(|(k, _)| k.to_string()) - .collect::>(); - assert_eq!( - expected_weak, found_weak, - "weak imports did not match, expected={:?} found={:?}", - expected_weak, found_weak - ); - let expected_strong = HashSet::from_iter(strong.iter().map(|s| s.to_string())); - let found_strong = actual_strong - .iter() - .map(|(k, _)| k.to_string()) - .collect::>(); - assert_eq!( - expected_strong, found_strong, - "strong imports did not match, expected={:?} found={:?}", - expected_strong, found_strong - ); + let mut collector = ImportCollector::new(code); + collector.collect(); + let (actual_weak, actual_strong): (Vec<_>, Vec<_>) = + collector.import_map.iter().partition(|(_, v)| v.1); + let expected_weak = HashSet::from_iter(weak.iter().map(|s| s.to_string())); + let found_weak = actual_weak + .iter() + .map(|(k, _)| k.to_string()) + .collect::>(); + assert_eq!( + expected_weak, found_weak, + "weak imports did not match, expected={:?} found={:?}", + expected_weak, found_weak + ); + let expected_strong = HashSet::from_iter(strong.iter().map(|s| s.to_string())); + let found_strong = actual_strong + .iter() + .map(|(k, _)| k.to_string()) + .collect::>(); + assert_eq!( + expected_strong, found_strong, + "strong imports did not match, expected={:?} found={:?}", + expected_strong, found_strong + ); } #[test] fn tryexcept_weak_imports() { - assert_imports_strong_weak( - r" + assert_imports_strong_weak( + r" try: import strong except AssertionError: pass", - &["strong"], - &[], - ); - assert_imports_strong_weak( - r" + &["strong"], + &[], + ); + assert_imports_strong_weak( + r" try: import weak except ImportError: pass", - &[], - &["weak"], - ); - assert_imports_strong_weak( - r" + &[], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import weak except (AssertionError, ImportError): pass", - &[], - &["weak"], - ); - assert_imports_strong_weak( - r" + &[], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import weak except (AssertionError, ImportError): pass", - &[], - &["weak"], - ); - assert_imports_strong_weak( - r" + &[], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import weak except [AssertionError, ImportError]: pass", - &[], - &["weak"], - ); - assert_imports_strong_weak( - r" + &[], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import weak except {AssertionError, ImportError}: pass", - &[], - &["weak"], - ); - assert_imports_strong_weak( - r" + &[], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import weak except AssertionError: pass except ImportError: pass", - &[], - &["weak"], - ); - assert_imports_strong_weak( - r" + &[], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import weak except AssertionError: import strong1 except ImportError: import strong2 else: import strong3 finally: import strong4", - &["strong1", "strong2", "strong3", "strong4"], - &["weak"], - ); - assert_imports_strong_weak( - r" + &["strong1", "strong2", "strong3", "strong4"], + &["weak"], + ); + assert_imports_strong_weak( + r" try: pass except AssertionError: try: import weak except ImportError: import strong", - &["strong"], - &["weak"], - ); - assert_imports_strong_weak( - r" + &["strong"], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import strong # This would be too complicated to try and handle except (lambda: ImportError)(): pass", - &["strong"], - &[], - ); - assert_imports_strong_weak( - r" + &["strong"], + &[], + ); + assert_imports_strong_weak( + r" ImpError = ImportError try: import strong # This would be too complicated to try and handle except ImpError: pass", - &["strong"], - &[], - ); - assert_imports_strong_weak( - r" + &["strong"], + &[], + ); + assert_imports_strong_weak( + r" try: import ignored_weak # pants: no-infer-dep except ImportError: import strong", - &["strong"], - &[], - ); - // NB: The `pass` forces the comment to be parsed as inside the except clause. - // Otherwise it is parsed after the entire try statement. - assert_imports_strong_weak( - r" + &["strong"], + &[], + ); + // NB: The `pass` forces the comment to be parsed as inside the except clause. + // Otherwise it is parsed after the entire try statement. + assert_imports_strong_weak( + r" try: import weak except ImportError: import ignored_strong # pants: no-infer-dep pass", - &[], - &["weak"], - ); - assert_imports_strong_weak( - r" + &[], + &["weak"], + ); + assert_imports_strong_weak( + r" try: import ignored_weak # pants: no-infer-dep except ImportError: import ignored_strong # pants: no-infer-dep pass", - &[], - &[], - ); + &[], + &[], + ); - assert_imports_strong_weak( - r" + assert_imports_strong_weak( + r" try: # A comment import one.two.three from one import four except ImportError: pass", - &[], - &["one.two.three", "one.four"], - ); + &[], + &["one.two.three", "one.four"], + ); - // Some conflict in strength - assert_imports_strong_weak( - r" + // Some conflict in strength + assert_imports_strong_weak( + r" import one.two.three try: import one.two.three except ImportError: pass", - &["one.two.three"], - &[], - ); - assert_imports_strong_weak( - r" + &["one.two.three"], + &[], + ); + assert_imports_strong_weak( + r" try: import one.two.three except ImportError: pass import one.two.three", - &["one.two.three"], - &[], - ); - // Ensure we preserve the stack of weakens with try-except - assert_imports_strong_weak( - r" + &["one.two.three"], + &[], + ); + // Ensure we preserve the stack of weakens with try-except + assert_imports_strong_weak( + r" try: with suppress(ImportError): import weak0 @@ -468,89 +468,89 @@ fn tryexcept_weak_imports() { import strong0 import strong1 ", - &["strong0", "strong1"], - &["weak0", "weak1", "weak2"], - ); + &["strong0", "strong1"], + &["weak0", "weak1", "weak2"], + ); } #[test] fn tryexcept_weak_imports_dunder() { - assert_imports_strong_weak( - r" + assert_imports_strong_weak( + r" __import__('strong') try: __import__('weak') except ImportError: pass ", - &["strong"], - &["weak"], - ) + &["strong"], + &["weak"], + ) } #[test] fn contextlib_suppress_weak_imports() { - // standard contextlib.suppress - assert_imports_strong_weak( - r" + // standard contextlib.suppress + assert_imports_strong_weak( + r" with contextlib.suppress(ImportError): import weak0 ", - &[], - &["weak0"], - ); - // ensure we reset the weakened status - assert_imports_strong_weak( - r" + &[], + &["weak0"], + ); + // ensure we reset the weakened status + assert_imports_strong_weak( + r" with contextlib.suppress(ImportError): import weak0 import strong0 ", - &["strong0"], - &["weak0"], - ); - // Allow other error types to be suppressed - assert_imports_strong_weak( - r" + &["strong0"], + &["weak0"], + ); + // Allow other error types to be suppressed + assert_imports_strong_weak( + r" with suppress(NameError, ImportError): import weak0 ", - &[], - &["weak0"], - ); - // We should respect the intention of any function that is obviously suppressing ImportErrors - assert_imports_strong_weak( - r" + &[], + &["weak0"], + ); + // We should respect the intention of any function that is obviously suppressing ImportErrors + assert_imports_strong_weak( + r" with suppress(ImportError): import weak0 ", - &[], - &["weak0"], - ); - // We should not weaken because of other suppressions - assert_imports_strong_weak( - r" + &[], + &["weak0"], + ); + // We should not weaken because of other suppressions + assert_imports_strong_weak( + r" with contextlib.suppress(NameError): import strong0 ", - &["strong0"], - &[], - ); - // Ensure we preserve the stack of weakens - assert_imports_strong_weak( - r" + &["strong0"], + &[], + ); + // Ensure we preserve the stack of weakens + assert_imports_strong_weak( + r" with suppress(ImportError): import weak0 with suppress(ImportError): import weak1 import weak2 ", - &[], - &["weak0", "weak1", "weak2"], - ); - // Ensure we preserve the stack of weakens with try-except - assert_imports_strong_weak( - r" + &[], + &["weak0", "weak1", "weak2"], + ); + // Ensure we preserve the stack of weakens with try-except + assert_imports_strong_weak( + r" with suppress(ImportError): try: import weak0 @@ -558,139 +558,139 @@ fn contextlib_suppress_weak_imports() { import weak1 import weak2 ", - &[], - &["weak0", "weak1", "weak2"], - ); - // Ensure we aren't affected by weirdness in tree-sitter - // where in the viewer the second import wasn't assigned the correct parent - assert_imports_strong_weak( - r" + &[], + &["weak0", "weak1", "weak2"], + ); + // Ensure we aren't affected by weirdness in tree-sitter + // where in the viewer the second import wasn't assigned the correct parent + assert_imports_strong_weak( + r" with suppress(ImportError): import weak0 import weak1 ", - &[], - &["weak0", "weak1"], - ); - // Ensure that we still traverse withitems - let withitems_open = r" + &[], + &["weak0", "weak1"], + ); + // Ensure that we still traverse withitems + let withitems_open = r" with ( open('/dev/null') as f0, open('data/subdir1/a.json') as f1, ): pass "; - assert_imports_strong_weak(withitems_open, &[], &[]); - assert_strings(withitems_open, &["/dev/null", "data/subdir1/a.json"]); - // Ensure suppress bound to variable - assert_imports_strong_weak( - r" + assert_imports_strong_weak(withitems_open, &[], &[]); + assert_strings(withitems_open, &["/dev/null", "data/subdir1/a.json"]); + // Ensure suppress bound to variable + assert_imports_strong_weak( + r" with suppress(ImportError) as e: import weak0 ", - &[], - &["weak0"], - ); - // Ensure multiple items in `with` - assert_imports_strong_weak( - r" + &[], + &["weak0"], + ); + // Ensure multiple items in `with` + assert_imports_strong_weak( + r" with open('file') as f, suppress(ImportError): import weak0 ", - &[], - &["weak0"], - ); - // Ensure multiple with_items - assert_imports_strong_weak( - r" + &[], + &["weak0"], + ); + // Ensure multiple with_items + assert_imports_strong_weak( + r" with suppress(ImportError), open('file'): import weak0 ", - &[], - &["weak0"], - // &["weak0"], - ); - // Ensure multiple with_items in parens (with trailing comma) - assert_imports_strong_weak( - r" + &[], + &["weak0"], + // &["weak0"], + ); + // Ensure multiple with_items in parens (with trailing comma) + assert_imports_strong_weak( + r" with (suppress(ImportError), open('file'),): import weak0 ", - &[], - &["weak0"], - ); - // pathological: suppress without a child - assert_imports_strong_weak( - r" + &[], + &["weak0"], + ); + // pathological: suppress without a child + assert_imports_strong_weak( + r" with suppress(): import strong0 ", - &["strong0"], - &[], - ); - // pathological: nothing in `with` clause - assert_imports_strong_weak( - r" + &["strong0"], + &[], + ); + // pathological: nothing in `with` clause + assert_imports_strong_weak( + r" with: import strong0 ", - &["strong0"], - &[], - ); + &["strong0"], + &[], + ); } #[test] fn contextlib_suppress_weak_imports_dunder() { - assert_imports_strong_weak( - r" + assert_imports_strong_weak( + r" __import__('strong') with contextlib.suppress(ImportError): __import__('weak') ", - &["strong"], - &["weak"], - ) + &["strong"], + &["weak"], + ) } fn assert_strings(code: &str, strings: &[&str]) { - let mut collector = ImportCollector::new(code); - collector.collect(); - assert_eq!( - HashSet::from_iter(strings.iter().map(|s| s.to_string())), - collector - .string_candidates - .keys() - .cloned() - .collect::>() - ); + let mut collector = ImportCollector::new(code); + collector.collect(); + assert_eq!( + HashSet::from_iter(strings.iter().map(|s| s.to_string())), + collector + .string_candidates + .keys() + .cloned() + .collect::>() + ); } #[test] fn string_candidates() { - assert_strings("'a'", &["a"]); - assert_strings("'''a'''", &["a"]); - assert_strings("'a.b'", &["a.b"]); - assert_strings("'a.b.c_狗'", &["a.b.c_狗"]); - assert_strings("'..a.b.c.d'", &["..a.b.c.d"]); - - // Not candidates - assert_strings("'I\\\\have\\\\backslashes'", &[]); - assert_strings("'I have whitespace'", &[]); - assert_strings("'\ttabby'", &[]); - assert_strings("'\\ttabby'", &[]); - assert_strings("'\\nnewline'", &[]); - assert_strings("'''\na'''", &[]); - assert_strings("'''a\n'''", &[]); - - // Technically the value of the string doesn't contain whitespace, but the parser isn't that - // sophisticated yet. - assert_strings("'''\\\na'''", &[]); + assert_strings("'a'", &["a"]); + assert_strings("'''a'''", &["a"]); + assert_strings("'a.b'", &["a.b"]); + assert_strings("'a.b.c_狗'", &["a.b.c_狗"]); + assert_strings("'..a.b.c.d'", &["..a.b.c.d"]); + + // Not candidates + assert_strings("'I\\\\have\\\\backslashes'", &[]); + assert_strings("'I have whitespace'", &[]); + assert_strings("'\ttabby'", &[]); + assert_strings("'\\ttabby'", &[]); + assert_strings("'\\nnewline'", &[]); + assert_strings("'''\na'''", &[]); + assert_strings("'''a\n'''", &[]); + + // Technically the value of the string doesn't contain whitespace, but the parser isn't that + // sophisticated yet. + assert_strings("'''\\\na'''", &[]); } #[test] fn python2() { - assert_collected( - r"# -*- coding: utf-8 -*- + assert_collected( + r"# -*- coding: utf-8 -*- print 'Python 2 lives on.' import demo @@ -710,94 +710,94 @@ fn python2() { except ImportError: import strong1 else: import strong2 finally: import strong3", - HashMap::from([ - ("demo", (4, false)), - ("project.demo.Demo", (5, false)), - ("pkg_resources", (7, false)), - ("treat.as.a.regular.import.not.a.string.import", (8, false)), - ("weak1", (17, true)), - ("strong1", (18, false)), - ("strong2", (19, false)), - ("strong3", (20, false)), - ]), - HashMap::from([ - ("dep.from.bytes", 11), - ("dep.from.str", 12), - ("dep.from.str_狗", 13), - ]), - ); + HashMap::from([ + ("demo", (4, false)), + ("project.demo.Demo", (5, false)), + ("pkg_resources", (7, false)), + ("treat.as.a.regular.import.not.a.string.import", (8, false)), + ("weak1", (17, true)), + ("strong1", (18, false)), + ("strong2", (19, false)), + ("strong3", (20, false)), + ]), + HashMap::from([ + ("dep.from.bytes", 11), + ("dep.from.str", 12), + ("dep.from.str_狗", 13), + ]), + ); } #[test] fn still_parses_from_syntax_error() { - assert_imports("import a; x=", &["a"]); + assert_imports("import a; x=", &["a"]); } fn assert_relative_imports(filename: &str, code: &str, resolved_imports: &[&str]) { - let result = get_dependencies(code, PathBuf::from(filename)).unwrap(); - assert_eq!( - HashSet::from_iter(resolved_imports.iter().map(|s| s.to_string())), - result.imports.keys().cloned().collect::>() - ); + let result = get_dependencies(code, PathBuf::from(filename)).unwrap(); + assert_eq!( + HashSet::from_iter(resolved_imports.iter().map(|s| s.to_string())), + result.imports.keys().cloned().collect::>() + ); } #[test] fn relative_imports_resolution() { - let filename = "foo/bar/baz.py"; - assert_relative_imports(filename, "from . import b", &["foo.bar.b"]); - assert_relative_imports(filename, "from . import *", &["foo.bar"]); - assert_relative_imports(filename, "from .a import b", &["foo.bar.a.b"]); - assert_relative_imports(filename, "from .a import *", &["foo.bar.a"]); - assert_relative_imports(filename, "from .. import b", &["foo.b"]); - assert_relative_imports(filename, "from .. import *", &["foo"]); - assert_relative_imports(filename, "from ..a import b", &["foo.a.b"]); - assert_relative_imports(filename, "from .. import b.c", &["foo.b.c"]); - assert_relative_imports(filename, "from ..a import b.c", &["foo.a.b.c"]); - - let filename = "bingo/bango/bongo/himom.py"; - assert_relative_imports(filename, "from . import b", &["bingo.bango.bongo.b"]); - assert_relative_imports(filename, "from .a import b", &["bingo.bango.bongo.a.b"]); - assert_relative_imports(filename, "from ..a import b", &["bingo.bango.a.b"]); - assert_relative_imports(filename, "from ..a import b.c", &["bingo.bango.a.b.c"]); - assert_relative_imports(filename, "from ... import b.c", &["bingo.b.c"]); - assert_relative_imports(filename, "from ...a import b.c", &["bingo.a.b.c"]); - - // Left unchanged, since we blew through the top, let Pants error using this string as a message - assert_relative_imports(filename, "from .... import b.c", &["....b.c"]); - assert_relative_imports(filename, "from ....a import b.c", &["....a.b.c"]); - assert_relative_imports(filename, "from ....a import b, c", &["....a.b", "....a.c"]); - assert_relative_imports( - filename, - "from ....a import b as d, c", - &["....a.b", "....a.c"], - ); + let filename = "foo/bar/baz.py"; + assert_relative_imports(filename, "from . import b", &["foo.bar.b"]); + assert_relative_imports(filename, "from . import *", &["foo.bar"]); + assert_relative_imports(filename, "from .a import b", &["foo.bar.a.b"]); + assert_relative_imports(filename, "from .a import *", &["foo.bar.a"]); + assert_relative_imports(filename, "from .. import b", &["foo.b"]); + assert_relative_imports(filename, "from .. import *", &["foo"]); + assert_relative_imports(filename, "from ..a import b", &["foo.a.b"]); + assert_relative_imports(filename, "from .. import b.c", &["foo.b.c"]); + assert_relative_imports(filename, "from ..a import b.c", &["foo.a.b.c"]); + + let filename = "bingo/bango/bongo/himom.py"; + assert_relative_imports(filename, "from . import b", &["bingo.bango.bongo.b"]); + assert_relative_imports(filename, "from .a import b", &["bingo.bango.bongo.a.b"]); + assert_relative_imports(filename, "from ..a import b", &["bingo.bango.a.b"]); + assert_relative_imports(filename, "from ..a import b.c", &["bingo.bango.a.b.c"]); + assert_relative_imports(filename, "from ... import b.c", &["bingo.b.c"]); + assert_relative_imports(filename, "from ...a import b.c", &["bingo.a.b.c"]); + + // Left unchanged, since we blew through the top, let Pants error using this string as a message + assert_relative_imports(filename, "from .... import b.c", &["....b.c"]); + assert_relative_imports(filename, "from ....a import b.c", &["....a.b.c"]); + assert_relative_imports(filename, "from ....a import b, c", &["....a.b", "....a.c"]); + assert_relative_imports( + filename, + "from ....a import b as d, c", + &["....a.b", "....a.c"], + ); } #[test] fn syntax_errors_and_other_fun() { - // These tests aren't specifically testing what we parse, so much as we don't "crash and burn". - - assert_imports("imprt a", &[]); - assert_imports("form a import b", &["b"]); - assert_imports("import .b", &["."]); - assert_imports("import a....b", &["a....b"]); - assert_imports("import a.", &[]); - assert_imports("import *", &[]); - assert_imports("from a import", &[]); - assert_imports("from a import;", &["a."]); - assert_imports("from a import ()", &["a."]); - assert_imports("from a imp x", &[]); - assert_imports("from from import a as .as", &[]); - assert_imports("from a import ......g", &["a.g"]); - assert_imports("from a. import b", &[]); - assert_imports("from a as c import b as d", &["a.b"]); - assert_imports("from a import *, b", &["a"]); - assert_imports("from a import b, *", &["a.b"]); - assert_imports("from a import (*)", &[]); - assert_imports("from * import b", &["b"]); - assert_imports("try:...\nexcept:import a", &["a"]); - assert_imports("try:...\nexcept 1:import a", &["a"]); - assert_imports("try:...\nexcept x=1:import a", &["a"]); - assert_imports("try:...\nexcept (x=1):import a", &["a"]); - assert_imports("foo()", &[]); + // These tests aren't specifically testing what we parse, so much as we don't "crash and burn". + + assert_imports("imprt a", &[]); + assert_imports("form a import b", &["b"]); + assert_imports("import .b", &["."]); + assert_imports("import a....b", &["a....b"]); + assert_imports("import a.", &[]); + assert_imports("import *", &[]); + assert_imports("from a import", &[]); + assert_imports("from a import;", &["a."]); + assert_imports("from a import ()", &["a."]); + assert_imports("from a imp x", &[]); + assert_imports("from from import a as .as", &[]); + assert_imports("from a import ......g", &["a.g"]); + assert_imports("from a. import b", &[]); + assert_imports("from a as c import b as d", &["a.b"]); + assert_imports("from a import *, b", &["a"]); + assert_imports("from a import b, *", &["a.b"]); + assert_imports("from a import (*)", &[]); + assert_imports("from * import b", &["b"]); + assert_imports("try:...\nexcept:import a", &["a"]); + assert_imports("try:...\nexcept 1:import a", &["a"]); + assert_imports("try:...\nexcept x=1:import a", &["a"]); + assert_imports("try:...\nexcept (x=1):import a", &["a"]); + assert_imports("foo()", &[]); } diff --git a/src/rust/engine/fs/brfs/src/main.rs b/src/rust/engine/fs/brfs/src/main.rs index 11d5e1489d1..909ce4515d6 100644 --- a/src/rust/engine/fs/brfs/src/main.rs +++ b/src/rust/engine/fs/brfs/src/main.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -53,43 +53,43 @@ const TTL: time::Duration = time::Duration::from_secs(0); const CREATE_TIME: time::SystemTime = time::SystemTime::UNIX_EPOCH; fn dir_attr_for(inode: Inode) -> fuser::FileAttr { - attr_for(inode, 0, fuser::FileType::Directory, 0x555) + attr_for(inode, 0, fuser::FileType::Directory, 0x555) } fn attr_for(inode: Inode, size: u64, kind: fuser::FileType, perm: u16) -> fuser::FileAttr { - fuser::FileAttr { - ino: inode, - size: size, - // TODO: Find out whether blocks is actually important - blocks: 0, - atime: CREATE_TIME, - mtime: CREATE_TIME, - ctime: CREATE_TIME, - crtime: CREATE_TIME, - kind: kind, - perm: perm, - nlink: 1, - uid: 0, - gid: 0, - rdev: 0, - // TODO: Find out whether blksize is actually important - blksize: 1, - flags: 0, - } + fuser::FileAttr { + ino: inode, + size: size, + // TODO: Find out whether blocks is actually important + blocks: 0, + atime: CREATE_TIME, + mtime: CREATE_TIME, + ctime: CREATE_TIME, + crtime: CREATE_TIME, + kind: kind, + perm: perm, + nlink: 1, + uid: 0, + gid: 0, + rdev: 0, + // TODO: Find out whether blksize is actually important + blksize: 1, + flags: 0, + } } pub fn digest_from_filepath(str: &str) -> Result { - let mut parts = str.split('-'); - let fingerprint_str = parts - .next() - .ok_or_else(|| format!("Invalid digest: {str} wasn't of form fingerprint-size"))?; - let fingerprint = Fingerprint::from_hex_string(fingerprint_str)?; - let size_bytes = parts - .next() - .ok_or_else(|| format!("Invalid digest: {str} wasn't of form fingerprint-size"))? - .parse::() - .map_err(|err| format!("Invalid digest; size {str} not a number: {err}"))?; - Ok(Digest::new(fingerprint, size_bytes)) + let mut parts = str.split('-'); + let fingerprint_str = parts + .next() + .ok_or_else(|| format!("Invalid digest: {str} wasn't of form fingerprint-size"))?; + let fingerprint = Fingerprint::from_hex_string(fingerprint_str)?; + let size_bytes = parts + .next() + .ok_or_else(|| format!("Invalid digest: {str} wasn't of form fingerprint-size"))? + .parse::() + .map_err(|err| format!("Invalid digest; size {str} not a number: {err}"))?; + Ok(Digest::new(fingerprint, size_bytes)) } type Inode = u64; @@ -100,313 +100,322 @@ const DIRECTORY_ROOT: Inode = 3; #[derive(Clone, Copy, Debug)] enum EntryType { - File, - Directory, + File, + Directory, } #[derive(Clone, Copy, Debug)] struct InodeDetails { - digest: Digest, - entry_type: EntryType, - is_executable: bool, + digest: Digest, + entry_type: EntryType, + is_executable: bool, } #[derive(Debug)] struct ReaddirEntry { - inode: Inode, - kind: fuser::FileType, - name: OsString, + inode: Inode, + kind: fuser::FileType, + name: OsString, } enum Node { - Directory(remexec::DirectoryNode), - File(remexec::FileNode), + Directory(remexec::DirectoryNode), + File(remexec::FileNode), } #[derive(Clone, Copy, Debug)] pub enum BRFSEvent { - Init, - Destroy, + Init, + Destroy, } struct BuildResultFS { - sender: Sender, - runtime: task_executor::Executor, - store: Store, - inode_digest_cache: HashMap, - digest_inode_cache: HashMap, - directory_inode_cache: HashMap, - next_inode: Inode, -} - -impl BuildResultFS { - pub fn new( sender: Sender, runtime: task_executor::Executor, store: Store, - ) -> BuildResultFS { - BuildResultFS { - sender: sender, - runtime: runtime, - store: store, - inode_digest_cache: HashMap::new(), - digest_inode_cache: HashMap::new(), - directory_inode_cache: HashMap::new(), - next_inode: 4, - } - } + inode_digest_cache: HashMap, + digest_inode_cache: HashMap, + directory_inode_cache: HashMap, + next_inode: Inode, } impl BuildResultFS { - pub fn node_for_digest( - &mut self, - directory: &remexec::Directory, - filename: &str, - ) -> Option { - for file in &directory.files { - if file.name == filename { - return Some(Node::File(file.clone())); - } + pub fn new( + sender: Sender, + runtime: task_executor::Executor, + store: Store, + ) -> BuildResultFS { + BuildResultFS { + sender: sender, + runtime: runtime, + store: store, + inode_digest_cache: HashMap::new(), + digest_inode_cache: HashMap::new(), + directory_inode_cache: HashMap::new(), + next_inode: 4, + } } - for child in &directory.directories { - if child.name == filename { - return Some(Node::Directory(child.clone())); - } +} + +impl BuildResultFS { + pub fn node_for_digest( + &mut self, + directory: &remexec::Directory, + filename: &str, + ) -> Option { + for file in &directory.files { + if file.name == filename { + return Some(Node::File(file.clone())); + } + } + for child in &directory.directories { + if child.name == filename { + return Some(Node::Directory(child.clone())); + } + } + None } - None - } - pub fn inode_for_file( - &mut self, - digest: Digest, - is_executable: bool, - ) -> Result, String> { - match self.digest_inode_cache.entry(digest) { - Occupied(entry) => { - let (executable_inode, non_executable_inode) = *entry.get(); - Ok(Some(if is_executable { - executable_inode - } else { - non_executable_inode - })) - } - Vacant(entry) => { - let store = self.store.clone(); - match self - .runtime - .block_on(async move { store.load_file_bytes_with(digest, |_| ()).await }) - { - Ok(()) => { - let executable_inode = self.next_inode; - self.next_inode += 1; - let non_executable_inode = self.next_inode; - self.next_inode += 1; - entry.insert((executable_inode, non_executable_inode)); - self.inode_digest_cache.insert( - executable_inode, - InodeDetails { - digest: digest, - entry_type: EntryType::File, - is_executable: true, - }, - ); - self.inode_digest_cache.insert( - non_executable_inode, - InodeDetails { - digest: digest, - entry_type: EntryType::File, - is_executable: false, - }, - ); - Ok(Some(if is_executable { - executable_inode - } else { - non_executable_inode - })) - } - Err(StoreError::MissingDigest { .. }) => Ok(None), - Err(err) => Err(err.to_string()), + pub fn inode_for_file( + &mut self, + digest: Digest, + is_executable: bool, + ) -> Result, String> { + match self.digest_inode_cache.entry(digest) { + Occupied(entry) => { + let (executable_inode, non_executable_inode) = *entry.get(); + Ok(Some(if is_executable { + executable_inode + } else { + non_executable_inode + })) + } + Vacant(entry) => { + let store = self.store.clone(); + match self + .runtime + .block_on(async move { store.load_file_bytes_with(digest, |_| ()).await }) + { + Ok(()) => { + let executable_inode = self.next_inode; + self.next_inode += 1; + let non_executable_inode = self.next_inode; + self.next_inode += 1; + entry.insert((executable_inode, non_executable_inode)); + self.inode_digest_cache.insert( + executable_inode, + InodeDetails { + digest: digest, + entry_type: EntryType::File, + is_executable: true, + }, + ); + self.inode_digest_cache.insert( + non_executable_inode, + InodeDetails { + digest: digest, + entry_type: EntryType::File, + is_executable: false, + }, + ); + Ok(Some(if is_executable { + executable_inode + } else { + non_executable_inode + })) + } + Err(StoreError::MissingDigest { .. }) => Ok(None), + Err(err) => Err(err.to_string()), + } + } } - } } - } - - pub fn inode_for_directory(&mut self, digest: Digest) -> Result, String> { - match self.directory_inode_cache.entry(digest) { - Occupied(entry) => Ok(Some(*entry.get())), - Vacant(entry) => { - let store = self.store.clone(); - match self - .runtime - .block_on(async move { store.load_directory(digest).await }) - { - Ok(_) => { - // TODO: Kick off some background futures to pre-load the contents of this Directory into - // an in-memory cache. Keep a background CPU pool driving those Futures. - let inode = self.next_inode; - self.next_inode += 1; - entry.insert(inode); - self.inode_digest_cache.insert( - inode, - InodeDetails { - digest: digest, - entry_type: EntryType::Directory, - is_executable: true, - }, - ); - Ok(Some(inode)) - } - Err(StoreError::MissingDigest { .. }) => Ok(None), - Err(err) => Err(err.to_string()), + + pub fn inode_for_directory(&mut self, digest: Digest) -> Result, String> { + match self.directory_inode_cache.entry(digest) { + Occupied(entry) => Ok(Some(*entry.get())), + Vacant(entry) => { + let store = self.store.clone(); + match self + .runtime + .block_on(async move { store.load_directory(digest).await }) + { + Ok(_) => { + // TODO: Kick off some background futures to pre-load the contents of this Directory into + // an in-memory cache. Keep a background CPU pool driving those Futures. + let inode = self.next_inode; + self.next_inode += 1; + entry.insert(inode); + self.inode_digest_cache.insert( + inode, + InodeDetails { + digest: digest, + entry_type: EntryType::Directory, + is_executable: true, + }, + ); + Ok(Some(inode)) + } + Err(StoreError::MissingDigest { .. }) => Ok(None), + Err(err) => Err(err.to_string()), + } + } } - } } - } - - pub fn file_attr_for(&mut self, inode: Inode) -> Option { - self.inode_digest_cache.get(&inode).map(|f| { - attr_for( - inode, - f.digest.size_bytes as u64, - fuser::FileType::RegularFile, - if f.is_executable { 0o555 } else { 0o444 }, - ) - }) - } - - pub fn dir_attr_for(&mut self, digest: Digest) -> Result { - match self.inode_for_directory(digest) { - Ok(Some(inode)) => Ok(dir_attr_for(inode)), - Ok(None) => Err(libc::ENOENT), - Err(err) => { - error!("Error getting directory for digest {:?}: {}", digest, err); - Err(libc::EINVAL) - } + + pub fn file_attr_for(&mut self, inode: Inode) -> Option { + self.inode_digest_cache.get(&inode).map(|f| { + attr_for( + inode, + f.digest.size_bytes as u64, + fuser::FileType::RegularFile, + if f.is_executable { 0o555 } else { 0o444 }, + ) + }) } - } - - pub fn readdir_entries(&mut self, inode: Inode) -> Result, i32> { - match inode { - ROOT => Ok(vec![ - ReaddirEntry { - inode: ROOT, - kind: fuser::FileType::Directory, - name: OsString::from("."), - }, - ReaddirEntry { - inode: ROOT, - kind: fuser::FileType::Directory, - name: OsString::from(".."), - }, - ReaddirEntry { - inode: DIGEST_ROOT, - kind: fuser::FileType::Directory, - name: OsString::from("digest"), - }, - ReaddirEntry { - inode: DIRECTORY_ROOT, - kind: fuser::FileType::Directory, - name: OsString::from("directory"), - }, - ]), - // readdir on /digest or /directory will return an empty set. - // readdir on /directory/abc123... will properly list the contents of that Directory. - // - // We skip directory listing for the roots because they will just be very long lists of - // digests. The only other behaviours we could reasonable use are: - // 1. Enumerate the entire contents of the local Store (which will be large), ignoring the - // remote Store (so the directory listing will still be incomplete - stuff which can be - // getattr'd/open'd will still not be present in the directory listing). - // 2. Store a cache of requests we've successfully served, and claim that the directory - // contains exactly those files/directories. - // All three of these end up with the same problem that readdir doesn't show things which, if - // you were to getattr/open would actually exist. So we choose the cheapest, and most - // consistent one: readdir is always empty. - DIGEST_ROOT | DIRECTORY_ROOT => Ok(vec![]), - inode => match self.inode_digest_cache.get(&inode) { - Some(&InodeDetails { - digest, - entry_type: EntryType::Directory, - .. - }) => { - let store = self.store.clone(); - let maybe_directory = self - .runtime - .block_on(async move { store.load_directory(digest).await }); - - match maybe_directory { - Ok(directory) => { - let mut entries = vec![ + + pub fn dir_attr_for(&mut self, digest: Digest) -> Result { + match self.inode_for_directory(digest) { + Ok(Some(inode)) => Ok(dir_attr_for(inode)), + Ok(None) => Err(libc::ENOENT), + Err(err) => { + error!("Error getting directory for digest {:?}: {}", digest, err); + Err(libc::EINVAL) + } + } + } + + pub fn readdir_entries(&mut self, inode: Inode) -> Result, i32> { + match inode { + ROOT => Ok(vec![ + ReaddirEntry { + inode: ROOT, + kind: fuser::FileType::Directory, + name: OsString::from("."), + }, + ReaddirEntry { + inode: ROOT, + kind: fuser::FileType::Directory, + name: OsString::from(".."), + }, ReaddirEntry { - inode: inode, - kind: fuser::FileType::Directory, - name: OsString::from("."), + inode: DIGEST_ROOT, + kind: fuser::FileType::Directory, + name: OsString::from("digest"), }, ReaddirEntry { - inode: DIRECTORY_ROOT, - kind: fuser::FileType::Directory, - name: OsString::from(".."), + inode: DIRECTORY_ROOT, + kind: fuser::FileType::Directory, + name: OsString::from("directory"), }, - ]; - - let directories = directory.directories.iter().map(|directory| { - ( - directory.digest.clone(), - directory.name.clone(), - fuser::FileType::Directory, - true, - ) - }); - let files = directory.files.iter().map(|file| { - ( - file.digest.clone(), - file.name.clone(), - fuser::FileType::RegularFile, - file.is_executable, - ) - }); - - for (digest, name, filetype, is_executable) in directories.chain(files) { - let child_digest = require_digest(digest.as_ref()).map_err(|err| { - error!("Error parsing digest: {:?}", err); - libc::ENOENT - })?; - let maybe_child_inode = match filetype { - fuser::FileType::Directory => self.inode_for_directory(child_digest), - fuser::FileType::RegularFile => self.inode_for_file(child_digest, is_executable), - _ => unreachable!(), - }; - match maybe_child_inode { - Ok(Some(child_inode)) => { - entries.push(ReaddirEntry { - inode: child_inode, - kind: filetype, - name: OsString::from(name), - }); - } - Ok(None) => { - return Err(libc::ENOENT); - } - Err(err) => { - error!("Error reading child directory {:?}: {}", child_digest, err); - return Err(libc::EINVAL); - } + ]), + // readdir on /digest or /directory will return an empty set. + // readdir on /directory/abc123... will properly list the contents of that Directory. + // + // We skip directory listing for the roots because they will just be very long lists of + // digests. The only other behaviours we could reasonable use are: + // 1. Enumerate the entire contents of the local Store (which will be large), ignoring the + // remote Store (so the directory listing will still be incomplete - stuff which can be + // getattr'd/open'd will still not be present in the directory listing). + // 2. Store a cache of requests we've successfully served, and claim that the directory + // contains exactly those files/directories. + // All three of these end up with the same problem that readdir doesn't show things which, if + // you were to getattr/open would actually exist. So we choose the cheapest, and most + // consistent one: readdir is always empty. + DIGEST_ROOT | DIRECTORY_ROOT => Ok(vec![]), + inode => match self.inode_digest_cache.get(&inode) { + Some(&InodeDetails { + digest, + entry_type: EntryType::Directory, + .. + }) => { + let store = self.store.clone(); + let maybe_directory = self + .runtime + .block_on(async move { store.load_directory(digest).await }); + + match maybe_directory { + Ok(directory) => { + let mut entries = vec![ + ReaddirEntry { + inode: inode, + kind: fuser::FileType::Directory, + name: OsString::from("."), + }, + ReaddirEntry { + inode: DIRECTORY_ROOT, + kind: fuser::FileType::Directory, + name: OsString::from(".."), + }, + ]; + + let directories = directory.directories.iter().map(|directory| { + ( + directory.digest.clone(), + directory.name.clone(), + fuser::FileType::Directory, + true, + ) + }); + let files = directory.files.iter().map(|file| { + ( + file.digest.clone(), + file.name.clone(), + fuser::FileType::RegularFile, + file.is_executable, + ) + }); + + for (digest, name, filetype, is_executable) in directories.chain(files) + { + let child_digest = + require_digest(digest.as_ref()).map_err(|err| { + error!("Error parsing digest: {:?}", err); + libc::ENOENT + })?; + let maybe_child_inode = match filetype { + fuser::FileType::Directory => { + self.inode_for_directory(child_digest) + } + fuser::FileType::RegularFile => { + self.inode_for_file(child_digest, is_executable) + } + _ => unreachable!(), + }; + match maybe_child_inode { + Ok(Some(child_inode)) => { + entries.push(ReaddirEntry { + inode: child_inode, + kind: filetype, + name: OsString::from(name), + }); + } + Ok(None) => { + return Err(libc::ENOENT); + } + Err(err) => { + error!( + "Error reading child directory {:?}: {}", + child_digest, err + ); + return Err(libc::EINVAL); + } + } + } + + Ok(entries) + } + Err(StoreError::MissingDigest { .. }) => Err(libc::ENOENT), + Err(err) => { + error!("Error loading directory {:?}: {}", digest, err); + Err(libc::EINVAL) + } + } } - } - - Ok(entries) - } - Err(StoreError::MissingDigest { .. }) => Err(libc::ENOENT), - Err(err) => { - error!("Error loading directory {:?}: {}", digest, err); - Err(libc::EINVAL) - } - } + _ => Err(libc::ENOENT), + }, } - _ => Err(libc::ENOENT), - }, } - } } // inodes: @@ -415,277 +424,285 @@ impl BuildResultFS { // 3: /directory // ... created on demand and cached for the lifetime of the program. impl fuser::Filesystem for BuildResultFS { - fn init( - &mut self, - _req: &fuser::Request, - _config: &mut fuser::KernelConfig, - ) -> Result<(), libc::c_int> { - self.sender.send(BRFSEvent::Init).map_err(|_| 1) - } - - fn destroy(&mut self) { - self - .sender - .send(BRFSEvent::Destroy) - .unwrap_or_else(|err| warn!("Failed to send {:?} event: {}", BRFSEvent::Destroy, err)) - } - - // Used to answer stat calls - fn lookup( - &mut self, - _req: &fuser::Request<'_>, - parent: Inode, - name: &OsStr, - reply: fuser::ReplyEntry, - ) { - let runtime = self.runtime.clone(); - runtime.enter(|| { - let r = match (parent, name.to_str()) { - (ROOT, Some("digest")) => Ok(dir_attr_for(DIGEST_ROOT)), - (ROOT, Some("directory")) => Ok(dir_attr_for(DIRECTORY_ROOT)), - (DIGEST_ROOT, Some(digest_str)) => match digest_from_filepath(digest_str) { - Ok(digest) => self - .inode_for_file(digest, true) - .map_err(|err| { - error!("Error loading file by digest {}: {}", digest_str, err); - libc::EINVAL - }) - .and_then(|maybe_inode| { - maybe_inode - .and_then(|inode| self.file_attr_for(inode)) - .ok_or(libc::ENOENT) - }), - Err(err) => { - warn!("Invalid digest for file in digest root: {}", err); - Err(libc::ENOENT) - } - }, - (DIRECTORY_ROOT, Some(digest_str)) => match digest_from_filepath(digest_str) { - Ok(digest) => self.dir_attr_for(digest), - Err(err) => { - warn!("Invalid digest for directory in directory root: {}", err); - Err(libc::ENOENT) - } - }, - (parent, Some(filename)) => { - let maybe_cache_entry = self - .inode_digest_cache - .get(&parent) - .cloned() - .ok_or(libc::ENOENT); - maybe_cache_entry - .and_then(|cache_entry| { - let store = self.store.clone(); - let parent_digest = cache_entry.digest; - let directory = self - .runtime - .block_on(async move { store.load_directory(parent_digest).await }) - .map_err(|err| match err { - StoreError::MissingDigest { .. } => libc::ENOENT, - err => { - error!("Error reading directory {:?}: {}", parent_digest, err); - libc::EINVAL - } - })?; - self - .node_for_digest(&directory, filename) - .ok_or(libc::ENOENT) - }) - .and_then(|node| match node { - Node::Directory(directory_node) => { - let digest = require_digest(directory_node.digest.as_ref()).map_err(|err| { - error!("Error parsing digest: {:?}", err); - libc::ENOENT - })?; - self.dir_attr_for(digest) - } - Node::File(file_node) => { - let digest = require_digest(file_node.digest.as_ref()).map_err(|err| { - error!("Error parsing digest: {:?}", err); - libc::ENOENT - })?; - self - .inode_for_file(digest, file_node.is_executable) - .map_err(|err| { - error!("Error loading file by digest {}: {}", filename, err); - libc::EINVAL - }) - .and_then(|maybe_inode| { - maybe_inode - .and_then(|inode| self.file_attr_for(inode)) - .ok_or(libc::ENOENT) - }) - } - }) - } - _ => Err(libc::ENOENT), - }; - match r { - Ok(r) => reply.entry(&TTL, &r, 1), - Err(err) => reply.error(err), - } - }) - } - - fn getattr(&mut self, _req: &fuser::Request<'_>, inode: Inode, reply: fuser::ReplyAttr) { - let runtime = self.runtime.clone(); - runtime.enter(|| match inode { - ROOT => reply.attr(&TTL, &dir_attr_for(ROOT)), - DIGEST_ROOT => reply.attr(&TTL, &dir_attr_for(DIGEST_ROOT)), - DIRECTORY_ROOT => reply.attr(&TTL, &dir_attr_for(DIRECTORY_ROOT)), - _ => match self.inode_digest_cache.get(&inode) { - Some(&InodeDetails { - entry_type: EntryType::File, - .. - }) => match self.file_attr_for(inode) { - Some(file_attr) => reply.attr(&TTL, &file_attr), - None => reply.error(libc::ENOENT), - }, - Some(&InodeDetails { - entry_type: EntryType::Directory, - .. - }) => reply.attr(&TTL, &dir_attr_for(inode)), - _ => reply.error(libc::ENOENT), - }, - }) - } - - // TODO: Find out whether fh is ever passed if open isn't explicitly implemented (and whether offset is ever negative) - fn read( - &mut self, - _req: &fuser::Request<'_>, - inode: Inode, - _fh: u64, - offset: i64, - size: u32, - _flags: i32, - _lock_owner: Option, - reply: fuser::ReplyData, - ) { - let runtime = self.runtime.clone(); - runtime.enter(|| { - match self.inode_digest_cache.get(&inode) { - Some(&InodeDetails { - digest, - entry_type: EntryType::File, - .. - }) => { - let reply = Arc::new(Mutex::new(Some(reply))); - let reply2 = reply.clone(); - // TODO: Read from a cache of Futures driven from a CPU pool, so we can merge in-flight - // requests, rather than reading from the store directly here. - let store = self.store.clone(); - let result: Result<(), ()> = self - .runtime - .block_on(async move { - store - .load_file_bytes_with(digest, move |bytes| { - let begin = std::cmp::min(offset as usize, bytes.len()); - let end = std::cmp::min(offset as usize + size as usize, bytes.len()); - let mut reply = reply.lock(); - reply.take().unwrap().data(&bytes[begin..end]); - }) - .await - }) - .or_else(|err| { - let maybe_reply = reply2.lock().take(); - match err { - StoreError::MissingDigest { .. } => { - if let Some(reply) = maybe_reply { - reply.error(libc::ENOENT); - } - } - err => { - error!("Error loading bytes for {:?}: {}", digest, err); - if let Some(reply) = maybe_reply { - reply.error(libc::EINVAL); - } + fn init( + &mut self, + _req: &fuser::Request, + _config: &mut fuser::KernelConfig, + ) -> Result<(), libc::c_int> { + self.sender.send(BRFSEvent::Init).map_err(|_| 1) + } + + fn destroy(&mut self) { + self.sender + .send(BRFSEvent::Destroy) + .unwrap_or_else(|err| warn!("Failed to send {:?} event: {}", BRFSEvent::Destroy, err)) + } + + // Used to answer stat calls + fn lookup( + &mut self, + _req: &fuser::Request<'_>, + parent: Inode, + name: &OsStr, + reply: fuser::ReplyEntry, + ) { + let runtime = self.runtime.clone(); + runtime.enter(|| { + let r = match (parent, name.to_str()) { + (ROOT, Some("digest")) => Ok(dir_attr_for(DIGEST_ROOT)), + (ROOT, Some("directory")) => Ok(dir_attr_for(DIRECTORY_ROOT)), + (DIGEST_ROOT, Some(digest_str)) => match digest_from_filepath(digest_str) { + Ok(digest) => self + .inode_for_file(digest, true) + .map_err(|err| { + error!("Error loading file by digest {}: {}", digest_str, err); + libc::EINVAL + }) + .and_then(|maybe_inode| { + maybe_inode + .and_then(|inode| self.file_attr_for(inode)) + .ok_or(libc::ENOENT) + }), + Err(err) => { + warn!("Invalid digest for file in digest root: {}", err); + Err(libc::ENOENT) + } + }, + (DIRECTORY_ROOT, Some(digest_str)) => match digest_from_filepath(digest_str) { + Ok(digest) => self.dir_attr_for(digest), + Err(err) => { + warn!("Invalid digest for directory in directory root: {}", err); + Err(libc::ENOENT) + } + }, + (parent, Some(filename)) => { + let maybe_cache_entry = self + .inode_digest_cache + .get(&parent) + .cloned() + .ok_or(libc::ENOENT); + maybe_cache_entry + .and_then(|cache_entry| { + let store = self.store.clone(); + let parent_digest = cache_entry.digest; + let directory = self + .runtime + .block_on(async move { store.load_directory(parent_digest).await }) + .map_err(|err| match err { + StoreError::MissingDigest { .. } => libc::ENOENT, + err => { + error!( + "Error reading directory {:?}: {}", + parent_digest, err + ); + libc::EINVAL + } + })?; + self.node_for_digest(&directory, filename) + .ok_or(libc::ENOENT) + }) + .and_then(|node| match node { + Node::Directory(directory_node) => { + let digest = require_digest(directory_node.digest.as_ref()) + .map_err(|err| { + error!("Error parsing digest: {:?}", err); + libc::ENOENT + })?; + self.dir_attr_for(digest) + } + Node::File(file_node) => { + let digest = + require_digest(file_node.digest.as_ref()).map_err(|err| { + error!("Error parsing digest: {:?}", err); + libc::ENOENT + })?; + self.inode_for_file(digest, file_node.is_executable) + .map_err(|err| { + error!( + "Error loading file by digest {}: {}", + filename, err + ); + libc::EINVAL + }) + .and_then(|maybe_inode| { + maybe_inode + .and_then(|inode| self.file_attr_for(inode)) + .ok_or(libc::ENOENT) + }) + } + }) } - } - Ok(()) - }); - result.expect("Error from read future which should have been handled in the future "); - } - _ => reply.error(libc::ENOENT), - } - }) - } - - fn readdir( - &mut self, - _req: &fuser::Request<'_>, - inode: Inode, + _ => Err(libc::ENOENT), + }; + match r { + Ok(r) => reply.entry(&TTL, &r, 1), + Err(err) => reply.error(err), + } + }) + } + + fn getattr(&mut self, _req: &fuser::Request<'_>, inode: Inode, reply: fuser::ReplyAttr) { + let runtime = self.runtime.clone(); + runtime.enter(|| match inode { + ROOT => reply.attr(&TTL, &dir_attr_for(ROOT)), + DIGEST_ROOT => reply.attr(&TTL, &dir_attr_for(DIGEST_ROOT)), + DIRECTORY_ROOT => reply.attr(&TTL, &dir_attr_for(DIRECTORY_ROOT)), + _ => match self.inode_digest_cache.get(&inode) { + Some(&InodeDetails { + entry_type: EntryType::File, + .. + }) => match self.file_attr_for(inode) { + Some(file_attr) => reply.attr(&TTL, &file_attr), + None => reply.error(libc::ENOENT), + }, + Some(&InodeDetails { + entry_type: EntryType::Directory, + .. + }) => reply.attr(&TTL, &dir_attr_for(inode)), + _ => reply.error(libc::ENOENT), + }, + }) + } + // TODO: Find out whether fh is ever passed if open isn't explicitly implemented (and whether offset is ever negative) - _fh: u64, - offset: i64, - mut reply: fuser::ReplyDirectory, - ) { - let runtime = self.runtime.clone(); - runtime.enter(|| { - match self.readdir_entries(inode) { - Ok(entries) => { - // 0 is a magic offset which means no offset, whereas a non-zero offset means start - // _after_ that entry. Inconsistency is fun. - let to_skip = if offset == 0 { 0 } else { offset + 1 } as usize; - let mut i = offset; - for entry in entries.into_iter().skip(to_skip) { - if reply.add(entry.inode, i, entry.kind, entry.name) { - // Buffer is full, don't add more entries. - break; + fn read( + &mut self, + _req: &fuser::Request<'_>, + inode: Inode, + _fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: fuser::ReplyData, + ) { + let runtime = self.runtime.clone(); + runtime.enter(|| { + match self.inode_digest_cache.get(&inode) { + Some(&InodeDetails { + digest, + entry_type: EntryType::File, + .. + }) => { + let reply = Arc::new(Mutex::new(Some(reply))); + let reply2 = reply.clone(); + // TODO: Read from a cache of Futures driven from a CPU pool, so we can merge in-flight + // requests, rather than reading from the store directly here. + let store = self.store.clone(); + let result: Result<(), ()> = self + .runtime + .block_on(async move { + store + .load_file_bytes_with(digest, move |bytes| { + let begin = std::cmp::min(offset as usize, bytes.len()); + let end = + std::cmp::min(offset as usize + size as usize, bytes.len()); + let mut reply = reply.lock(); + reply.take().unwrap().data(&bytes[begin..end]); + }) + .await + }) + .or_else(|err| { + let maybe_reply = reply2.lock().take(); + match err { + StoreError::MissingDigest { .. } => { + if let Some(reply) = maybe_reply { + reply.error(libc::ENOENT); + } + } + err => { + error!("Error loading bytes for {:?}: {}", digest, err); + if let Some(reply) = maybe_reply { + reply.error(libc::EINVAL); + } + } + } + Ok(()) + }); + result.expect( + "Error from read future which should have been handled in the future ", + ); + } + _ => reply.error(libc::ENOENT), } - i += 1; - } - reply.ok(); - } - Err(err) => reply.error(err), - } - }) - } - - // If this isn't implemented, OSX will try to manipulate ._ files to manage xattrs out of band, which adds both overhead and logspam. - fn listxattr( - &mut self, - _req: &fuser::Request<'_>, - _inode: Inode, - _size: u32, - reply: fuser::ReplyXattr, - ) { - let runtime = self.runtime.clone(); - runtime.enter(|| { - reply.size(0); - }) - } + }) + } + + fn readdir( + &mut self, + _req: &fuser::Request<'_>, + inode: Inode, + // TODO: Find out whether fh is ever passed if open isn't explicitly implemented (and whether offset is ever negative) + _fh: u64, + offset: i64, + mut reply: fuser::ReplyDirectory, + ) { + let runtime = self.runtime.clone(); + runtime.enter(|| { + match self.readdir_entries(inode) { + Ok(entries) => { + // 0 is a magic offset which means no offset, whereas a non-zero offset means start + // _after_ that entry. Inconsistency is fun. + let to_skip = if offset == 0 { 0 } else { offset + 1 } as usize; + let mut i = offset; + for entry in entries.into_iter().skip(to_skip) { + if reply.add(entry.inode, i, entry.kind, entry.name) { + // Buffer is full, don't add more entries. + break; + } + i += 1; + } + reply.ok(); + } + Err(err) => reply.error(err), + } + }) + } + + // If this isn't implemented, OSX will try to manipulate ._ files to manage xattrs out of band, which adds both overhead and logspam. + fn listxattr( + &mut self, + _req: &fuser::Request<'_>, + _inode: Inode, + _size: u32, + reply: fuser::ReplyXattr, + ) { + let runtime = self.runtime.clone(); + runtime.enter(|| { + reply.size(0); + }) + } } pub fn mount>( - mount_path: P, - store: Store, - runtime: task_executor::Executor, + mount_path: P, + store: Store, + runtime: task_executor::Executor, ) -> std::io::Result<(fuser::BackgroundSession, Receiver)> { - // TODO: Work out how to disable caching in the filesystem - let options = vec![ - fuser::MountOption::RO, - fuser::MountOption::FSName("brfs".to_owned()), - fuser::MountOption::CUSTOM("noapplexattr".to_owned()), - ]; - - let (sender, receiver) = channel(); - let brfs = BuildResultFS::new(sender, runtime, store); - - debug!("About to spawn_mount with options {:?}", options); - let result = fuser::spawn_mount2(brfs, &mount_path, &options); - // N.B.: The session won't be used by the caller, but we return it since a reference must be - // maintained to prevent early dropping which unmounts the filesystem. - result.map(|session| (session, receiver)) + // TODO: Work out how to disable caching in the filesystem + let options = vec![ + fuser::MountOption::RO, + fuser::MountOption::FSName("brfs".to_owned()), + fuser::MountOption::CUSTOM("noapplexattr".to_owned()), + ]; + + let (sender, receiver) = channel(); + let brfs = BuildResultFS::new(sender, runtime, store); + + debug!("About to spawn_mount with options {:?}", options); + let result = fuser::spawn_mount2(brfs, &mount_path, &options); + // N.B.: The session won't be used by the caller, but we return it since a reference must be + // maintained to prevent early dropping which unmounts the filesystem. + result.map(|session| (session, receiver)) } #[tokio::main] async fn main() { - env_logger::init(); + env_logger::init(); - let default_store_path = Store::default_path(); + let default_store_path = Store::default_path(); - let args = Command::new("brfs") + let args = Command::new("brfs") .arg( Arg::new("local-store-path") .takes_value(true) @@ -736,128 +753,128 @@ async fn main() { ) .get_matches(); - let mount_path = args.value_of("mount-path").unwrap(); - let store_path = args.value_of("local-store-path").unwrap(); - - let root_ca_certs = args - .value_of("root-ca-cert-file") - .map(|path| std::fs::read(path).expect("Error reading root CA certs file")); - - let mut headers = BTreeMap::new(); - if let Some(oauth_path) = args.value_of("oauth-bearer-token-file") { - let token = match std::fs::read_to_string(oauth_path) { - Ok(token) => token, - Err(err) => { - error!( - "Error reading oauth bearer token from {:?}: {}", - oauth_path, err + let mount_path = args.value_of("mount-path").unwrap(); + let store_path = args.value_of("local-store-path").unwrap(); + + let root_ca_certs = args + .value_of("root-ca-cert-file") + .map(|path| std::fs::read(path).expect("Error reading root CA certs file")); + + let mut headers = BTreeMap::new(); + if let Some(oauth_path) = args.value_of("oauth-bearer-token-file") { + let token = match std::fs::read_to_string(oauth_path) { + Ok(token) => token, + Err(err) => { + error!( + "Error reading oauth bearer token from {:?}: {}", + oauth_path, err + ); + std::process::exit(1); + } + }; + headers.insert( + "authorization".to_owned(), + format!("Bearer {}", token.trim()), ); - std::process::exit(1); - } + } + + let runtime = task_executor::Executor::new(); + + let tls_config = match tls::Config::new(root_ca_certs, None) { + Ok(tls_config) => tls_config, + Err(err) => { + error!("Error when creating TLS configuration: {err:?}"); + std::process::exit(1); + } + }; + + let local_only_store = + Store::local_only(runtime.clone(), store_path).expect("Error making local store."); + let store = match args.value_of("server-address") { + Some(address) => local_only_store + .into_with_remote(RemoteOptions { + cas_address: address.to_owned(), + instance_name: args.value_of("remote-instance-name").map(str::to_owned), + tls_config, + headers, + chunk_size_bytes: 4 * 1024 * 1024, + rpc_timeout: std::time::Duration::from_secs(5 * 60), + rpc_retries: 1, + rpc_concurrency_limit: args + .value_of_t::("rpc-concurrency-limit") + .expect("Bad rpc-concurrency-limit flag"), + capabilities_cell_opt: None, + batch_api_size_limit: args + .value_of_t::("batch-api-size-limit") + .expect("Bad batch-api-size-limit flag"), + }) + .await + .expect("Error making remote store"), + None => local_only_store, }; - headers.insert( - "authorization".to_owned(), - format!("Bearer {}", token.trim()), - ); - } - - let runtime = task_executor::Executor::new(); - - let tls_config = match tls::Config::new(root_ca_certs, None) { - Ok(tls_config) => tls_config, - Err(err) => { - error!("Error when creating TLS configuration: {err:?}"); - std::process::exit(1); + + #[derive(Clone, Copy, Debug)] + enum Sig { + Int, + Term, + Unmount, } - }; - - let local_only_store = - Store::local_only(runtime.clone(), store_path).expect("Error making local store."); - let store = match args.value_of("server-address") { - Some(address) => local_only_store - .into_with_remote(RemoteOptions { - cas_address: address.to_owned(), - instance_name: args.value_of("remote-instance-name").map(str::to_owned), - tls_config, - headers, - chunk_size_bytes: 4 * 1024 * 1024, - rpc_timeout: std::time::Duration::from_secs(5 * 60), - rpc_retries: 1, - rpc_concurrency_limit: args - .value_of_t::("rpc-concurrency-limit") - .expect("Bad rpc-concurrency-limit flag"), - capabilities_cell_opt: None, - batch_api_size_limit: args - .value_of_t::("batch-api-size-limit") - .expect("Bad batch-api-size-limit flag"), - }) - .await - .expect("Error making remote store"), - None => local_only_store, - }; - - #[derive(Clone, Copy, Debug)] - enum Sig { - Int, - Term, - Unmount, - } - - fn install_handler(install_fn: F, sig: Sig) -> impl StreamExt> - where - F: Fn() -> SignalKind, - { - SignalStream::new( - signal(install_fn()).unwrap_or_else(|_| panic!("Failed to install SIG{sig:?} handler")), - ) - .map(move |_| Some(sig)) - } - - let sigint = install_handler(SignalKind::interrupt, Sig::Int); - let sigterm = install_handler(SignalKind::terminate, Sig::Term); - - match mount(mount_path, store, runtime.clone()) { - Err(err) => { - error!( - "Store {} failed to mount at {}: {}", - store_path, mount_path, err - ); - std::process::exit(1); + + fn install_handler(install_fn: F, sig: Sig) -> impl StreamExt> + where + F: Fn() -> SignalKind, + { + SignalStream::new( + signal(install_fn()).unwrap_or_else(|_| panic!("Failed to install SIG{sig:?} handler")), + ) + .map(move |_| Some(sig)) } - Ok((_, receiver)) => { - match receiver.recv().unwrap() { - BRFSEvent::Init => debug!("Store {} mounted at {}", store_path, mount_path), - BRFSEvent::Destroy => { - warn!("Externally unmounted before we could mount."); - return; - } - } - - let unmount = task::spawn_blocking(move || { - // N.B.: In practice recv always errs and we exercise the or branch. It seems the sender - // side thread always exits (which drops our BuildResultFS) before we get a chance to - // complete the read. - match receiver.recv().unwrap_or(BRFSEvent::Destroy) { - BRFSEvent::Destroy => Some(Sig::Unmount), - event => { - warn!("Received unexpected event {:?}", event); - None - } + + let sigint = install_handler(SignalKind::interrupt, Sig::Int); + let sigterm = install_handler(SignalKind::terminate, Sig::Term); + + match mount(mount_path, store, runtime.clone()) { + Err(err) => { + error!( + "Store {} failed to mount at {}: {}", + store_path, mount_path, err + ); + std::process::exit(1); } - }) - .map(|res| res.unwrap()) - .into_stream(); - - let mut shutdown_signal = sigint.merge(sigterm).merge(unmount).filter_map(|x| x); - debug!("Awaiting shutdown signal ..."); - if let Some(sig) = shutdown_signal.next().await { - match sig { - Sig::Unmount => debug!("Externally unmounted"), - sig => debug!("Received SIG{:?}", sig), + Ok((_, receiver)) => { + match receiver.recv().unwrap() { + BRFSEvent::Init => debug!("Store {} mounted at {}", store_path, mount_path), + BRFSEvent::Destroy => { + warn!("Externally unmounted before we could mount."); + return; + } + } + + let unmount = task::spawn_blocking(move || { + // N.B.: In practice recv always errs and we exercise the or branch. It seems the sender + // side thread always exits (which drops our BuildResultFS) before we get a chance to + // complete the read. + match receiver.recv().unwrap_or(BRFSEvent::Destroy) { + BRFSEvent::Destroy => Some(Sig::Unmount), + event => { + warn!("Received unexpected event {:?}", event); + None + } + } + }) + .map(|res| res.unwrap()) + .into_stream(); + + let mut shutdown_signal = sigint.merge(sigterm).merge(unmount).filter_map(|x| x); + debug!("Awaiting shutdown signal ..."); + if let Some(sig) = shutdown_signal.next().await { + match sig { + Sig::Unmount => debug!("Externally unmounted"), + sig => debug!("Received SIG{:?}", sig), + } + } } - } } - } } #[cfg(test)] diff --git a/src/rust/engine/fs/brfs/src/syscall_tests.rs b/src/rust/engine/fs/brfs/src/syscall_tests.rs index d49d7e3bb3e..93f73f1e4e3 100644 --- a/src/rust/engine/fs/brfs/src/syscall_tests.rs +++ b/src/rust/engine/fs/brfs/src/syscall_tests.rs @@ -14,45 +14,45 @@ use testutil::data::TestData; #[tokio::test] async fn read_file_by_digest_exact_bytes() { - let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); + let (store_dir, mount_dir) = make_dirs(); + let runtime = task_executor::Executor::new(); - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - let test_bytes = TestData::roland(); + let test_bytes = TestData::roland(); - store - .store_file_bytes(test_bytes.bytes(), false) - .await - .expect("Storing bytes"); + store + .store_file_bytes(test_bytes.bytes(), false) + .await + .expect("Storing bytes"); - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - let path = mount_dir - .path() - .join("digest") - .join(digest_to_filepath(&test_bytes.digest())); + let path = mount_dir + .path() + .join("digest") + .join(digest_to_filepath(&test_bytes.digest())); - let mut buf = make_buffer(test_bytes.len()); + let mut buf = make_buffer(test_bytes.len()); - unsafe { - let fd = libc::open(path_to_cstring(&path).as_ptr(), 0); - assert!(fd > 0, "Bad fd {}", fd); - let read_bytes = libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()); - assert_eq!(test_bytes.len() as isize, read_bytes); - assert_eq!(0, libc::close(fd)); - } + unsafe { + let fd = libc::open(path_to_cstring(&path).as_ptr(), 0); + assert!(fd > 0, "Bad fd {}", fd); + let read_bytes = libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()); + assert_eq!(test_bytes.len() as isize, read_bytes); + assert_eq!(0, libc::close(fd)); + } - assert_eq!(test_bytes.string(), String::from_utf8(buf).unwrap()); + assert_eq!(test_bytes.string(), String::from_utf8(buf).unwrap()); } fn path_to_cstring(path: &Path) -> CString { - CString::new(path.to_string_lossy().as_bytes().to_owned()).unwrap() + CString::new(path.to_string_lossy().as_bytes().to_owned()).unwrap() } fn make_buffer(size: usize) -> Vec { - let mut buf: Vec = Vec::new(); - buf.resize(size, 0); - buf + let mut buf: Vec = Vec::new(); + buf.resize(size, 0); + buf } diff --git a/src/rust/engine/fs/brfs/src/tests.rs b/src/rust/engine/fs/brfs/src/tests.rs index 57afa7991a6..30e58f48911 100644 --- a/src/rust/engine/fs/brfs/src/tests.rs +++ b/src/rust/engine/fs/brfs/src/tests.rs @@ -7,231 +7,231 @@ use crate::mount; use hashing; use store::Store; use testutil::{ - data::{TestData, TestDirectory}, - file, + data::{TestData, TestDirectory}, + file, }; #[tokio::test] async fn missing_digest() { - let (store_dir, mount_dir) = make_dirs(); + let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); + let runtime = task_executor::Executor::new(); - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - assert!(!&mount_dir - .path() - .join("digest") - .join(digest_to_filepath(&TestData::roland().digest())) - .exists()); + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + assert!(!&mount_dir + .path() + .join("digest") + .join(digest_to_filepath(&TestData::roland().digest())) + .exists()); } #[tokio::test] async fn read_file_by_digest() { - let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); - - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - - let test_bytes = TestData::roland(); - - store - .store_file_bytes(test_bytes.bytes(), false) - .await - .expect("Storing bytes"); - - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - let file_path = mount_dir - .path() - .join("digest") - .join(digest_to_filepath(&test_bytes.digest())); - assert_eq!(test_bytes.bytes(), file::contents(&file_path)); - assert!(file::is_executable(&file_path)); + let (store_dir, mount_dir) = make_dirs(); + let runtime = task_executor::Executor::new(); + + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + + let test_bytes = TestData::roland(); + + store + .store_file_bytes(test_bytes.bytes(), false) + .await + .expect("Storing bytes"); + + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + let file_path = mount_dir + .path() + .join("digest") + .join(digest_to_filepath(&test_bytes.digest())); + assert_eq!(test_bytes.bytes(), file::contents(&file_path)); + assert!(file::is_executable(&file_path)); } #[tokio::test] async fn list_directory() { - let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); - - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - - let test_bytes = TestData::roland(); - let test_directory = TestDirectory::containing_roland(); - - store - .store_file_bytes(test_bytes.bytes(), false) - .await - .expect("Storing bytes"); - store - .record_directory(&test_directory.directory(), false) - .await - .expect("Storing directory"); - - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - let virtual_dir = mount_dir - .path() - .join("directory") - .join(digest_to_filepath(&test_directory.digest())); - assert_eq!(vec!["roland.ext"], file::list_dir(&virtual_dir)); + let (store_dir, mount_dir) = make_dirs(); + let runtime = task_executor::Executor::new(); + + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + + let test_bytes = TestData::roland(); + let test_directory = TestDirectory::containing_roland(); + + store + .store_file_bytes(test_bytes.bytes(), false) + .await + .expect("Storing bytes"); + store + .record_directory(&test_directory.directory(), false) + .await + .expect("Storing directory"); + + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + let virtual_dir = mount_dir + .path() + .join("directory") + .join(digest_to_filepath(&test_directory.digest())); + assert_eq!(vec!["roland.ext"], file::list_dir(&virtual_dir)); } #[tokio::test] async fn read_file_from_directory() { - let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); - - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - - let test_bytes = TestData::roland(); - let test_directory = TestDirectory::containing_roland(); - - store - .store_file_bytes(test_bytes.bytes(), false) - .await - .expect("Storing bytes"); - store - .record_directory(&test_directory.directory(), false) - .await - .expect("Storing directory"); - - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - let roland = mount_dir - .path() - .join("directory") - .join(digest_to_filepath(&test_directory.digest())) - .join("roland.ext"); - assert_eq!(test_bytes.bytes(), file::contents(&roland)); - assert!(!file::is_executable(&roland)); + let (store_dir, mount_dir) = make_dirs(); + let runtime = task_executor::Executor::new(); + + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + + let test_bytes = TestData::roland(); + let test_directory = TestDirectory::containing_roland(); + + store + .store_file_bytes(test_bytes.bytes(), false) + .await + .expect("Storing bytes"); + store + .record_directory(&test_directory.directory(), false) + .await + .expect("Storing directory"); + + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + let roland = mount_dir + .path() + .join("directory") + .join(digest_to_filepath(&test_directory.digest())) + .join("roland.ext"); + assert_eq!(test_bytes.bytes(), file::contents(&roland)); + assert!(!file::is_executable(&roland)); } #[tokio::test] async fn list_recursive_directory() { - let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); - - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - - let test_bytes = TestData::roland(); - let treat_bytes = TestData::catnip(); - let test_directory = TestDirectory::containing_roland(); - let recursive_directory = TestDirectory::recursive(); - - store - .store_file_bytes(test_bytes.bytes(), false) - .await - .expect("Storing bytes"); - store - .store_file_bytes(treat_bytes.bytes(), false) - .await - .expect("Storing bytes"); - store - .record_directory(&test_directory.directory(), false) - .await - .expect("Storing directory"); - store - .record_directory(&recursive_directory.directory(), false) - .await - .expect("Storing directory"); - - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - let virtual_dir = mount_dir - .path() - .join("directory") - .join(digest_to_filepath(&recursive_directory.digest())); - assert_eq!(vec!["cats", "treats.ext"], file::list_dir(&virtual_dir)); - assert_eq!( - vec!["roland.ext"], - file::list_dir(&virtual_dir.join("cats")) - ); + let (store_dir, mount_dir) = make_dirs(); + let runtime = task_executor::Executor::new(); + + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + + let test_bytes = TestData::roland(); + let treat_bytes = TestData::catnip(); + let test_directory = TestDirectory::containing_roland(); + let recursive_directory = TestDirectory::recursive(); + + store + .store_file_bytes(test_bytes.bytes(), false) + .await + .expect("Storing bytes"); + store + .store_file_bytes(treat_bytes.bytes(), false) + .await + .expect("Storing bytes"); + store + .record_directory(&test_directory.directory(), false) + .await + .expect("Storing directory"); + store + .record_directory(&recursive_directory.directory(), false) + .await + .expect("Storing directory"); + + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + let virtual_dir = mount_dir + .path() + .join("directory") + .join(digest_to_filepath(&recursive_directory.digest())); + assert_eq!(vec!["cats", "treats.ext"], file::list_dir(&virtual_dir)); + assert_eq!( + vec!["roland.ext"], + file::list_dir(&virtual_dir.join("cats")) + ); } #[tokio::test] async fn read_file_from_recursive_directory() { - let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); - - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - - let test_bytes = TestData::roland(); - let treat_bytes = TestData::catnip(); - let test_directory = TestDirectory::containing_roland(); - let recursive_directory = TestDirectory::recursive(); - - store - .store_file_bytes(test_bytes.bytes(), false) - .await - .expect("Storing bytes"); - store - .store_file_bytes(treat_bytes.bytes(), false) - .await - .expect("Storing bytes"); - store - .record_directory(&test_directory.directory(), false) - .await - .expect("Storing directory"); - store - .record_directory(&recursive_directory.directory(), false) - .await - .expect("Storing directory"); - - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - let virtual_dir = mount_dir - .path() - .join("directory") - .join(digest_to_filepath(&recursive_directory.digest())); - let treats = virtual_dir.join("treats.ext"); - assert_eq!(treat_bytes.bytes(), file::contents(&treats)); - assert!(!file::is_executable(&treats)); - - let roland = virtual_dir.join("cats").join("roland.ext"); - assert_eq!(test_bytes.bytes(), file::contents(&roland)); - assert!(!file::is_executable(&roland)); + let (store_dir, mount_dir) = make_dirs(); + let runtime = task_executor::Executor::new(); + + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + + let test_bytes = TestData::roland(); + let treat_bytes = TestData::catnip(); + let test_directory = TestDirectory::containing_roland(); + let recursive_directory = TestDirectory::recursive(); + + store + .store_file_bytes(test_bytes.bytes(), false) + .await + .expect("Storing bytes"); + store + .store_file_bytes(treat_bytes.bytes(), false) + .await + .expect("Storing bytes"); + store + .record_directory(&test_directory.directory(), false) + .await + .expect("Storing directory"); + store + .record_directory(&recursive_directory.directory(), false) + .await + .expect("Storing directory"); + + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + let virtual_dir = mount_dir + .path() + .join("directory") + .join(digest_to_filepath(&recursive_directory.digest())); + let treats = virtual_dir.join("treats.ext"); + assert_eq!(treat_bytes.bytes(), file::contents(&treats)); + assert!(!file::is_executable(&treats)); + + let roland = virtual_dir.join("cats").join("roland.ext"); + assert_eq!(test_bytes.bytes(), file::contents(&roland)); + assert!(!file::is_executable(&roland)); } #[tokio::test] async fn files_are_correctly_executable() { - let (store_dir, mount_dir) = make_dirs(); - let runtime = task_executor::Executor::new(); - - let store = - Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); - - let treat_bytes = TestData::catnip(); - let directory = TestDirectory::with_maybe_executable_files(true); - - store - .store_file_bytes(treat_bytes.bytes(), false) - .await - .expect("Storing bytes"); - store - .record_directory(&directory.directory(), false) - .await - .expect("Storing directory"); - - let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); - let virtual_dir = mount_dir - .path() - .join("directory") - .join(digest_to_filepath(&directory.digest())); - assert_eq!(vec!["feed.ext", "food.ext"], file::list_dir(&virtual_dir)); - assert!(file::is_executable(&virtual_dir.join("feed.ext"))); - assert!(!file::is_executable(&virtual_dir.join("food.ext"))); + let (store_dir, mount_dir) = make_dirs(); + let runtime = task_executor::Executor::new(); + + let store = + Store::local_only(runtime.clone(), store_dir.path()).expect("Error creating local store"); + + let treat_bytes = TestData::catnip(); + let directory = TestDirectory::with_maybe_executable_files(true); + + store + .store_file_bytes(treat_bytes.bytes(), false) + .await + .expect("Storing bytes"); + store + .record_directory(&directory.directory(), false) + .await + .expect("Storing directory"); + + let _fs = mount(mount_dir.path(), store, runtime).expect("Mounting"); + let virtual_dir = mount_dir + .path() + .join("directory") + .join(digest_to_filepath(&directory.digest())); + assert_eq!(vec!["feed.ext", "food.ext"], file::list_dir(&virtual_dir)); + assert!(file::is_executable(&virtual_dir.join("feed.ext"))); + assert!(!file::is_executable(&virtual_dir.join("food.ext"))); } pub fn digest_to_filepath(digest: &hashing::Digest) -> String { - format!("{}-{}", digest.hash, digest.size_bytes) + format!("{}-{}", digest.hash, digest.size_bytes) } pub fn make_dirs() -> (tempfile::TempDir, tempfile::TempDir) { - let store_dir = tempfile::Builder::new().prefix("store").tempdir().unwrap(); - let mount_dir = tempfile::Builder::new().prefix("mount").tempdir().unwrap(); - (store_dir, mount_dir) + let store_dir = tempfile::Builder::new().prefix("store").tempdir().unwrap(); + let mount_dir = tempfile::Builder::new().prefix("mount").tempdir().unwrap(); + (store_dir, mount_dir) } diff --git a/src/rust/engine/fs/fs_util/src/main.rs b/src/rust/engine/fs/fs_util/src/main.rs index fc27a1b31e2..57a9cfd307a 100644 --- a/src/rust/engine/fs/fs_util/src/main.rs +++ b/src/rust/engine/fs/fs_util/src/main.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -35,8 +35,8 @@ use std::time::Duration; use bytes::Bytes; use clap::{Arg, Command}; use fs::{ - DirectoryDigest, GlobExpansionConjunction, GlobMatching, Permissions, PreparedPathGlobs, - RelativePath, StrictGlobMatching, SymlinkBehavior, + DirectoryDigest, GlobExpansionConjunction, GlobMatching, Permissions, PreparedPathGlobs, + RelativePath, StrictGlobMatching, SymlinkBehavior, }; use futures::future::{self, BoxFuture}; use futures::FutureExt; @@ -48,48 +48,48 @@ use protos::require_digest; use serde_derive::Serialize; use std::collections::{BTreeMap, BTreeSet}; use store::{ - RemoteOptions, Snapshot, SnapshotOps, Store, StoreError, StoreFileByDigest, SubsetParams, - UploadSummary, + RemoteOptions, Snapshot, SnapshotOps, Store, StoreError, StoreFileByDigest, SubsetParams, + UploadSummary, }; use workunit_store::WorkunitStore; #[derive(Debug)] enum ExitCode { - UnknownError = 1, - NotFound = 2, + UnknownError = 1, + NotFound = 2, } #[derive(Debug)] struct ExitError(String, ExitCode); impl From for ExitError { - fn from(s: StoreError) -> Self { - match s { - md @ StoreError::MissingDigest { .. } => ExitError(md.to_string(), ExitCode::NotFound), - StoreError::Unclassified(s) => ExitError(s, ExitCode::UnknownError), + fn from(s: StoreError) -> Self { + match s { + md @ StoreError::MissingDigest { .. } => ExitError(md.to_string(), ExitCode::NotFound), + StoreError::Unclassified(s) => ExitError(s, ExitCode::UnknownError), + } } - } } impl From for ExitError { - fn from(s: String) -> Self { - ExitError(s, ExitCode::UnknownError) - } + fn from(s: String) -> Self { + ExitError(s, ExitCode::UnknownError) + } } #[derive(Serialize)] struct SummaryWithDigest { - digest: Digest, - summary: Option, + digest: Digest, + summary: Option, } #[tokio::main] async fn main() { - env_logger::init(); - let workunit_store = WorkunitStore::new(false, log::Level::Debug); - workunit_store.init_thread_state(None); + env_logger::init(); + let workunit_store = WorkunitStore::new(false, log::Level::Debug); + workunit_store.init_thread_state(None); - match execute( + match execute( &Command::new("fs_util") .subcommand( Command::new("file") @@ -333,476 +333,492 @@ Destination must not exist before this command is run.", // TODO: Sure, it's a bit long... #[allow(clippy::cognitive_complexity)] async fn execute(top_match: &clap::ArgMatches) -> Result<(), ExitError> { - let store_dir = top_match - .value_of("local-store-path") - .map(PathBuf::from) - .unwrap_or_else(Store::default_path); - let runtime = task_executor::Executor::new(); - let (store, store_has_remote) = { - let local_only = Store::local_only(runtime.clone(), &store_dir) - .map_err(|e| format!("Failed to open/create store for directory {store_dir:?}: {e}"))?; - let (store_result, store_has_remote) = match top_match.value_of("server-address") { - Some(cas_address) => { - let chunk_size_bytes = top_match - .value_of_t::("chunk-bytes") - .expect("Bad chunk-bytes flag"); - - let root_ca_certs = if let Some(path) = top_match.value_of("root-ca-cert-file") { - Some( - std::fs::read(path) - .map_err(|err| format!("Error reading root CA certs file {path}: {err}"))?, - ) - } else { - None - }; - - let mtls = match ( - top_match.value_of("mtls-client-certificate-chain-path"), - top_match.value_of("mtls-client-key-path"), - ) { - (Some(cert_chain_path), Some(key_path)) => { - let key = std::fs::read(key_path).map_err(|err| { - format!("Failed to read mtls-client-key-path from {key_path:?}: {err:?}") - })?; - let cert_chain = std::fs::read(cert_chain_path).map_err(|err| { + let store_dir = top_match + .value_of("local-store-path") + .map(PathBuf::from) + .unwrap_or_else(Store::default_path); + let runtime = task_executor::Executor::new(); + let (store, store_has_remote) = { + let local_only = Store::local_only(runtime.clone(), &store_dir) + .map_err(|e| format!("Failed to open/create store for directory {store_dir:?}: {e}"))?; + let (store_result, store_has_remote) = match top_match.value_of("server-address") { + Some(cas_address) => { + let chunk_size_bytes = top_match + .value_of_t::("chunk-bytes") + .expect("Bad chunk-bytes flag"); + + let root_ca_certs = + if let Some(path) = top_match.value_of("root-ca-cert-file") { + Some(std::fs::read(path).map_err(|err| { + format!("Error reading root CA certs file {path}: {err}") + })?) + } else { + None + }; + + let mtls = match ( + top_match.value_of("mtls-client-certificate-chain-path"), + top_match.value_of("mtls-client-key-path"), + ) { + (Some(cert_chain_path), Some(key_path)) => { + let key = std::fs::read(key_path).map_err(|err| { + format!( + "Failed to read mtls-client-key-path from {key_path:?}: {err:?}" + ) + })?; + let cert_chain = std::fs::read(cert_chain_path).map_err(|err| { format!( "Failed to read mtls-client-certificate-chain-path from {cert_chain_path:?}: {err:?}" ) })?; - Some((cert_chain, key)) - } - (None, None) => None, - _ => { - return Err("Must specify both or neither of mtls-client-certificate-chain-path and mtls-client-key-path".to_owned().into()); - } - }; - - let certificate_check = if top_match.is_present("dangerously-ignore-certificates") { - CertificateCheck::DangerouslyDisabled - } else { - CertificateCheck::Enabled - }; - - let mut tls_config = grpc_util::tls::Config::new(root_ca_certs, mtls)?; - - tls_config.certificate_check = certificate_check; - - let mut headers = BTreeMap::new(); - - if let Some(headers_from_flag) = top_match.values_of("header") { - for h in headers_from_flag { - if let Some((key, value)) = h.split_once('=') { - headers.insert(key.to_owned(), value.to_owned()); - } else { - panic!("Expected --header flag to contain = but was {h}"); + Some((cert_chain, key)) + } + (None, None) => None, + _ => { + return Err("Must specify both or neither of mtls-client-certificate-chain-path and mtls-client-key-path".to_owned().into()); + } + }; + + let certificate_check = if top_match.is_present("dangerously-ignore-certificates") { + CertificateCheck::DangerouslyDisabled + } else { + CertificateCheck::Enabled + }; + + let mut tls_config = grpc_util::tls::Config::new(root_ca_certs, mtls)?; + + tls_config.certificate_check = certificate_check; + + let mut headers = BTreeMap::new(); + + if let Some(headers_from_flag) = top_match.values_of("header") { + for h in headers_from_flag { + if let Some((key, value)) = h.split_once('=') { + headers.insert(key.to_owned(), value.to_owned()); + } else { + panic!("Expected --header flag to contain = but was {h}"); + } + } + } + + if let Some(oauth_path) = top_match.value_of("oauth-bearer-token-file") { + let token = std::fs::read_to_string(oauth_path).map_err(|err| { + format!("Error reading oauth bearer token from {oauth_path:?}: {err}") + })?; + headers.insert( + "authorization".to_owned(), + format!("Bearer {}", token.trim()), + ); + } + + ( + local_only + .into_with_remote(RemoteOptions { + cas_address: cas_address.to_owned(), + instance_name: top_match + .value_of("remote-instance-name") + .map(str::to_owned), + tls_config, + headers, + chunk_size_bytes, + // This deadline is really only in place because otherwise DNS failures + // leave this hanging forever. + // + // Make fs_util have a very long deadline (because it's not configurable, + // like it is inside pants). + rpc_timeout: Duration::from_secs(30 * 60), + rpc_retries: top_match + .value_of_t::("rpc-attempts") + .expect("Bad rpc-attempts flag"), + rpc_concurrency_limit: top_match + .value_of_t::("rpc-concurrency-limit") + .expect("Bad rpc-concurrency-limit flag"), + capabilities_cell_opt: None, + batch_api_size_limit: top_match + .value_of_t::("batch-api-size-limit") + .expect("Bad batch-api-size-limit flag"), + }) + .await, + true, + ) } - } - } - - if let Some(oauth_path) = top_match.value_of("oauth-bearer-token-file") { - let token = std::fs::read_to_string(oauth_path).map_err(|err| { - format!("Error reading oauth bearer token from {oauth_path:?}: {err}") - })?; - headers.insert( - "authorization".to_owned(), - format!("Bearer {}", token.trim()), - ); - } - - ( - local_only - .into_with_remote(RemoteOptions { - cas_address: cas_address.to_owned(), - instance_name: top_match - .value_of("remote-instance-name") - .map(str::to_owned), - tls_config, - headers, - chunk_size_bytes, - // This deadline is really only in place because otherwise DNS failures - // leave this hanging forever. - // - // Make fs_util have a very long deadline (because it's not configurable, - // like it is inside pants). - rpc_timeout: Duration::from_secs(30 * 60), - rpc_retries: top_match - .value_of_t::("rpc-attempts") - .expect("Bad rpc-attempts flag"), - rpc_concurrency_limit: top_match - .value_of_t::("rpc-concurrency-limit") - .expect("Bad rpc-concurrency-limit flag"), - capabilities_cell_opt: None, - batch_api_size_limit: top_match - .value_of_t::("batch-api-size-limit") - .expect("Bad batch-api-size-limit flag"), - }) - .await, - true, - ) - } - None => (Ok(local_only), false), + None => (Ok(local_only), false), + }; + (store_result?, store_has_remote) }; - (store_result?, store_has_remote) - }; - match expect_subcommand(top_match) { - ("file", sub_match) => { - match expect_subcommand(sub_match) { - ("cat", args) => { - let fingerprint = Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; - let size_bytes = args - .value_of("size_bytes") - .unwrap() - .parse::() - .expect("size_bytes must be a non-negative number"); - let digest = Digest::new(fingerprint, size_bytes); - Ok( - store - .load_file_bytes_with(digest, |bytes| io::stdout().write_all(bytes).unwrap()) - .await?, - ) + match expect_subcommand(top_match) { + ("file", sub_match) => { + match expect_subcommand(sub_match) { + ("cat", args) => { + let fingerprint = + Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; + let size_bytes = args + .value_of("size_bytes") + .unwrap() + .parse::() + .expect("size_bytes must be a non-negative number"); + let digest = Digest::new(fingerprint, size_bytes); + Ok(store + .load_file_bytes_with(digest, |bytes| { + io::stdout().write_all(bytes).unwrap() + }) + .await?) + } + ("save", args) => { + let path = PathBuf::from(args.value_of("path").unwrap()); + // Canonicalize path to guarantee that a relative path has a parent. + let posix_fs = make_posix_fs( + runtime.clone(), + path.canonicalize() + .map_err(|e| format!("Error canonicalizing path {path:?}: {e:?}"))? + .parent() + .ok_or_else(|| { + format!("File being saved must have parent but {path:?} did not") + })?, + ); + let file = posix_fs + .stat_sync(Path::new(path.file_name().unwrap())) + .unwrap() + .ok_or_else(|| { + format!("Tried to save file {path:?} but it did not exist") + })?; + match file { + fs::Stat::File(f) => { + let digest = store::OneOffStoreFileByDigest::new( + store.clone(), + Arc::new(posix_fs), + false, + ) + .store_by_digest(f) + .await + .unwrap(); + + let report = + ensure_uploaded_to_remote(&store, store_has_remote, digest) + .await + .unwrap(); + print_upload_summary(args.value_of("output-mode"), &report); + + Ok(()) + } + o => Err(format!( + "Tried to save file {path:?} but it was not a file, was a {o:?}" + ) + .into()), + } + } + (_, _) => unimplemented!(), + } } - ("save", args) => { - let path = PathBuf::from(args.value_of("path").unwrap()); - // Canonicalize path to guarantee that a relative path has a parent. - let posix_fs = make_posix_fs( - runtime.clone(), - path - .canonicalize() - .map_err(|e| format!("Error canonicalizing path {path:?}: {e:?}"))? - .parent() - .ok_or_else(|| format!("File being saved must have parent but {path:?} did not"))?, - ); - let file = posix_fs - .stat_sync(Path::new(path.file_name().unwrap())) - .unwrap() - .ok_or_else(|| format!("Tried to save file {path:?} but it did not exist"))?; - match file { - fs::Stat::File(f) => { - let digest = - store::OneOffStoreFileByDigest::new(store.clone(), Arc::new(posix_fs), false) - .store_by_digest(f) - .await - .unwrap(); - - let report = ensure_uploaded_to_remote(&store, store_has_remote, digest) - .await - .unwrap(); - print_upload_summary(args.value_of("output-mode"), &report); - - Ok(()) + ("tree", sub_match) => match expect_subcommand(sub_match) { + ("materialize", args) => { + let destination = PathBuf::from(args.value_of("destination").unwrap()); + // NB: We use `destination` as the root directory, because there is no need to + // memoize a check for whether some other parent directory is hardlinkable. + let destination_root = destination.clone(); + let fingerprint = + Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; + let size_bytes = args + .value_of("size_bytes") + .unwrap() + .parse::() + .expect("size_bytes must be a non-negative number"); + let digest = Digest::new(fingerprint, size_bytes); + let output_digest_opt = store + .load_tree_from_remote(digest) + .await + .expect("protocol error"); + let output_digest = output_digest_opt + .ok_or_else(|| ExitError("not found".into(), ExitCode::NotFound))?; + Ok(store + .materialize_directory( + destination, + &destination_root, + output_digest, + false, + &BTreeSet::new(), + Permissions::Writable, + ) + .await?) } - o => { - Err(format!("Tried to save file {path:?} but it was not a file, was a {o:?}").into()) + (_, _) => unimplemented!(), + }, + ("directory", sub_match) => match expect_subcommand(sub_match) { + ("materialize", args) => { + let destination = PathBuf::from(args.value_of("destination").unwrap()); + // NB: We use `destination` as the root directory, because there is no need to + // memoize a check for whether some other parent directory is hardlinkable. + let destination_root = destination.clone(); + let fingerprint = + Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; + let size_bytes = args + .value_of("size_bytes") + .unwrap() + .parse::() + .expect("size_bytes must be a non-negative number"); + let digest = + DirectoryDigest::from_persisted_digest(Digest::new(fingerprint, size_bytes)); + Ok(store + .materialize_directory( + destination, + &destination_root, + digest, + false, + &BTreeSet::new(), + Permissions::Writable, + ) + .await?) } - } + ("save", args) => { + let posix_fs = Arc::new(make_posix_fs( + runtime.clone(), + args.value_of("root").unwrap(), + )); + let store_copy = store.clone(); + let path_globs = fs::PathGlobs::new( + args.values_of("globs") + .unwrap() + .map(str::to_string) + .collect::>(), + // By using `Ignore`, we say that we don't care if some globs fail to expand. Is + // that a valid assumption? + fs::StrictGlobMatching::Ignore, + fs::GlobExpansionConjunction::AllMatch, + ) + .parse()?; + let paths = posix_fs + .expand_globs(path_globs, SymlinkBehavior::Oblivious, None) + .await + .map_err(|e| format!("Error expanding globs: {e:?}"))?; + + let snapshot = Snapshot::from_path_stats( + store::OneOffStoreFileByDigest::new(store_copy, posix_fs, false), + paths, + ) + .await?; + + let ((), report) = futures::try_join!( + store.ensure_directory_digest_persisted(snapshot.clone().into()), + ensure_uploaded_to_remote(&store, store_has_remote, snapshot.digest), + )?; + print_upload_summary(args.value_of("output-mode"), &report); + + Ok(()) + } + ("cat-proto", args) => { + let fingerprint = + Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; + let size_bytes = args + .value_of("size_bytes") + .unwrap() + .parse::() + .expect("size_bytes must be a non-negative number"); + let mut digest = + DirectoryDigest::from_persisted_digest(Digest::new(fingerprint, size_bytes)); + + if let Some(prefix_to_strip) = args.value_of("child-dir") { + let mut result = store + .subset( + digest, + SubsetParams { + globs: PreparedPathGlobs::create( + vec![format!("{prefix_to_strip}/**")], + StrictGlobMatching::Ignore, + GlobExpansionConjunction::AnyMatch, + )?, + }, + ) + .await; + // It's a shame we can't just .and_then here, because we can't use async closures. + if let Ok(subset_digest) = result { + result = store + .strip_prefix(subset_digest, &RelativePath::new(prefix_to_strip)?) + .await; + } + digest = result?; + + // TODO: The below operations don't strictly need persistence: we could render the + // relevant `DigestTrie` directly. See #13112. + store + .ensure_directory_digest_persisted(digest.clone()) + .await?; + } + + let proto_bytes: Vec = match args.value_of("output-format").unwrap() { + "binary" => store + .load_directory(digest.as_digest()) + .await? + .to_bytes() + .to_vec(), + "text" => format!("{:?}\n", store.load_directory(digest.as_digest()).await?) + .as_bytes() + .to_vec(), + "recursive-file-list" => expand_files(store, digest.as_digest()) + .await? + .into_iter() + .map(|(name, _digest)| format!("{name}\n")) + .collect::>() + .join("") + .into_bytes(), + "recursive-file-list-with-digests" => expand_files(store, digest.as_digest()) + .await? + .into_iter() + .map(|(name, digest)| { + format!("{} {:<16} {}\n", digest.hash, digest.size_bytes, name) + }) + .collect::>() + .join("") + .into_bytes(), + format => { + return Err( + format!("Unexpected value of --output-format arg: {format}").into() + ) + } + }; + + io::stdout().write_all(&proto_bytes).unwrap(); + Ok(()) + } + (_, _) => unimplemented!(), + }, + ("cat", args) => { + let fingerprint = Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; + let size_bytes = args + .value_of("size_bytes") + .unwrap() + .parse::() + .expect("size_bytes must be a non-negative number"); + let digest = Digest::new(fingerprint, size_bytes); + let bytes = match store + .load_file_bytes_with(digest, Bytes::copy_from_slice) + .await + { + Err(StoreError::MissingDigest { .. }) => { + store.load_directory(digest).await?.to_bytes() + } + Err(e) => return Err(e.into()), + Ok(bytes) => bytes, + }; + + io::stdout().write_all(&bytes).unwrap(); + Ok(()) } - (_, _) => unimplemented!(), - } - } - ("tree", sub_match) => match expect_subcommand(sub_match) { - ("materialize", args) => { - let destination = PathBuf::from(args.value_of("destination").unwrap()); - // NB: We use `destination` as the root directory, because there is no need to - // memoize a check for whether some other parent directory is hardlinkable. - let destination_root = destination.clone(); - let fingerprint = Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; - let size_bytes = args - .value_of("size_bytes") - .unwrap() - .parse::() - .expect("size_bytes must be a non-negative number"); - let digest = Digest::new(fingerprint, size_bytes); - let output_digest_opt = store - .load_tree_from_remote(digest) - .await - .expect("protocol error"); - let output_digest = - output_digest_opt.ok_or_else(|| ExitError("not found".into(), ExitCode::NotFound))?; - Ok( - store - .materialize_directory( - destination, - &destination_root, - output_digest, - false, - &BTreeSet::new(), - Permissions::Writable, - ) - .await?, - ) - } - (_, _) => unimplemented!(), - }, - ("directory", sub_match) => match expect_subcommand(sub_match) { - ("materialize", args) => { - let destination = PathBuf::from(args.value_of("destination").unwrap()); - // NB: We use `destination` as the root directory, because there is no need to - // memoize a check for whether some other parent directory is hardlinkable. - let destination_root = destination.clone(); - let fingerprint = Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; - let size_bytes = args - .value_of("size_bytes") - .unwrap() - .parse::() - .expect("size_bytes must be a non-negative number"); - let digest = DirectoryDigest::from_persisted_digest(Digest::new(fingerprint, size_bytes)); - Ok( - store - .materialize_directory( - destination, - &destination_root, - digest, - false, - &BTreeSet::new(), - Permissions::Writable, - ) - .await?, - ) - } - ("save", args) => { - let posix_fs = Arc::new(make_posix_fs( - runtime.clone(), - args.value_of("root").unwrap(), - )); - let store_copy = store.clone(); - let path_globs = fs::PathGlobs::new( - args - .values_of("globs") - .unwrap() - .map(str::to_string) - .collect::>(), - // By using `Ignore`, we say that we don't care if some globs fail to expand. Is - // that a valid assumption? - fs::StrictGlobMatching::Ignore, - fs::GlobExpansionConjunction::AllMatch, - ) - .parse()?; - let paths = posix_fs - .expand_globs(path_globs, SymlinkBehavior::Oblivious, None) - .await - .map_err(|e| format!("Error expanding globs: {e:?}"))?; - - let snapshot = Snapshot::from_path_stats( - store::OneOffStoreFileByDigest::new(store_copy, posix_fs, false), - paths, - ) - .await?; - - let ((), report) = futures::try_join!( - store.ensure_directory_digest_persisted(snapshot.clone().into()), - ensure_uploaded_to_remote(&store, store_has_remote, snapshot.digest), - )?; - print_upload_summary(args.value_of("output-mode"), &report); - - Ok(()) - } - ("cat-proto", args) => { - let fingerprint = Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; - let size_bytes = args - .value_of("size_bytes") - .unwrap() - .parse::() - .expect("size_bytes must be a non-negative number"); - let mut digest = - DirectoryDigest::from_persisted_digest(Digest::new(fingerprint, size_bytes)); - - if let Some(prefix_to_strip) = args.value_of("child-dir") { - let mut result = store - .subset( - digest, - SubsetParams { - globs: PreparedPathGlobs::create( - vec![format!("{prefix_to_strip}/**")], - StrictGlobMatching::Ignore, - GlobExpansionConjunction::AnyMatch, - )?, - }, - ) - .await; - // It's a shame we can't just .and_then here, because we can't use async closures. - if let Ok(subset_digest) = result { - result = store - .strip_prefix(subset_digest, &RelativePath::new(prefix_to_strip)?) - .await; - } - digest = result?; - - // TODO: The below operations don't strictly need persistence: we could render the - // relevant `DigestTrie` directly. See #13112. - store - .ensure_directory_digest_persisted(digest.clone()) - .await?; + ("directories", sub_match) => match expect_subcommand(sub_match) { + ("list", _) => { + for digest in store + .all_local_digests(::store::EntryType::Directory) + .await + .expect("Error opening store") + { + println!("{} {}", digest.hash, digest.size_bytes); + } + Ok(()) + } + _ => unimplemented!(), + }, + ("gc", args) => { + let target_size_bytes = args + .value_of_t::("target-size-bytes") + .expect("--target-size-bytes must be passed as a non-negative integer"); + store + .garbage_collect(target_size_bytes, store::ShrinkBehavior::Compact) + .await?; + Ok(()) } - let proto_bytes: Vec = match args.value_of("output-format").unwrap() { - "binary" => store - .load_directory(digest.as_digest()) - .await? - .to_bytes() - .to_vec(), - "text" => format!("{:?}\n", store.load_directory(digest.as_digest()).await?) - .as_bytes() - .to_vec(), - "recursive-file-list" => expand_files(store, digest.as_digest()) - .await? - .into_iter() - .map(|(name, _digest)| format!("{name}\n")) - .collect::>() - .join("") - .into_bytes(), - "recursive-file-list-with-digests" => expand_files(store, digest.as_digest()) - .await? - .into_iter() - .map(|(name, digest)| format!("{} {:<16} {}\n", digest.hash, digest.size_bytes, name)) - .collect::>() - .join("") - .into_bytes(), - format => { - return Err(format!("Unexpected value of --output-format arg: {format}").into()) - } - }; - - io::stdout().write_all(&proto_bytes).unwrap(); - Ok(()) - } - (_, _) => unimplemented!(), - }, - ("cat", args) => { - let fingerprint = Fingerprint::from_hex_string(args.value_of("fingerprint").unwrap())?; - let size_bytes = args - .value_of("size_bytes") - .unwrap() - .parse::() - .expect("size_bytes must be a non-negative number"); - let digest = Digest::new(fingerprint, size_bytes); - let bytes = match store - .load_file_bytes_with(digest, Bytes::copy_from_slice) - .await - { - Err(StoreError::MissingDigest { .. }) => store.load_directory(digest).await?.to_bytes(), - Err(e) => return Err(e.into()), - Ok(bytes) => bytes, - }; - - io::stdout().write_all(&bytes).unwrap(); - Ok(()) - } - ("directories", sub_match) => match expect_subcommand(sub_match) { - ("list", _) => { - for digest in store - .all_local_digests(::store::EntryType::Directory) - .await - .expect("Error opening store") - { - println!("{} {}", digest.hash, digest.size_bytes); - } - Ok(()) - } - _ => unimplemented!(), - }, - ("gc", args) => { - let target_size_bytes = args - .value_of_t::("target-size-bytes") - .expect("--target-size-bytes must be passed as a non-negative integer"); - store - .garbage_collect(target_size_bytes, store::ShrinkBehavior::Compact) - .await?; - Ok(()) + (_, _) => unimplemented!(), } - - (_, _) => unimplemented!(), - } } fn expect_subcommand(matches: &clap::ArgMatches) -> (&str, &clap::ArgMatches) { - matches - .subcommand() - .unwrap_or_else(|| panic!("Expected subcommand. See `--help`.")) + matches + .subcommand() + .unwrap_or_else(|| panic!("Expected subcommand. See `--help`.")) } async fn expand_files(store: Store, digest: Digest) -> Result, StoreError> { - let files = Arc::new(Mutex::new(Vec::new())); - expand_files_helper(store, digest, String::new(), files.clone()).await?; + let files = Arc::new(Mutex::new(Vec::new())); + expand_files_helper(store, digest, String::new(), files.clone()).await?; - let mut v = Arc::try_unwrap(files).unwrap().into_inner(); - v.sort_by(|(l, _), (r, _)| l.cmp(r)); - Ok(v) + let mut v = Arc::try_unwrap(files).unwrap().into_inner(); + v.sort_by(|(l, _), (r, _)| l.cmp(r)); + Ok(v) } fn expand_files_helper( - store: Store, - digest: Digest, - prefix: String, - files: Arc>>, + store: Store, + digest: Digest, + prefix: String, + files: Arc>>, ) -> BoxFuture<'static, Result<(), StoreError>> { - async move { - let dir = store.load_directory(digest).await?; - { - let mut files_unlocked = files.lock(); - for file in &dir.files { - let file_digest = require_digest(file.digest.as_ref())?; - files_unlocked.push((format!("{}{}", prefix, file.name), file_digest)); - } + async move { + let dir = store.load_directory(digest).await?; + { + let mut files_unlocked = files.lock(); + for file in &dir.files { + let file_digest = require_digest(file.digest.as_ref())?; + files_unlocked.push((format!("{}{}", prefix, file.name), file_digest)); + } + } + let subdirs_and_digests = dir + .directories + .iter() + .map(move |subdir| { + let digest = require_digest(subdir.digest.as_ref()); + digest.map(|digest| (subdir, digest)) + }) + .collect::, _>>()?; + future::try_join_all( + subdirs_and_digests + .into_iter() + .map(move |(subdir, digest)| { + expand_files_helper( + store.clone(), + digest, + format!("{}{}/", prefix, subdir.name), + files.clone(), + ) + }) + .collect::>(), + ) + .await + .map(|_| ()) } - let subdirs_and_digests = dir - .directories - .iter() - .map(move |subdir| { - let digest = require_digest(subdir.digest.as_ref()); - digest.map(|digest| (subdir, digest)) - }) - .collect::, _>>()?; - future::try_join_all( - subdirs_and_digests - .into_iter() - .map(move |(subdir, digest)| { - expand_files_helper( - store.clone(), - digest, - format!("{}{}/", prefix, subdir.name), - files.clone(), - ) - }) - .collect::>(), - ) - .await - .map(|_| ()) - } - .boxed() + .boxed() } fn make_posix_fs>(executor: task_executor::Executor, root: P) -> fs::PosixFS { - // Unwrapping the output of creating the git ignorer with no patterns is infallible. - fs::PosixFS::new( - &root, - fs::GitignoreStyleExcludes::create(vec![]).unwrap(), - executor, - ) - .unwrap() + // Unwrapping the output of creating the git ignorer with no patterns is infallible. + fs::PosixFS::new( + &root, + fs::GitignoreStyleExcludes::create(vec![]).unwrap(), + executor, + ) + .unwrap() } async fn ensure_uploaded_to_remote( - store: &Store, - store_has_remote: bool, - digest: Digest, + store: &Store, + store_has_remote: bool, + digest: Digest, ) -> Result { - let summary = if store_has_remote { - store - .ensure_remote_has_recursive(vec![digest]) - .await - .map(Some) - } else { - Ok(None) - }; - summary.map(move |summary| SummaryWithDigest { digest, summary }) + let summary = if store_has_remote { + store + .ensure_remote_has_recursive(vec![digest]) + .await + .map(Some) + } else { + Ok(None) + }; + summary.map(move |summary| SummaryWithDigest { digest, summary }) } fn print_upload_summary(mode: Option<&str>, report: &SummaryWithDigest) { - match mode { - Some("json") => println!("{}", serde_json::to_string_pretty(&report).unwrap()), - Some("simple") => println!("{} {}", report.digest.hash, report.digest.size_bytes), - // This should never be reached, as clap should error with unknown formats. - _ => eprintln!("Unknown summary format."), - }; + match mode { + Some("json") => println!("{}", serde_json::to_string_pretty(&report).unwrap()), + Some("simple") => println!("{} {}", report.digest.hash, report.digest.size_bytes), + // This should never be reached, as clap should error with unknown formats. + _ => eprintln!("Unknown summary format."), + }; } diff --git a/src/rust/engine/fs/src/directory.rs b/src/rust/engine/fs/src/directory.rs index 00c737e015e..43fd4080aec 100644 --- a/src/rust/engine/fs/src/directory.rs +++ b/src/rust/engine/fs/src/directory.rs @@ -25,19 +25,19 @@ use protos::require_digest; use crate::{LinkDepth, PathStat, RelativePath, MAX_LINK_DEPTH}; lazy_static! { - pub static ref EMPTY_DIGEST_TREE: DigestTrie = DigestTrie(vec![].into()); - pub static ref EMPTY_DIRECTORY_DIGEST: DirectoryDigest = DirectoryDigest { - digest: EMPTY_DIGEST, - tree: Some(EMPTY_DIGEST_TREE.clone()), - }; + pub static ref EMPTY_DIGEST_TREE: DigestTrie = DigestTrie(vec![].into()); + pub static ref EMPTY_DIRECTORY_DIGEST: DirectoryDigest = DirectoryDigest { + digest: EMPTY_DIGEST, + tree: Some(EMPTY_DIGEST_TREE.clone()), + }; } #[derive(Clone, Copy)] pub enum SymlinkBehavior { - /// Treat symlinks as a distinctive element. - Aware, - /// Follow symlinks to their target. - Oblivious, + /// Treat symlinks as a distinctive element. + Aware, + /// Follow symlinks to their target. + Oblivious, } /// A Digest for a directory, optionally with its content stored as a DigestTrie. @@ -49,100 +49,100 @@ pub enum SymlinkBehavior { /// before the Digest may be operated on. #[derive(Clone, DeepSizeOf, Serialize)] pub struct DirectoryDigest { - // NB: Private in order to force a choice between `todo_as_digest` and `as_digest`. - digest: Digest, - #[serde(skip_serializing)] - pub tree: Option, + // NB: Private in order to force a choice between `todo_as_digest` and `as_digest`. + digest: Digest, + #[serde(skip_serializing)] + pub tree: Option, } impl workunit_store::DirectoryDigest for DirectoryDigest { - fn as_any(&self) -> &dyn std::any::Any { - self - } + fn as_any(&self) -> &dyn std::any::Any { + self + } } impl Eq for DirectoryDigest {} impl PartialEq for DirectoryDigest { - fn eq(&self, other: &Self) -> bool { - self.digest == other.digest - } + fn eq(&self, other: &Self) -> bool { + self.digest == other.digest + } } impl Hash for DirectoryDigest { - fn hash(&self, state: &mut H) { - self.digest.hash(state); - } + fn hash(&self, state: &mut H) { + self.digest.hash(state); + } } impl Debug for DirectoryDigest { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // NB: To avoid over-large output, we don't render the Trie. It would likely be best rendered - // as e.g. JSON. - let tree = if self.tree.is_some() { - "Some(..)" - } else { - "None" - }; - f.debug_struct("DirectoryDigest") - .field("digest", &self.digest) - .field("tree", &tree) - .finish() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // NB: To avoid over-large output, we don't render the Trie. It would likely be best rendered + // as e.g. JSON. + let tree = if self.tree.is_some() { + "Some(..)" + } else { + "None" + }; + f.debug_struct("DirectoryDigest") + .field("digest", &self.digest) + .field("tree", &tree) + .finish() + } } impl DirectoryDigest { - /// Construct a DirectoryDigest from a Digest and DigestTrie (by asserting that the Digest - /// identifies the DigestTrie). - pub fn new(digest: Digest, tree: DigestTrie) -> Self { - if cfg!(debug_assertions) { - let actual = tree.compute_root_digest(); - assert!(digest == actual, "Expected {digest:?} but got {actual:?}"); + /// Construct a DirectoryDigest from a Digest and DigestTrie (by asserting that the Digest + /// identifies the DigestTrie). + pub fn new(digest: Digest, tree: DigestTrie) -> Self { + if cfg!(debug_assertions) { + let actual = tree.compute_root_digest(); + assert!(digest == actual, "Expected {digest:?} but got {actual:?}"); + } + Self { + digest, + tree: Some(tree), + } + } + + /// Creates a DirectoryDigest which asserts that the given Digest represents a Directory structure + /// which is persisted in a Store. + /// + /// Use of this method should be rare: code should prefer to pass around a `DirectoryDigest` rather + /// than to create one from a `Digest` (as the latter requires loading the content from disk). + pub fn from_persisted_digest(digest: Digest) -> Self { + Self { digest, tree: None } + } + + /// Returns the `Digest` for this `DirectoryDigest`. + /// + /// TODO: If a callsite needs to convert to `Digest` as a convenience (i.e. in a location where + /// its signature could be changed to return a `DirectoryDigest` instead) during the porting + /// effort of #13112, it should use `todo_as_digest` rather than `as_digest`. + pub fn as_digest(&self) -> Digest { + self.digest } - Self { - digest, - tree: Some(tree), + + /// Marks a callsite that is discarding the `DigestTrie` held by this `DirectoryDigest` as a + /// temporary convenience, rather than updating its signature to return a `DirectoryDigest`. All + /// usages of this method should be removed before closing #13112. + pub fn todo_as_digest(&self) -> Digest { + self.digest } - } - - /// Creates a DirectoryDigest which asserts that the given Digest represents a Directory structure - /// which is persisted in a Store. - /// - /// Use of this method should be rare: code should prefer to pass around a `DirectoryDigest` rather - /// than to create one from a `Digest` (as the latter requires loading the content from disk). - pub fn from_persisted_digest(digest: Digest) -> Self { - Self { digest, tree: None } - } - - /// Returns the `Digest` for this `DirectoryDigest`. - /// - /// TODO: If a callsite needs to convert to `Digest` as a convenience (i.e. in a location where - /// its signature could be changed to return a `DirectoryDigest` instead) during the porting - /// effort of #13112, it should use `todo_as_digest` rather than `as_digest`. - pub fn as_digest(&self) -> Digest { - self.digest - } - - /// Marks a callsite that is discarding the `DigestTrie` held by this `DirectoryDigest` as a - /// temporary convenience, rather than updating its signature to return a `DirectoryDigest`. All - /// usages of this method should be removed before closing #13112. - pub fn todo_as_digest(&self) -> Digest { - self.digest - } - - /// Returns the digests reachable from this DirectoryDigest. - /// - /// If this DirectoryDigest has been persisted to disk (i.e., does not have a DigestTrie) then - /// this will only include the root. - pub fn digests(&self) -> Vec { - if let Some(tree) = &self.tree { - let mut digests = tree.digests(); - digests.push(self.digest); - digests - } else { - vec![self.digest] + + /// Returns the digests reachable from this DirectoryDigest. + /// + /// If this DirectoryDigest has been persisted to disk (i.e., does not have a DigestTrie) then + /// this will only include the root. + pub fn digests(&self) -> Vec { + if let Some(tree) = &self.tree { + let mut digests = tree.digests(); + digests.push(self.digest); + digests + } else { + vec![self.digest] + } } - } } /// A single component of a filesystem path. @@ -156,244 +156,244 @@ pub struct Name(Intern); known_deep_size!(0; Name); impl Name { - pub fn new(name: &str) -> Self { - if cfg!(debug_assertions) { - assert!(Path::new(name).components().count() < 2) + pub fn new(name: &str) -> Self { + if cfg!(debug_assertions) { + assert!(Path::new(name).components().count() < 2) + } + Name(Intern::from(name)) } - Name(Intern::from(name)) - } } impl Deref for Name { - type Target = Intern; + type Target = Intern; - fn deref(&self) -> &Intern { - &self.0 - } + fn deref(&self) -> &Intern { + &self.0 + } } impl Display for Name { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!(f, "{}", self.0.as_ref()) - } + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}", self.0.as_ref()) + } } #[derive(Clone, Debug, DeepSizeOf)] pub enum Entry { - Directory(Directory), - File(File), - Symlink(Symlink), + Directory(Directory), + File(File), + Symlink(Symlink), } impl Entry { - pub fn name(&self) -> Name { - match self { - Entry::Directory(d) => d.name, - Entry::File(f) => f.name, - Entry::Symlink(s) => s.name, + pub fn name(&self) -> Name { + match self { + Entry::Directory(d) => d.name, + Entry::File(f) => f.name, + Entry::Symlink(s) => s.name, + } } - } - pub fn digest(&self) -> Digest { - match self { - Entry::Directory(d) => d.digest, - Entry::File(f) => f.digest, - Entry::Symlink(_) => EMPTY_DIGEST, + pub fn digest(&self) -> Digest { + match self { + Entry::Directory(d) => d.digest, + Entry::File(f) => f.digest, + Entry::Symlink(_) => EMPTY_DIGEST, + } } - } } #[derive(Clone, DeepSizeOf)] pub struct Directory { - name: Name, - digest: Digest, - tree: DigestTrie, + name: Name, + digest: Digest, + tree: DigestTrie, } impl Directory { - pub(crate) fn new(name: Name, entries: Vec) -> Self { - Self::from_digest_tree(name, DigestTrie(entries.into())) - } - - fn from_remexec_directory_node( - dir_node: &remexec::DirectoryNode, - directories_by_digest: &HashMap, - ) -> Result { - let digest = require_digest(&dir_node.digest)?; - let directory = directories_by_digest.get(&digest).ok_or_else(|| { - format!( - "Child of {name} with {digest:?} was not present.", - name = dir_node.name - ) - })?; - Ok(Self { - name: Name(Intern::from(&dir_node.name)), - digest, - tree: DigestTrie::from_remexec_directories(directory, directories_by_digest)?, - }) - } - - fn from_digest_tree(name: Name, tree: DigestTrie) -> Self { - Self { - name, - digest: tree.compute_root_digest(), - tree, + pub(crate) fn new(name: Name, entries: Vec) -> Self { + Self::from_digest_tree(name, DigestTrie(entries.into())) + } + + fn from_remexec_directory_node( + dir_node: &remexec::DirectoryNode, + directories_by_digest: &HashMap, + ) -> Result { + let digest = require_digest(&dir_node.digest)?; + let directory = directories_by_digest.get(&digest).ok_or_else(|| { + format!( + "Child of {name} with {digest:?} was not present.", + name = dir_node.name + ) + })?; + Ok(Self { + name: Name(Intern::from(&dir_node.name)), + digest, + tree: DigestTrie::from_remexec_directories(directory, directories_by_digest)?, + }) } - } - pub fn name(&self) -> Name { - self.name - } + fn from_digest_tree(name: Name, tree: DigestTrie) -> Self { + Self { + name, + digest: tree.compute_root_digest(), + tree, + } + } - pub fn digest(&self) -> Digest { - self.digest - } + pub fn name(&self) -> Name { + self.name + } - pub fn tree(&self) -> &DigestTrie { - &self.tree - } + pub fn digest(&self) -> Digest { + self.digest + } - pub fn as_remexec_directory(&self) -> remexec::Directory { - self.tree.as_remexec_directory() - } + pub fn tree(&self) -> &DigestTrie { + &self.tree + } + + pub fn as_remexec_directory(&self) -> remexec::Directory { + self.tree.as_remexec_directory() + } } impl Debug for Directory { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // NB: To avoid over-large output, we don't render the Trie. It would likely be best rendered - // as e.g. JSON. - f.debug_struct("Directory") - .field("name", &self.name) - .field("digest", &self.digest) - .field("tree", &"..") - .finish() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // NB: To avoid over-large output, we don't render the Trie. It would likely be best rendered + // as e.g. JSON. + f.debug_struct("Directory") + .field("name", &self.name) + .field("digest", &self.digest) + .field("tree", &"..") + .finish() + } } impl From<&Directory> for remexec::DirectoryNode { - fn from(dir: &Directory) -> Self { - remexec::DirectoryNode { - name: dir.name.as_ref().to_owned(), - digest: Some((&dir.digest).into()), + fn from(dir: &Directory) -> Self { + remexec::DirectoryNode { + name: dir.name.as_ref().to_owned(), + digest: Some((&dir.digest).into()), + } } - } } #[derive(Clone, Debug, DeepSizeOf)] pub struct File { - name: Name, - digest: Digest, - is_executable: bool, + name: Name, + digest: Digest, + is_executable: bool, } impl File { - pub fn name(&self) -> Name { - self.name - } + pub fn name(&self) -> Name { + self.name + } - pub fn digest(&self) -> Digest { - self.digest - } + pub fn digest(&self) -> Digest { + self.digest + } - pub fn is_executable(&self) -> bool { - self.is_executable - } + pub fn is_executable(&self) -> bool { + self.is_executable + } } impl TryFrom<&remexec::FileNode> for File { - type Error = String; - - fn try_from(file_node: &remexec::FileNode) -> Result { - Ok(Self { - name: Name(Intern::from(&file_node.name)), - digest: require_digest(&file_node.digest)?, - is_executable: file_node.is_executable, - }) - } + type Error = String; + + fn try_from(file_node: &remexec::FileNode) -> Result { + Ok(Self { + name: Name(Intern::from(&file_node.name)), + digest: require_digest(&file_node.digest)?, + is_executable: file_node.is_executable, + }) + } } impl From<&File> for remexec::FileNode { - fn from(file: &File) -> Self { - remexec::FileNode { - name: file.name.as_ref().to_owned(), - digest: Some(file.digest.into()), - is_executable: file.is_executable, - ..remexec::FileNode::default() + fn from(file: &File) -> Self { + remexec::FileNode { + name: file.name.as_ref().to_owned(), + digest: Some(file.digest.into()), + is_executable: file.is_executable, + ..remexec::FileNode::default() + } } - } } #[derive(Clone, Debug, DeepSizeOf)] pub struct Symlink { - name: Name, - target: PathBuf, + name: Name, + target: PathBuf, } impl Symlink { - pub fn name(&self) -> Name { - self.name - } + pub fn name(&self) -> Name { + self.name + } - pub fn target(&self) -> &Path { - &self.target - } + pub fn target(&self) -> &Path { + &self.target + } } impl TryFrom<&remexec::SymlinkNode> for Symlink { - type Error = String; - - fn try_from(symlink_node: &remexec::SymlinkNode) -> Result { - Ok(Self { - name: Name(Intern::from(&symlink_node.name)), - target: PathBuf::from(&symlink_node.target), - }) - } + type Error = String; + + fn try_from(symlink_node: &remexec::SymlinkNode) -> Result { + Ok(Self { + name: Name(Intern::from(&symlink_node.name)), + target: PathBuf::from(&symlink_node.target), + }) + } } impl From<&Symlink> for remexec::SymlinkNode { - fn from(symlink: &Symlink) -> Self { - remexec::SymlinkNode { - name: symlink.name.as_ref().to_owned(), - target: symlink.target.to_str().unwrap().to_string(), - ..remexec::SymlinkNode::default() + fn from(symlink: &Symlink) -> Self { + remexec::SymlinkNode { + name: symlink.name.as_ref().to_owned(), + target: symlink.target.to_str().unwrap().to_string(), + ..remexec::SymlinkNode::default() + } } - } } // TODO: `PathStat` owns its path, which means it can't be used via recursive slicing. See // whether these types can be merged. pub enum TypedPath<'a> { - File { path: &'a Path, is_executable: bool }, - Link { path: &'a Path, target: &'a Path }, - Dir(&'a Path), + File { path: &'a Path, is_executable: bool }, + Link { path: &'a Path, target: &'a Path }, + Dir(&'a Path), } impl<'a> Deref for TypedPath<'a> { - type Target = Path; + type Target = Path; - fn deref(&self) -> &Path { - match self { - TypedPath::File { path, .. } => path, - TypedPath::Link { path, .. } => path, - TypedPath::Dir(d) => d, + fn deref(&self) -> &Path { + match self { + TypedPath::File { path, .. } => path, + TypedPath::Link { path, .. } => path, + TypedPath::Dir(d) => d, + } } - } } impl<'a> From<&'a PathStat> for TypedPath<'a> { - fn from(p: &'a PathStat) -> Self { - match p { - PathStat::File { path, stat } => TypedPath::File { - path, - is_executable: stat.is_executable, - }, - PathStat::Link { path, stat } => TypedPath::Link { - path, - target: &stat.target, - }, - PathStat::Dir { path, .. } => TypedPath::Dir(path), + fn from(p: &'a PathStat) -> Self { + match p { + PathStat::File { path, stat } => TypedPath::File { + path, + is_executable: stat.is_executable, + }, + PathStat::Link { path, stat } => TypedPath::Link { + path, + target: &stat.target, + }, + PathStat::Dir { path, .. } => TypedPath::Dir(path), + } } - } } #[derive(Clone, DeepSizeOf)] @@ -403,876 +403,885 @@ pub struct DigestTrie(Arc<[Entry]>); unsafe impl Sync for DigestTrie {} impl From for DirectoryDigest { - fn from(tree: DigestTrie) -> Self { - Self { - digest: tree.compute_root_digest(), - tree: Some(tree), + fn from(tree: DigestTrie) -> Self { + Self { + digest: tree.compute_root_digest(), + tree: Some(tree), + } } - } } impl DigestTrie { - /// Create a DigestTrie from unique TypedPath. Fails for duplicate items. - pub fn from_unique_paths( - mut path_stats: Vec, - file_digests: &HashMap, - ) -> Result { - // Sort and ensure that there were no duplicate entries. - #[allow(clippy::unnecessary_sort_by)] - path_stats.sort_by(|a, b| (**a).cmp(&**b)); - - // The helper assumes that if a Path has multiple children, it must be a directory. - // Proactively error if we run into identically named files, because otherwise we will treat - // them like empty directories. - let pre_dedupe_len = path_stats.len(); - path_stats.dedup_by(|a, b| **a == **b); - if path_stats.len() != pre_dedupe_len { - return Err(format!( - "Snapshots must be constructed from unique path stats; got duplicates in {:?}", - path_stats - .iter() - .map(|p| (**p).to_str()) - .collect::>() - )); + /// Create a DigestTrie from unique TypedPath. Fails for duplicate items. + pub fn from_unique_paths( + mut path_stats: Vec, + file_digests: &HashMap, + ) -> Result { + // Sort and ensure that there were no duplicate entries. + #[allow(clippy::unnecessary_sort_by)] + path_stats.sort_by(|a, b| (**a).cmp(&**b)); + + // The helper assumes that if a Path has multiple children, it must be a directory. + // Proactively error if we run into identically named files, because otherwise we will treat + // them like empty directories. + let pre_dedupe_len = path_stats.len(); + path_stats.dedup_by(|a, b| **a == **b); + if path_stats.len() != pre_dedupe_len { + return Err(format!( + "Snapshots must be constructed from unique path stats; got duplicates in {:?}", + path_stats + .iter() + .map(|p| (**p).to_str()) + .collect::>() + )); + } + + Self::from_sorted_paths(PathBuf::new(), path_stats, file_digests) } - Self::from_sorted_paths(PathBuf::new(), path_stats, file_digests) - } - - fn from_sorted_paths( - prefix: PathBuf, - paths: Vec, - file_digests: &HashMap, - ) -> Result { - let mut entries = Vec::new(); - - for (name_res, group) in &paths - .into_iter() - .group_by(|s| first_path_component_to_name(s)) - { - let name = name_res?; - let mut path_group: Vec = group.collect(); - if path_group.len() == 1 && path_group[0].components().count() == 1 { - // Exactly one entry with exactly one component indicates either a file in this directory, - // or an empty directory. - // If the child is a non-empty directory, or a file therein, there must be multiple - // PathStats with that prefix component, and we will handle that recursively. - - match path_group.pop().unwrap() { - TypedPath::File { - path, - is_executable, - } => { - let digest = *file_digests.get(prefix.join(path).as_path()).unwrap(); - - entries.push(Entry::File(File { - name, - digest, - is_executable, - })); - } - TypedPath::Link { target, .. } => { - entries.push(Entry::Symlink(Symlink { - name, - target: target.to_path_buf(), - })); - } - TypedPath::Dir { .. } => { - // Because there are no children of this Dir, it must be empty. - entries.push(Entry::Directory(Directory::new(name, vec![]))); - } + fn from_sorted_paths( + prefix: PathBuf, + paths: Vec, + file_digests: &HashMap, + ) -> Result { + let mut entries = Vec::new(); + + for (name_res, group) in &paths + .into_iter() + .group_by(|s| first_path_component_to_name(s)) + { + let name = name_res?; + let mut path_group: Vec = group.collect(); + if path_group.len() == 1 && path_group[0].components().count() == 1 { + // Exactly one entry with exactly one component indicates either a file in this directory, + // or an empty directory. + // If the child is a non-empty directory, or a file therein, there must be multiple + // PathStats with that prefix component, and we will handle that recursively. + + match path_group.pop().unwrap() { + TypedPath::File { + path, + is_executable, + } => { + let digest = *file_digests.get(prefix.join(path).as_path()).unwrap(); + + entries.push(Entry::File(File { + name, + digest, + is_executable, + })); + } + TypedPath::Link { target, .. } => { + entries.push(Entry::Symlink(Symlink { + name, + target: target.to_path_buf(), + })); + } + TypedPath::Dir { .. } => { + // Because there are no children of this Dir, it must be empty. + entries.push(Entry::Directory(Directory::new(name, vec![]))); + } + } + } else { + entries.push(Entry::Directory(Directory::from_digest_tree( + name, + Self::from_sorted_paths( + prefix.join(name.as_ref()), + paths_of_child_dir(name, path_group), + file_digests, + )?, + ))); + } } - } else { - entries.push(Entry::Directory(Directory::from_digest_tree( - name, - Self::from_sorted_paths( - prefix.join(name.as_ref()), - paths_of_child_dir(name, path_group), - file_digests, - )?, - ))); - } - } - Ok(Self(entries.into())) - } - - /// Create a DigestTrie from a root remexec::Directory and a map of its transitive children. - fn from_remexec_directories( - root: &remexec::Directory, - children_by_digest: &HashMap, - ) -> Result { - let mut entries = root - .files - .iter() - .map(|f| File::try_from(f).map(Entry::File)) - .chain( - root - .symlinks - .iter() - .map(|s| Symlink::try_from(s).map(Entry::Symlink)), - ) - .chain(root.directories.iter().map(|d| { - Directory::from_remexec_directory_node(d, children_by_digest).map(Entry::Directory) - })) - .collect::, _>>()?; - entries.sort_by_key(|e| e.name()); - Ok(Self(entries.into())) - } - - pub fn as_remexec_directory(&self) -> remexec::Directory { - let mut files = Vec::new(); - let mut directories = Vec::new(); - let mut symlinks = Vec::new(); - - for entry in &*self.0 { - match entry { - Entry::File(f) => files.push(f.into()), - Entry::Symlink(s) => symlinks.push(s.into()), - Entry::Directory(d) => directories.push(d.into()), - } + Ok(Self(entries.into())) } - remexec::Directory { - directories, - files, - symlinks, - ..remexec::Directory::default() + /// Create a DigestTrie from a root remexec::Directory and a map of its transitive children. + fn from_remexec_directories( + root: &remexec::Directory, + children_by_digest: &HashMap, + ) -> Result { + let mut entries = root + .files + .iter() + .map(|f| File::try_from(f).map(Entry::File)) + .chain( + root.symlinks + .iter() + .map(|s| Symlink::try_from(s).map(Entry::Symlink)), + ) + .chain(root.directories.iter().map(|d| { + Directory::from_remexec_directory_node(d, children_by_digest).map(Entry::Directory) + })) + .collect::, _>>()?; + entries.sort_by_key(|e| e.name()); + Ok(Self(entries.into())) } - } - pub fn compute_root_digest(&self) -> Digest { - if self.0.is_empty() { - return EMPTY_DIGEST; - } + pub fn as_remexec_directory(&self) -> remexec::Directory { + let mut files = Vec::new(); + let mut directories = Vec::new(); + let mut symlinks = Vec::new(); - Digest::of_bytes(&self.as_remexec_directory().to_bytes()) - } - - pub fn entries(&self) -> &[Entry] { - &self.0 - } - - /// Returns the digests reachable from this DigestTrie. - pub fn digests(&self) -> Vec { - // Walk the tree and collect Digests. - let mut digests = Vec::new(); - let mut stack = self.0.iter().collect::>(); - while let Some(entry) = stack.pop() { - match entry { - Entry::Directory(d) => { - digests.push(d.digest); - stack.extend(d.tree.0.iter()); - } - Entry::File(f) => { - digests.push(f.digest); + for entry in &*self.0 { + match entry { + Entry::File(f) => files.push(f.into()), + Entry::Symlink(s) => symlinks.push(s.into()), + Entry::Directory(d) => directories.push(d.into()), + } } - // There is no digest for a symlink - Entry::Symlink(_) => (), - } - } - digests - } - - pub fn files(&self, symlink_behavior: SymlinkBehavior) -> Vec { - let mut files = Vec::new(); - self.walk(symlink_behavior, &mut |path, entry| { - if let Entry::File(_) = entry { - files.push(path.to_owned()) - } - }); - files - } - - pub fn directories(&self, symlink_behavior: SymlinkBehavior) -> Vec { - let mut directories = Vec::new(); - self.walk(symlink_behavior, &mut |path, entry| { - match entry { - Entry::Directory(d) if d.name.is_empty() => { - // Is the root directory, which is not emitted here. + + remexec::Directory { + directories, + files, + symlinks, + ..remexec::Directory::default() } - Entry::Directory(_) => directories.push(path.to_owned()), - _ => (), - } - }); - directories - } - - pub fn symlinks(&self) -> Vec { - let mut symlinks = Vec::new(); - self.walk(SymlinkBehavior::Aware, &mut |path, entry| { - if let Entry::Symlink(_) = entry { - symlinks.push(path.to_owned()) - } - }); - symlinks - } - - /// The paths of all "leaf" nodes of the DigestTrie: empty directories, files, or symlinks. - pub fn leaf_paths(&self) -> Vec { - let mut paths = Vec::new(); - self.walk(SymlinkBehavior::Aware, &mut |path, entry| match entry { - Entry::Directory(d) if d.tree.0.is_empty() => paths.push(path.to_owned()), - Entry::Directory(_) => {} - Entry::File(_) | Entry::Symlink(_) => paths.push(path.to_owned()), - }); - paths - } - - /// Visit every node in the tree, calling the given function with the path to the Node, and its - /// entries. - /// NOTE: if SymlinkBehavior::Oblivious, `f` will never be called with a `SymlinkEntry`. - pub fn walk(&self, symlink_behavior: SymlinkBehavior, f: &mut impl FnMut(&Path, &Entry)) { - { - // TODO: It's likely that a DigestTrie should hold its own Digest, to avoid re-computing it - // here. - let root = Entry::Directory(Directory::from_digest_tree( - Name(Intern::from("")), - self.clone(), - )); - f(&PathBuf::new(), &root); } - self.walk_helper(self, PathBuf::new(), symlink_behavior, 0, f) - } - - fn walk_helper( - &self, - root: &DigestTrie, - path_so_far: PathBuf, - symlink_behavior: SymlinkBehavior, - mut link_depth: LinkDepth, - f: &mut impl FnMut(&Path, &Entry), - ) { - for entry in &*self.0 { - let path = path_so_far.join(entry.name().as_ref()); - let mut entry = entry; - if let SymlinkBehavior::Oblivious = symlink_behavior { - if let Entry::Symlink(s) = entry { - link_depth += 1; - if s.target == Component::CurDir.as_os_str() { - if link_depth >= MAX_LINK_DEPTH { - warn!("Exceeded the maximum link depth while traversing links. Stopping traversal."); - return; - } - self.walk_helper(root, path.clone(), symlink_behavior, link_depth, f); - return; - } - - let destination_path = path_so_far.join(s.target.clone()); - let destination_entry = root.entry_helper(root, &destination_path, link_depth); - if let Ok(Some(valid_entry)) = destination_entry { - entry = valid_entry; - } else { - continue; - } - } - } - match entry { - Entry::Directory(d) => { - f(&path, entry); - d.tree - .walk_helper(root, path.to_path_buf(), symlink_behavior, link_depth, f); + pub fn compute_root_digest(&self) -> Digest { + if self.0.is_empty() { + return EMPTY_DIGEST; } - _ => f(&path, entry), - }; + + Digest::of_bytes(&self.as_remexec_directory().to_bytes()) } - } - - pub fn diff(&self, other: &DigestTrie) -> DigestTrieDiff { - let mut result = DigestTrieDiff::default(); - self.diff_helper(other, PathBuf::new(), &mut result); - result - } - - // NB: The current implementation assumes that the entries are sorted (by name, irrespective of - // whether the entry is a file/dir). - fn diff_helper(&self, them: &DigestTrie, path_so_far: PathBuf, result: &mut DigestTrieDiff) { - let mut our_iter = self.0.iter(); - let mut their_iter = them.0.iter(); - let mut ours = our_iter.next(); - let mut theirs = their_iter.next(); - - let add_unique = |entry: &Entry, - unique_files: &mut Vec, - unique_dirs: &mut Vec, - unique_symlinks: &mut Vec| { - let path = path_so_far.join(entry.name().as_ref()); - match entry { - Entry::File(_) => unique_files.push(path), - Entry::Symlink(_) => unique_symlinks.push(path), - Entry::Directory(_) => unique_dirs.push(path), - } - }; - let add_ours = |entry: &Entry, diff: &mut DigestTrieDiff| { - add_unique( - entry, - &mut diff.our_unique_files, - &mut diff.our_unique_dirs, - &mut diff.our_unique_symlinks, - ); - }; - let add_theirs = |entry: &Entry, diff: &mut DigestTrieDiff| { - add_unique( - entry, - &mut diff.their_unique_files, - &mut diff.their_unique_dirs, - &mut diff.their_unique_symlinks, - ); - }; + pub fn entries(&self) -> &[Entry] { + &self.0 + } - while let Some(our_entry) = ours { - match theirs { - Some(their_entry) => match our_entry.name().cmp(&their_entry.name()) { - Ordering::Less => { - add_ours(our_entry, result); - ours = our_iter.next(); - continue; - } - Ordering::Greater => { - add_theirs(their_entry, result); - theirs = their_iter.next(); - continue; - } - Ordering::Equal => match (our_entry, their_entry) { - (Entry::File(our_file), Entry::File(their_file)) => { - if our_file.digest != their_file.digest { - result - .changed_files - .push(path_so_far.join(our_file.name().as_ref())); - } - ours = our_iter.next(); - theirs = their_iter.next(); + /// Returns the digests reachable from this DigestTrie. + pub fn digests(&self) -> Vec { + // Walk the tree and collect Digests. + let mut digests = Vec::new(); + let mut stack = self.0.iter().collect::>(); + while let Some(entry) = stack.pop() { + match entry { + Entry::Directory(d) => { + digests.push(d.digest); + stack.extend(d.tree.0.iter()); + } + Entry::File(f) => { + digests.push(f.digest); + } + // There is no digest for a symlink + Entry::Symlink(_) => (), } - (Entry::Symlink(our_symlink), Entry::Symlink(their_symlink)) => { - if our_symlink.target != their_symlink.target { - result - .changed_symlinks - .push(path_so_far.join(our_symlink.name.as_ref())); - } - ours = our_iter.next(); - theirs = their_iter.next(); + } + digests + } + + pub fn files(&self, symlink_behavior: SymlinkBehavior) -> Vec { + let mut files = Vec::new(); + self.walk(symlink_behavior, &mut |path, entry| { + if let Entry::File(_) = entry { + files.push(path.to_owned()) } - (Entry::Directory(our_dir), Entry::Directory(their_dir)) => { - if our_dir.digest != their_dir.digest { - our_dir.tree.diff_helper( - &their_dir.tree, - path_so_far.join(our_dir.name().as_ref()), - result, - ) - } - ours = our_iter.next(); - theirs = their_iter.next(); + }); + files + } + + pub fn directories(&self, symlink_behavior: SymlinkBehavior) -> Vec { + let mut directories = Vec::new(); + self.walk(symlink_behavior, &mut |path, entry| { + match entry { + Entry::Directory(d) if d.name.is_empty() => { + // Is the root directory, which is not emitted here. + } + Entry::Directory(_) => directories.push(path.to_owned()), + _ => (), } - _ => { - add_ours(our_entry, result); - add_theirs(their_entry, result); - ours = our_iter.next(); - theirs = their_iter.next(); + }); + directories + } + + pub fn symlinks(&self) -> Vec { + let mut symlinks = Vec::new(); + self.walk(SymlinkBehavior::Aware, &mut |path, entry| { + if let Entry::Symlink(_) = entry { + symlinks.push(path.to_owned()) } - }, - }, - None => { - add_ours(our_entry, result); - ours = our_iter.next(); + }); + symlinks + } + + /// The paths of all "leaf" nodes of the DigestTrie: empty directories, files, or symlinks. + pub fn leaf_paths(&self) -> Vec { + let mut paths = Vec::new(); + self.walk(SymlinkBehavior::Aware, &mut |path, entry| match entry { + Entry::Directory(d) if d.tree.0.is_empty() => paths.push(path.to_owned()), + Entry::Directory(_) => {} + Entry::File(_) | Entry::Symlink(_) => paths.push(path.to_owned()), + }); + paths + } + + /// Visit every node in the tree, calling the given function with the path to the Node, and its + /// entries. + /// NOTE: if SymlinkBehavior::Oblivious, `f` will never be called with a `SymlinkEntry`. + pub fn walk(&self, symlink_behavior: SymlinkBehavior, f: &mut impl FnMut(&Path, &Entry)) { + { + // TODO: It's likely that a DigestTrie should hold its own Digest, to avoid re-computing it + // here. + let root = Entry::Directory(Directory::from_digest_tree( + Name(Intern::from("")), + self.clone(), + )); + f(&PathBuf::new(), &root); } - } + self.walk_helper(self, PathBuf::new(), symlink_behavior, 0, f) } - while let Some(their_entry) = &theirs { - add_theirs(their_entry, result); - theirs = their_iter.next(); + fn walk_helper( + &self, + root: &DigestTrie, + path_so_far: PathBuf, + symlink_behavior: SymlinkBehavior, + mut link_depth: LinkDepth, + f: &mut impl FnMut(&Path, &Entry), + ) { + for entry in &*self.0 { + let path = path_so_far.join(entry.name().as_ref()); + let mut entry = entry; + if let SymlinkBehavior::Oblivious = symlink_behavior { + if let Entry::Symlink(s) = entry { + link_depth += 1; + if s.target == Component::CurDir.as_os_str() { + if link_depth >= MAX_LINK_DEPTH { + warn!("Exceeded the maximum link depth while traversing links. Stopping traversal."); + return; + } + self.walk_helper(root, path.clone(), symlink_behavior, link_depth, f); + return; + } + + let destination_path = path_so_far.join(s.target.clone()); + let destination_entry = root.entry_helper(root, &destination_path, link_depth); + if let Ok(Some(valid_entry)) = destination_entry { + entry = valid_entry; + } else { + continue; + } + } + } + + match entry { + Entry::Directory(d) => { + f(&path, entry); + d.tree + .walk_helper(root, path.to_path_buf(), symlink_behavior, link_depth, f); + } + _ => f(&path, entry), + }; + } } - } - - /// Add the given path as a prefix for this trie, returning the resulting trie. - pub fn add_prefix(self, prefix: &RelativePath) -> Result { - let mut prefix_iter = prefix.iter(); - let mut tree = self; - while let Some(parent) = prefix_iter.next_back() { - let directory = - Directory::from_digest_tree(first_path_component_to_name(parent.as_ref())?, tree); - tree = DigestTrie(vec![Entry::Directory(directory)].into()); + + pub fn diff(&self, other: &DigestTrie) -> DigestTrieDiff { + let mut result = DigestTrieDiff::default(); + self.diff_helper(other, PathBuf::new(), &mut result); + result } - Ok(tree) - } - - /// Remove the given prefix from this trie, returning the resulting trie. - pub fn remove_prefix(self, prefix: &RelativePath) -> Result { - let root = self.clone(); - let mut tree = self; - let mut already_stripped = PathBuf::new(); - for component_to_strip in prefix.components() { - let component_to_strip = component_to_strip.as_os_str(); - let mut matching_dir = None; - let mut extra_directories = Vec::new(); - let mut files = Vec::new(); - let mut symlinks = Vec::new(); - for entry in tree.entries() { - match entry { - Entry::Directory(d) if Path::new(d.name.as_ref()).as_os_str() == component_to_strip => { - matching_dir = Some(d) - } - Entry::Directory(d) => extra_directories.push(d.name.as_ref().to_owned()), - Entry::File(f) => files.push(f.name.as_ref().to_owned()), - Entry::Symlink(s) => symlinks.push(s.name.as_ref().to_owned()), + // NB: The current implementation assumes that the entries are sorted (by name, irrespective of + // whether the entry is a file/dir). + fn diff_helper(&self, them: &DigestTrie, path_so_far: PathBuf, result: &mut DigestTrieDiff) { + let mut our_iter = self.0.iter(); + let mut their_iter = them.0.iter(); + let mut ours = our_iter.next(); + let mut theirs = their_iter.next(); + + let add_unique = |entry: &Entry, + unique_files: &mut Vec, + unique_dirs: &mut Vec, + unique_symlinks: &mut Vec| { + let path = path_so_far.join(entry.name().as_ref()); + match entry { + Entry::File(_) => unique_files.push(path), + Entry::Symlink(_) => unique_symlinks.push(path), + Entry::Directory(_) => unique_dirs.push(path), + } + }; + + let add_ours = |entry: &Entry, diff: &mut DigestTrieDiff| { + add_unique( + entry, + &mut diff.our_unique_files, + &mut diff.our_unique_dirs, + &mut diff.our_unique_symlinks, + ); + }; + let add_theirs = |entry: &Entry, diff: &mut DigestTrieDiff| { + add_unique( + entry, + &mut diff.their_unique_files, + &mut diff.their_unique_dirs, + &mut diff.their_unique_symlinks, + ); + }; + + while let Some(our_entry) = ours { + match theirs { + Some(their_entry) => match our_entry.name().cmp(&their_entry.name()) { + Ordering::Less => { + add_ours(our_entry, result); + ours = our_iter.next(); + continue; + } + Ordering::Greater => { + add_theirs(their_entry, result); + theirs = their_iter.next(); + continue; + } + Ordering::Equal => match (our_entry, their_entry) { + (Entry::File(our_file), Entry::File(their_file)) => { + if our_file.digest != their_file.digest { + result + .changed_files + .push(path_so_far.join(our_file.name().as_ref())); + } + ours = our_iter.next(); + theirs = their_iter.next(); + } + (Entry::Symlink(our_symlink), Entry::Symlink(their_symlink)) => { + if our_symlink.target != their_symlink.target { + result + .changed_symlinks + .push(path_so_far.join(our_symlink.name.as_ref())); + } + ours = our_iter.next(); + theirs = their_iter.next(); + } + (Entry::Directory(our_dir), Entry::Directory(their_dir)) => { + if our_dir.digest != their_dir.digest { + our_dir.tree.diff_helper( + &their_dir.tree, + path_so_far.join(our_dir.name().as_ref()), + result, + ) + } + ours = our_iter.next(); + theirs = their_iter.next(); + } + _ => { + add_ours(our_entry, result); + add_theirs(their_entry, result); + ours = our_iter.next(); + theirs = their_iter.next(); + } + }, + }, + None => { + add_ours(our_entry, result); + ours = our_iter.next(); + } + } } - } - - let has_already_stripped_any = already_stripped.components().next().is_some(); - match ( - matching_dir, - extra_directories.is_empty() && files.is_empty() && symlinks.is_empty(), - ) { - (None, true) => { - tree = EMPTY_DIGEST_TREE.clone(); - break; + + while let Some(their_entry) = &theirs { + add_theirs(their_entry, result); + theirs = their_iter.next(); } - (None, false) => { - // Prefer "No subdirectory found" error to "had extra files" error. - return Err(format!( - "Cannot strip prefix {} from root directory (Digest with hash {:?}) - \ - {}directory{} didn't contain a directory named {}{}", - prefix.display(), - root.compute_root_digest().hash, - if has_already_stripped_any { - "sub" - } else { - "root " - }, - if has_already_stripped_any { - format!(" {}", already_stripped.display()) - } else { - String::new() - }, - Path::new(component_to_strip).display(), - if !extra_directories.is_empty() || !files.is_empty() || !symlinks.is_empty() { - format!( - " but did contain {}", - format_entries(&extra_directories, &files, &symlinks) - ) - } else { - String::new() - }, - )); + } + + /// Add the given path as a prefix for this trie, returning the resulting trie. + pub fn add_prefix(self, prefix: &RelativePath) -> Result { + let mut prefix_iter = prefix.iter(); + let mut tree = self; + while let Some(parent) = prefix_iter.next_back() { + let directory = + Directory::from_digest_tree(first_path_component_to_name(parent.as_ref())?, tree); + tree = DigestTrie(vec![Entry::Directory(directory)].into()); } - (Some(_), false) => { - return Err(format!( - "Cannot strip prefix {} from root directory (Digest with hash {:?}) - \ + + Ok(tree) + } + + /// Remove the given prefix from this trie, returning the resulting trie. + pub fn remove_prefix(self, prefix: &RelativePath) -> Result { + let root = self.clone(); + let mut tree = self; + let mut already_stripped = PathBuf::new(); + for component_to_strip in prefix.components() { + let component_to_strip = component_to_strip.as_os_str(); + let mut matching_dir = None; + let mut extra_directories = Vec::new(); + let mut files = Vec::new(); + let mut symlinks = Vec::new(); + for entry in tree.entries() { + match entry { + Entry::Directory(d) + if Path::new(d.name.as_ref()).as_os_str() == component_to_strip => + { + matching_dir = Some(d) + } + Entry::Directory(d) => extra_directories.push(d.name.as_ref().to_owned()), + Entry::File(f) => files.push(f.name.as_ref().to_owned()), + Entry::Symlink(s) => symlinks.push(s.name.as_ref().to_owned()), + } + } + + let has_already_stripped_any = already_stripped.components().next().is_some(); + match ( + matching_dir, + extra_directories.is_empty() && files.is_empty() && symlinks.is_empty(), + ) { + (None, true) => { + tree = EMPTY_DIGEST_TREE.clone(); + break; + } + (None, false) => { + // Prefer "No subdirectory found" error to "had extra files" error. + return Err(format!( + "Cannot strip prefix {} from root directory (Digest with hash {:?}) - \ + {}directory{} didn't contain a directory named {}{}", + prefix.display(), + root.compute_root_digest().hash, + if has_already_stripped_any { + "sub" + } else { + "root " + }, + if has_already_stripped_any { + format!(" {}", already_stripped.display()) + } else { + String::new() + }, + Path::new(component_to_strip).display(), + if !extra_directories.is_empty() + || !files.is_empty() + || !symlinks.is_empty() + { + format!( + " but did contain {}", + format_entries(&extra_directories, &files, &symlinks) + ) + } else { + String::new() + }, + )); + } + (Some(_), false) => { + return Err(format!( + "Cannot strip prefix {} from root directory (Digest with hash {:?}) - \ {}directory{} contained non-matching {}", - prefix.display(), - root.compute_root_digest().hash, - if has_already_stripped_any { - "sub" - } else { - "root " - }, - if has_already_stripped_any { - format!(" {}", already_stripped.display()) - } else { - String::new() - }, - format_entries(&extra_directories, &files, &symlinks), - )) - } - (Some(d), true) => { - already_stripped = already_stripped.join(component_to_strip); - tree = d.tree.clone(); + prefix.display(), + root.compute_root_digest().hash, + if has_already_stripped_any { + "sub" + } else { + "root " + }, + if has_already_stripped_any { + format!(" {}", already_stripped.display()) + } else { + String::new() + }, + format_entries(&extra_directories, &files, &symlinks), + )) + } + (Some(d), true) => { + already_stripped = already_stripped.join(component_to_strip); + tree = d.tree.clone(); + } + } } - } + + Ok(tree) + } + + /// Return the Entry at the given relative path in the trie, or None if no such path was present. + /// If a directory component is a symlink, will follow the symlink. In cases where a symlink points + /// to a parent or current dir, the return may be None due to exceeding the link depth. + /// + /// Cannot follow a symlink above `self` (returns None). + /// + /// An error will be returned if the given path attempts to traverse below a file entry. + pub fn entry<'a>(&'a self, path: &Path) -> Result, String> { + self.entry_helper(self, path, 0) } - Ok(tree) - } - - /// Return the Entry at the given relative path in the trie, or None if no such path was present. - /// If a directory component is a symlink, will follow the symlink. In cases where a symlink points - /// to a parent or current dir, the return may be None due to exceeding the link depth. - /// - /// Cannot follow a symlink above `self` (returns None). - /// - /// An error will be returned if the given path attempts to traverse below a file entry. - pub fn entry<'a>(&'a self, path: &Path) -> Result, String> { - self.entry_helper(self, path, 0) - } - - fn entry_helper<'a>( - &'a self, - root: &'a DigestTrie, - requested_path: &Path, - link_depth: LinkDepth, - ) -> Result, String> { - let mut tree = self; - let mut path_so_far = PathBuf::new(); - // Identical to path_so_far, but doesn't have components for "CurDir" symlinks - // E.g. If path_so_far is "dir/self/self/foo" and "dir/self -> .", then logical_path will be - // "dir/foo". - let mut logical_path = PathBuf::new(); - let mut components = requested_path.components(); - let mut current_entry: Option<&Entry> = None; - while let Some(component) = components.next() { - if component == Component::CurDir { - // NB: This only happens if "." is the first component in a path. - continue; - } - - if let Some(Entry::File(_)) = current_entry { - return Err(format!( - "{tree_digest:?} cannot contain a path at {requested_path:?}, \ + fn entry_helper<'a>( + &'a self, + root: &'a DigestTrie, + requested_path: &Path, + link_depth: LinkDepth, + ) -> Result, String> { + let mut tree = self; + let mut path_so_far = PathBuf::new(); + // Identical to path_so_far, but doesn't have components for "CurDir" symlinks + // E.g. If path_so_far is "dir/self/self/foo" and "dir/self -> .", then logical_path will be + // "dir/foo". + let mut logical_path = PathBuf::new(); + let mut components = requested_path.components(); + let mut current_entry: Option<&Entry> = None; + while let Some(component) = components.next() { + if component == Component::CurDir { + // NB: This only happens if "." is the first component in a path. + continue; + } + + if let Some(Entry::File(_)) = current_entry { + return Err(format!( + "{tree_digest:?} cannot contain a path at {requested_path:?}, \ because a file was encountered at {path_so_far:?}.", - tree_digest = self.compute_root_digest() - )); - } - - if let Some(Entry::Directory(d)) = current_entry { - tree = &d.tree; - } - - path_so_far.push(component); - logical_path.push(component); - if component == Component::ParentDir { - if let Some(grandparent) = logical_path.parent().unwrap().parent() { - let full_path = grandparent.join(components.as_path()); - return root.entry_helper(root, &full_path, link_depth); - } - return Ok(None); - } - - let component = component.as_os_str(); - let maybe_matching_entry = tree - .entries() - .binary_search_by_key(&component, |entry| { - Path::new(entry.name().as_ref()).as_os_str() - }) - .ok() - .map(|idx| &tree.entries()[idx]); - if maybe_matching_entry.is_none() { - return Ok(None); - } - - if let Some(Entry::Symlink(s)) = maybe_matching_entry { - if link_depth >= MAX_LINK_DEPTH { - warn!("Exceeded the maximum link depth while traversing links. Stopping traversal."); - return Ok(None); - } + tree_digest = self.compute_root_digest() + )); + } + + if let Some(Entry::Directory(d)) = current_entry { + tree = &d.tree; + } + + path_so_far.push(component); + logical_path.push(component); + if component == Component::ParentDir { + if let Some(grandparent) = logical_path.parent().unwrap().parent() { + let full_path = grandparent.join(components.as_path()); + return root.entry_helper(root, &full_path, link_depth); + } + return Ok(None); + } - if s.target.as_os_str() == Component::CurDir.as_os_str() { - logical_path = logical_path.parent().unwrap().to_path_buf(); - continue; + let component = component.as_os_str(); + let maybe_matching_entry = tree + .entries() + .binary_search_by_key(&component, |entry| { + Path::new(entry.name().as_ref()).as_os_str() + }) + .ok() + .map(|idx| &tree.entries()[idx]); + if maybe_matching_entry.is_none() { + return Ok(None); + } + + if let Some(Entry::Symlink(s)) = maybe_matching_entry { + if link_depth >= MAX_LINK_DEPTH { + warn!("Exceeded the maximum link depth while traversing links. Stopping traversal."); + return Ok(None); + } + + if s.target.as_os_str() == Component::CurDir.as_os_str() { + logical_path = logical_path.parent().unwrap().to_path_buf(); + continue; + } + let full_path = path_so_far + .parent() + .unwrap() + .join(&s.target) + .join(components.as_path()); + return root.entry_helper(root, &full_path, link_depth + 1); + } + + current_entry = maybe_matching_entry; } - let full_path = path_so_far - .parent() - .unwrap() - .join(&s.target) - .join(components.as_path()); - return root.entry_helper(root, &full_path, link_depth + 1); - } - - current_entry = maybe_matching_entry; - } - Ok(current_entry) - } - - /// Given DigestTries, merge them recursively into a single DigestTrie. - /// - /// If a file is present with the same name and contents multiple times, it will appear once. - /// If a file is present with the same name, but different contents, an error will be returned. - pub fn merge(trees: Vec) -> Result { - Self::merge_helper(PathBuf::new(), trees) - } - - fn merge_helper(parent_path: PathBuf, trees: Vec) -> Result { - if trees.is_empty() { - return Ok(EMPTY_DIGEST_TREE.clone()); - } else if trees.len() == 1 { - let mut trees = trees; - return Ok(trees.pop().unwrap()); + Ok(current_entry) } - // Merge sorted Entries. - let input_entries = trees - .iter() - .map(|tree| tree.entries().iter()) - .kmerge_by(|a, b| a.name() < b.name()); - - // Then group by name, and merge into an output list. - let mut entries: Vec = Vec::new(); - for (name, group) in &input_entries.group_by(|e| e.name()) { - let mut group = group.peekable(); - let first = group.next().unwrap(); - if group.peek().is_none() { - // There was only one Entry: emit it. - entries.push(first.clone()); - continue; - } - - match first { - Entry::File(f) => { - // If any Entry is a File, then they must all be identical. - let (mut mismatched_files, mismatched_dirs, mismatched_symlinks) = - collisions(f.digest, group); - if !mismatched_files.is_empty() - || !mismatched_dirs.is_empty() - || !mismatched_symlinks.is_empty() - { - mismatched_files.push(f); - return Err(MergeError::duplicates( - parent_path, - mismatched_files, - mismatched_dirs, - mismatched_symlinks, - )); - } + /// Given DigestTries, merge them recursively into a single DigestTrie. + /// + /// If a file is present with the same name and contents multiple times, it will appear once. + /// If a file is present with the same name, but different contents, an error will be returned. + pub fn merge(trees: Vec) -> Result { + Self::merge_helper(PathBuf::new(), trees) + } - // All entries matched: emit one copy. - entries.push(first.clone()); + fn merge_helper( + parent_path: PathBuf, + trees: Vec, + ) -> Result { + if trees.is_empty() { + return Ok(EMPTY_DIGEST_TREE.clone()); + } else if trees.len() == 1 { + let mut trees = trees; + return Ok(trees.pop().unwrap()); } - Entry::Symlink(s) => { - let mut mismatched_files = Vec::new(); - let mut mismatched_dirs = Vec::new(); - let mut mismatched_symlinks = Vec::new(); - for entry in group { - match entry { - Entry::File(other) => mismatched_files.push(other), - Entry::Symlink(other) if other.target != s.target => mismatched_symlinks.push(other), - Entry::Directory(other) => mismatched_dirs.push(other), - _ => (), + + // Merge sorted Entries. + let input_entries = trees + .iter() + .map(|tree| tree.entries().iter()) + .kmerge_by(|a, b| a.name() < b.name()); + + // Then group by name, and merge into an output list. + let mut entries: Vec = Vec::new(); + for (name, group) in &input_entries.group_by(|e| e.name()) { + let mut group = group.peekable(); + let first = group.next().unwrap(); + if group.peek().is_none() { + // There was only one Entry: emit it. + entries.push(first.clone()); + continue; } - } - - if !mismatched_files.is_empty() - || !mismatched_dirs.is_empty() - || !mismatched_symlinks.is_empty() - { - mismatched_symlinks.push(s); - return Err(MergeError::duplicates( - parent_path, - mismatched_files, - mismatched_dirs, - mismatched_symlinks, - )); - } - // All entries matched: emit one copy. - entries.push(first.clone()); - } - Entry::Directory(d) => { - // If any Entry is a Directory, then they must all be Directories which will be merged. - let (mismatched_files, mut mismatched_dirs, mismatched_symlinks) = - collisions(d.digest, group); - - // If there were any Files, error. - if !mismatched_files.is_empty() || !mismatched_symlinks.is_empty() { - mismatched_dirs.push(d); - return Err(MergeError::duplicates( - parent_path, - mismatched_files, - mismatched_dirs, - mismatched_symlinks, - )); - } - - if mismatched_dirs.is_empty() { - // All directories matched: emit one copy. - entries.push(first.clone()); - } else { - // Some directories mismatched, so merge all of them into a new entry and emit that. - mismatched_dirs.push(d); - let merged_tree = Self::merge_helper( - parent_path.join(name.as_ref()), - mismatched_dirs - .into_iter() - .map(|d| d.tree.clone()) - .collect(), - )?; - entries.push(Entry::Directory(Directory::from_digest_tree( - name, - merged_tree, - ))); - } + match first { + Entry::File(f) => { + // If any Entry is a File, then they must all be identical. + let (mut mismatched_files, mismatched_dirs, mismatched_symlinks) = + collisions(f.digest, group); + if !mismatched_files.is_empty() + || !mismatched_dirs.is_empty() + || !mismatched_symlinks.is_empty() + { + mismatched_files.push(f); + return Err(MergeError::duplicates( + parent_path, + mismatched_files, + mismatched_dirs, + mismatched_symlinks, + )); + } + + // All entries matched: emit one copy. + entries.push(first.clone()); + } + Entry::Symlink(s) => { + let mut mismatched_files = Vec::new(); + let mut mismatched_dirs = Vec::new(); + let mut mismatched_symlinks = Vec::new(); + for entry in group { + match entry { + Entry::File(other) => mismatched_files.push(other), + Entry::Symlink(other) if other.target != s.target => { + mismatched_symlinks.push(other) + } + Entry::Directory(other) => mismatched_dirs.push(other), + _ => (), + } + } + + if !mismatched_files.is_empty() + || !mismatched_dirs.is_empty() + || !mismatched_symlinks.is_empty() + { + mismatched_symlinks.push(s); + return Err(MergeError::duplicates( + parent_path, + mismatched_files, + mismatched_dirs, + mismatched_symlinks, + )); + } + + // All entries matched: emit one copy. + entries.push(first.clone()); + } + Entry::Directory(d) => { + // If any Entry is a Directory, then they must all be Directories which will be merged. + let (mismatched_files, mut mismatched_dirs, mismatched_symlinks) = + collisions(d.digest, group); + + // If there were any Files, error. + if !mismatched_files.is_empty() || !mismatched_symlinks.is_empty() { + mismatched_dirs.push(d); + return Err(MergeError::duplicates( + parent_path, + mismatched_files, + mismatched_dirs, + mismatched_symlinks, + )); + } + + if mismatched_dirs.is_empty() { + // All directories matched: emit one copy. + entries.push(first.clone()); + } else { + // Some directories mismatched, so merge all of them into a new entry and emit that. + mismatched_dirs.push(d); + let merged_tree = Self::merge_helper( + parent_path.join(name.as_ref()), + mismatched_dirs + .into_iter() + .map(|d| d.tree.clone()) + .collect(), + )?; + entries.push(Entry::Directory(Directory::from_digest_tree( + name, + merged_tree, + ))); + } + } + } } - } - } - Ok(DigestTrie(entries.into())) - } + Ok(DigestTrie(entries.into())) + } } impl TryFrom for DigestTrie { - type Error = String; - - fn try_from(tree: remexec::Tree) -> Result { - let root = tree - .root - .as_ref() - .ok_or_else(|| format!("Corrupt tree, no root: {tree:?}"))?; - let children = tree - .children - .into_iter() - .map(|d| (Digest::of_bytes(&d.to_bytes()), d)) - .collect::>(); - - Self::from_remexec_directories(root, &children) - } + type Error = String; + + fn try_from(tree: remexec::Tree) -> Result { + let root = tree + .root + .as_ref() + .ok_or_else(|| format!("Corrupt tree, no root: {tree:?}"))?; + let children = tree + .children + .into_iter() + .map(|d| (Digest::of_bytes(&d.to_bytes()), d)) + .collect::>(); + + Self::from_remexec_directories(root, &children) + } } impl From<&DigestTrie> for remexec::Tree { - fn from(trie: &DigestTrie) -> Self { - let mut tree = remexec::Tree::default(); - trie.walk(SymlinkBehavior::Aware, &mut |_, entry| { - match entry { - Entry::File(_) => (), - Entry::Symlink(_) => (), - Entry::Directory(d) if d.name.is_empty() => { - // Is the root directory. - tree.root = Some(d.tree.as_remexec_directory()); - } - Entry::Directory(d) => { - // Is a child directory. - tree.children.push(d.tree.as_remexec_directory()); - } - } - }); - tree - } + fn from(trie: &DigestTrie) -> Self { + let mut tree = remexec::Tree::default(); + trie.walk(SymlinkBehavior::Aware, &mut |_, entry| { + match entry { + Entry::File(_) => (), + Entry::Symlink(_) => (), + Entry::Directory(d) if d.name.is_empty() => { + // Is the root directory. + tree.root = Some(d.tree.as_remexec_directory()); + } + Entry::Directory(d) => { + // Is a child directory. + tree.children.push(d.tree.as_remexec_directory()); + } + } + }); + tree + } } #[derive(Default)] pub struct DigestTrieDiff { - pub our_unique_files: Vec, - pub our_unique_symlinks: Vec, - pub our_unique_dirs: Vec, - pub their_unique_files: Vec, - pub their_unique_symlinks: Vec, - pub their_unique_dirs: Vec, - pub changed_files: Vec, - pub changed_symlinks: Vec, + pub our_unique_files: Vec, + pub our_unique_symlinks: Vec, + pub our_unique_dirs: Vec, + pub their_unique_files: Vec, + pub their_unique_symlinks: Vec, + pub their_unique_dirs: Vec, + pub changed_files: Vec, + pub changed_symlinks: Vec, } pub enum MergeError { - Duplicates { - parent_path: PathBuf, - files: Vec, - directories: Vec, - symlinks: Vec, - }, + Duplicates { + parent_path: PathBuf, + files: Vec, + directories: Vec, + symlinks: Vec, + }, } impl MergeError { - fn duplicates( - parent_path: PathBuf, - files: Vec<&File>, - directories: Vec<&Directory>, - symlinks: Vec<&Symlink>, - ) -> Self { - MergeError::Duplicates { - parent_path, - files: files.into_iter().cloned().collect(), - directories: directories.into_iter().cloned().collect(), - symlinks: symlinks.into_iter().cloned().collect(), + fn duplicates( + parent_path: PathBuf, + files: Vec<&File>, + directories: Vec<&Directory>, + symlinks: Vec<&Symlink>, + ) -> Self { + MergeError::Duplicates { + parent_path, + files: files.into_iter().cloned().collect(), + directories: directories.into_iter().cloned().collect(), + symlinks: symlinks.into_iter().cloned().collect(), + } } - } } fn paths_of_child_dir(name: Name, paths: Vec) -> Vec { - paths - .into_iter() - .filter_map(|s| { - if s.components().count() == 1 { - return None; - } - Some(match s { - TypedPath::File { - path, - is_executable, - } => TypedPath::File { - path: path.strip_prefix(name.as_ref()).unwrap(), - is_executable, - }, - TypedPath::Link { path, target } => TypedPath::Link { - path: path.strip_prefix(name.as_ref()).unwrap(), - target: target.strip_prefix(name.as_ref()).unwrap_or(target), - }, - TypedPath::Dir(path) => TypedPath::Dir(path.strip_prefix(name.as_ref()).unwrap()), - }) - }) - .collect() + paths + .into_iter() + .filter_map(|s| { + if s.components().count() == 1 { + return None; + } + Some(match s { + TypedPath::File { + path, + is_executable, + } => TypedPath::File { + path: path.strip_prefix(name.as_ref()).unwrap(), + is_executable, + }, + TypedPath::Link { path, target } => TypedPath::Link { + path: path.strip_prefix(name.as_ref()).unwrap(), + target: target.strip_prefix(name.as_ref()).unwrap_or(target), + }, + TypedPath::Dir(path) => TypedPath::Dir(path.strip_prefix(name.as_ref()).unwrap()), + }) + }) + .collect() } fn first_path_component_to_name(path: &Path) -> Result { - let first_path_component = path - .components() - .next() - .ok_or_else(|| format!("Path `{}` was unexpectedly empty", path.display()))?; - let name = first_path_component - .as_os_str() - .to_str() - .ok_or_else(|| format!("{first_path_component:?} is not representable in UTF8"))?; - Ok(Name(Intern::from(name))) + let first_path_component = path + .components() + .next() + .ok_or_else(|| format!("Path `{}` was unexpectedly empty", path.display()))?; + let name = first_path_component + .as_os_str() + .to_str() + .ok_or_else(|| format!("{first_path_component:?} is not representable in UTF8"))?; + Ok(Name(Intern::from(name))) } /// Return any entries which did not have the same Digest as the given Entry. fn collisions<'a>( - digest: Digest, - entries: impl Iterator, + digest: Digest, + entries: impl Iterator, ) -> (Vec<&'a File>, Vec<&'a Directory>, Vec<&'a Symlink>) { - let mut mismatched_files = Vec::new(); - let mut mismatched_dirs = Vec::new(); - let mut mismatched_symlinks = Vec::new(); - for entry in entries { - match entry { - Entry::File(other) if other.digest != digest => mismatched_files.push(other), - // Symlinks can't have the same digest as files/directories, as they have no digest - Entry::Symlink(other) => mismatched_symlinks.push(other), - Entry::Directory(other) if other.digest != digest => mismatched_dirs.push(other), - _ => (), + let mut mismatched_files = Vec::new(); + let mut mismatched_dirs = Vec::new(); + let mut mismatched_symlinks = Vec::new(); + for entry in entries { + match entry { + Entry::File(other) if other.digest != digest => mismatched_files.push(other), + // Symlinks can't have the same digest as files/directories, as they have no digest + Entry::Symlink(other) => mismatched_symlinks.push(other), + Entry::Directory(other) if other.digest != digest => mismatched_dirs.push(other), + _ => (), + } } - } - (mismatched_files, mismatched_dirs, mismatched_symlinks) + (mismatched_files, mismatched_dirs, mismatched_symlinks) } /// Format entries as a human readable string. fn format_entries(directories: &[String], files: &[String], symlinks: &[String]) -> String { - format!( - "{}{}{}{}{}", - if directories.is_empty() { - String::new() - } else { - format!( - "director{} named: {}", - if directories.len() == 1 { "y" } else { "ies" }, - directories.join(", ") - ) - }, - if !directories.is_empty() && (!files.is_empty() || !symlinks.is_empty()) { - " and " - } else { - "" - }, - if files.is_empty() { - String::new() - } else { - format!( - "file{} named: {}", - if files.len() == 1 { "" } else { "s" }, - files.join(", ") - ) - }, - if (!directories.is_empty() || !files.is_empty()) && !symlinks.is_empty() { - " and " - } else { - "" - }, - if symlinks.is_empty() { - String::new() - } else { - format!( - "symlink{} named: {}", - if symlinks.len() == 1 { "" } else { "s" }, - symlinks.join(", ") - ) - }, - ) + format!( + "{}{}{}{}{}", + if directories.is_empty() { + String::new() + } else { + format!( + "director{} named: {}", + if directories.len() == 1 { "y" } else { "ies" }, + directories.join(", ") + ) + }, + if !directories.is_empty() && (!files.is_empty() || !symlinks.is_empty()) { + " and " + } else { + "" + }, + if files.is_empty() { + String::new() + } else { + format!( + "file{} named: {}", + if files.len() == 1 { "" } else { "s" }, + files.join(", ") + ) + }, + if (!directories.is_empty() || !files.is_empty()) && !symlinks.is_empty() { + " and " + } else { + "" + }, + if symlinks.is_empty() { + String::new() + } else { + format!( + "symlink{} named: {}", + if symlinks.len() == 1 { "" } else { "s" }, + symlinks.join(", ") + ) + }, + ) } diff --git a/src/rust/engine/fs/src/directory_tests.rs b/src/rust/engine/fs/src/directory_tests.rs index 6350e184000..88180106b61 100644 --- a/src/rust/engine/fs/src/directory_tests.rs +++ b/src/rust/engine/fs/src/directory_tests.rs @@ -9,386 +9,388 @@ use hashing::EMPTY_DIGEST; use std::path::{Path, PathBuf}; fn make_tree(path_stats: Vec) -> DigestTrie { - let mut file_digests = HashMap::new(); - file_digests.extend( - path_stats - .iter() - .map(|path| (path.to_path_buf(), EMPTY_DIGEST)), - ); + let mut file_digests = HashMap::new(); + file_digests.extend( + path_stats + .iter() + .map(|path| (path.to_path_buf(), EMPTY_DIGEST)), + ); - DigestTrie::from_unique_paths(path_stats, &file_digests).unwrap() + DigestTrie::from_unique_paths(path_stats, &file_digests).unwrap() } fn assert_entry_is_none(tree: &DigestTrie, path: &str) { - assert!(tree.entry(Path::new(path)).unwrap().is_none()); + assert!(tree.entry(Path::new(path)).unwrap().is_none()); } fn assert_entry_is_some(tree: &DigestTrie, path: &str) { - assert!(tree.entry(Path::new(path)).unwrap().is_some()); + assert!(tree.entry(Path::new(path)).unwrap().is_some()); } fn assert_entry_is_err(tree: &DigestTrie, path: &str) { - assert!(tree.entry(Path::new(path)).is_err()); + assert!(tree.entry(Path::new(path)).is_err()); } #[test] fn entry_simple() { - let tree = make_tree(vec![ - TypedPath::Link { - path: Path::new("linkfile"), - target: Path::new("dir/file.txt"), - }, - TypedPath::File { - path: Path::new("dir/file.txt"), - is_executable: false, - }, - ]); + let tree = make_tree(vec![ + TypedPath::Link { + path: Path::new("linkfile"), + target: Path::new("dir/file.txt"), + }, + TypedPath::File { + path: Path::new("dir/file.txt"), + is_executable: false, + }, + ]); - assert_entry_is_some(&tree, "dir/file.txt"); - assert_entry_is_some(&tree, "linkfile"); + assert_entry_is_some(&tree, "dir/file.txt"); + assert_entry_is_some(&tree, "linkfile"); } #[test] fn entry_self_referencing_symlink() { - let tree = make_tree(vec![ - TypedPath::Link { - path: Path::new("self"), - target: Path::new("."), - }, - TypedPath::File { - path: Path::new("file.txt"), - is_executable: false, - }, - ]); + let tree = make_tree(vec![ + TypedPath::Link { + path: Path::new("self"), + target: Path::new("."), + }, + TypedPath::File { + path: Path::new("file.txt"), + is_executable: false, + }, + ]); - let assert_is_file = |path: &str| match tree.entry(Path::new(path)).unwrap().unwrap() { - Entry::File(file) => assert_eq!(file.name(), Name::new("file.txt")), - _ => assert!(false), - }; + let assert_is_file = |path: &str| match tree.entry(Path::new(path)).unwrap().unwrap() { + Entry::File(file) => assert_eq!(file.name(), Name::new("file.txt")), + _ => assert!(false), + }; - for n in 0..(MAX_LINK_DEPTH + 1) { - let path = "".to_owned() + &"self/".repeat(n.into()) + "file.txt"; - assert_is_file(&path); - } + for n in 0..(MAX_LINK_DEPTH + 1) { + let path = "".to_owned() + &"self/".repeat(n.into()) + "file.txt"; + assert_is_file(&path); + } } #[test] fn entry_self_referencing_symlink_subdir() { - let tree = make_tree(vec![ - TypedPath::Link { - path: Path::new("a/self"), - target: Path::new("."), - }, - TypedPath::File { - path: Path::new("a/file.txt"), - is_executable: false, - }, - ]); + let tree = make_tree(vec![ + TypedPath::Link { + path: Path::new("a/self"), + target: Path::new("."), + }, + TypedPath::File { + path: Path::new("a/file.txt"), + is_executable: false, + }, + ]); - let assert_is_file = |path: &str| match tree.entry(Path::new(path)).unwrap().unwrap() { - Entry::File(file) => assert_eq!(file.name(), Name::new("file.txt")), - _ => assert!(false), - }; + let assert_is_file = |path: &str| match tree.entry(Path::new(path)).unwrap().unwrap() { + Entry::File(file) => assert_eq!(file.name(), Name::new("file.txt")), + _ => assert!(false), + }; - let assert_is_a = |path: &str| match tree.entry(Path::new(path)).unwrap().unwrap() { - Entry::Directory(dir) => assert_eq!(dir.name(), Name::new("a")), - _ => assert!(false), - }; + let assert_is_a = |path: &str| match tree.entry(Path::new(path)).unwrap().unwrap() { + Entry::Directory(dir) => assert_eq!(dir.name(), Name::new("a")), + _ => assert!(false), + }; - // Max link depth isn't relevant here because we'll always land at something "real". - for n in 0..MAX_LINK_DEPTH + 2 { - let dirpath = "a/".to_owned() + &"self/".repeat(n.into()); - assert_is_a(&dirpath); - let path = dirpath + "file.txt"; - assert_is_file(&path); - } + // Max link depth isn't relevant here because we'll always land at something "real". + for n in 0..MAX_LINK_DEPTH + 2 { + let dirpath = "a/".to_owned() + &"self/".repeat(n.into()); + assert_is_a(&dirpath); + let path = dirpath + "file.txt"; + assert_is_file(&path); + } } #[test] fn entry_too_far_up() { - let tree = make_tree(vec![ - TypedPath::Link { - path: Path::new("up1"), - target: Path::new(".."), - }, - TypedPath::Link { - path: Path::new("dir/up2"), - target: Path::new("../.."), - }, - TypedPath::Link { - path: Path::new("dir/up2-self"), - target: Path::new("../../."), - }, - TypedPath::Link { - path: Path::new("selfdir"), - target: Path::new("."), - }, - TypedPath::File { - path: Path::new("file.txt"), - is_executable: false, - }, - ]); + let tree = make_tree(vec![ + TypedPath::Link { + path: Path::new("up1"), + target: Path::new(".."), + }, + TypedPath::Link { + path: Path::new("dir/up2"), + target: Path::new("../.."), + }, + TypedPath::Link { + path: Path::new("dir/up2-self"), + target: Path::new("../../."), + }, + TypedPath::Link { + path: Path::new("selfdir"), + target: Path::new("."), + }, + TypedPath::File { + path: Path::new("file.txt"), + is_executable: false, + }, + ]); - assert_entry_is_none(&tree, "up1"); - assert_entry_is_none(&tree, "dir/up2"); - assert_entry_is_none(&tree, "dir/up2-self"); - assert_entry_is_none(&tree, "selfdir/dir/up2"); - assert_entry_is_none(&tree, "selfdir/dir/up2/file.txt"); - assert_entry_is_none(&tree, "selfdir/dir/up2-self/file.txt"); - assert_entry_is_none(&tree, "selfdir/dir/up2/selfdir/up1/file.txt"); + assert_entry_is_none(&tree, "up1"); + assert_entry_is_none(&tree, "dir/up2"); + assert_entry_is_none(&tree, "dir/up2-self"); + assert_entry_is_none(&tree, "selfdir/dir/up2"); + assert_entry_is_none(&tree, "selfdir/dir/up2/file.txt"); + assert_entry_is_none(&tree, "selfdir/dir/up2-self/file.txt"); + assert_entry_is_none(&tree, "selfdir/dir/up2/selfdir/up1/file.txt"); } #[test] fn entry_traverse_through_file() { - let tree = make_tree(vec![ - TypedPath::Link { - path: Path::new("self"), - target: Path::new("."), - }, - TypedPath::Link { - path: Path::new("dir/up"), - target: Path::new(".."), - }, - TypedPath::File { - path: Path::new("file"), - is_executable: false, - }, - TypedPath::Link { - path: Path::new("filelink"), - target: Path::new("file"), - }, - TypedPath::Link { - path: Path::new("badlink"), - target: Path::new("file/anything"), - }, - TypedPath::Link { - path: Path::new("dir/badlink1"), - target: Path::new("../badlink"), - }, - TypedPath::Link { - path: Path::new("dir/badlink2"), - target: Path::new("../file/anything"), - }, - ]); - assert_entry_is_err(&tree, "file/anything"); - assert_entry_is_err(&tree, "filelink/anything"); - assert_entry_is_err(&tree, "self/file/anything"); - assert_entry_is_err(&tree, "self/filelink/anything"); - assert_entry_is_err(&tree, "dir/up/file/anything"); - assert_entry_is_err(&tree, "dir/up/filelink/anything"); - assert_entry_is_err(&tree, "badlink"); - assert_entry_is_err(&tree, "dir/badlink1"); - assert_entry_is_err(&tree, "dir/badlink2"); + let tree = make_tree(vec![ + TypedPath::Link { + path: Path::new("self"), + target: Path::new("."), + }, + TypedPath::Link { + path: Path::new("dir/up"), + target: Path::new(".."), + }, + TypedPath::File { + path: Path::new("file"), + is_executable: false, + }, + TypedPath::Link { + path: Path::new("filelink"), + target: Path::new("file"), + }, + TypedPath::Link { + path: Path::new("badlink"), + target: Path::new("file/anything"), + }, + TypedPath::Link { + path: Path::new("dir/badlink1"), + target: Path::new("../badlink"), + }, + TypedPath::Link { + path: Path::new("dir/badlink2"), + target: Path::new("../file/anything"), + }, + ]); + assert_entry_is_err(&tree, "file/anything"); + assert_entry_is_err(&tree, "filelink/anything"); + assert_entry_is_err(&tree, "self/file/anything"); + assert_entry_is_err(&tree, "self/filelink/anything"); + assert_entry_is_err(&tree, "dir/up/file/anything"); + assert_entry_is_err(&tree, "dir/up/filelink/anything"); + assert_entry_is_err(&tree, "badlink"); + assert_entry_is_err(&tree, "dir/badlink1"); + assert_entry_is_err(&tree, "dir/badlink2"); } #[test] fn entry_infinite_loop() { - let tree = make_tree(vec![ - TypedPath::Link { - path: Path::new("self"), - target: Path::new("self"), - }, - TypedPath::Link { - path: Path::new("also-self"), - target: Path::new("./self"), - }, - TypedPath::Link { - path: Path::new("subdir/self-through-parent"), - target: Path::new("../self-through-parent"), - }, - TypedPath::Link { - path: Path::new("chain1"), - target: Path::new("chain2"), - }, - TypedPath::Link { - path: Path::new("chain2"), - target: Path::new("chain3"), - }, - TypedPath::Link { - path: Path::new("chain3"), - target: Path::new("chain1"), - }, - ]); - assert_entry_is_none(&tree, "self"); - assert_entry_is_none(&tree, "also-self"); - assert_entry_is_none(&tree, "chain1"); - assert_entry_is_none(&tree, "chain2"); - assert_entry_is_none(&tree, "chain3"); - assert_entry_is_none(&tree, "subdir/self-through-parent"); + let tree = make_tree(vec![ + TypedPath::Link { + path: Path::new("self"), + target: Path::new("self"), + }, + TypedPath::Link { + path: Path::new("also-self"), + target: Path::new("./self"), + }, + TypedPath::Link { + path: Path::new("subdir/self-through-parent"), + target: Path::new("../self-through-parent"), + }, + TypedPath::Link { + path: Path::new("chain1"), + target: Path::new("chain2"), + }, + TypedPath::Link { + path: Path::new("chain2"), + target: Path::new("chain3"), + }, + TypedPath::Link { + path: Path::new("chain3"), + target: Path::new("chain1"), + }, + ]); + assert_entry_is_none(&tree, "self"); + assert_entry_is_none(&tree, "also-self"); + assert_entry_is_none(&tree, "chain1"); + assert_entry_is_none(&tree, "chain2"); + assert_entry_is_none(&tree, "chain3"); + assert_entry_is_none(&tree, "subdir/self-through-parent"); } #[test] fn entry_absolute() { - let tree = make_tree(vec![TypedPath::Link { - path: Path::new("absolute"), - target: Path::new("/etc/gitconfig"), - }]); - assert_entry_is_none(&tree, "absolute"); + let tree = make_tree(vec![TypedPath::Link { + path: Path::new("absolute"), + target: Path::new("/etc/gitconfig"), + }]); + assert_entry_is_none(&tree, "absolute"); } #[test] fn entry_dead_link() { - let tree = make_tree(vec![TypedPath::Link { - path: Path::new("dead"), - target: Path::new("nonexistant"), - }]); - assert_entry_is_none(&tree, "dead"); + let tree = make_tree(vec![TypedPath::Link { + path: Path::new("dead"), + target: Path::new("nonexistant"), + }]); + assert_entry_is_none(&tree, "dead"); } #[test] fn entry_gnarly_symlinks() { - let tree = make_tree(vec![ - TypedPath::Link { - path: Path::new("dir/parent"), - target: Path::new(".."), - }, - TypedPath::Link { - path: Path::new("dir/self"), - target: Path::new("."), - }, - TypedPath::Link { - path: Path::new("dir/self_obtusely"), - target: Path::new("../dir"), - }, - TypedPath::Link { - path: Path::new("dir/self_but_oh_so_obtusely"), - target: Path::new("self/self/self/self/self_obtusely/parent/dir/parent/dir/parent/dir/self"), - }, - TypedPath::File { - path: Path::new("dir/file.txt"), - is_executable: false, - }, - ]); - assert_entry_is_some(&tree, "dir/self_but_oh_so_obtusely"); - assert_entry_is_some(&tree, "dir/self_but_oh_so_obtusely/file.txt"); + let tree = make_tree(vec![ + TypedPath::Link { + path: Path::new("dir/parent"), + target: Path::new(".."), + }, + TypedPath::Link { + path: Path::new("dir/self"), + target: Path::new("."), + }, + TypedPath::Link { + path: Path::new("dir/self_obtusely"), + target: Path::new("../dir"), + }, + TypedPath::Link { + path: Path::new("dir/self_but_oh_so_obtusely"), + target: Path::new( + "self/self/self/self/self_obtusely/parent/dir/parent/dir/parent/dir/self", + ), + }, + TypedPath::File { + path: Path::new("dir/file.txt"), + is_executable: false, + }, + ]); + assert_entry_is_some(&tree, "dir/self_but_oh_so_obtusely"); + assert_entry_is_some(&tree, "dir/self_but_oh_so_obtusely/file.txt"); } fn assert_walk(tree: &DigestTrie, expected_filenames: Vec, expected_dirnames: Vec) { - let mut filenames = Vec::new(); - let mut dirnames = Vec::new(); - tree.walk( - crate::SymlinkBehavior::Oblivious, - &mut |path, entry| match entry { - Entry::Symlink(_) => panic!("But we're oblivious!"), - Entry::Directory(_) => dirnames.push(path.to_path_buf()), - Entry::File(_) => filenames.push(path.to_path_buf()), - }, - ); - assert_eq!( - filenames, - expected_filenames - .iter() - .map(PathBuf::from) - .collect::>() - ); - assert_eq!( - dirnames, - expected_dirnames - .iter() - .map(PathBuf::from) - .collect::>() - ); + let mut filenames = Vec::new(); + let mut dirnames = Vec::new(); + tree.walk( + crate::SymlinkBehavior::Oblivious, + &mut |path, entry| match entry { + Entry::Symlink(_) => panic!("But we're oblivious!"), + Entry::Directory(_) => dirnames.push(path.to_path_buf()), + Entry::File(_) => filenames.push(path.to_path_buf()), + }, + ); + assert_eq!( + filenames, + expected_filenames + .iter() + .map(PathBuf::from) + .collect::>() + ); + assert_eq!( + dirnames, + expected_dirnames + .iter() + .map(PathBuf::from) + .collect::>() + ); } #[test] fn walk_simple() { - let tree = make_tree(vec![ - TypedPath::File { - path: Path::new("file.txt"), - is_executable: false, - }, - TypedPath::Link { - path: Path::new("symlink"), - target: Path::new("file.txt"), - }, - TypedPath::Link { - path: Path::new("relsymlink"), - target: Path::new("./file.txt"), - }, - TypedPath::Link { - path: Path::new("a/symlink"), - target: Path::new("../file.txt"), - }, - TypedPath::Link { - path: Path::new("a/b/symlink"), - target: Path::new("../../file.txt"), - }, - ]); - assert_walk( - &tree, - vec![ - "a/b/symlink".to_string(), - "a/symlink".to_string(), - "file.txt".to_string(), - "relsymlink".to_string(), - "symlink".to_string(), - ], - vec!["".to_string(), "a".to_string(), "a/b".to_string()], - ); + let tree = make_tree(vec![ + TypedPath::File { + path: Path::new("file.txt"), + is_executable: false, + }, + TypedPath::Link { + path: Path::new("symlink"), + target: Path::new("file.txt"), + }, + TypedPath::Link { + path: Path::new("relsymlink"), + target: Path::new("./file.txt"), + }, + TypedPath::Link { + path: Path::new("a/symlink"), + target: Path::new("../file.txt"), + }, + TypedPath::Link { + path: Path::new("a/b/symlink"), + target: Path::new("../../file.txt"), + }, + ]); + assert_walk( + &tree, + vec![ + "a/b/symlink".to_string(), + "a/symlink".to_string(), + "file.txt".to_string(), + "relsymlink".to_string(), + "symlink".to_string(), + ], + vec!["".to_string(), "a".to_string(), "a/b".to_string()], + ); } #[test] fn walk_too_many_links_rootdir() { - let tree = make_tree(vec![ - TypedPath::File { - path: Path::new("file.txt"), - is_executable: false, - }, - TypedPath::Link { - path: Path::new("self"), - target: Path::new("."), - }, - ]); - assert_walk( - &tree, - (0..MAX_LINK_DEPTH) - .into_iter() - .map(|n| ("self/".repeat(n.into()) + "file.txt")) - .collect::>(), - vec!["".to_string()], - ); + let tree = make_tree(vec![ + TypedPath::File { + path: Path::new("file.txt"), + is_executable: false, + }, + TypedPath::Link { + path: Path::new("self"), + target: Path::new("."), + }, + ]); + assert_walk( + &tree, + (0..MAX_LINK_DEPTH) + .into_iter() + .map(|n| ("self/".repeat(n.into()) + "file.txt")) + .collect::>(), + vec!["".to_string()], + ); } #[test] fn walk_too_many_links_subdir() { - let tree = make_tree(vec![ - TypedPath::File { - path: Path::new("a/file.txt"), - is_executable: false, - }, - TypedPath::Link { - path: Path::new("a/self"), - target: Path::new("."), - }, - ]); - assert_walk( - &tree, - (0..MAX_LINK_DEPTH) - .into_iter() - .map(|n| ("a/".to_string() + &"self/".repeat(n.into()) + "file.txt")) - .collect::>(), - vec!["".to_string(), "a".to_string()], - ); + let tree = make_tree(vec![ + TypedPath::File { + path: Path::new("a/file.txt"), + is_executable: false, + }, + TypedPath::Link { + path: Path::new("a/self"), + target: Path::new("."), + }, + ]); + assert_walk( + &tree, + (0..MAX_LINK_DEPTH) + .into_iter() + .map(|n| ("a/".to_string() + &"self/".repeat(n.into()) + "file.txt")) + .collect::>(), + vec!["".to_string(), "a".to_string()], + ); } #[test] fn leaf_paths() { - let file = PathBuf::from("parent/file.txt"); - let link = PathBuf::from("parent/link"); - let empty_dir = PathBuf::from("empty_dir"); - let tree = make_tree(vec![ - TypedPath::File { - path: &file, - is_executable: false, - }, - TypedPath::Link { - path: &link, - target: Path::new("file.txt"), - }, - TypedPath::Dir(&empty_dir), - ]); + let file = PathBuf::from("parent/file.txt"); + let link = PathBuf::from("parent/link"); + let empty_dir = PathBuf::from("empty_dir"); + let tree = make_tree(vec![ + TypedPath::File { + path: &file, + is_executable: false, + }, + TypedPath::Link { + path: &link, + target: Path::new("file.txt"), + }, + TypedPath::Dir(&empty_dir), + ]); - let leaf_paths = tree.leaf_paths(); + let leaf_paths = tree.leaf_paths(); - assert_eq!(leaf_paths, vec![empty_dir, file, link]) + assert_eq!(leaf_paths, vec![empty_dir, file, link]) } diff --git a/src/rust/engine/fs/src/gitignore.rs b/src/rust/engine/fs/src/gitignore.rs index b7d183a2004..5a5a3e8161b 100644 --- a/src/rust/engine/fs/src/gitignore.rs +++ b/src/rust/engine/fs/src/gitignore.rs @@ -10,250 +10,252 @@ use lazy_static::lazy_static; use crate::Stat; lazy_static! { - static ref EMPTY_IGNORE: Arc = Arc::new(GitignoreStyleExcludes { - patterns: vec![], - gitignore: Gitignore::empty(), - }); + static ref EMPTY_IGNORE: Arc = Arc::new(GitignoreStyleExcludes { + patterns: vec![], + gitignore: Gitignore::empty(), + }); } #[derive(Debug)] pub struct GitignoreStyleExcludes { - patterns: Vec, - gitignore: Gitignore, + patterns: Vec, + gitignore: Gitignore, } impl GitignoreStyleExcludes { - pub fn create(patterns: Vec) -> Result, String> { - Self::create_with_gitignore_files(patterns, vec![]) - } - - pub fn empty() -> Arc { - EMPTY_IGNORE.clone() - } - - /// Create with patterns and possibly multiple files. - /// - /// Later paths in `gitignore_paths` take precedence. `patterns` takes precedence over all - /// `gitignore_paths`. - pub fn create_with_gitignore_files( - patterns: Vec, - gitignore_paths: Vec, - ) -> Result, String> { - if patterns.is_empty() && gitignore_paths.is_empty() { - return Ok(EMPTY_IGNORE.clone()); + pub fn create(patterns: Vec) -> Result, String> { + Self::create_with_gitignore_files(patterns, vec![]) } - let mut ignore_builder = GitignoreBuilder::new(""); - - for path in gitignore_paths { - if let Some(err) = ignore_builder.add(&path) { - return Err(format!("Error adding the path {}: {err:?}", path.display())); - } + pub fn empty() -> Arc { + EMPTY_IGNORE.clone() } - for pattern in &patterns { - ignore_builder - .add_line(None, pattern) - .map_err(|e| format!("Could not parse glob exclude pattern `{pattern:?}`: {e:?}"))?; + + /// Create with patterns and possibly multiple files. + /// + /// Later paths in `gitignore_paths` take precedence. `patterns` takes precedence over all + /// `gitignore_paths`. + pub fn create_with_gitignore_files( + patterns: Vec, + gitignore_paths: Vec, + ) -> Result, String> { + if patterns.is_empty() && gitignore_paths.is_empty() { + return Ok(EMPTY_IGNORE.clone()); + } + + let mut ignore_builder = GitignoreBuilder::new(""); + + for path in gitignore_paths { + if let Some(err) = ignore_builder.add(&path) { + return Err(format!("Error adding the path {}: {err:?}", path.display())); + } + } + for pattern in &patterns { + ignore_builder.add_line(None, pattern).map_err(|e| { + format!("Could not parse glob exclude pattern `{pattern:?}`: {e:?}") + })?; + } + + let gitignore = ignore_builder + .build() + .map_err(|e| format!("Could not build ignore patterns: {e:?}"))?; + + Ok(Arc::new(Self { + patterns, + gitignore, + })) } - let gitignore = ignore_builder - .build() - .map_err(|e| format!("Could not build ignore patterns: {e:?}"))?; - - Ok(Arc::new(Self { - patterns, - gitignore, - })) - } - - /// Return the absolute file paths to the global gitignore, `/.gitignore`, and - /// `/.git/info/exclude`, in that order. - /// - /// Will only add the files if they exist. - pub fn gitignore_file_paths(build_root: &Path) -> Vec { - let mut result = vec![]; - - if let Some(global_ignore_path) = - ignore::gitignore::gitconfig_excludes_path().filter(|fp| fp.is_file()) - { - result.push(global_ignore_path); + /// Return the absolute file paths to the global gitignore, `/.gitignore`, and + /// `/.git/info/exclude`, in that order. + /// + /// Will only add the files if they exist. + pub fn gitignore_file_paths(build_root: &Path) -> Vec { + let mut result = vec![]; + + if let Some(global_ignore_path) = + ignore::gitignore::gitconfig_excludes_path().filter(|fp| fp.is_file()) + { + result.push(global_ignore_path); + } + + let gitignore_path = build_root.join(".gitignore"); + if Path::is_file(&gitignore_path) { + result.push(gitignore_path); + } + + // Unlike Git, we hardcode `.git` and don't look for `$GIT_DIR`. See + // https://github.com/BurntSushi/ripgrep/blob/041544853c86dde91c49983e5ddd0aa799bd2831/crates/ignore/src/dir.rs#L786-L794 + // for why. + let exclude_path = build_root.join(".git/info/exclude"); + if Path::is_file(&exclude_path) { + result.push(exclude_path) + } + result } - let gitignore_path = build_root.join(".gitignore"); - if Path::is_file(&gitignore_path) { - result.push(gitignore_path); + pub(crate) fn exclude_patterns(&self) -> &[String] { + self.patterns.as_slice() } - // Unlike Git, we hardcode `.git` and don't look for `$GIT_DIR`. See - // https://github.com/BurntSushi/ripgrep/blob/041544853c86dde91c49983e5ddd0aa799bd2831/crates/ignore/src/dir.rs#L786-L794 - // for why. - let exclude_path = build_root.join(".git/info/exclude"); - if Path::is_file(&exclude_path) { - result.push(exclude_path) + pub(crate) fn is_ignored(&self, stat: &Stat) -> bool { + let is_dir = matches!(stat, &Stat::Dir(_)); + self.is_ignored_path(stat.path(), is_dir) } - result - } - - pub(crate) fn exclude_patterns(&self) -> &[String] { - self.patterns.as_slice() - } - - pub(crate) fn is_ignored(&self, stat: &Stat) -> bool { - let is_dir = matches!(stat, &Stat::Dir(_)); - self.is_ignored_path(stat.path(), is_dir) - } - - pub fn is_ignored_path(&self, path: &Path, is_dir: bool) -> bool { - match self.gitignore.matched(path, is_dir) { - ::ignore::Match::None | ::ignore::Match::Whitelist(_) => false, - ::ignore::Match::Ignore(_) => true, + + pub fn is_ignored_path(&self, path: &Path, is_dir: bool) -> bool { + match self.gitignore.matched(path, is_dir) { + ::ignore::Match::None | ::ignore::Match::Whitelist(_) => false, + ::ignore::Match::Ignore(_) => true, + } } - } - pub fn is_ignored_or_child_of_ignored_path(&self, path: &Path, is_dir: bool) -> bool { - match self.gitignore.matched_path_or_any_parents(path, is_dir) { - ::ignore::Match::None | ::ignore::Match::Whitelist(_) => false, - ::ignore::Match::Ignore(_) => true, + pub fn is_ignored_or_child_of_ignored_path(&self, path: &Path, is_dir: bool) -> bool { + match self.gitignore.matched_path_or_any_parents(path, is_dir) { + ::ignore::Match::None | ::ignore::Match::Whitelist(_) => false, + ::ignore::Match::Ignore(_) => true, + } } - } } #[cfg(test)] mod tests { - use std::fs; - use std::path::PathBuf; - use std::sync::Arc; - - use crate::{GitignoreStyleExcludes, PosixFS, Stat}; - use testutil::make_file; - - async fn read_mock_files(input: Vec, posix_fs: &Arc) -> Vec { - input - .iter() - .map(|p| posix_fs.stat_sync(p).unwrap().unwrap()) - .collect() - } - - #[tokio::test] - async fn test_basic_gitignore_functionality() { - let root = tempfile::TempDir::new().unwrap(); - let root_path = root.path(); - - for fp in [ - "non-ignored", - "ignored-file.tmp", - "important.x", - "unimportant.x", - ] { - make_file(&root_path.join(fp), b"content", 0o700); + use std::fs; + use std::path::PathBuf; + use std::sync::Arc; + + use crate::{GitignoreStyleExcludes, PosixFS, Stat}; + use testutil::make_file; + + async fn read_mock_files(input: Vec, posix_fs: &Arc) -> Vec { + input + .iter() + .map(|p| posix_fs.stat_sync(p).unwrap().unwrap()) + .collect() } - let gitignore_path = root_path.join(".gitignore"); - let git_info_exclude_path = root_path.join(".git/info/exclude"); - make_file(&gitignore_path, b"*.tmp\n!*.x", 0o700); - fs::create_dir_all(git_info_exclude_path.parent().unwrap()).unwrap(); - make_file(&git_info_exclude_path, b"unimportant.x", 0o700); - - let create_posix_fx = |patterns, gitignore_paths| { - let ignorer = - GitignoreStyleExcludes::create_with_gitignore_files(patterns, gitignore_paths).unwrap(); - Arc::new(PosixFS::new(root.as_ref(), ignorer, task_executor::Executor::new()).unwrap()) - }; - - let posix_fs = create_posix_fx(vec![], vec![gitignore_path.clone()]); - - let stats = read_mock_files( - vec![ - PathBuf::from("non-ignored"), - PathBuf::from("ignored-file.tmp"), - PathBuf::from("important.x"), - PathBuf::from("unimportant.x"), - ], - &posix_fs, - ) - .await; - - assert!(posix_fs.is_ignored(&stats[1])); - for fp in [&stats[0], &stats[2], &stats[3]] { - assert!(!posix_fs.is_ignored(fp)); + #[tokio::test] + async fn test_basic_gitignore_functionality() { + let root = tempfile::TempDir::new().unwrap(); + let root_path = root.path(); + + for fp in [ + "non-ignored", + "ignored-file.tmp", + "important.x", + "unimportant.x", + ] { + make_file(&root_path.join(fp), b"content", 0o700); + } + + let gitignore_path = root_path.join(".gitignore"); + let git_info_exclude_path = root_path.join(".git/info/exclude"); + make_file(&gitignore_path, b"*.tmp\n!*.x", 0o700); + fs::create_dir_all(git_info_exclude_path.parent().unwrap()).unwrap(); + make_file(&git_info_exclude_path, b"unimportant.x", 0o700); + + let create_posix_fx = |patterns, gitignore_paths| { + let ignorer = + GitignoreStyleExcludes::create_with_gitignore_files(patterns, gitignore_paths) + .unwrap(); + Arc::new(PosixFS::new(root.as_ref(), ignorer, task_executor::Executor::new()).unwrap()) + }; + + let posix_fs = create_posix_fx(vec![], vec![gitignore_path.clone()]); + + let stats = read_mock_files( + vec![ + PathBuf::from("non-ignored"), + PathBuf::from("ignored-file.tmp"), + PathBuf::from("important.x"), + PathBuf::from("unimportant.x"), + ], + &posix_fs, + ) + .await; + + assert!(posix_fs.is_ignored(&stats[1])); + for fp in [&stats[0], &stats[2], &stats[3]] { + assert!(!posix_fs.is_ignored(fp)); + } + + // Test that .gitignore files work in tandem with explicit ignores. + // + // Patterns override file paths: note how the gitignore says `!*.x` but that gets + // overridden here. + let posix_fs2 = create_posix_fx( + vec!["unimportant.x".to_owned()], + vec![gitignore_path.clone()], + ); + for fp in [&stats[1], &stats[3]] { + assert!(posix_fs2.is_ignored(fp)); + } + for fp in [&stats[0], &stats[2]] { + assert!(!posix_fs2.is_ignored(fp)); + } + + // Test that later gitignore files override earlier ones. + let posix_fs3 = create_posix_fx( + vec![], + vec![gitignore_path.clone(), git_info_exclude_path.clone()], + ); + for fp in [&stats[1], &stats[3]] { + assert!(posix_fs3.is_ignored(fp)); + } + for fp in [&stats[0], &stats[2]] { + assert!(!posix_fs3.is_ignored(fp)); + } + let posix_fs4 = create_posix_fx( + vec![], + vec![git_info_exclude_path.clone(), gitignore_path.clone()], + ); + assert!(posix_fs4.is_ignored(&stats[1])); + for fp in [&stats[0], &stats[2], &stats[3]] { + assert!(!posix_fs4.is_ignored(fp)); + } } - // Test that .gitignore files work in tandem with explicit ignores. - // - // Patterns override file paths: note how the gitignore says `!*.x` but that gets - // overridden here. - let posix_fs2 = create_posix_fx( - vec!["unimportant.x".to_owned()], - vec![gitignore_path.clone()], - ); - for fp in [&stats[1], &stats[3]] { - assert!(posix_fs2.is_ignored(fp)); - } - for fp in [&stats[0], &stats[2]] { - assert!(!posix_fs2.is_ignored(fp)); - } - - // Test that later gitignore files override earlier ones. - let posix_fs3 = create_posix_fx( - vec![], - vec![gitignore_path.clone(), git_info_exclude_path.clone()], - ); - for fp in [&stats[1], &stats[3]] { - assert!(posix_fs3.is_ignored(fp)); - } - for fp in [&stats[0], &stats[2]] { - assert!(!posix_fs3.is_ignored(fp)); - } - let posix_fs4 = create_posix_fx( - vec![], - vec![git_info_exclude_path.clone(), gitignore_path.clone()], - ); - assert!(posix_fs4.is_ignored(&stats[1])); - for fp in [&stats[0], &stats[2], &stats[3]] { - assert!(!posix_fs4.is_ignored(fp)); + #[test] + fn test_gitignore_file_paths() { + let root = tempfile::TempDir::new().unwrap(); + let root_path = root.path(); + + // The behavior of gitignore_file_paths depends on whether the machine has a global config + // file or not. We do not want to muck around with people's global config, so instead we + // update what we expect from the test. + let global_config_path = + ignore::gitignore::gitconfig_excludes_path().filter(|fp| fp.is_file()); + + let expected = match global_config_path.clone() { + Some(global_fp) => vec![global_fp], + None => vec![], + }; + assert_eq!( + GitignoreStyleExcludes::gitignore_file_paths(root_path), + expected + ); + + let gitignore_path = root_path.join(".gitignore"); + make_file(&gitignore_path, b"", 0o700); + let expected = match global_config_path.clone() { + Some(global_fp) => vec![global_fp, gitignore_path.clone()], + None => vec![gitignore_path.clone()], + }; + assert_eq!( + GitignoreStyleExcludes::gitignore_file_paths(root_path), + expected + ); + + let git_info_exclude_path = root_path.join(".git/info/exclude"); + fs::create_dir_all(git_info_exclude_path.parent().unwrap()).unwrap(); + make_file(&git_info_exclude_path, b"", 0o700); + let expected = match global_config_path.clone() { + Some(global_fp) => vec![global_fp, gitignore_path.clone(), git_info_exclude_path], + None => vec![gitignore_path.clone(), git_info_exclude_path], + }; + assert_eq!( + GitignoreStyleExcludes::gitignore_file_paths(root_path), + expected + ); } - } - - #[test] - fn test_gitignore_file_paths() { - let root = tempfile::TempDir::new().unwrap(); - let root_path = root.path(); - - // The behavior of gitignore_file_paths depends on whether the machine has a global config - // file or not. We do not want to muck around with people's global config, so instead we - // update what we expect from the test. - let global_config_path = ignore::gitignore::gitconfig_excludes_path().filter(|fp| fp.is_file()); - - let expected = match global_config_path.clone() { - Some(global_fp) => vec![global_fp], - None => vec![], - }; - assert_eq!( - GitignoreStyleExcludes::gitignore_file_paths(root_path), - expected - ); - - let gitignore_path = root_path.join(".gitignore"); - make_file(&gitignore_path, b"", 0o700); - let expected = match global_config_path.clone() { - Some(global_fp) => vec![global_fp, gitignore_path.clone()], - None => vec![gitignore_path.clone()], - }; - assert_eq!( - GitignoreStyleExcludes::gitignore_file_paths(root_path), - expected - ); - - let git_info_exclude_path = root_path.join(".git/info/exclude"); - fs::create_dir_all(git_info_exclude_path.parent().unwrap()).unwrap(); - make_file(&git_info_exclude_path, b"", 0o700); - let expected = match global_config_path.clone() { - Some(global_fp) => vec![global_fp, gitignore_path.clone(), git_info_exclude_path], - None => vec![gitignore_path.clone(), git_info_exclude_path], - }; - assert_eq!( - GitignoreStyleExcludes::gitignore_file_paths(root_path), - expected - ); - } } diff --git a/src/rust/engine/fs/src/glob_matching.rs b/src/rust/engine/fs/src/glob_matching.rs index 9cc34688f4d..35c2b609dd3 100644 --- a/src/rust/engine/fs/src/glob_matching.rs +++ b/src/rust/engine/fs/src/glob_matching.rs @@ -16,38 +16,38 @@ use log::warn; use parking_lot::Mutex; use crate::{ - Dir, GitignoreStyleExcludes, GlobExpansionConjunction, Link, LinkDepth, PathStat, Stat, - StrictGlobMatching, SymlinkBehavior, Vfs, MAX_LINK_DEPTH, + Dir, GitignoreStyleExcludes, GlobExpansionConjunction, Link, LinkDepth, PathStat, Stat, + StrictGlobMatching, SymlinkBehavior, Vfs, MAX_LINK_DEPTH, }; static DOUBLE_STAR: &str = "**"; lazy_static! { - pub static ref SINGLE_STAR_GLOB: Pattern = Pattern::new("*").unwrap(); - pub static ref DOUBLE_STAR_GLOB: Pattern = Pattern::new(DOUBLE_STAR).unwrap(); - static ref MISSING_GLOB_SOURCE: GlobParsedSource = GlobParsedSource(String::from("")); - static ref PATTERN_MATCH_OPTIONS: MatchOptions = MatchOptions { - case_sensitive: true, - require_literal_separator: true, - require_literal_leading_dot: false, - }; + pub static ref SINGLE_STAR_GLOB: Pattern = Pattern::new("*").unwrap(); + pub static ref DOUBLE_STAR_GLOB: Pattern = Pattern::new(DOUBLE_STAR).unwrap(); + static ref MISSING_GLOB_SOURCE: GlobParsedSource = GlobParsedSource(String::from("")); + static ref PATTERN_MATCH_OPTIONS: MatchOptions = MatchOptions { + case_sensitive: true, + require_literal_separator: true, + require_literal_leading_dot: false, + }; } #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum PathGlob { - Wildcard { - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - link_depth: LinkDepth, - }, - DirWildcard { - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - remainder: Vec, - link_depth: LinkDepth, - }, + Wildcard { + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + link_depth: LinkDepth, + }, + DirWildcard { + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + remainder: Vec, + link_depth: LinkDepth, + }, } #[derive(Clone, Debug, Eq, Hash, PartialEq)] @@ -55,351 +55,349 @@ struct GlobParsedSource(String); #[derive(Clone, Debug, PartialEq)] pub(crate) struct PathGlobIncludeEntry { - input: GlobParsedSource, - globs: Vec, + input: GlobParsedSource, + globs: Vec, } impl PathGlob { - fn wildcard( - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - link_depth: LinkDepth, - ) -> PathGlob { - PathGlob::Wildcard { - canonical_dir, - symbolic_path, - wildcard, - link_depth, - } - } - - fn dir_wildcard( - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - remainder: Vec, - link_depth: LinkDepth, - ) -> PathGlob { - PathGlob::DirWildcard { - canonical_dir, - symbolic_path, - wildcard, - remainder, - link_depth, - } - } - - pub fn create(filespecs: Vec) -> Result, String> { - // Getting a Vec per filespec is needed to create a `PreparedPathGlobs`, but we don't - // need that here. - Ok( - Self::spread_filespecs(filespecs)? - .into_iter() - .flat_map(|entry| entry.globs) - .collect(), - ) - } - - pub(crate) fn spread_filespecs( - filespecs: Vec, - ) -> Result, String> { - let mut spec_globs_map = Vec::new(); - for filespec in filespecs { - let canonical_dir = Dir(PathBuf::new()); - let symbolic_path = PathBuf::new(); - let globs = PathGlob::parse(canonical_dir, symbolic_path, &filespec)?; - spec_globs_map.push(PathGlobIncludeEntry { - input: GlobParsedSource(filespec), - globs, - }); - } - Ok(spec_globs_map) - } - - /// - /// Normalize the given glob pattern string by splitting it into path components, and dropping - /// references to the current directory, and consecutive '**'s. - /// - fn normalize_pattern(pattern: &str) -> Result, String> { - let mut parts = Vec::new(); - let mut prev_was_doublestar = false; - for component in Path::new(pattern).components() { - let part = match component { - Component::Prefix(..) | Component::RootDir => { - return Err(format!("Absolute paths not supported: {pattern:?}")); + fn wildcard( + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + link_depth: LinkDepth, + ) -> PathGlob { + PathGlob::Wildcard { + canonical_dir, + symbolic_path, + wildcard, + link_depth, } - Component::CurDir => continue, - c => c.as_os_str(), - }; - - // Ignore repeated doublestar instances. - let cur_is_doublestar = DOUBLE_STAR == part; - if prev_was_doublestar && cur_is_doublestar { - continue; - } - prev_was_doublestar = cur_is_doublestar; - - parts.push(part); } - Ok(parts) - } - - /// - /// Given a filespec String relative to a canonical Dir and path, parse it to a normalized - /// series of PathGlob objects. - /// - fn parse( - canonical_dir: Dir, - symbolic_path: PathBuf, - filespec: &str, - ) -> Result, String> { - // NB: Because the filespec is a String input, calls to `to_str_lossy` are not lossy; the - // use of `Path` is strictly for os-independent Path parsing. - let parts = Self::normalize_pattern(filespec)? - .into_iter() - .map(|part| { - Pattern::new(&part.to_string_lossy()) - .map_err(|e| format!("Could not parse {filespec:?} as a glob: {e:?}")) - }) - .collect::, _>>()?; - - PathGlob::parse_globs(canonical_dir, symbolic_path, &parts, 0) - } - - /// - /// Given a filespec as Patterns, create a series of PathGlob objects. - /// - fn parse_globs( - canonical_dir: Dir, - symbolic_path: PathBuf, - parts: &[Pattern], - link_depth: LinkDepth, - ) -> Result, String> { - if parts.is_empty() { - Ok(vec![]) - } else if DOUBLE_STAR == parts[0].as_str() { - if parts.len() == 1 { - // Per https://git-scm.com/docs/gitignore: - // "A trailing '/**' matches everything inside. For example, 'abc/**' matches all files - // inside directory "abc", relative to the location of the .gitignore file, with infinite - // depth." - return Ok(vec![ - PathGlob::dir_wildcard( - canonical_dir.clone(), - symbolic_path.clone(), - SINGLE_STAR_GLOB.clone(), - vec![DOUBLE_STAR_GLOB.clone()], - link_depth, - ), - PathGlob::wildcard( + + fn dir_wildcard( + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + remainder: Vec, + link_depth: LinkDepth, + ) -> PathGlob { + PathGlob::DirWildcard { canonical_dir, symbolic_path, - SINGLE_STAR_GLOB.clone(), + wildcard, + remainder, link_depth, - ), - ]); - } - - // There is a double-wildcard in a dirname of the path: double wildcards are recursive, - // so there are two remainder possibilities: one with the double wildcard included, and the - // other without. - let pathglob_with_doublestar = PathGlob::dir_wildcard( - canonical_dir.clone(), - symbolic_path.clone(), - SINGLE_STAR_GLOB.clone(), - parts[0..].to_vec(), - link_depth, - ); - let pathglob_no_doublestar = if parts.len() == 2 { - PathGlob::wildcard(canonical_dir, symbolic_path, parts[1].clone(), link_depth) - } else { - PathGlob::dir_wildcard( - canonical_dir, - symbolic_path, - parts[1].clone(), - parts[2..].to_vec(), - link_depth, - ) - }; - Ok(vec![pathglob_with_doublestar, pathglob_no_doublestar]) - } else if parts[0].as_str() == Component::ParentDir.as_os_str().to_str().unwrap() { - // A request for the parent of `canonical_dir`: since we've already expanded the directory - // to make it canonical, we can safely drop it directly and recurse without this component. - // The resulting symbolic path will continue to contain a literal `..`. - let mut canonical_dir_parent = canonical_dir; - let mut symbolic_path_parent = symbolic_path; - if !canonical_dir_parent.0.pop() { - let mut symbolic_path = symbolic_path_parent; - symbolic_path.extend(parts.iter().map(Pattern::as_str)); - return Err(format!( - "Globs may not traverse outside of the buildroot: {symbolic_path:?}", - )); - } - symbolic_path_parent.push(Path::new(&Component::ParentDir)); - PathGlob::parse_globs( - canonical_dir_parent, - symbolic_path_parent, - &parts[1..], - link_depth, - ) - } else if parts.len() == 1 { - // This is the path basename. - Ok(vec![PathGlob::wildcard( - canonical_dir, - symbolic_path, - parts[0].clone(), - link_depth, - )]) - } else { - // This is a path dirname. - Ok(vec![PathGlob::dir_wildcard( - canonical_dir, - symbolic_path, - parts[0].clone(), - parts[1..].to_vec(), - link_depth, - )]) + } + } + + pub fn create(filespecs: Vec) -> Result, String> { + // Getting a Vec per filespec is needed to create a `PreparedPathGlobs`, but we don't + // need that here. + Ok(Self::spread_filespecs(filespecs)? + .into_iter() + .flat_map(|entry| entry.globs) + .collect()) + } + + pub(crate) fn spread_filespecs( + filespecs: Vec, + ) -> Result, String> { + let mut spec_globs_map = Vec::new(); + for filespec in filespecs { + let canonical_dir = Dir(PathBuf::new()); + let symbolic_path = PathBuf::new(); + let globs = PathGlob::parse(canonical_dir, symbolic_path, &filespec)?; + spec_globs_map.push(PathGlobIncludeEntry { + input: GlobParsedSource(filespec), + globs, + }); + } + Ok(spec_globs_map) + } + + /// + /// Normalize the given glob pattern string by splitting it into path components, and dropping + /// references to the current directory, and consecutive '**'s. + /// + fn normalize_pattern(pattern: &str) -> Result, String> { + let mut parts = Vec::new(); + let mut prev_was_doublestar = false; + for component in Path::new(pattern).components() { + let part = match component { + Component::Prefix(..) | Component::RootDir => { + return Err(format!("Absolute paths not supported: {pattern:?}")); + } + Component::CurDir => continue, + c => c.as_os_str(), + }; + + // Ignore repeated doublestar instances. + let cur_is_doublestar = DOUBLE_STAR == part; + if prev_was_doublestar && cur_is_doublestar { + continue; + } + prev_was_doublestar = cur_is_doublestar; + + parts.push(part); + } + Ok(parts) + } + + /// + /// Given a filespec String relative to a canonical Dir and path, parse it to a normalized + /// series of PathGlob objects. + /// + fn parse( + canonical_dir: Dir, + symbolic_path: PathBuf, + filespec: &str, + ) -> Result, String> { + // NB: Because the filespec is a String input, calls to `to_str_lossy` are not lossy; the + // use of `Path` is strictly for os-independent Path parsing. + let parts = Self::normalize_pattern(filespec)? + .into_iter() + .map(|part| { + Pattern::new(&part.to_string_lossy()) + .map_err(|e| format!("Could not parse {filespec:?} as a glob: {e:?}")) + }) + .collect::, _>>()?; + + PathGlob::parse_globs(canonical_dir, symbolic_path, &parts, 0) + } + + /// + /// Given a filespec as Patterns, create a series of PathGlob objects. + /// + fn parse_globs( + canonical_dir: Dir, + symbolic_path: PathBuf, + parts: &[Pattern], + link_depth: LinkDepth, + ) -> Result, String> { + if parts.is_empty() { + Ok(vec![]) + } else if DOUBLE_STAR == parts[0].as_str() { + if parts.len() == 1 { + // Per https://git-scm.com/docs/gitignore: + // "A trailing '/**' matches everything inside. For example, 'abc/**' matches all files + // inside directory "abc", relative to the location of the .gitignore file, with infinite + // depth." + return Ok(vec![ + PathGlob::dir_wildcard( + canonical_dir.clone(), + symbolic_path.clone(), + SINGLE_STAR_GLOB.clone(), + vec![DOUBLE_STAR_GLOB.clone()], + link_depth, + ), + PathGlob::wildcard( + canonical_dir, + symbolic_path, + SINGLE_STAR_GLOB.clone(), + link_depth, + ), + ]); + } + + // There is a double-wildcard in a dirname of the path: double wildcards are recursive, + // so there are two remainder possibilities: one with the double wildcard included, and the + // other without. + let pathglob_with_doublestar = PathGlob::dir_wildcard( + canonical_dir.clone(), + symbolic_path.clone(), + SINGLE_STAR_GLOB.clone(), + parts[0..].to_vec(), + link_depth, + ); + let pathglob_no_doublestar = if parts.len() == 2 { + PathGlob::wildcard(canonical_dir, symbolic_path, parts[1].clone(), link_depth) + } else { + PathGlob::dir_wildcard( + canonical_dir, + symbolic_path, + parts[1].clone(), + parts[2..].to_vec(), + link_depth, + ) + }; + Ok(vec![pathglob_with_doublestar, pathglob_no_doublestar]) + } else if parts[0].as_str() == Component::ParentDir.as_os_str().to_str().unwrap() { + // A request for the parent of `canonical_dir`: since we've already expanded the directory + // to make it canonical, we can safely drop it directly and recurse without this component. + // The resulting symbolic path will continue to contain a literal `..`. + let mut canonical_dir_parent = canonical_dir; + let mut symbolic_path_parent = symbolic_path; + if !canonical_dir_parent.0.pop() { + let mut symbolic_path = symbolic_path_parent; + symbolic_path.extend(parts.iter().map(Pattern::as_str)); + return Err(format!( + "Globs may not traverse outside of the buildroot: {symbolic_path:?}", + )); + } + symbolic_path_parent.push(Path::new(&Component::ParentDir)); + PathGlob::parse_globs( + canonical_dir_parent, + symbolic_path_parent, + &parts[1..], + link_depth, + ) + } else if parts.len() == 1 { + // This is the path basename. + Ok(vec![PathGlob::wildcard( + canonical_dir, + symbolic_path, + parts[0].clone(), + link_depth, + )]) + } else { + // This is a path dirname. + Ok(vec![PathGlob::dir_wildcard( + canonical_dir, + symbolic_path, + parts[0].clone(), + parts[1..].to_vec(), + link_depth, + )]) + } } - } } #[derive(Debug, Clone)] pub struct PreparedPathGlobs { - pub(crate) include: Vec, - pub(crate) exclude: Arc, - strict_match_behavior: StrictGlobMatching, - conjunction: GlobExpansionConjunction, + pub(crate) include: Vec, + pub(crate) exclude: Arc, + strict_match_behavior: StrictGlobMatching, + conjunction: GlobExpansionConjunction, } impl PreparedPathGlobs { - pub fn create( - globs: Vec, - strict_match_behavior: StrictGlobMatching, - conjunction: GlobExpansionConjunction, - ) -> Result { - let mut include_globs = Vec::new(); - let mut exclude_globs = Vec::new(); - for glob in globs { - if glob.starts_with('!') { - let normalized_exclude: String = glob.chars().skip(1).collect(); - exclude_globs.push(normalized_exclude); - } else { - include_globs.push(glob); - } + pub fn create( + globs: Vec, + strict_match_behavior: StrictGlobMatching, + conjunction: GlobExpansionConjunction, + ) -> Result { + let mut include_globs = Vec::new(); + let mut exclude_globs = Vec::new(); + for glob in globs { + if glob.starts_with('!') { + let normalized_exclude: String = glob.chars().skip(1).collect(); + exclude_globs.push(normalized_exclude); + } else { + include_globs.push(glob); + } + } + let include = PathGlob::spread_filespecs(include_globs)?; + let exclude = GitignoreStyleExcludes::create(exclude_globs)?; + + Ok(PreparedPathGlobs { + include, + exclude, + strict_match_behavior, + conjunction, + }) + } + + fn from_globs(include: Vec) -> Result { + let include: Vec = include + .into_iter() + .map(|glob| PathGlobIncludeEntry { + input: MISSING_GLOB_SOURCE.clone(), + globs: vec![glob], + }) + .collect(); + + Ok(PreparedPathGlobs { + include, + // An empty exclude becomes EMPTY_IGNORE. + exclude: GitignoreStyleExcludes::create(vec![])?, + strict_match_behavior: StrictGlobMatching::Ignore, + conjunction: GlobExpansionConjunction::AllMatch, + }) } - let include = PathGlob::spread_filespecs(include_globs)?; - let exclude = GitignoreStyleExcludes::create(exclude_globs)?; - - Ok(PreparedPathGlobs { - include, - exclude, - strict_match_behavior, - conjunction, - }) - } - - fn from_globs(include: Vec) -> Result { - let include: Vec = include - .into_iter() - .map(|glob| PathGlobIncludeEntry { - input: MISSING_GLOB_SOURCE.clone(), - globs: vec![glob], - }) - .collect(); - - Ok(PreparedPathGlobs { - include, - // An empty exclude becomes EMPTY_IGNORE. - exclude: GitignoreStyleExcludes::create(vec![])?, - strict_match_behavior: StrictGlobMatching::Ignore, - conjunction: GlobExpansionConjunction::AllMatch, - }) - } } /// Allows checking in-memory if paths match the patterns. #[derive(Debug)] pub struct FilespecMatcher { - includes: Vec, - excludes: Arc, + includes: Vec, + excludes: Arc, } impl FilespecMatcher { - pub fn new(includes: Vec, excludes: Vec) -> Result { - let includes = includes - .iter() - .map(|glob| { - PathGlob::normalize_pattern(glob).and_then(|components| { - let normalized_pattern: PathBuf = components.into_iter().collect(); - Pattern::new(normalized_pattern.to_str().unwrap()) - .map_err(|e| format!("Could not parse {glob:?} as a glob: {e:?}")) - }) - }) - .collect::, String>>()?; - let excludes = GitignoreStyleExcludes::create(excludes)?; - Ok(Self { includes, excludes }) - } - - /// - /// Matches the patterns against the given paths. - /// - /// NB: This implementation is independent from GlobMatchingImplementation::expand, and must be - /// kept in sync via unit tests (in particular: the python filespec_test.py) in order to allow for - /// owners detection of deleted files (see #6790 and #5636 for more info). The lazy filesystem - /// traversal in expand is (currently) too expensive to use for that in-memory matching (such as - /// via MemFS). - /// - pub fn matches(&self, path: &Path) -> bool { - let matches_includes = self - .includes - .iter() - .any(|pattern| pattern.matches_path_with(path, *PATTERN_MATCH_OPTIONS)); - matches_includes && !self.excludes.is_ignored_path(path, false) - } - - pub fn include_globs(&self) -> &[Pattern] { - self.includes.as_slice() - } - - pub fn exclude_globs(&self) -> &[String] { - self.excludes.exclude_patterns() - } + pub fn new(includes: Vec, excludes: Vec) -> Result { + let includes = includes + .iter() + .map(|glob| { + PathGlob::normalize_pattern(glob).and_then(|components| { + let normalized_pattern: PathBuf = components.into_iter().collect(); + Pattern::new(normalized_pattern.to_str().unwrap()) + .map_err(|e| format!("Could not parse {glob:?} as a glob: {e:?}")) + }) + }) + .collect::, String>>()?; + let excludes = GitignoreStyleExcludes::create(excludes)?; + Ok(Self { includes, excludes }) + } + + /// + /// Matches the patterns against the given paths. + /// + /// NB: This implementation is independent from GlobMatchingImplementation::expand, and must be + /// kept in sync via unit tests (in particular: the python filespec_test.py) in order to allow for + /// owners detection of deleted files (see #6790 and #5636 for more info). The lazy filesystem + /// traversal in expand is (currently) too expensive to use for that in-memory matching (such as + /// via MemFS). + /// + pub fn matches(&self, path: &Path) -> bool { + let matches_includes = self + .includes + .iter() + .any(|pattern| pattern.matches_path_with(path, *PATTERN_MATCH_OPTIONS)); + matches_includes && !self.excludes.is_ignored_path(path, false) + } + + pub fn include_globs(&self) -> &[Pattern] { + self.includes.as_slice() + } + + pub fn exclude_globs(&self) -> &[String] { + self.excludes.exclude_patterns() + } } #[async_trait] pub trait GlobMatching: Vfs { - /// - /// Canonicalize the Link for the given Path to an underlying File or Dir. May result - /// in None if the PathStat represents a broken Link. - /// - /// Skips ignored paths both before and after expansion. - /// - async fn canonicalize_link( - &self, - symbolic_path: PathBuf, - link: Link, - ) -> Result, E> { - GlobMatchingImplementation::canonicalize_link(self, symbolic_path, link).await - } - - /// - /// Recursively expands PathGlobs into PathStats while applying excludes. - /// - async fn expand_globs( - &self, - path_globs: PreparedPathGlobs, - symlink_behavior: SymlinkBehavior, - unmatched_globs_additional_context: Option, - ) -> Result, E> { - GlobMatchingImplementation::expand_globs( - self, - path_globs, - symlink_behavior, - unmatched_globs_additional_context, - ) - .await - } + /// + /// Canonicalize the Link for the given Path to an underlying File or Dir. May result + /// in None if the PathStat represents a broken Link. + /// + /// Skips ignored paths both before and after expansion. + /// + async fn canonicalize_link( + &self, + symbolic_path: PathBuf, + link: Link, + ) -> Result, E> { + GlobMatchingImplementation::canonicalize_link(self, symbolic_path, link).await + } + + /// + /// Recursively expands PathGlobs into PathStats while applying excludes. + /// + async fn expand_globs( + &self, + path_globs: PreparedPathGlobs, + symlink_behavior: SymlinkBehavior, + unmatched_globs_additional_context: Option, + ) -> Result, E> { + GlobMatchingImplementation::expand_globs( + self, + path_globs, + symlink_behavior, + unmatched_globs_additional_context, + ) + .await + } } impl> GlobMatching for T {} @@ -410,359 +408,363 @@ impl> GlobMatching for T {} // The methods of `GlobMatching` are forwarded to methods here. #[async_trait] trait GlobMatchingImplementation: Vfs { - async fn directory_listing( - &self, - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - exclude: &Arc, - symlink_behavior: SymlinkBehavior, - link_depth: LinkDepth, - ) -> Result, E> { - // List the directory to create relative Stats. - let dir_listing = self.scandir(canonical_dir.clone()).await?; - - // Match any relevant Stats, and join them into PathStats. - let path_stats = future::try_join_all( - dir_listing - .0 - .iter() - .filter(|stat| { - // Match relevant filenames. - stat - .path() - .file_name() - .map(|file_name| wildcard.matches_path(Path::new(file_name))) - .unwrap_or(false) - }) - .filter_map(|stat| { - // Append matched filenames. - stat - .path() - .file_name() - .map(|file_name| symbolic_path.join(file_name)) - .map(|symbolic_stat_path| (symbolic_stat_path, stat)) - }) - .map(|(stat_symbolic_path, stat)| { - let context = self.clone(); - let exclude = exclude.clone(); - let stat = stat.within(&canonical_dir.0); - async move { - // Canonicalize matched PathStats, and filter paths that are ignored by local excludes. - // Context ("global") ignore patterns are applied during `scandir`. - if exclude.is_ignored(&stat) { - Ok(None) - } else { - match stat { - Stat::Link(l) => { - // NB: When traversing a link, we increment the link_depth. - if link_depth >= MAX_LINK_DEPTH { - return Err(Self::mk_error(&format!( + async fn directory_listing( + &self, + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + exclude: &Arc, + symlink_behavior: SymlinkBehavior, + link_depth: LinkDepth, + ) -> Result, E> { + // List the directory to create relative Stats. + let dir_listing = self.scandir(canonical_dir.clone()).await?; + + // Match any relevant Stats, and join them into PathStats. + let path_stats = future::try_join_all( + dir_listing + .0 + .iter() + .filter(|stat| { + // Match relevant filenames. + stat.path() + .file_name() + .map(|file_name| wildcard.matches_path(Path::new(file_name))) + .unwrap_or(false) + }) + .filter_map(|stat| { + // Append matched filenames. + stat.path() + .file_name() + .map(|file_name| symbolic_path.join(file_name)) + .map(|symbolic_stat_path| (symbolic_stat_path, stat)) + }) + .map(|(stat_symbolic_path, stat)| { + let context = self.clone(); + let exclude = exclude.clone(); + let stat = stat.within(&canonical_dir.0); + async move { + // Canonicalize matched PathStats, and filter paths that are ignored by local excludes. + // Context ("global") ignore patterns are applied during `scandir`. + if exclude.is_ignored(&stat) { + Ok(None) + } else { + match stat { + Stat::Link(l) => { + // NB: When traversing a link, we increment the link_depth. + if link_depth >= MAX_LINK_DEPTH { + return Err(Self::mk_error(&format!( "Maximum link depth exceeded at {l:?} for {stat_symbolic_path:?}" ))); - } + } + + if let SymlinkBehavior::Aware = symlink_behavior { + return Ok(Some(( + PathStat::link(stat_symbolic_path, l), + link_depth + 1, + ))); + } + + let dest = + context.canonicalize_link(stat_symbolic_path, l).await?; + + Ok(dest.map(|ps| (ps, link_depth + 1))) + } + Stat::Dir(d) => { + Ok(Some((PathStat::dir(stat_symbolic_path, d), link_depth))) + } + Stat::File(f) => { + Ok(Some((PathStat::file(stat_symbolic_path, f), link_depth))) + } + } + } + } + }) + .collect::>(), + ) + .await?; + // See the note above. + Ok(path_stats.into_iter().flatten().collect()) + } - if let SymlinkBehavior::Aware = symlink_behavior { - return Ok(Some(( - PathStat::link(stat_symbolic_path, l), - link_depth + 1, - ))); - } + async fn expand_globs( + &self, + path_globs: PreparedPathGlobs, + symlink_behavior: SymlinkBehavior, + unmatched_globs_additional_context: Option, + ) -> Result, E> { + let PreparedPathGlobs { + include, + exclude, + strict_match_behavior, + conjunction, + .. + } = path_globs; - let dest = context.canonicalize_link(stat_symbolic_path, l).await?; + if include.is_empty() { + return Ok(vec![]); + } + + let result = Arc::new(Mutex::new(Vec::new())); + + let mut sources = Vec::new(); + let mut roots = Vec::new(); + for pgie in include { + let source = Arc::new(pgie.input); + for path_glob in pgie.globs { + sources.push(source.clone()); + roots.push(self.expand_single( + result.clone(), + exclude.clone(), + path_glob, + symlink_behavior, + )); + } + } - Ok(dest.map(|ps| (ps, link_depth + 1))) + let matched = future::try_join_all(roots).await?; + + if strict_match_behavior.should_check_glob_matches() { + // Get all the inputs which didn't transitively expand to any files. + let matching_inputs = sources + .iter() + .zip(matched) + .filter_map( + |(source, matched)| { + if matched { + Some(source.clone()) + } else { + None + } + }, + ) + .collect::>(); + + let non_matching_inputs = sources + .into_iter() + .filter(|s| !matching_inputs.contains(s)) + .collect::>(); + + let match_failed = match conjunction { + // All must match. + GlobExpansionConjunction::AllMatch => !non_matching_inputs.is_empty(), + // Only one needs to match. + GlobExpansionConjunction::AnyMatch => matching_inputs.is_empty(), + }; + + if match_failed { + let mut non_matching_inputs = non_matching_inputs + .iter() + .map(|parsed_source| parsed_source.0.clone()) + .collect::>(); + non_matching_inputs.sort(); + let single_glob = non_matching_inputs.len() == 1; + let prefix = format!("Unmatched glob{}", if single_glob { "" } else { "s" }); + let origin = match &strict_match_behavior { + StrictGlobMatching::Warn(description) + | StrictGlobMatching::Error(description) => { + format!(" from {description}: ") + } + _ => ": ".to_string(), + }; + let unmatched_globs = if single_glob { + format!("{:?}", non_matching_inputs[0]) + } else { + format!("{non_matching_inputs:?}") + }; + let exclude_patterns = exclude.exclude_patterns(); + let excludes_portion = if exclude_patterns.is_empty() { + "".to_string() + } else { + let single_exclude = exclude_patterns.len() == 1; + if single_exclude { + format!(", exclude: {:?}", exclude_patterns[0]) + } else { + format!(", excludes: {exclude_patterns:?}") + } + }; + let msg = format!( + "{}{}{}{}{}", + prefix, + origin, + unmatched_globs, + excludes_portion, + unmatched_globs_additional_context.unwrap_or_else(|| "".to_owned()) + ); + if strict_match_behavior.should_throw_on_error() { + return Err(Self::mk_error(&msg)); + } else { + warn!("{}", msg); } - Stat::Dir(d) => Ok(Some((PathStat::dir(stat_symbolic_path, d), link_depth))), - Stat::File(f) => Ok(Some((PathStat::file(stat_symbolic_path, f), link_depth))), - } } - } - }) - .collect::>(), - ) - .await?; - // See the note above. - Ok(path_stats.into_iter().flatten().collect()) - } - - async fn expand_globs( - &self, - path_globs: PreparedPathGlobs, - symlink_behavior: SymlinkBehavior, - unmatched_globs_additional_context: Option, - ) -> Result, E> { - let PreparedPathGlobs { - include, - exclude, - strict_match_behavior, - conjunction, - .. - } = path_globs; - - if include.is_empty() { - return Ok(vec![]); - } + } - let result = Arc::new(Mutex::new(Vec::new())); - - let mut sources = Vec::new(); - let mut roots = Vec::new(); - for pgie in include { - let source = Arc::new(pgie.input); - for path_glob in pgie.globs { - sources.push(source.clone()); - roots.push(self.expand_single( - result.clone(), - exclude.clone(), - path_glob, - symlink_behavior, - )); - } + let mut path_stats = Arc::try_unwrap(result) + .unwrap_or_else(|_| panic!("expand violated its contract.")) + .into_inner() + .into_iter() + .collect::>(); + #[allow(clippy::unnecessary_sort_by)] + path_stats.sort_by(|a, b| a.path().cmp(b.path())); + path_stats.dedup_by(|a, b| a.path() == b.path()); + Ok(path_stats) } - let matched = future::try_join_all(roots).await?; - - if strict_match_behavior.should_check_glob_matches() { - // Get all the inputs which didn't transitively expand to any files. - let matching_inputs = sources - .iter() - .zip(matched) - .filter_map( - |(source, matched)| { - if matched { - Some(source.clone()) - } else { - None + async fn expand_single( + &self, + result: Arc>>, + exclude: Arc, + path_glob: PathGlob, + symlink_behavior: SymlinkBehavior, + ) -> Result { + match path_glob { + PathGlob::Wildcard { + canonical_dir, + symbolic_path, + wildcard, + link_depth, + } => { + self.expand_wildcard( + result, + exclude, + canonical_dir, + symbolic_path, + wildcard, + symlink_behavior, + link_depth, + ) + .await + } + PathGlob::DirWildcard { + canonical_dir, + symbolic_path, + wildcard, + remainder, + link_depth, + } => { + self.expand_dir_wildcard( + result, + exclude, + canonical_dir, + symbolic_path, + wildcard, + remainder, + symlink_behavior, + link_depth, + ) + .await } - }, - ) - .collect::>(); - - let non_matching_inputs = sources - .into_iter() - .filter(|s| !matching_inputs.contains(s)) - .collect::>(); - - let match_failed = match conjunction { - // All must match. - GlobExpansionConjunction::AllMatch => !non_matching_inputs.is_empty(), - // Only one needs to match. - GlobExpansionConjunction::AnyMatch => matching_inputs.is_empty(), - }; - - if match_failed { - let mut non_matching_inputs = non_matching_inputs - .iter() - .map(|parsed_source| parsed_source.0.clone()) - .collect::>(); - non_matching_inputs.sort(); - let single_glob = non_matching_inputs.len() == 1; - let prefix = format!("Unmatched glob{}", if single_glob { "" } else { "s" }); - let origin = match &strict_match_behavior { - StrictGlobMatching::Warn(description) | StrictGlobMatching::Error(description) => { - format!(" from {description}: ") - } - _ => ": ".to_string(), - }; - let unmatched_globs = if single_glob { - format!("{:?}", non_matching_inputs[0]) - } else { - format!("{non_matching_inputs:?}") - }; - let exclude_patterns = exclude.exclude_patterns(); - let excludes_portion = if exclude_patterns.is_empty() { - "".to_string() - } else { - let single_exclude = exclude_patterns.len() == 1; - if single_exclude { - format!(", exclude: {:?}", exclude_patterns[0]) - } else { - format!(", excludes: {exclude_patterns:?}") - } - }; - let msg = format!( - "{}{}{}{}{}", - prefix, - origin, - unmatched_globs, - excludes_portion, - unmatched_globs_additional_context.unwrap_or_else(|| "".to_owned()) - ); - if strict_match_behavior.should_throw_on_error() { - return Err(Self::mk_error(&msg)); - } else { - warn!("{}", msg); } - } } - let mut path_stats = Arc::try_unwrap(result) - .unwrap_or_else(|_| panic!("expand violated its contract.")) - .into_inner() - .into_iter() - .collect::>(); - #[allow(clippy::unnecessary_sort_by)] - path_stats.sort_by(|a, b| a.path().cmp(b.path())); - path_stats.dedup_by(|a, b| a.path() == b.path()); - Ok(path_stats) - } - - async fn expand_single( - &self, - result: Arc>>, - exclude: Arc, - path_glob: PathGlob, - symlink_behavior: SymlinkBehavior, - ) -> Result { - match path_glob { - PathGlob::Wildcard { - canonical_dir, - symbolic_path, - wildcard, - link_depth, - } => { - self - .expand_wildcard( - result, - exclude, - canonical_dir, - symbolic_path, - wildcard, - symlink_behavior, - link_depth, - ) - .await - } - PathGlob::DirWildcard { - canonical_dir, - symbolic_path, - wildcard, - remainder, - link_depth, - } => { - self - .expand_dir_wildcard( - result, - exclude, - canonical_dir, - symbolic_path, - wildcard, - remainder, - symlink_behavior, - link_depth, - ) - .await - } + async fn expand_wildcard( + &self, + result: Arc>>, + exclude: Arc, + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + symlink_behavior: SymlinkBehavior, + link_depth: LinkDepth, + ) -> Result { + // Filter directory listing to append PathStats, with no continuation. + let path_stats = self + .directory_listing( + canonical_dir, + symbolic_path, + wildcard, + &exclude, + symlink_behavior, + link_depth, + ) + .await?; + + let mut result = result.lock(); + let matched = !path_stats.is_empty(); + result.extend(path_stats.into_iter().map(|(ps, _)| ps)); + Ok(matched) + } + + async fn expand_dir_wildcard( + &self, + result: Arc>>, + exclude: Arc, + canonical_dir: Dir, + symbolic_path: PathBuf, + wildcard: Pattern, + remainder: Vec, + symlink_behavior: SymlinkBehavior, + link_depth: LinkDepth, + ) -> Result { + // Filter directory listing and recurse for matched Dirs. + let context = self.clone(); + let path_stats = self + .directory_listing( + canonical_dir, + symbolic_path, + wildcard, + &exclude, + symlink_behavior, + link_depth, + ) + .await?; + + let path_globs = path_stats + .into_iter() + .filter_map(|(ps, link_depth)| match ps { + PathStat::Dir { path, stat } => Some( + PathGlob::parse_globs(stat, path, &remainder, link_depth) + .map_err(|e| Self::mk_error(e.as_str())), + ), + PathStat::Link { .. } => None, + PathStat::File { .. } => None, + }) + .collect::, E>>()?; + + let child_globs = path_globs + .into_iter() + .flat_map(Vec::into_iter) + .map(|pg| context.expand_single(result.clone(), exclude.clone(), pg, symlink_behavior)) + .collect::>(); + + let child_matches = future::try_join_all(child_globs).await?; + Ok(child_matches.into_iter().any(|m| m)) + } + + async fn canonicalize_link( + &self, + symbolic_path: PathBuf, + link: Link, + ) -> Result, E> { + // Read the link, which may result in PathGlob(s) that match 0 or 1 Path. + let context = self.clone(); + // If the link destination can't be parsed as PathGlob(s), it is broken. + let link_globs = self + .read_link(&link) + .await? + .to_str() + .and_then(|dest_str| { + // Escape any globs in the parsed dest, which should guarantee one output PathGlob. + PathGlob::create(vec![Pattern::escape(dest_str)]).ok() + }) + .unwrap_or_default(); + + let path_globs = + PreparedPathGlobs::from_globs(link_globs).map_err(|e| Self::mk_error(e.as_str()))?; + let mut path_stats = context + .expand_globs(path_globs, SymlinkBehavior::Oblivious, None) + .map_err(move |e| { + Self::mk_error(&format!("While expanding link {:?}: {}", link.path, e)) + }) + .await?; + + // Since we've escaped any globs in the parsed path, expect either 0 or 1 destination. + Ok(path_stats.pop().map(|ps| match ps { + PathStat::Dir { stat, .. } => PathStat::dir(symbolic_path, stat), + PathStat::File { stat, .. } => PathStat::file(symbolic_path, stat), + PathStat::Link { stat, .. } => PathStat::link(symbolic_path, stat), + })) } - } - - async fn expand_wildcard( - &self, - result: Arc>>, - exclude: Arc, - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - symlink_behavior: SymlinkBehavior, - link_depth: LinkDepth, - ) -> Result { - // Filter directory listing to append PathStats, with no continuation. - let path_stats = self - .directory_listing( - canonical_dir, - symbolic_path, - wildcard, - &exclude, - symlink_behavior, - link_depth, - ) - .await?; - - let mut result = result.lock(); - let matched = !path_stats.is_empty(); - result.extend(path_stats.into_iter().map(|(ps, _)| ps)); - Ok(matched) - } - - async fn expand_dir_wildcard( - &self, - result: Arc>>, - exclude: Arc, - canonical_dir: Dir, - symbolic_path: PathBuf, - wildcard: Pattern, - remainder: Vec, - symlink_behavior: SymlinkBehavior, - link_depth: LinkDepth, - ) -> Result { - // Filter directory listing and recurse for matched Dirs. - let context = self.clone(); - let path_stats = self - .directory_listing( - canonical_dir, - symbolic_path, - wildcard, - &exclude, - symlink_behavior, - link_depth, - ) - .await?; - - let path_globs = path_stats - .into_iter() - .filter_map(|(ps, link_depth)| match ps { - PathStat::Dir { path, stat } => Some( - PathGlob::parse_globs(stat, path, &remainder, link_depth) - .map_err(|e| Self::mk_error(e.as_str())), - ), - PathStat::Link { .. } => None, - PathStat::File { .. } => None, - }) - .collect::, E>>()?; - - let child_globs = path_globs - .into_iter() - .flat_map(Vec::into_iter) - .map(|pg| context.expand_single(result.clone(), exclude.clone(), pg, symlink_behavior)) - .collect::>(); - - let child_matches = future::try_join_all(child_globs).await?; - Ok(child_matches.into_iter().any(|m| m)) - } - - async fn canonicalize_link( - &self, - symbolic_path: PathBuf, - link: Link, - ) -> Result, E> { - // Read the link, which may result in PathGlob(s) that match 0 or 1 Path. - let context = self.clone(); - // If the link destination can't be parsed as PathGlob(s), it is broken. - let link_globs = self - .read_link(&link) - .await? - .to_str() - .and_then(|dest_str| { - // Escape any globs in the parsed dest, which should guarantee one output PathGlob. - PathGlob::create(vec![Pattern::escape(dest_str)]).ok() - }) - .unwrap_or_default(); - - let path_globs = - PreparedPathGlobs::from_globs(link_globs).map_err(|e| Self::mk_error(e.as_str()))?; - let mut path_stats = context - .expand_globs(path_globs, SymlinkBehavior::Oblivious, None) - .map_err(move |e| Self::mk_error(&format!("While expanding link {:?}: {}", link.path, e))) - .await?; - - // Since we've escaped any globs in the parsed path, expect either 0 or 1 destination. - Ok(path_stats.pop().map(|ps| match ps { - PathStat::Dir { stat, .. } => PathStat::dir(symbolic_path, stat), - PathStat::File { stat, .. } => PathStat::file(symbolic_path, stat), - PathStat::Link { stat, .. } => PathStat::link(symbolic_path, stat), - })) - } } impl> GlobMatchingImplementation for T {} diff --git a/src/rust/engine/fs/src/glob_matching_tests.rs b/src/rust/engine/fs/src/glob_matching_tests.rs index 74738bce0c5..e9fd71503bc 100644 --- a/src/rust/engine/fs/src/glob_matching_tests.rs +++ b/src/rust/engine/fs/src/glob_matching_tests.rs @@ -6,29 +6,29 @@ use crate::{GitignoreStyleExcludes, GlobExpansionConjunction, PathGlobs, StrictG #[test] fn path_globs_create_distinguishes_between_includes_and_excludes() { - let include_globs = vec!["foo.rs".to_string(), "bar.rs".to_string()]; - let parsed_exclude_globs = vec!["ignore.rs".to_string(), "**/*.rs".to_string()]; + let include_globs = vec!["foo.rs".to_string(), "bar.rs".to_string()]; + let parsed_exclude_globs = vec!["ignore.rs".to_string(), "**/*.rs".to_string()]; - let mut glob_inputs: Vec = vec![]; - glob_inputs.extend_from_slice(&include_globs); - glob_inputs.extend(parsed_exclude_globs.iter().map(|glob| format!("!{glob}"))); + let mut glob_inputs: Vec = vec![]; + glob_inputs.extend_from_slice(&include_globs); + glob_inputs.extend(parsed_exclude_globs.iter().map(|glob| format!("!{glob}"))); - let pg = PathGlobs::new( - glob_inputs, - StrictGlobMatching::Ignore, - GlobExpansionConjunction::AllMatch, - ) - .parse() - .expect("Path globs failed to be created"); + let pg = PathGlobs::new( + glob_inputs, + StrictGlobMatching::Ignore, + GlobExpansionConjunction::AllMatch, + ) + .parse() + .expect("Path globs failed to be created"); - assert_eq!( - pg.include, - PathGlob::spread_filespecs(include_globs).expect("Include globs failed to expand") - ); - assert_eq!( - pg.exclude.exclude_patterns(), - GitignoreStyleExcludes::create(parsed_exclude_globs) - .expect("Exclude globs failed to expand") - .exclude_patterns() - ); + assert_eq!( + pg.include, + PathGlob::spread_filespecs(include_globs).expect("Include globs failed to expand") + ); + assert_eq!( + pg.exclude.exclude_patterns(), + GitignoreStyleExcludes::create(parsed_exclude_globs) + .expect("Exclude globs failed to expand") + .exclude_patterns() + ); } diff --git a/src/rust/engine/fs/src/lib.rs b/src/rust/engine/fs/src/lib.rs index ba9bb56e298..4ad73da3471 100644 --- a/src/rust/engine/fs/src/lib.rs +++ b/src/rust/engine/fs/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -36,12 +36,12 @@ mod glob_matching_tests; mod posixfs_tests; pub use crate::directory::{ - DigestTrie, DirectoryDigest, Entry, SymlinkBehavior, TypedPath, EMPTY_DIGEST_TREE, - EMPTY_DIRECTORY_DIGEST, + DigestTrie, DirectoryDigest, Entry, SymlinkBehavior, TypedPath, EMPTY_DIGEST_TREE, + EMPTY_DIRECTORY_DIGEST, }; pub use crate::gitignore::GitignoreStyleExcludes; pub use crate::glob_matching::{ - FilespecMatcher, GlobMatching, PathGlob, PreparedPathGlobs, DOUBLE_STAR_GLOB, SINGLE_STAR_GLOB, + FilespecMatcher, GlobMatching, PathGlob, PreparedPathGlobs, DOUBLE_STAR_GLOB, SINGLE_STAR_GLOB, }; use std::cmp::min; @@ -72,135 +72,137 @@ type LinkDepth = u8; /// Follows the unix XDB base spec: . pub fn default_cache_path() -> PathBuf { - let cache_path = std::env::var(XDG_CACHE_HOME) - .ok() - .filter(|v| !v.is_empty()) - .map(PathBuf::from) - .or_else(|| dirs_next::home_dir().map(|home| home.join(".cache"))) - .unwrap_or_else(|| panic!("Could not find home dir or {XDG_CACHE_HOME}.")); - cache_path.join("pants") + let cache_path = std::env::var(XDG_CACHE_HOME) + .ok() + .filter(|v| !v.is_empty()) + .map(PathBuf::from) + .or_else(|| dirs_next::home_dir().map(|home| home.join(".cache"))) + .unwrap_or_else(|| panic!("Could not find home dir or {XDG_CACHE_HOME}.")); + cache_path.join("pants") } /// Simplified filesystem Permissions. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum Permissions { - ReadOnly, - Writable, + ReadOnly, + Writable, } #[derive(Clone, Debug, DeepSizeOf, PartialEq, Eq, Ord, PartialOrd, Hash, Serialize)] pub struct RelativePath(PathBuf); impl RelativePath { - pub fn empty() -> RelativePath { - RelativePath(PathBuf::new()) - } - - pub fn new>(path: P) -> Result { - let mut relative_path = PathBuf::new(); - let candidate = path.as_ref(); - for component in candidate.components() { - match component { - Component::Prefix(_) => { - return Err(format!("Windows paths are not allowed: {candidate:?}")) - } - Component::RootDir => return Err(format!("Absolute paths are not allowed: {candidate:?}")), - Component::CurDir => continue, - Component::ParentDir => { - if !relative_path.pop() { - return Err(format!( - "Relative paths that escape the root are not allowed: {candidate:?}" - )); - } + pub fn empty() -> RelativePath { + RelativePath(PathBuf::new()) + } + + pub fn new>(path: P) -> Result { + let mut relative_path = PathBuf::new(); + let candidate = path.as_ref(); + for component in candidate.components() { + match component { + Component::Prefix(_) => { + return Err(format!("Windows paths are not allowed: {candidate:?}")) + } + Component::RootDir => { + return Err(format!("Absolute paths are not allowed: {candidate:?}")) + } + Component::CurDir => continue, + Component::ParentDir => { + if !relative_path.pop() { + return Err(format!( + "Relative paths that escape the root are not allowed: {candidate:?}" + )); + } + } + Component::Normal(path) => relative_path.push(path), + } } - Component::Normal(path) => relative_path.push(path), - } + Ok(RelativePath(relative_path)) } - Ok(RelativePath(relative_path)) - } - pub fn to_str(&self) -> Option<&str> { - self.0.to_str() - } + pub fn to_str(&self) -> Option<&str> { + self.0.to_str() + } - pub fn join(&self, other: Self) -> RelativePath { - RelativePath(self.0.join(other)) - } + pub fn join(&self, other: Self) -> RelativePath { + RelativePath(self.0.join(other)) + } } impl Deref for RelativePath { - type Target = PathBuf; + type Target = PathBuf; - fn deref(&self) -> &PathBuf { - &self.0 - } + fn deref(&self) -> &PathBuf { + &self.0 + } } impl AsRef for RelativePath { - fn as_ref(&self) -> &Path { - self.0.as_path() - } + fn as_ref(&self) -> &Path { + self.0.as_path() + } } impl From for PathBuf { - fn from(p: RelativePath) -> Self { - p.0 - } + fn from(p: RelativePath) -> Self { + p.0 + } } #[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)] pub enum Stat { - Link(Link), - Dir(Dir), - File(File), + Link(Link), + Dir(Dir), + File(File), } impl Stat { - pub fn path(&self) -> &Path { - match self { - &Stat::Dir(Dir(ref p)) => p.as_path(), - &Stat::File(File { path: ref p, .. }) => p.as_path(), - &Stat::Link(Link { path: ref p, .. }) => p.as_path(), - } - } - - pub fn dir(path: PathBuf) -> Stat { - Stat::Dir(Dir(path)) - } - - pub fn file(path: PathBuf, is_executable: bool) -> Stat { - Stat::File(File { - path, - is_executable, - }) - } - - pub fn link(path: PathBuf, target: PathBuf) -> Stat { - Stat::Link(Link { path, target }) - } - - pub fn within(&self, directory: &Path) -> Stat { - match self { - Stat::Dir(Dir(p)) => Stat::Dir(Dir(directory.join(p))), - Stat::File(File { - path, - is_executable, - }) => Stat::File(File { - path: directory.join(path), - is_executable: *is_executable, - }), - Stat::Link(Link { path, target }) => Stat::Link(Link { - path: directory.join(path), - target: target.to_owned(), - }), - } - } + pub fn path(&self) -> &Path { + match self { + &Stat::Dir(Dir(ref p)) => p.as_path(), + &Stat::File(File { path: ref p, .. }) => p.as_path(), + &Stat::Link(Link { path: ref p, .. }) => p.as_path(), + } + } + + pub fn dir(path: PathBuf) -> Stat { + Stat::Dir(Dir(path)) + } + + pub fn file(path: PathBuf, is_executable: bool) -> Stat { + Stat::File(File { + path, + is_executable, + }) + } + + pub fn link(path: PathBuf, target: PathBuf) -> Stat { + Stat::Link(Link { path, target }) + } + + pub fn within(&self, directory: &Path) -> Stat { + match self { + Stat::Dir(Dir(p)) => Stat::Dir(Dir(directory.join(p))), + Stat::File(File { + path, + is_executable, + }) => Stat::File(File { + path: directory.join(path), + is_executable: *is_executable, + }), + Stat::Link(Link { path, target }) => Stat::Link(Link { + path: directory.join(path), + target: target.to_owned(), + }), + } + } } #[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)] pub struct Link { - pub path: PathBuf, - pub target: PathBuf, + pub path: PathBuf, + pub target: PathBuf, } #[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)] @@ -208,52 +210,52 @@ pub struct Dir(pub PathBuf); #[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)] pub struct File { - pub path: PathBuf, - pub is_executable: bool, + pub path: PathBuf, + pub is_executable: bool, } #[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)] pub enum PathStat { - Dir { - // The symbolic name of some filesystem Path, which is context specific. - path: PathBuf, - // The canonical Stat that underlies the Path. - stat: Dir, - }, - File { - // The symbolic name of some filesystem Path, which is context specific. - path: PathBuf, - // The canonical Stat that underlies the Path. - stat: File, - }, - Link { - // The symbolic name of some filesystem Path, which is context specific. - path: PathBuf, - // The canonical Stat that underlies the Path. - stat: Link, - }, + Dir { + // The symbolic name of some filesystem Path, which is context specific. + path: PathBuf, + // The canonical Stat that underlies the Path. + stat: Dir, + }, + File { + // The symbolic name of some filesystem Path, which is context specific. + path: PathBuf, + // The canonical Stat that underlies the Path. + stat: File, + }, + Link { + // The symbolic name of some filesystem Path, which is context specific. + path: PathBuf, + // The canonical Stat that underlies the Path. + stat: Link, + }, } impl PathStat { - pub fn dir(path: PathBuf, stat: Dir) -> PathStat { - PathStat::Dir { path, stat } - } + pub fn dir(path: PathBuf, stat: Dir) -> PathStat { + PathStat::Dir { path, stat } + } - pub fn file(path: PathBuf, stat: File) -> PathStat { - PathStat::File { path, stat } - } + pub fn file(path: PathBuf, stat: File) -> PathStat { + PathStat::File { path, stat } + } - pub fn link(path: PathBuf, stat: Link) -> PathStat { - PathStat::Link { path, stat } - } + pub fn link(path: PathBuf, stat: Link) -> PathStat { + PathStat::Link { path, stat } + } - pub fn path(&self) -> &Path { - match self { - PathStat::Dir { path, .. } => path.as_path(), - PathStat::File { path, .. } => path.as_path(), - PathStat::Link { path, .. } => path.as_path(), + pub fn path(&self) -> &Path { + match self { + PathStat::Dir { path, .. } => path.as_path(), + PathStat::File { path, .. } => path.as_path(), + PathStat::Link { path, .. } => path.as_path(), + } } - } } #[derive(Debug, DeepSizeOf, Eq, PartialEq)] @@ -261,16 +263,16 @@ pub struct DirectoryListing(pub Vec); #[derive(Debug, DeepSizeOf, Clone, Eq, Hash, PartialEq)] pub enum StrictGlobMatching { - // NB: the Error and Warn variants store a description of the origin of the PathGlob - // request so that we can make the error message more helpful to users when globs fail to match. - Error(String), - Warn(String), - Ignore, + // NB: the Error and Warn variants store a description of the origin of the PathGlob + // request so that we can make the error message more helpful to users when globs fail to match. + Error(String), + Warn(String), + Ignore, } impl StrictGlobMatching { - pub fn create(behavior: &str, description_of_origin: Option) -> Result { - match (behavior, description_of_origin) { + pub fn create(behavior: &str, description_of_origin: Option) -> Result { + match (behavior, description_of_origin) { ("ignore", None) => Ok(StrictGlobMatching::Ignore), ("warn", Some(origin)) => Ok(StrictGlobMatching::Warn(origin)), ("error", Some(origin)) => Ok(StrictGlobMatching::Error(origin)), @@ -285,66 +287,66 @@ impl StrictGlobMatching { "Unrecognized strict glob matching behavior: {behavior}.", )), } - } + } - pub fn should_check_glob_matches(&self) -> bool { - !matches!(self, &StrictGlobMatching::Ignore) - } + pub fn should_check_glob_matches(&self) -> bool { + !matches!(self, &StrictGlobMatching::Ignore) + } - pub fn should_throw_on_error(&self) -> bool { - matches!(self, &StrictGlobMatching::Error(_)) - } + pub fn should_throw_on_error(&self) -> bool { + matches!(self, &StrictGlobMatching::Error(_)) + } } #[derive(Debug, DeepSizeOf, Clone, Eq, Hash, PartialEq)] pub enum GlobExpansionConjunction { - AllMatch, - AnyMatch, + AllMatch, + AnyMatch, } impl GlobExpansionConjunction { - pub fn create(spec: &str) -> Result { - match spec { - "all_match" => Ok(GlobExpansionConjunction::AllMatch), - "any_match" => Ok(GlobExpansionConjunction::AnyMatch), - _ => Err(format!("Unrecognized conjunction: {spec}.")), + pub fn create(spec: &str) -> Result { + match spec { + "all_match" => Ok(GlobExpansionConjunction::AllMatch), + "any_match" => Ok(GlobExpansionConjunction::AnyMatch), + _ => Err(format!("Unrecognized conjunction: {spec}.")), + } } - } } #[derive(Debug, DeepSizeOf, Clone, Eq, PartialEq, Hash)] pub struct PathGlobs { - globs: Vec, - strict_match_behavior: StrictGlobMatching, - conjunction: GlobExpansionConjunction, -} - -impl PathGlobs { - pub fn new( globs: Vec, strict_match_behavior: StrictGlobMatching, conjunction: GlobExpansionConjunction, - ) -> PathGlobs { - PathGlobs { - globs, - strict_match_behavior, - conjunction, - } - } - - pub fn parse(self) -> Result { - glob_matching::PreparedPathGlobs::create( - self.globs, - self.strict_match_behavior, - self.conjunction, - ) - } +} + +impl PathGlobs { + pub fn new( + globs: Vec, + strict_match_behavior: StrictGlobMatching, + conjunction: GlobExpansionConjunction, + ) -> PathGlobs { + PathGlobs { + globs, + strict_match_behavior, + conjunction, + } + } + + pub fn parse(self) -> Result { + glob_matching::PreparedPathGlobs::create( + self.globs, + self.strict_match_behavior, + self.conjunction, + ) + } } impl fmt::Display for PathGlobs { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.globs.join(", ")) - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.globs.join(", ")) + } } /// @@ -357,323 +359,324 @@ impl fmt::Display for PathGlobs { /// #[derive(Clone)] pub struct PosixFS { - root: Dir, - ignore: Arc, - executor: task_executor::Executor, - symlink_behavior: SymlinkBehavior, + root: Dir, + ignore: Arc, + executor: task_executor::Executor, + symlink_behavior: SymlinkBehavior, } impl PosixFS { - pub fn new>( - root: P, - ignorer: Arc, - executor: task_executor::Executor, - ) -> Result { - Self::new_with_symlink_behavior(root, ignorer, executor, SymlinkBehavior::Aware) - } + pub fn new>( + root: P, + ignorer: Arc, + executor: task_executor::Executor, + ) -> Result { + Self::new_with_symlink_behavior(root, ignorer, executor, SymlinkBehavior::Aware) + } - pub fn new_with_symlink_behavior>( - root: P, - ignorer: Arc, - executor: task_executor::Executor, - symlink_behavior: SymlinkBehavior, - ) -> Result { - let root: &Path = root.as_ref(); - let canonical_root = root - .canonicalize() - .and_then(|canonical| { - canonical.metadata().and_then(|metadata| { - if metadata.is_dir() { - Ok(Dir(canonical)) - } else { - Err(io::Error::new( - io::ErrorKind::InvalidInput, - "Not a directory.", - )) - } + pub fn new_with_symlink_behavior>( + root: P, + ignorer: Arc, + executor: task_executor::Executor, + symlink_behavior: SymlinkBehavior, + ) -> Result { + let root: &Path = root.as_ref(); + let canonical_root = root + .canonicalize() + .and_then(|canonical| { + canonical.metadata().and_then(|metadata| { + if metadata.is_dir() { + Ok(Dir(canonical)) + } else { + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Not a directory.", + )) + } + }) + }) + .map_err(|e| format!("Could not canonicalize root {root:?}: {e:?}"))?; + + Ok(PosixFS { + root: canonical_root, + ignore: ignorer, + executor: executor, + symlink_behavior: symlink_behavior, }) - }) - .map_err(|e| format!("Could not canonicalize root {root:?}: {e:?}"))?; - - Ok(PosixFS { - root: canonical_root, - ignore: ignorer, - executor: executor, - symlink_behavior: symlink_behavior, - }) - } - - pub async fn scandir(&self, dir_relative_to_root: Dir) -> Result { - let vfs = self.clone(); - self - .executor - .spawn_blocking( - move || vfs.scandir_sync(&dir_relative_to_root), - |e| { - Err(io::Error::new( - io::ErrorKind::Other, - format!("Synchronous scandir failed: {e}"), - )) - }, - ) - .await - } - - fn scandir_sync(&self, dir_relative_to_root: &Dir) -> Result { - let dir_abs = self.root.0.join(&dir_relative_to_root.0); - let mut stats: Vec = dir_abs - .read_dir()? - .map(|readdir| { - let dir_entry = readdir?; - let (file_type, compute_metadata): (_, Box Result<_, _>>) = - match self.symlink_behavior { - SymlinkBehavior::Aware => { - // Use the dir_entry metadata, which is symlink aware. - (dir_entry.file_type()?, Box::new(|| dir_entry.metadata())) - } - SymlinkBehavior::Oblivious => { - // Use an independent stat call to get metadata, which is symlink oblivious. - let metadata = std::fs::metadata(dir_abs.join(dir_entry.file_name()))?; - (metadata.file_type(), Box::new(|| Ok(metadata))) - } - }; - PosixFS::stat_internal( - &dir_abs.join(dir_entry.file_name()), - file_type, - compute_metadata, - ) - }) - .filter_map(|s| match s { - Ok(Some(s)) - if !self.ignore.is_ignored_path( - &dir_relative_to_root.0.join(s.path()), - matches!(s, Stat::Dir(_)), - ) => - { - // It would be nice to be able to ignore paths before stat'ing them, but in order to apply - // git-style ignore patterns, we need to know whether a path represents a directory. - Some(Ok(s)) - } - Ok(_) => None, - Err(e) => Some(Err(e)), - }) - .collect::, io::Error>>() - .map_err(|e| { - io::Error::new( - e.kind(), - format!("Failed to scan directory {dir_abs:?}: {e}"), - ) - })?; - stats.sort_by(|s1, s2| s1.path().cmp(s2.path())); - Ok(DirectoryListing(stats)) - } - - pub fn is_ignored(&self, stat: &Stat) -> bool { - self.ignore.is_ignored(stat) - } - - pub fn file_path(&self, file: &File) -> PathBuf { - self.root.0.join(&file.path) - } - - pub async fn read_link(&self, link: &Link) -> Result { - let link_parent = link.path.parent().map(Path::to_owned); - let link_abs = self.root.0.join(link.path.as_path()); - tokio::fs::read_link(&link_abs) - .await - .and_then(|path_buf| { - if path_buf.is_absolute() { - Err(io::Error::new( - io::ErrorKind::InvalidData, - format!("Absolute symlink: {path_buf:?}"), - )) - } else { - link_parent - .map(|parent| parent.join(&path_buf)) - .ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("Symlink without a parent?: {path_buf:?}"), - ) + } + + pub async fn scandir(&self, dir_relative_to_root: Dir) -> Result { + let vfs = self.clone(); + self.executor + .spawn_blocking( + move || vfs.scandir_sync(&dir_relative_to_root), + |e| { + Err(io::Error::new( + io::ErrorKind::Other, + format!("Synchronous scandir failed: {e}"), + )) + }, + ) + .await + } + + fn scandir_sync(&self, dir_relative_to_root: &Dir) -> Result { + let dir_abs = self.root.0.join(&dir_relative_to_root.0); + let mut stats: Vec = dir_abs + .read_dir()? + .map(|readdir| { + let dir_entry = readdir?; + let (file_type, compute_metadata): (_, Box Result<_, _>>) = + match self.symlink_behavior { + SymlinkBehavior::Aware => { + // Use the dir_entry metadata, which is symlink aware. + (dir_entry.file_type()?, Box::new(|| dir_entry.metadata())) + } + SymlinkBehavior::Oblivious => { + // Use an independent stat call to get metadata, which is symlink oblivious. + let metadata = std::fs::metadata(dir_abs.join(dir_entry.file_name()))?; + (metadata.file_type(), Box::new(|| Ok(metadata))) + } + }; + PosixFS::stat_internal( + &dir_abs.join(dir_entry.file_name()), + file_type, + compute_metadata, + ) }) - } - }) - .map_err(|e| io::Error::new(e.kind(), format!("Failed to read link {link_abs:?}: {e}"))) - } - - /// - /// Makes a Stat for path_to_stat relative to its containing directory. - /// - /// This method takes both a `FileType` and a getter for `Metadata` because on Unixes, - /// directory walks cheaply return the `FileType` without extra syscalls, but other - /// metadata requires additional syscall(s) to compute. We can avoid those calls for - /// Dirs and Links. - /// - fn stat_internal( - path_to_stat: &Path, - file_type: std::fs::FileType, - compute_metadata: F, - ) -> Result, io::Error> - where - F: FnOnce() -> Result, - { - let Some(file_name) = path_to_stat.file_name() else { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "Argument path_to_stat to PosixFS::stat_internal must have a file name.", - )); - }; - if cfg!(debug_assertions) && !path_to_stat.is_absolute() { - return Err(io::Error::new( + .filter_map(|s| match s { + Ok(Some(s)) + if !self.ignore.is_ignored_path( + &dir_relative_to_root.0.join(s.path()), + matches!(s, Stat::Dir(_)), + ) => + { + // It would be nice to be able to ignore paths before stat'ing them, but in order to apply + // git-style ignore patterns, we need to know whether a path represents a directory. + Some(Ok(s)) + } + Ok(_) => None, + Err(e) => Some(Err(e)), + }) + .collect::, io::Error>>() + .map_err(|e| { + io::Error::new( + e.kind(), + format!("Failed to scan directory {dir_abs:?}: {e}"), + ) + })?; + stats.sort_by(|s1, s2| s1.path().cmp(s2.path())); + Ok(DirectoryListing(stats)) + } + + pub fn is_ignored(&self, stat: &Stat) -> bool { + self.ignore.is_ignored(stat) + } + + pub fn file_path(&self, file: &File) -> PathBuf { + self.root.0.join(&file.path) + } + + pub async fn read_link(&self, link: &Link) -> Result { + let link_parent = link.path.parent().map(Path::to_owned); + let link_abs = self.root.0.join(link.path.as_path()); + tokio::fs::read_link(&link_abs) + .await + .and_then(|path_buf| { + if path_buf.is_absolute() { + Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Absolute symlink: {path_buf:?}"), + )) + } else { + link_parent + .map(|parent| parent.join(&path_buf)) + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Symlink without a parent?: {path_buf:?}"), + ) + }) + } + }) + .map_err(|e| io::Error::new(e.kind(), format!("Failed to read link {link_abs:?}: {e}"))) + } + + /// + /// Makes a Stat for path_to_stat relative to its containing directory. + /// + /// This method takes both a `FileType` and a getter for `Metadata` because on Unixes, + /// directory walks cheaply return the `FileType` without extra syscalls, but other + /// metadata requires additional syscall(s) to compute. We can avoid those calls for + /// Dirs and Links. + /// + fn stat_internal( + path_to_stat: &Path, + file_type: std::fs::FileType, + compute_metadata: F, + ) -> Result, io::Error> + where + F: FnOnce() -> Result, + { + let Some(file_name) = path_to_stat.file_name() else { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Argument path_to_stat to PosixFS::stat_internal must have a file name.", + )); + }; + if cfg!(debug_assertions) && !path_to_stat.is_absolute() { + return Err(io::Error::new( io::ErrorKind::InvalidInput, format!( "Argument path_to_stat to PosixFS::stat_internal must be absolute path, got {path_to_stat:?}" ), )); + } + let path = file_name.to_owned().into(); + if file_type.is_symlink() { + Ok(Some(Stat::Link(Link { + path, + target: std::fs::read_link(path_to_stat)?, + }))) + } else if file_type.is_file() { + let is_executable = compute_metadata()?.permissions().mode() & 0o100 == 0o100; + Ok(Some(Stat::File(File { + path, + is_executable: is_executable, + }))) + } else if file_type.is_dir() { + Ok(Some(Stat::Dir(Dir(path)))) + } else { + Ok(None) + } } - let path = file_name.to_owned().into(); - if file_type.is_symlink() { - Ok(Some(Stat::Link(Link { - path, - target: std::fs::read_link(path_to_stat)?, - }))) - } else if file_type.is_file() { - let is_executable = compute_metadata()?.permissions().mode() & 0o100 == 0o100; - Ok(Some(Stat::File(File { - path, - is_executable: is_executable, - }))) - } else if file_type.is_dir() { - Ok(Some(Stat::Dir(Dir(path)))) - } else { - Ok(None) - } - } - - /// - /// Returns a Stat relative to its containing directory. - /// - /// NB: This method is synchronous because it is used to stat all files in a directory as one - /// blocking operation as part of `scandir_sync` (as recommended by the `tokio` documentation, to - /// avoid many small spawned tasks). - /// - pub fn stat_sync(&self, relative_path: &Path) -> Result, io::Error> { - if cfg!(debug_assertions) && relative_path.is_absolute() { - return Err(io::Error::new( + + /// + /// Returns a Stat relative to its containing directory. + /// + /// NB: This method is synchronous because it is used to stat all files in a directory as one + /// blocking operation as part of `scandir_sync` (as recommended by the `tokio` documentation, to + /// avoid many small spawned tasks). + /// + pub fn stat_sync(&self, relative_path: &Path) -> Result, io::Error> { + if cfg!(debug_assertions) && relative_path.is_absolute() { + return Err(io::Error::new( io::ErrorKind::InvalidInput, format!( "Argument relative_path to PosixFS::stat_sync must be relative path, got {relative_path:?}" ), )); + } + let abs_path = self.root.0.join(relative_path); + let metadata = match self.symlink_behavior { + SymlinkBehavior::Aware => fs::symlink_metadata(&abs_path), + SymlinkBehavior::Oblivious => fs::metadata(&abs_path), + }; + metadata + .and_then(|metadata| { + PosixFS::stat_internal(&abs_path, metadata.file_type(), || Ok(metadata)) + }) + .or_else(|err| match err.kind() { + io::ErrorKind::NotFound => Ok(None), + _ => Err(err), + }) } - let abs_path = self.root.0.join(relative_path); - let metadata = match self.symlink_behavior { - SymlinkBehavior::Aware => fs::symlink_metadata(&abs_path), - SymlinkBehavior::Oblivious => fs::metadata(&abs_path), - }; - metadata - .and_then(|metadata| PosixFS::stat_internal(&abs_path, metadata.file_type(), || Ok(metadata))) - .or_else(|err| match err.kind() { - io::ErrorKind::NotFound => Ok(None), - _ => Err(err), - }) - } } #[async_trait] impl Vfs for Arc { - async fn read_link(&self, link: &Link) -> Result { - PosixFS::read_link(self, link).await - } + async fn read_link(&self, link: &Link) -> Result { + PosixFS::read_link(self, link).await + } - async fn scandir(&self, dir: Dir) -> Result, io::Error> { - Ok(Arc::new(PosixFS::scandir(self, dir).await?)) - } + async fn scandir(&self, dir: Dir) -> Result, io::Error> { + Ok(Arc::new(PosixFS::scandir(self, dir).await?)) + } - fn is_ignored(&self, stat: &Stat) -> bool { - PosixFS::is_ignored(self, stat) - } + fn is_ignored(&self, stat: &Stat) -> bool { + PosixFS::is_ignored(self, stat) + } - fn mk_error(msg: &str) -> io::Error { - io::Error::new(io::ErrorKind::Other, msg) - } + fn mk_error(msg: &str) -> io::Error { + io::Error::new(io::ErrorKind::Other, msg) + } } #[async_trait] impl Vfs for DigestTrie { - async fn read_link(&self, link: &Link) -> Result { - let entry = self - .entry(&link.path)? - .ok_or_else(|| format!("{link:?} does not exist within this Snapshot."))?; - let target = match entry { - directory::Entry::File(_) => { - return Err(format!( - "Path `{}` was a file rather than a symlink.", - link.path.display() - )) - } - directory::Entry::Symlink(s) => s.target(), - directory::Entry::Directory(_) => { - return Err(format!( - "Path `{}` was a directory rather than a symlink.", - link.path.display() - )) - } - }; - Ok(target.to_path_buf()) - } - - async fn scandir(&self, dir: Dir) -> Result, String> { - // TODO(#14890): Change interface to take a reference to an Entry. That would avoid both the - // need to handle this root case, and the need to recurse in `entry` down into children. - let entries = if dir.0.components().next().is_none() { - self.entries() - } else { - let entry = self - .entry(&dir.0)? - .ok_or_else(|| format!("{dir:?} does not exist within this Snapshot."))?; - match entry { - directory::Entry::File(_) => { - return Err(format!( - "Path `{}` was a file rather than a directory.", - dir.0.display() - )) - } - directory::Entry::Symlink(_) => { - return Err(format!( - "Path `{}` was a symlink rather than a directory.", - dir.0.display() - )) - } - directory::Entry::Directory(d) => d.tree().entries(), - } - }; - - Ok(Arc::new(DirectoryListing( - entries - .iter() - .map(|child| match child { - directory::Entry::File(f) => Stat::File(File { - path: f.name().as_ref().into(), - is_executable: f.is_executable(), - }), - directory::Entry::Symlink(s) => Stat::Link(Link { - path: s.name().as_ref().into(), - target: s.target().to_path_buf(), - }), - directory::Entry::Directory(d) => Stat::Dir(Dir(d.name().as_ref().into())), - }) - .collect(), - ))) - } + async fn read_link(&self, link: &Link) -> Result { + let entry = self + .entry(&link.path)? + .ok_or_else(|| format!("{link:?} does not exist within this Snapshot."))?; + let target = match entry { + directory::Entry::File(_) => { + return Err(format!( + "Path `{}` was a file rather than a symlink.", + link.path.display() + )) + } + directory::Entry::Symlink(s) => s.target(), + directory::Entry::Directory(_) => { + return Err(format!( + "Path `{}` was a directory rather than a symlink.", + link.path.display() + )) + } + }; + Ok(target.to_path_buf()) + } - fn is_ignored(&self, _stat: &Stat) -> bool { - false - } + async fn scandir(&self, dir: Dir) -> Result, String> { + // TODO(#14890): Change interface to take a reference to an Entry. That would avoid both the + // need to handle this root case, and the need to recurse in `entry` down into children. + let entries = if dir.0.components().next().is_none() { + self.entries() + } else { + let entry = self + .entry(&dir.0)? + .ok_or_else(|| format!("{dir:?} does not exist within this Snapshot."))?; + match entry { + directory::Entry::File(_) => { + return Err(format!( + "Path `{}` was a file rather than a directory.", + dir.0.display() + )) + } + directory::Entry::Symlink(_) => { + return Err(format!( + "Path `{}` was a symlink rather than a directory.", + dir.0.display() + )) + } + directory::Entry::Directory(d) => d.tree().entries(), + } + }; + + Ok(Arc::new(DirectoryListing( + entries + .iter() + .map(|child| match child { + directory::Entry::File(f) => Stat::File(File { + path: f.name().as_ref().into(), + is_executable: f.is_executable(), + }), + directory::Entry::Symlink(s) => Stat::Link(Link { + path: s.name().as_ref().into(), + target: s.target().to_path_buf(), + }), + directory::Entry::Directory(d) => Stat::Dir(Dir(d.name().as_ref().into())), + }) + .collect(), + ))) + } + + fn is_ignored(&self, _stat: &Stat) -> bool { + false + } - fn mk_error(msg: &str) -> String { - msg.to_owned() - } + fn mk_error(msg: &str) -> String { + msg.to_owned() + } } /// @@ -681,65 +684,65 @@ impl Vfs for DigestTrie { /// #[async_trait] pub trait Vfs: Clone + Send + Sync + 'static { - async fn read_link(&self, link: &Link) -> Result; - async fn scandir(&self, dir: Dir) -> Result, E>; - fn is_ignored(&self, stat: &Stat) -> bool; - fn mk_error(msg: &str) -> E; + async fn read_link(&self, link: &Link) -> Result; + async fn scandir(&self, dir: Dir) -> Result, E>; + fn is_ignored(&self, stat: &Stat) -> bool; + fn mk_error(msg: &str) -> E; } pub struct FileContent { - pub path: PathBuf, - pub content: Bytes, - pub is_executable: bool, + pub path: PathBuf, + pub content: Bytes, + pub is_executable: bool, } impl fmt::Debug for FileContent { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let len = min(self.content.len(), 5); - let describer = if len < self.content.len() { - "starting " - } else { - "" - }; - write!( - f, - "FileContent(path={:?}, content={} bytes {}{:?})", - self.path, - self.content.len(), - describer, - &self.content[..len] - ) - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let len = min(self.content.len(), 5); + let describer = if len < self.content.len() { + "starting " + } else { + "" + }; + write!( + f, + "FileContent(path={:?}, content={} bytes {}{:?})", + self.path, + self.content.len(), + describer, + &self.content[..len] + ) + } } #[derive(Debug, Eq, PartialEq)] pub struct FileEntry { - pub path: PathBuf, - pub digest: hashing::Digest, - pub is_executable: bool, + pub path: PathBuf, + pub digest: hashing::Digest, + pub is_executable: bool, } #[derive(Debug, Eq, PartialEq)] pub struct SymlinkEntry { - pub path: PathBuf, - pub target: PathBuf, + pub path: PathBuf, + pub target: PathBuf, } #[derive(Debug, Eq, PartialEq)] pub enum DigestEntry { - File(FileEntry), - Symlink(SymlinkEntry), - EmptyDirectory(PathBuf), + File(FileEntry), + Symlink(SymlinkEntry), + EmptyDirectory(PathBuf), } impl DigestEntry { - pub fn path(&self) -> &Path { - match self { - DigestEntry::File(file_entry) => &file_entry.path, - DigestEntry::Symlink(symlink_entry) => &symlink_entry.path, - DigestEntry::EmptyDirectory(path) => path, + pub fn path(&self) -> &Path { + match self { + DigestEntry::File(file_entry) => &file_entry.path, + DigestEntry::Symlink(symlink_entry) => &symlink_entry.path, + DigestEntry::EmptyDirectory(path) => path, + } } - } } /// @@ -748,34 +751,34 @@ impl DigestEntry { /// as a warning to be rendered rather than as something fatal. /// pub fn increase_limits() -> Result { - loop { - let (cur, max) = rlimit::Resource::NOFILE - .get() - .map_err(|e| format!("Could not validate file handle limits: {e}"))?; - // If the limit is less than our target. - if cur < TARGET_NOFILE_LIMIT { - let err_suffix = format!( + loop { + let (cur, max) = rlimit::Resource::NOFILE + .get() + .map_err(|e| format!("Could not validate file handle limits: {e}"))?; + // If the limit is less than our target. + if cur < TARGET_NOFILE_LIMIT { + let err_suffix = format!( "To avoid 'too many open file handle' errors, we recommend a limit of at least {TARGET_NOFILE_LIMIT}: \ please see https://www.pantsbuild.org/docs/troubleshooting#too-many-open-files-error \ for more information." ); - // If we might be able to increase the soft limit, try to. - if cur < max { - let target_soft_limit = std::cmp::min(max, TARGET_NOFILE_LIMIT); - rlimit::Resource::NOFILE + // If we might be able to increase the soft limit, try to. + if cur < max { + let target_soft_limit = std::cmp::min(max, TARGET_NOFILE_LIMIT); + rlimit::Resource::NOFILE .set(target_soft_limit, max) .map_err(|e| { format!("Could not raise soft file handle limit above {cur}: `{e}`. {err_suffix}") })?; - } else { - return Err(format!( - "File handle limit is capped to: {cur}. {err_suffix}" - )); - } - } else { - return Ok(format!("File handle limit is: {cur}")); - }; - } + } else { + return Err(format!( + "File handle limit is capped to: {cur}. {err_suffix}" + )); + } + } else { + return Ok(format!("File handle limit is: {cur}")); + }; + } } #[cfg(test)] diff --git a/src/rust/engine/fs/src/posixfs_tests.rs b/src/rust/engine/fs/src/posixfs_tests.rs index 1ed96eef9da..e1fc3736eed 100644 --- a/src/rust/engine/fs/src/posixfs_tests.rs +++ b/src/rust/engine/fs/src/posixfs_tests.rs @@ -7,386 +7,385 @@ use hashing::EMPTY_DIGEST; use testutil::make_file; use crate::{ - DigestTrie, Dir, DirectoryListing, File, GitignoreStyleExcludes, GlobExpansionConjunction, - GlobMatching, Link, PathGlobs, PathStat, PosixFS, Stat, StrictGlobMatching, SymlinkBehavior, - TypedPath, + DigestTrie, Dir, DirectoryListing, File, GitignoreStyleExcludes, GlobExpansionConjunction, + GlobMatching, Link, PathGlobs, PathStat, PosixFS, Stat, StrictGlobMatching, SymlinkBehavior, + TypedPath, }; #[tokio::test] async fn is_executable_false() { - let dir = tempfile::TempDir::new().unwrap(); - make_file(&dir.path().join("marmosets"), &[], 0o611); - assert_only_file_is_executable(dir.path(), false).await; + let dir = tempfile::TempDir::new().unwrap(); + make_file(&dir.path().join("marmosets"), &[], 0o611); + assert_only_file_is_executable(dir.path(), false).await; } #[tokio::test] async fn is_executable_true() { - let dir = tempfile::TempDir::new().unwrap(); - make_file(&dir.path().join("photograph_marmosets"), &[], 0o700); - assert_only_file_is_executable(dir.path(), true).await; + let dir = tempfile::TempDir::new().unwrap(); + make_file(&dir.path().join("photograph_marmosets"), &[], 0o700); + assert_only_file_is_executable(dir.path(), true).await; } #[tokio::test] async fn file_path() { - let dir = tempfile::TempDir::new().unwrap(); - let path = PathBuf::from("marmosets"); - let fs = new_posixfs(dir.path()); - let expected_path = std::fs::canonicalize(dir.path()).unwrap().join(&path); - let actual_path = fs.file_path(&File { - path: path, - is_executable: false, - }); - assert_eq!(actual_path, expected_path); + let dir = tempfile::TempDir::new().unwrap(); + let path = PathBuf::from("marmosets"); + let fs = new_posixfs(dir.path()); + let expected_path = std::fs::canonicalize(dir.path()).unwrap().join(&path); + let actual_path = fs.file_path(&File { + path: path, + is_executable: false, + }); + assert_eq!(actual_path, expected_path); } #[tokio::test] async fn stat_executable_file() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(dir.path()); - let path = PathBuf::from("photograph_marmosets"); - make_file(&dir.path().join(&path), &[], 0o700); - assert_eq!( - posix_fs.stat_sync(&path).unwrap().unwrap(), - super::Stat::File(File { - path: path, - is_executable: true, - }) - ) + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs(dir.path()); + let path = PathBuf::from("photograph_marmosets"); + make_file(&dir.path().join(&path), &[], 0o700); + assert_eq!( + posix_fs.stat_sync(&path).unwrap().unwrap(), + super::Stat::File(File { + path: path, + is_executable: true, + }) + ) } #[tokio::test] async fn stat_nonexecutable_file() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(dir.path()); - let path = PathBuf::from("marmosets"); - make_file(&dir.path().join(&path), &[], 0o600); - assert_eq!( - posix_fs.stat_sync(&path).unwrap().unwrap(), - super::Stat::File(File { - path: path, - is_executable: false, - }) - ) + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs(dir.path()); + let path = PathBuf::from("marmosets"); + make_file(&dir.path().join(&path), &[], 0o600); + assert_eq!( + posix_fs.stat_sync(&path).unwrap().unwrap(), + super::Stat::File(File { + path: path, + is_executable: false, + }) + ) } #[tokio::test] async fn stat_dir() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(dir.path()); - let path = PathBuf::from("enclosure"); - std::fs::create_dir(dir.path().join(&path)).unwrap(); - assert_eq!( - posix_fs.stat_sync(&path).unwrap().unwrap(), - super::Stat::Dir(Dir(path)) - ) + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs(dir.path()); + let path = PathBuf::from("enclosure"); + std::fs::create_dir(dir.path().join(&path)).unwrap(); + assert_eq!( + posix_fs.stat_sync(&path).unwrap().unwrap(), + super::Stat::Dir(Dir(path)) + ) } #[tokio::test] async fn stat_symlink() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(dir.path()); - let path = PathBuf::from("marmosets"); - make_file(&dir.path().join(&path), &[], 0o600); + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs(dir.path()); + let path = PathBuf::from("marmosets"); + make_file(&dir.path().join(&path), &[], 0o600); - let link_path = PathBuf::from("remarkably_similar_marmoset"); - std::os::unix::fs::symlink(dir.path().join(path.clone()), dir.path().join(&link_path)).unwrap(); - assert_eq!( - posix_fs.stat_sync(&link_path).unwrap().unwrap(), - super::Stat::Link(Link { - path: link_path, - target: dir.path().join(path) - }) - ) + let link_path = PathBuf::from("remarkably_similar_marmoset"); + std::os::unix::fs::symlink(dir.path().join(path.clone()), dir.path().join(&link_path)).unwrap(); + assert_eq!( + posix_fs.stat_sync(&link_path).unwrap().unwrap(), + super::Stat::Link(Link { + path: link_path, + target: dir.path().join(path) + }) + ) } #[tokio::test] async fn stat_symlink_oblivious() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs_symlink_oblivious(dir.path()); - let path = PathBuf::from("marmosets"); - make_file(&dir.path().join(&path), &[], 0o600); + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs_symlink_oblivious(dir.path()); + let path = PathBuf::from("marmosets"); + make_file(&dir.path().join(&path), &[], 0o600); - let link_path = PathBuf::from("remarkably_similar_marmoset"); - std::os::unix::fs::symlink(dir.path().join(path), dir.path().join(&link_path)).unwrap(); - // Symlink oblivious stat will give us the destination type. - assert_eq!( - posix_fs.stat_sync(&link_path).unwrap().unwrap(), - super::Stat::File(File { - path: link_path, - is_executable: false, - }) - ) + let link_path = PathBuf::from("remarkably_similar_marmoset"); + std::os::unix::fs::symlink(dir.path().join(path), dir.path().join(&link_path)).unwrap(); + // Symlink oblivious stat will give us the destination type. + assert_eq!( + posix_fs.stat_sync(&link_path).unwrap().unwrap(), + super::Stat::File(File { + path: link_path, + is_executable: false, + }) + ) } #[tokio::test] async fn stat_other() { - assert_eq!( - new_posixfs("/dev").stat_sync(Path::new("null")).unwrap(), - None, - ); + assert_eq!( + new_posixfs("/dev").stat_sync(Path::new("null")).unwrap(), + None, + ); } #[tokio::test] async fn stat_missing() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(dir.path()); - assert_eq!(posix_fs.stat_sync(Path::new("no_marmosets")).unwrap(), None,); + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs(dir.path()); + assert_eq!(posix_fs.stat_sync(Path::new("no_marmosets")).unwrap(), None,); } #[tokio::test] async fn scandir_empty() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(dir.path()); - let path = PathBuf::from("empty_enclosure"); - std::fs::create_dir(dir.path().join(&path)).unwrap(); - assert_eq!( - posix_fs.scandir(Dir(path)).await.unwrap(), - DirectoryListing(vec![]) - ); + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs(dir.path()); + let path = PathBuf::from("empty_enclosure"); + std::fs::create_dir(dir.path().join(&path)).unwrap(); + assert_eq!( + posix_fs.scandir(Dir(path)).await.unwrap(), + DirectoryListing(vec![]) + ); } #[tokio::test] async fn scandir() { - let dir = tempfile::TempDir::new().unwrap(); - let path = PathBuf::from("enclosure"); - std::fs::create_dir(dir.path().join(&path)).unwrap(); + let dir = tempfile::TempDir::new().unwrap(); + let path = PathBuf::from("enclosure"); + std::fs::create_dir(dir.path().join(&path)).unwrap(); - let a_marmoset = PathBuf::from("a_marmoset"); - let feed = PathBuf::from("feed"); - let hammock = PathBuf::from("hammock"); - let remarkably_similar_marmoset = PathBuf::from("remarkably_similar_marmoset"); - let sneaky_marmoset = PathBuf::from("sneaky_marmoset"); + let a_marmoset = PathBuf::from("a_marmoset"); + let feed = PathBuf::from("feed"); + let hammock = PathBuf::from("hammock"); + let remarkably_similar_marmoset = PathBuf::from("remarkably_similar_marmoset"); + let sneaky_marmoset = PathBuf::from("sneaky_marmoset"); - make_file(&dir.path().join(&path).join(&feed), &[], 0o700); - make_file(&dir.path().join(&path).join(&a_marmoset), &[], 0o600); - make_file(&dir.path().join(&path).join(&sneaky_marmoset), &[], 0o600); - std::os::unix::fs::symlink( - dir.path().join(&path).join(&a_marmoset), - dir.path().join(&path).join(&remarkably_similar_marmoset), - ) - .unwrap(); - std::fs::create_dir(dir.path().join(&path).join(&hammock)).unwrap(); - make_file( - &dir - .path() - .join(&path) - .join(&hammock) - .join("napping_marmoset"), - &[], - 0o600, - ); + make_file(&dir.path().join(&path).join(&feed), &[], 0o700); + make_file(&dir.path().join(&path).join(&a_marmoset), &[], 0o600); + make_file(&dir.path().join(&path).join(&sneaky_marmoset), &[], 0o600); + std::os::unix::fs::symlink( + dir.path().join(&path).join(&a_marmoset), + dir.path().join(&path).join(&remarkably_similar_marmoset), + ) + .unwrap(); + std::fs::create_dir(dir.path().join(&path).join(&hammock)).unwrap(); + make_file( + &dir.path() + .join(&path) + .join(&hammock) + .join("napping_marmoset"), + &[], + 0o600, + ); - // Symlink aware. - assert_eq!( - new_posixfs(dir.path()) - .scandir(Dir(path.clone())) - .await - .unwrap(), - DirectoryListing(vec![ - Stat::File(File { - path: a_marmoset.clone(), - is_executable: false, - }), - Stat::File(File { - path: feed.clone(), - is_executable: true, - }), - Stat::Dir(Dir(hammock.clone())), - Stat::Link(Link { - path: remarkably_similar_marmoset.clone(), - target: dir.path().join(&path).join(&a_marmoset) - }), - Stat::File(File { - path: sneaky_marmoset.clone(), - is_executable: false, - }), - ]) - ); + // Symlink aware. + assert_eq!( + new_posixfs(dir.path()) + .scandir(Dir(path.clone())) + .await + .unwrap(), + DirectoryListing(vec![ + Stat::File(File { + path: a_marmoset.clone(), + is_executable: false, + }), + Stat::File(File { + path: feed.clone(), + is_executable: true, + }), + Stat::Dir(Dir(hammock.clone())), + Stat::Link(Link { + path: remarkably_similar_marmoset.clone(), + target: dir.path().join(&path).join(&a_marmoset) + }), + Stat::File(File { + path: sneaky_marmoset.clone(), + is_executable: false, + }), + ]) + ); - // Symlink oblivious. - assert_eq!( - new_posixfs_symlink_oblivious(dir.path()) - .scandir(Dir(path)) - .await - .unwrap(), - DirectoryListing(vec![ - Stat::File(File { - path: a_marmoset, - is_executable: false, - }), - Stat::File(File { - path: feed, - is_executable: true, - }), - Stat::Dir(Dir(hammock)), - Stat::File(File { - path: remarkably_similar_marmoset, - is_executable: false, - }), - Stat::File(File { - path: sneaky_marmoset, - is_executable: false, - }), - ]) - ); + // Symlink oblivious. + assert_eq!( + new_posixfs_symlink_oblivious(dir.path()) + .scandir(Dir(path)) + .await + .unwrap(), + DirectoryListing(vec![ + Stat::File(File { + path: a_marmoset, + is_executable: false, + }), + Stat::File(File { + path: feed, + is_executable: true, + }), + Stat::Dir(Dir(hammock)), + Stat::File(File { + path: remarkably_similar_marmoset, + is_executable: false, + }), + Stat::File(File { + path: sneaky_marmoset, + is_executable: false, + }), + ]) + ); } #[tokio::test] async fn scandir_missing() { - let dir = tempfile::TempDir::new().unwrap(); - let posix_fs = new_posixfs(dir.path()); - posix_fs - .scandir(Dir(PathBuf::from("no_marmosets_here"))) - .await - .expect_err("Want error"); + let dir = tempfile::TempDir::new().unwrap(); + let posix_fs = new_posixfs(dir.path()); + posix_fs + .scandir(Dir(PathBuf::from("no_marmosets_here"))) + .await + .expect_err("Want error"); } #[tokio::test] async fn stats_for_paths() { - let dir = tempfile::TempDir::new().unwrap(); - let root_path = dir.path(); + let dir = tempfile::TempDir::new().unwrap(); + let root_path = dir.path(); - // File tree: - // dir - // dir/recursive_symlink -> ../symlink -> executable_file - // dir_symlink -> dir - // executable_file - // regular_file - // symlink -> executable_file - // symlink_to_nothing -> doesnotexist + // File tree: + // dir + // dir/recursive_symlink -> ../symlink -> executable_file + // dir_symlink -> dir + // executable_file + // regular_file + // symlink -> executable_file + // symlink_to_nothing -> doesnotexist - make_file(&root_path.join("executable_file"), &[], 0o700); - make_file(&root_path.join("regular_file"), &[], 0o600); - std::fs::create_dir(root_path.join("dir")).unwrap(); - std::os::unix::fs::symlink("executable_file", root_path.join("symlink")).unwrap(); - std::os::unix::fs::symlink( - "../symlink", - root_path.join("dir").join("recursive_symlink"), - ) - .unwrap(); - std::os::unix::fs::symlink("dir", root_path.join("dir_symlink")).unwrap(); - std::os::unix::fs::symlink("doesnotexist", root_path.join("symlink_to_nothing")).unwrap(); + make_file(&root_path.join("executable_file"), &[], 0o700); + make_file(&root_path.join("regular_file"), &[], 0o600); + std::fs::create_dir(root_path.join("dir")).unwrap(); + std::os::unix::fs::symlink("executable_file", root_path.join("symlink")).unwrap(); + std::os::unix::fs::symlink( + "../symlink", + root_path.join("dir").join("recursive_symlink"), + ) + .unwrap(); + std::os::unix::fs::symlink("dir", root_path.join("dir_symlink")).unwrap(); + std::os::unix::fs::symlink("doesnotexist", root_path.join("symlink_to_nothing")).unwrap(); - let posix_fs = Arc::new(new_posixfs(root_path)); - let path_stats = vec![ - PathBuf::from("executable_file"), - PathBuf::from("regular_file"), - PathBuf::from("dir"), - PathBuf::from("symlink"), - PathBuf::from("dir").join("recursive_symlink"), - PathBuf::from("dir_symlink"), - PathBuf::from("symlink_to_nothing"), - PathBuf::from("doesnotexist"), - ] - .into_iter() - .map(|p| posix_fs.stat_sync(&p).unwrap()) - .collect::>(); - let v: Vec> = vec![ - Some(Stat::File(File { - path: PathBuf::from("executable_file"), - is_executable: true, - })), - Some(Stat::File(File { - path: PathBuf::from("regular_file"), - is_executable: false, - })), - Some(Stat::Dir(Dir(PathBuf::from("dir")))), - Some(Stat::Link(Link { - path: PathBuf::from("symlink"), - target: PathBuf::from("executable_file"), - })), - Some(Stat::Link(Link { - path: PathBuf::from("recursive_symlink"), - target: PathBuf::from("../symlink"), - })), - Some(Stat::Link(Link { - path: PathBuf::from("dir_symlink"), - target: PathBuf::from("dir"), - })), - Some(Stat::Link(Link { - path: PathBuf::from("symlink_to_nothing"), - target: PathBuf::from("doesnotexist"), - })), - None, - ]; - assert_eq!(v, path_stats); + let posix_fs = Arc::new(new_posixfs(root_path)); + let path_stats = vec![ + PathBuf::from("executable_file"), + PathBuf::from("regular_file"), + PathBuf::from("dir"), + PathBuf::from("symlink"), + PathBuf::from("dir").join("recursive_symlink"), + PathBuf::from("dir_symlink"), + PathBuf::from("symlink_to_nothing"), + PathBuf::from("doesnotexist"), + ] + .into_iter() + .map(|p| posix_fs.stat_sync(&p).unwrap()) + .collect::>(); + let v: Vec> = vec![ + Some(Stat::File(File { + path: PathBuf::from("executable_file"), + is_executable: true, + })), + Some(Stat::File(File { + path: PathBuf::from("regular_file"), + is_executable: false, + })), + Some(Stat::Dir(Dir(PathBuf::from("dir")))), + Some(Stat::Link(Link { + path: PathBuf::from("symlink"), + target: PathBuf::from("executable_file"), + })), + Some(Stat::Link(Link { + path: PathBuf::from("recursive_symlink"), + target: PathBuf::from("../symlink"), + })), + Some(Stat::Link(Link { + path: PathBuf::from("dir_symlink"), + target: PathBuf::from("dir"), + })), + Some(Stat::Link(Link { + path: PathBuf::from("symlink_to_nothing"), + target: PathBuf::from("doesnotexist"), + })), + None, + ]; + assert_eq!(v, path_stats); } #[tokio::test] async fn memfs_expand_basic() { - // Create two files, with the effect that there is a nested directory for the longer path. - let p1 = PathBuf::from("some/file"); - let p2 = PathBuf::from("some/other"); - let p3 = p2.join("file"); + // Create two files, with the effect that there is a nested directory for the longer path. + let p1 = PathBuf::from("some/file"); + let p2 = PathBuf::from("some/other"); + let p3 = p2.join("file"); - let fs = DigestTrie::from_unique_paths( - vec![ - TypedPath::File { - path: &p1, - is_executable: false, - }, - TypedPath::File { - path: &p3, - is_executable: false, - }, - ], - &vec![(p1.clone(), EMPTY_DIGEST), (p3.clone(), EMPTY_DIGEST)] - .into_iter() - .collect(), - ) - .unwrap(); - let globs = PathGlobs::new( - vec!["some/*".into()], - StrictGlobMatching::Ignore, - GlobExpansionConjunction::AnyMatch, - ) - .parse() - .unwrap(); + let fs = DigestTrie::from_unique_paths( + vec![ + TypedPath::File { + path: &p1, + is_executable: false, + }, + TypedPath::File { + path: &p3, + is_executable: false, + }, + ], + &vec![(p1.clone(), EMPTY_DIGEST), (p3.clone(), EMPTY_DIGEST)] + .into_iter() + .collect(), + ) + .unwrap(); + let globs = PathGlobs::new( + vec!["some/*".into()], + StrictGlobMatching::Ignore, + GlobExpansionConjunction::AnyMatch, + ) + .parse() + .unwrap(); - assert_eq!( - fs.expand_globs(globs, SymlinkBehavior::Oblivious, None) - .await - .unwrap(), - vec![ - PathStat::file( - p1.clone(), - File { - path: p1, - is_executable: false, - }, - ), - PathStat::dir(p2.clone(), Dir(p2)), - ], - ); + assert_eq!( + fs.expand_globs(globs, SymlinkBehavior::Oblivious, None) + .await + .unwrap(), + vec![ + PathStat::file( + p1.clone(), + File { + path: p1, + is_executable: false, + }, + ), + PathStat::dir(p2.clone(), Dir(p2)), + ], + ); } async fn assert_only_file_is_executable(path: &Path, want_is_executable: bool) { - let fs = new_posixfs(path); - let stats = fs.scandir(Dir(PathBuf::from("."))).await.unwrap(); - assert_eq!(stats.0.len(), 1); - match stats.0.get(0).unwrap() { - &super::Stat::File(File { - is_executable: got, .. - }) => assert_eq!(want_is_executable, got), - other => panic!("Expected file, got {other:?}"), - } + let fs = new_posixfs(path); + let stats = fs.scandir(Dir(PathBuf::from("."))).await.unwrap(); + assert_eq!(stats.0.len(), 1); + match stats.0.get(0).unwrap() { + &super::Stat::File(File { + is_executable: got, .. + }) => assert_eq!(want_is_executable, got), + other => panic!("Expected file, got {other:?}"), + } } fn new_posixfs>(dir: P) -> PosixFS { - PosixFS::new( - dir.as_ref(), - GitignoreStyleExcludes::empty(), - task_executor::Executor::new(), - ) - .unwrap() + PosixFS::new( + dir.as_ref(), + GitignoreStyleExcludes::empty(), + task_executor::Executor::new(), + ) + .unwrap() } fn new_posixfs_symlink_oblivious>(dir: P) -> PosixFS { - PosixFS::new_with_symlink_behavior( - dir.as_ref(), - GitignoreStyleExcludes::empty(), - task_executor::Executor::new(), - SymlinkBehavior::Oblivious, - ) - .unwrap() + PosixFS::new_with_symlink_behavior( + dir.as_ref(), + GitignoreStyleExcludes::empty(), + task_executor::Executor::new(), + SymlinkBehavior::Oblivious, + ) + .unwrap() } diff --git a/src/rust/engine/fs/src/tests.rs b/src/rust/engine/fs/src/tests.rs index da5e9fecc8f..45dc1b6f782 100644 --- a/src/rust/engine/fs/src/tests.rs +++ b/src/rust/engine/fs/src/tests.rs @@ -4,22 +4,22 @@ use crate::RelativePath; #[test] fn relative_path_ok() { - assert_eq!(Some("a"), RelativePath::new("a").unwrap().to_str()); - assert_eq!(Some("a"), RelativePath::new("./a").unwrap().to_str()); - assert_eq!(Some("a"), RelativePath::new("b/../a").unwrap().to_str()); - assert_eq!( - Some("a/c"), - RelativePath::new("b/../a/././c").unwrap().to_str() - ); + assert_eq!(Some("a"), RelativePath::new("a").unwrap().to_str()); + assert_eq!(Some("a"), RelativePath::new("./a").unwrap().to_str()); + assert_eq!(Some("a"), RelativePath::new("b/../a").unwrap().to_str()); + assert_eq!( + Some("a/c"), + RelativePath::new("b/../a/././c").unwrap().to_str() + ); } #[test] fn relative_path_err() { - assert!(RelativePath::new("../a").is_err()); - assert!(RelativePath::new("/a").is_err()); + assert!(RelativePath::new("../a").is_err()); + assert!(RelativePath::new("/a").is_err()); } #[test] fn relative_path_normalize() { - assert_eq!(Some("a"), RelativePath::new("a/").unwrap().to_str()); + assert_eq!(Some("a"), RelativePath::new("a/").unwrap().to_str()); } diff --git a/src/rust/engine/fs/store/benches/store.rs b/src/rust/engine/fs/store/benches/store.rs index ef4b59d87c6..21531ea6af0 100644 --- a/src/rust/engine/fs/store/benches/store.rs +++ b/src/rust/engine/fs/store/benches/store.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -35,8 +35,8 @@ use std::sync::Arc; use std::time::Duration; use fs::{ - DirectoryDigest, File, GitignoreStyleExcludes, GlobExpansionConjunction, PathStat, Permissions, - PosixFS, PreparedPathGlobs, StrictGlobMatching, + DirectoryDigest, File, GitignoreStyleExcludes, GlobExpansionConjunction, PathStat, Permissions, + PosixFS, PreparedPathGlobs, StrictGlobMatching, }; use hashing::EMPTY_DIGEST; use protos::gen::build::bazel::remote::execution::v2 as remexec; @@ -46,49 +46,49 @@ use tempfile::TempDir; use store::{OneOffStoreFileByDigest, Snapshot, SnapshotOps, Store, SubsetParams}; fn executor() -> Executor { - Executor::new_owned(num_cpus::get(), num_cpus::get() * 4, || ()).unwrap() + Executor::new_owned(num_cpus::get(), num_cpus::get() * 4, || ()).unwrap() } pub fn criterion_benchmark_materialize(c: &mut Criterion) { - // Create an executor, store containing the stuff to materialize, and a digest for the stuff. - // To avoid benchmarking the deleting of things, we create a parent temporary directory (which - // will be deleted at the end of the benchmark) and then skip deletion of the per-run directories. - let executor = executor(); - - let mut cgroup = c.benchmark_group("materialize_directory"); - - for perms in vec![Permissions::ReadOnly, Permissions::Writable] { - for (count, size) in vec![(100, 100), (20, 10_000_000), (1, 200_000_000), (10000, 100)] { - let (store, _tempdir, digest) = snapshot(&executor, count, size); - let parent_dest = TempDir::new().unwrap(); - let parent_dest_path = parent_dest.path(); - - cgroup - .sample_size(10) - .measurement_time(Duration::from_secs(30)) - .bench_function( - format!("materialize_directory({:?}, {}, {})", perms, count, size), - |b| { - b.iter(|| { - // NB: We forget this child tempdir to avoid deleting things during the run. - let new_temp = TempDir::new_in(parent_dest_path).unwrap(); - let dest = new_temp.path().to_path_buf(); - std::mem::forget(new_temp); - let _ = executor - .block_on(store.materialize_directory( - dest, - parent_dest_path, - digest.clone(), - false, - &BTreeSet::new(), - perms, - )) - .unwrap(); - }) - }, - ); + // Create an executor, store containing the stuff to materialize, and a digest for the stuff. + // To avoid benchmarking the deleting of things, we create a parent temporary directory (which + // will be deleted at the end of the benchmark) and then skip deletion of the per-run directories. + let executor = executor(); + + let mut cgroup = c.benchmark_group("materialize_directory"); + + for perms in vec![Permissions::ReadOnly, Permissions::Writable] { + for (count, size) in vec![(100, 100), (20, 10_000_000), (1, 200_000_000), (10000, 100)] { + let (store, _tempdir, digest) = snapshot(&executor, count, size); + let parent_dest = TempDir::new().unwrap(); + let parent_dest_path = parent_dest.path(); + + cgroup + .sample_size(10) + .measurement_time(Duration::from_secs(30)) + .bench_function( + format!("materialize_directory({:?}, {}, {})", perms, count, size), + |b| { + b.iter(|| { + // NB: We forget this child tempdir to avoid deleting things during the run. + let new_temp = TempDir::new_in(parent_dest_path).unwrap(); + let dest = new_temp.path().to_path_buf(); + std::mem::forget(new_temp); + let _ = executor + .block_on(store.materialize_directory( + dest, + parent_dest_path, + digest.clone(), + false, + &BTreeSet::new(), + perms, + )) + .unwrap(); + }) + }, + ); + } } - } } /// @@ -96,165 +96,169 @@ pub fn criterion_benchmark_materialize(c: &mut Criterion) { /// filesystem traversal overheads and focuses on digesting/capturing. /// pub fn criterion_benchmark_snapshot_capture(c: &mut Criterion) { - let executor = executor(); - - let mut cgroup = c.benchmark_group("snapshot_capture"); - - // The number of files, file size, whether the inputs should be assumed to be immutable, and the - // number of times to capture (only the first capture actually stores anything: the rest should - // ignore the duplicated data.) - for params in vec![ - (100, 100, false, 100), - (20, 10_000_000, true, 10), - (1, 200_000_000, true, 10), - ] { - let (count, size, immutable, captures) = params; - let storedir = TempDir::new().unwrap(); - let store = Store::local_only(executor.clone(), storedir.path()).unwrap(); - let (tempdir, path_stats) = tempdir_containing(count, size); - let posix_fs = Arc::new( - PosixFS::new( - tempdir.path(), - GitignoreStyleExcludes::empty(), - executor.clone(), - ) - .unwrap(), - ); - cgroup - .sample_size(10) - .measurement_time(Duration::from_secs(30)) - .bench_function(format!("snapshot_capture({:?})", params), |b| { - b.iter(|| { - for _ in 0..captures { - let _ = executor - .block_on(Snapshot::from_path_stats( - OneOffStoreFileByDigest::new(store.clone(), posix_fs.clone(), immutable), - path_stats.clone(), - )) - .unwrap(); - } - }) - }); - } -} - -pub fn criterion_benchmark_subset_wildcard(c: &mut Criterion) { - let executor = executor(); - // NB: We use a much larger snapshot size compared to the materialize benchmark! - let (store, _tempdir, digest) = snapshot(&executor, 1000, 100); - - let mut cgroup = c.benchmark_group("digest_subset"); - - cgroup - .sample_size(10) - .measurement_time(Duration::from_secs(80)) - .bench_function("wildcard", |b| { - b.iter(|| { - let get_subset = store.subset( - digest.clone(), - SubsetParams { - globs: PreparedPathGlobs::create( - vec!["**/*".to_string()], - StrictGlobMatching::Ignore, - GlobExpansionConjunction::AllMatch, + let executor = executor(); + + let mut cgroup = c.benchmark_group("snapshot_capture"); + + // The number of files, file size, whether the inputs should be assumed to be immutable, and the + // number of times to capture (only the first capture actually stores anything: the rest should + // ignore the duplicated data.) + for params in vec![ + (100, 100, false, 100), + (20, 10_000_000, true, 10), + (1, 200_000_000, true, 10), + ] { + let (count, size, immutable, captures) = params; + let storedir = TempDir::new().unwrap(); + let store = Store::local_only(executor.clone(), storedir.path()).unwrap(); + let (tempdir, path_stats) = tempdir_containing(count, size); + let posix_fs = Arc::new( + PosixFS::new( + tempdir.path(), + GitignoreStyleExcludes::empty(), + executor.clone(), ) .unwrap(), - }, ); - let _ = executor.block_on(get_subset).unwrap(); - }) - }); + cgroup + .sample_size(10) + .measurement_time(Duration::from_secs(30)) + .bench_function(format!("snapshot_capture({:?})", params), |b| { + b.iter(|| { + for _ in 0..captures { + let _ = executor + .block_on(Snapshot::from_path_stats( + OneOffStoreFileByDigest::new( + store.clone(), + posix_fs.clone(), + immutable, + ), + path_stats.clone(), + )) + .unwrap(); + } + }) + }); + } +} + +pub fn criterion_benchmark_subset_wildcard(c: &mut Criterion) { + let executor = executor(); + // NB: We use a much larger snapshot size compared to the materialize benchmark! + let (store, _tempdir, digest) = snapshot(&executor, 1000, 100); + + let mut cgroup = c.benchmark_group("digest_subset"); + + cgroup + .sample_size(10) + .measurement_time(Duration::from_secs(80)) + .bench_function("wildcard", |b| { + b.iter(|| { + let get_subset = store.subset( + digest.clone(), + SubsetParams { + globs: PreparedPathGlobs::create( + vec!["**/*".to_string()], + StrictGlobMatching::Ignore, + GlobExpansionConjunction::AllMatch, + ) + .unwrap(), + }, + ); + let _ = executor.block_on(get_subset).unwrap(); + }) + }); } pub fn criterion_benchmark_merge(c: &mut Criterion) { - let executor = executor(); - let num_files: usize = 4000; - let (store, _tempdir, digest) = snapshot(&executor, num_files, 100); - - // Modify half of the files in the top-level directory by setting them to have the empty - // fingerprint (zero content). - executor - .block_on(store.ensure_directory_digest_persisted(digest.clone())) - .unwrap(); - let directory = executor - .block_on(store.load_directory(digest.as_digest())) - .unwrap(); - let mut all_file_nodes = directory.files.to_vec(); - let mut file_nodes_to_modify = all_file_nodes.split_off(all_file_nodes.len() / 2); - for file_node in file_nodes_to_modify.iter_mut() { - file_node.digest = Some(remexec::Digest { - hash: EMPTY_DIGEST.hash.to_hex(), - size_bytes: 0, - }); - } - let modified_file_names: HashSet = file_nodes_to_modify - .iter() - .map(|file_node| file_node.name.to_string()) - .collect(); - - let bazel_modified_files_directory = remexec::Directory { - files: all_file_nodes - .iter() - .cloned() - .chain(file_nodes_to_modify.into_iter()) - .collect(), - directories: directory.directories.clone(), - ..remexec::Directory::default() - }; - - let modified_digest = executor - .block_on(store.record_directory(&bazel_modified_files_directory, true)) - .unwrap(); - - let bazel_removed_files_directory = remexec::Directory { - files: all_file_nodes - .into_iter() - .filter(|file_node| !modified_file_names.contains(&file_node.name)) - .collect(), - directories: directory.directories.clone(), - ..remexec::Directory::default() - }; - let removed_digest = executor - .block_on(store.record_directory(&bazel_removed_files_directory, true)) - .unwrap(); - - // NB: We benchmark with trees that are already held in memory, since that's the expected case in - // production. - let removed_digest = executor - .block_on(store.load_directory_digest(removed_digest)) - .unwrap(); - let modified_digest = executor - .block_on(store.load_directory_digest(modified_digest)) - .unwrap(); - - let mut cgroup = c.benchmark_group("snapshot_merge"); - - cgroup - .sample_size(10) - .measurement_time(Duration::from_secs(80)) - .bench_function("snapshot_merge", |b| { - b.iter(|| { - // Merge the old and the new snapshot together, allowing any file to be duplicated. - let old_first = executor - .block_on(store.merge(vec![removed_digest.clone(), modified_digest.clone()])) - .unwrap(); - - // Test the performance of either ordering of snapshots. - let new_first = executor - .block_on(store.merge(vec![modified_digest.clone(), removed_digest.clone()])) - .unwrap(); - - assert_eq!(old_first, new_first); - }) - }); + let executor = executor(); + let num_files: usize = 4000; + let (store, _tempdir, digest) = snapshot(&executor, num_files, 100); + + // Modify half of the files in the top-level directory by setting them to have the empty + // fingerprint (zero content). + executor + .block_on(store.ensure_directory_digest_persisted(digest.clone())) + .unwrap(); + let directory = executor + .block_on(store.load_directory(digest.as_digest())) + .unwrap(); + let mut all_file_nodes = directory.files.to_vec(); + let mut file_nodes_to_modify = all_file_nodes.split_off(all_file_nodes.len() / 2); + for file_node in file_nodes_to_modify.iter_mut() { + file_node.digest = Some(remexec::Digest { + hash: EMPTY_DIGEST.hash.to_hex(), + size_bytes: 0, + }); + } + let modified_file_names: HashSet = file_nodes_to_modify + .iter() + .map(|file_node| file_node.name.to_string()) + .collect(); + + let bazel_modified_files_directory = remexec::Directory { + files: all_file_nodes + .iter() + .cloned() + .chain(file_nodes_to_modify.into_iter()) + .collect(), + directories: directory.directories.clone(), + ..remexec::Directory::default() + }; + + let modified_digest = executor + .block_on(store.record_directory(&bazel_modified_files_directory, true)) + .unwrap(); + + let bazel_removed_files_directory = remexec::Directory { + files: all_file_nodes + .into_iter() + .filter(|file_node| !modified_file_names.contains(&file_node.name)) + .collect(), + directories: directory.directories.clone(), + ..remexec::Directory::default() + }; + let removed_digest = executor + .block_on(store.record_directory(&bazel_removed_files_directory, true)) + .unwrap(); + + // NB: We benchmark with trees that are already held in memory, since that's the expected case in + // production. + let removed_digest = executor + .block_on(store.load_directory_digest(removed_digest)) + .unwrap(); + let modified_digest = executor + .block_on(store.load_directory_digest(modified_digest)) + .unwrap(); + + let mut cgroup = c.benchmark_group("snapshot_merge"); + + cgroup + .sample_size(10) + .measurement_time(Duration::from_secs(80)) + .bench_function("snapshot_merge", |b| { + b.iter(|| { + // Merge the old and the new snapshot together, allowing any file to be duplicated. + let old_first = executor + .block_on(store.merge(vec![removed_digest.clone(), modified_digest.clone()])) + .unwrap(); + + // Test the performance of either ordering of snapshots. + let new_first = executor + .block_on(store.merge(vec![modified_digest.clone(), removed_digest.clone()])) + .unwrap(); + + assert_eq!(old_first, new_first); + }) + }); } criterion_group!( - benches, - criterion_benchmark_materialize, - criterion_benchmark_snapshot_capture, - criterion_benchmark_subset_wildcard, - criterion_benchmark_merge + benches, + criterion_benchmark_materialize, + criterion_benchmark_snapshot_capture, + criterion_benchmark_subset_wildcard, + criterion_benchmark_merge ); criterion_main!(benches); @@ -263,79 +267,81 @@ criterion_main!(benches); /// file_target_size. /// fn tempdir_containing(max_files: usize, file_target_size: usize) -> (TempDir, Vec) { - let henries_lines = { - let f = std::fs::File::open( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("..") - .join("..") - .join("testutil") - .join("src") - .join("all_the_henries.txt"), - ) - .expect("Error opening all_the_henries"); - BufReader::new(f).lines() - }; - - let mut produced = HashSet::new(); - let paths = henries_lines - .filter_map(|line| { - // Clean up to lowercase ascii. - let clean_line = line - .expect("Failed to read from all_the_henries") - .trim() - .chars() - .filter_map(|c| { - if c.is_ascii_alphanumeric() { - Some(c.to_ascii_lowercase()) - } else if c.is_ascii_whitespace() { - Some(' ') - } else { - None - } + let henries_lines = { + let f = std::fs::File::open( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("testutil") + .join("src") + .join("all_the_henries.txt"), + ) + .expect("Error opening all_the_henries"); + BufReader::new(f).lines() + }; + + let mut produced = HashSet::new(); + let paths = henries_lines + .filter_map(|line| { + // Clean up to lowercase ascii. + let clean_line = line + .expect("Failed to read from all_the_henries") + .trim() + .chars() + .filter_map(|c| { + if c.is_ascii_alphanumeric() { + Some(c.to_ascii_lowercase()) + } else if c.is_ascii_whitespace() { + Some(' ') + } else { + None + } + }) + .collect::(); + + // NB: Split the line by whitespace, then accumulate a PathBuf using each word as a path + // component! + let mut path_buf = clean_line.split_whitespace().collect::(); + // Drop empty or too-long candidates. + let components_too_long = path_buf.components().any(|c| c.as_os_str().len() > 255); + if components_too_long + || path_buf.as_os_str().is_empty() + || path_buf.as_os_str().len() > 512 + { + None + } else { + // We add an extension to files to avoid collisions with directories (which are created + // implicitly based on leading components). + path_buf.set_extension("txt"); + Some(PathStat::file( + path_buf.clone(), + File { + path: path_buf, + is_executable: false, + }, + )) + } }) - .collect::(); - - // NB: Split the line by whitespace, then accumulate a PathBuf using each word as a path - // component! - let mut path_buf = clean_line.split_whitespace().collect::(); - // Drop empty or too-long candidates. - let components_too_long = path_buf.components().any(|c| c.as_os_str().len() > 255); - if components_too_long || path_buf.as_os_str().is_empty() || path_buf.as_os_str().len() > 512 - { - None - } else { - // We add an extension to files to avoid collisions with directories (which are created - // implicitly based on leading components). - path_buf.set_extension("txt"); - Some(PathStat::file( - path_buf.clone(), - File { - path: path_buf, - is_executable: false, - }, - )) - } - }) - .filter(move |path| produced.insert(path.clone())) - .take(max_files) - .collect::>(); - - let tempdir = TempDir::new().unwrap(); - for path in &paths { - // We use the (repeated) path as the content as well. - let abs_path = tempdir.path().join(path.path()); - if let Some(parent) = abs_path.parent() { - std::fs::create_dir_all(parent).unwrap(); - } - let mut f = BufWriter::new(std::fs::File::create(abs_path).unwrap()); - let bytes = path.path().as_os_str().as_bytes(); - let lines_to_write = file_target_size / bytes.len(); - for _ in 0..lines_to_write { - f.write_all(bytes).unwrap(); - f.write_all(b"\n").unwrap(); + .filter(move |path| produced.insert(path.clone())) + .take(max_files) + .collect::>(); + + let tempdir = TempDir::new().unwrap(); + for path in &paths { + // We use the (repeated) path as the content as well. + let abs_path = tempdir.path().join(path.path()); + if let Some(parent) = abs_path.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + let mut f = BufWriter::new(std::fs::File::create(abs_path).unwrap()); + let bytes = path.path().as_os_str().as_bytes(); + let lines_to_write = file_target_size / bytes.len(); + for _ in 0..lines_to_write { + f.write_all(bytes).unwrap(); + f.write_all(b"\n").unwrap(); + } } - } - (tempdir, paths) + (tempdir, paths) } /// @@ -343,34 +349,34 @@ fn tempdir_containing(max_files: usize, file_target_size: usize) -> (TempDir, Ve /// containing the given number of files, each with roughly the given size. /// fn snapshot( - executor: &Executor, - max_files: usize, - file_target_size: usize, + executor: &Executor, + max_files: usize, + file_target_size: usize, ) -> (Store, TempDir, DirectoryDigest) { - // NB: We create the files in a tempdir rather than in memory in order to allow for more - // realistic benchmarking involving large files. The tempdir is dropped at the end of this method - // (after everything has been captured out of it). - let (tempdir, path_stats) = tempdir_containing(max_files, file_target_size); - let storedir = TempDir::new().unwrap(); - let store = Store::local_only(executor.clone(), storedir.path()).unwrap(); - - let store2 = store.clone(); - let digest = executor - .block_on(async move { - let posix_fs = PosixFS::new( - tempdir.path(), - GitignoreStyleExcludes::empty(), - executor.clone(), - ) - .unwrap(); - Snapshot::from_path_stats( - OneOffStoreFileByDigest::new(store2, Arc::new(posix_fs), true), - path_stats, - ) - .await - }) - .unwrap() - .into(); - - (store, storedir, digest) + // NB: We create the files in a tempdir rather than in memory in order to allow for more + // realistic benchmarking involving large files. The tempdir is dropped at the end of this method + // (after everything has been captured out of it). + let (tempdir, path_stats) = tempdir_containing(max_files, file_target_size); + let storedir = TempDir::new().unwrap(); + let store = Store::local_only(executor.clone(), storedir.path()).unwrap(); + + let store2 = store.clone(); + let digest = executor + .block_on(async move { + let posix_fs = PosixFS::new( + tempdir.path(), + GitignoreStyleExcludes::empty(), + executor.clone(), + ) + .unwrap(); + Snapshot::from_path_stats( + OneOffStoreFileByDigest::new(store2, Arc::new(posix_fs), true), + path_stats, + ) + .await + }) + .unwrap() + .into(); + + (store, storedir, digest) } diff --git a/src/rust/engine/fs/store/src/immutable_inputs.rs b/src/rust/engine/fs/store/src/immutable_inputs.rs index 6e9a8c1960e..d17a73be008 100644 --- a/src/rust/engine/fs/store/src/immutable_inputs.rs +++ b/src/rust/engine/fs/store/src/immutable_inputs.rs @@ -16,17 +16,17 @@ use crate::{Store, StoreError}; /// A symlink from a relative src to an absolute dst (outside of the workdir). #[derive(Debug)] pub struct WorkdirSymlink { - pub src: RelativePath, - pub dst: PathBuf, + pub src: RelativePath, + pub dst: PathBuf, } struct Inner { - store: Store, - // The TempDir that digests are materialized in. - workdir: TempDir, - // A map from Digest to the location it has been materialized at. The OnceCell allows - // for cooperation between threads attempting to create Digests. - contents: Mutex>>>, + store: Store, + // The TempDir that digests are materialized in. + workdir: TempDir, + // A map from Digest to the location it has been materialized at. The OnceCell allows + // for cooperation between threads attempting to create Digests. + contents: Mutex>>>, } /// @@ -36,117 +36,116 @@ struct Inner { pub struct ImmutableInputs(Arc); impl ImmutableInputs { - pub fn new(store: Store, base: &Path) -> Result { - create_dir_all(base).map_err(|e| format!("Failed to create base for immutable inputs: {e}"))?; - let workdir = tempfile::Builder::new() - .prefix("immutable_inputs") - .tempdir_in(base) - .map_err(|e| format!("Failed to create temporary directory for immutable inputs: {e}"))?; - Ok(Self(Arc::new(Inner { - store, - workdir, - contents: Mutex::default(), - }))) - } + pub fn new(store: Store, base: &Path) -> Result { + create_dir_all(base) + .map_err(|e| format!("Failed to create base for immutable inputs: {e}"))?; + let workdir = tempfile::Builder::new() + .prefix("immutable_inputs") + .tempdir_in(base) + .map_err(|e| { + format!("Failed to create temporary directory for immutable inputs: {e}") + })?; + Ok(Self(Arc::new(Inner { + store, + workdir, + contents: Mutex::default(), + }))) + } - pub fn workdir(&self) -> &Path { - self.0.workdir.path() - } + pub fn workdir(&self) -> &Path { + self.0.workdir.path() + } - /// Returns an absolute Path to immutably consume the given Digest from. - pub(crate) async fn path_for_dir( - &self, - directory_digest: DirectoryDigest, - ) -> Result { - let digest = directory_digest.as_digest(); - let cell = self.0.contents.lock().entry(digest).or_default().clone(); + /// Returns an absolute Path to immutably consume the given Digest from. + pub(crate) async fn path_for_dir( + &self, + directory_digest: DirectoryDigest, + ) -> Result { + let digest = directory_digest.as_digest(); + let cell = self.0.contents.lock().entry(digest).or_default().clone(); - // We (might) need to initialize the value. - // - // Because this code executes a side-effect which could be observed elsewhere within this - // process (other threads can observe the contents of the temporary directory), we need to - // ensure that if this method is cancelled (via async Drop), whether the cell has been - // initialized or not stays in sync with whether the side-effect is visible. - // - // Making the initialization "cancellation safe", involves either: - // - // 1. Adding a Drop guard to "undo" the side-effect if we're dropped before we fully - // initialize the cell. - // * This is challenging to do correctly in this case, because the `Drop` guard cannot - // be created until after initialization begins, but cannot be cleared until after the - // cell has been initialized (i.e., after `get_or_try_init` returns). - // 2. Shielding ourselves from cancellation by `spawn`ing a new Task to guarantee that the - // cell initialization always runs to completion. - // * This would work, but would mean that we would finish initializing cells even when - // work was cancelled. Cancellation usually means that the work is no longer necessary, - // and so that could result in a lot of spurious IO (in e.g. warm cache cases which - // never end up actually needing any inputs). - // * An advanced variant of this approach would be to _pause_ work on materializing a - // Digest when demand for it disappeared, and resume the work if another caller - // requested that Digest. - // 3. Using anonymous destination paths, such that multiple attempts to initialize cannot - // collide. - // * This means that although the side-effect is visible, it can never collide. - // - // We take the final approach here currently (for simplicity's sake), but the advanced variant - // of approach 2 might eventually be worthwhile. - cell - .get_or_try_init(async { - let chroot = TempDir::new_in(self.0.workdir.path()).map_err(|e| { - format!( + // We (might) need to initialize the value. + // + // Because this code executes a side-effect which could be observed elsewhere within this + // process (other threads can observe the contents of the temporary directory), we need to + // ensure that if this method is cancelled (via async Drop), whether the cell has been + // initialized or not stays in sync with whether the side-effect is visible. + // + // Making the initialization "cancellation safe", involves either: + // + // 1. Adding a Drop guard to "undo" the side-effect if we're dropped before we fully + // initialize the cell. + // * This is challenging to do correctly in this case, because the `Drop` guard cannot + // be created until after initialization begins, but cannot be cleared until after the + // cell has been initialized (i.e., after `get_or_try_init` returns). + // 2. Shielding ourselves from cancellation by `spawn`ing a new Task to guarantee that the + // cell initialization always runs to completion. + // * This would work, but would mean that we would finish initializing cells even when + // work was cancelled. Cancellation usually means that the work is no longer necessary, + // and so that could result in a lot of spurious IO (in e.g. warm cache cases which + // never end up actually needing any inputs). + // * An advanced variant of this approach would be to _pause_ work on materializing a + // Digest when demand for it disappeared, and resume the work if another caller + // requested that Digest. + // 3. Using anonymous destination paths, such that multiple attempts to initialize cannot + // collide. + // * This means that although the side-effect is visible, it can never collide. + // + // We take the final approach here currently (for simplicity's sake), but the advanced variant + // of approach 2 might eventually be worthwhile. + cell.get_or_try_init(async { + let chroot = TempDir::new_in(self.0.workdir.path()).map_err(|e| { + format!( "Failed to create a temporary directory for materialization of immutable input \ digest {digest:?}: {e}" ) - })?; + })?; - let dest = chroot.path().join(digest.hash.to_hex()); - self - .0 - .store - .materialize_directory( - dest.clone(), - self.0.workdir.path(), - directory_digest, - false, - &BTreeSet::new(), - Permissions::ReadOnly, - ) - .await?; + let dest = chroot.path().join(digest.hash.to_hex()); + self.0 + .store + .materialize_directory( + dest.clone(), + self.0.workdir.path(), + directory_digest, + false, + &BTreeSet::new(), + Permissions::ReadOnly, + ) + .await?; - // Now that we've successfully initialized the destination, forget the TempDir so that it - // is not cleaned up. - let _ = chroot.into_path(); + // Now that we've successfully initialized the destination, forget the TempDir so that it + // is not cleaned up. + let _ = chroot.into_path(); - Ok(dest) - }) - .await - .cloned() - } + Ok(dest) + }) + .await + .cloned() + } - /// - /// Returns symlinks to create for the given set of immutable cache paths. - /// - pub async fn local_paths( - &self, - immutable_inputs: &BTreeMap, - ) -> Result, StoreError> { - let dsts = futures::future::try_join_all( - immutable_inputs - .values() - .map(|d| self.path_for_dir(d.clone())) - .collect::>(), - ) - .await?; + /// + /// Returns symlinks to create for the given set of immutable cache paths. + /// + pub async fn local_paths( + &self, + immutable_inputs: &BTreeMap, + ) -> Result, StoreError> { + let dsts = futures::future::try_join_all( + immutable_inputs + .values() + .map(|d| self.path_for_dir(d.clone())) + .collect::>(), + ) + .await?; - Ok( - immutable_inputs - .keys() - .zip(dsts.into_iter()) - .map(|(src, dst)| WorkdirSymlink { - src: src.clone(), - dst, - }) - .collect(), - ) - } + Ok(immutable_inputs + .keys() + .zip(dsts.into_iter()) + .map(|(src, dst)| WorkdirSymlink { + src: src.clone(), + dst, + }) + .collect()) + } } diff --git a/src/rust/engine/fs/store/src/lib.rs b/src/rust/engine/fs/store/src/lib.rs index 4e80586a661..5570c666963 100644 --- a/src/rust/engine/fs/store/src/lib.rs +++ b/src/rust/engine/fs/store/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -52,9 +52,9 @@ use async_oncecell::OnceCell; use async_trait::async_trait; use bytes::Bytes; use fs::{ - default_cache_path, directory, DigestEntry, DigestTrie, Dir, DirectoryDigest, File, FileContent, - FileEntry, Link, PathStat, Permissions, RelativePath, SymlinkBehavior, SymlinkEntry, - EMPTY_DIRECTORY_DIGEST, + default_cache_path, directory, DigestEntry, DigestTrie, Dir, DirectoryDigest, File, + FileContent, FileEntry, Link, PathStat, Permissions, RelativePath, SymlinkBehavior, + SymlinkEntry, EMPTY_DIRECTORY_DIGEST, }; use futures::future::{self, BoxFuture, Either, FutureExt, TryFutureExt}; use grpc_util::prost::MessageExt; @@ -92,10 +92,10 @@ mod remote_tests; pub use remote_provider::RemoteOptions; pub struct LocalOptions { - pub files_max_size_bytes: usize, - pub directories_max_size_bytes: usize, - pub lease_time: Duration, - pub shard_count: u8, + pub files_max_size_bytes: usize, + pub directories_max_size_bytes: usize, + pub lease_time: Duration, + pub shard_count: u8, } /// @@ -103,48 +103,48 @@ pub struct LocalOptions { /// explicit settings in most cases. /// impl Default for LocalOptions { - fn default() -> Self { - Self { - files_max_size_bytes: 16 * 4 * GIGABYTES, - directories_max_size_bytes: 2 * 4 * GIGABYTES, - lease_time: DEFAULT_LEASE_TIME, - shard_count: 16, + fn default() -> Self { + Self { + files_max_size_bytes: 16 * 4 * GIGABYTES, + directories_max_size_bytes: 2 * 4 * GIGABYTES, + lease_time: DEFAULT_LEASE_TIME, + shard_count: 16, + } } - } } #[derive(Debug, PartialEq, Eq)] pub enum StoreError { - /// A Digest was not present in either of the local or remote Stores. - MissingDigest(String, Digest), - /// All other error types. - Unclassified(String), + /// A Digest was not present in either of the local or remote Stores. + MissingDigest(String, Digest), + /// All other error types. + Unclassified(String), } impl StoreError { - pub fn enrich(self, prefix: &str) -> Self { - match self { - Self::MissingDigest(s, d) => Self::MissingDigest(format!("{prefix}: {s}"), d), - Self::Unclassified(s) => Self::Unclassified(format!("{prefix}: {s}")), + pub fn enrich(self, prefix: &str) -> Self { + match self { + Self::MissingDigest(s, d) => Self::MissingDigest(format!("{prefix}: {s}"), d), + Self::Unclassified(s) => Self::Unclassified(format!("{prefix}: {s}")), + } } - } } impl Display for StoreError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::MissingDigest(s, d) => { - write!(f, "{s}: {d:?}") - } - Self::Unclassified(s) => write!(f, "{s}"), + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::MissingDigest(s, d) => { + write!(f, "{s}: {d:?}") + } + Self::Unclassified(s) => write!(f, "{s}"), + } } - } } impl From for StoreError { - fn from(err: String) -> Self { - Self::Unclassified(err) - } + fn from(err: String) -> Self { + Self::Unclassified(err) + } } // Summary of the files and directories uploaded with an operation @@ -152,12 +152,12 @@ impl From for StoreError { // uploaded_file_{count, bytes}: Number and combined size of files uploaded to the remote #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize)] pub struct UploadSummary { - pub ingested_file_count: usize, - pub ingested_file_bytes: usize, - pub uploaded_file_count: usize, - pub uploaded_file_bytes: usize, - #[serde(skip)] - pub upload_wall_time: Duration, + pub ingested_file_count: usize, + pub ingested_file_bytes: usize, + pub uploaded_file_count: usize, + pub uploaded_file_bytes: usize, + #[serde(skip)] + pub upload_wall_time: Duration, } /// @@ -171,94 +171,94 @@ pub struct UploadSummary { /// #[derive(Clone, Debug)] struct RemoteStore { - store: remote::ByteStore, - in_flight_uploads: Arc>>>>, - in_flight_downloads: Arc>>>>, + store: remote::ByteStore, + in_flight_uploads: Arc>>>>, + in_flight_downloads: Arc>>>>, } impl RemoteStore { - fn new(store: remote::ByteStore) -> Self { - Self { - store, - in_flight_uploads: Arc::default(), - in_flight_downloads: Arc::default(), + fn new(store: remote::ByteStore) -> Self { + Self { + store, + in_flight_uploads: Arc::default(), + in_flight_downloads: Arc::default(), + } } - } - - /// - /// Returns a strongly held cell from a map of weakly held cells, creating it if necessary. - /// - fn cell_from( - cells: &Mutex>>>, - digest: Digest, - ) -> Arc> { - let mut cells = cells.lock(); - if let Some(cell) = cells.get(&digest).and_then(|weak_cell| weak_cell.upgrade()) { - cell - } else { - let cell = Arc::new(OnceCell::new()); - cells.insert(digest, Arc::downgrade(&cell)); - cell + + /// + /// Returns a strongly held cell from a map of weakly held cells, creating it if necessary. + /// + fn cell_from( + cells: &Mutex>>>, + digest: Digest, + ) -> Arc> { + let mut cells = cells.lock(); + if let Some(cell) = cells.get(&digest).and_then(|weak_cell| weak_cell.upgrade()) { + cell + } else { + let cell = Arc::new(OnceCell::new()); + cells.insert(digest, Arc::downgrade(&cell)); + cell + } } - } - - /// - /// Guards an attempt to upload the given `Digest`, skipping the upload if another attempt has - /// been successful. Will not return until either an attempt has succeed, or this attempt has - /// failed. - /// - async fn maybe_upload( - &self, - digest: Digest, - upload: impl Future>, - ) -> Result<(), E> { - Self::cell_from(&self.in_flight_uploads, digest) - .get_or_try_init(upload) - .await - .map(|&()| ()) - } - - /// - /// Guards an attempt to download the given `Digest`, skipping the download if another attempt - /// has been successful. Will not return until either an attempt has succeed, or this attempt has - /// failed. - /// - async fn maybe_download( - &self, - digest: Digest, - upload: impl Future>, - ) -> Result<(), E> { - Self::cell_from(&self.in_flight_downloads, digest) - .get_or_try_init(upload) - .await - .map(|&()| ()) - } - - async fn remote_writer( - remote_store: &remote::ByteStore, - digest: Digest, - file: tokio::fs::File, - ) -> Result { - remote_store.load_file(digest, file).await?.ok_or_else(|| { - StoreError::MissingDigest( - "Was not present in either the local or remote store".to_owned(), - digest, - ) - }) - } - - /// Download the digest to the local byte store from this remote store. The function `f_remote` - /// can be used to validate the bytes (NB. if provided, the whole value will be buffered into - /// memory to provide the `Bytes` argument, and thus `f_remote` should only be used for small digests). - async fn download_digest_to_local( - &self, - local_store: local::ByteStore, - digest: Digest, - entry_type: EntryType, - f_remote: Option<&(dyn Fn(Bytes) -> Result<(), String> + Send + Sync + 'static)>, - ) -> Result<(), StoreError> { - let remote_store = self.store.clone(); - self + + /// + /// Guards an attempt to upload the given `Digest`, skipping the upload if another attempt has + /// been successful. Will not return until either an attempt has succeed, or this attempt has + /// failed. + /// + async fn maybe_upload( + &self, + digest: Digest, + upload: impl Future>, + ) -> Result<(), E> { + Self::cell_from(&self.in_flight_uploads, digest) + .get_or_try_init(upload) + .await + .map(|&()| ()) + } + + /// + /// Guards an attempt to download the given `Digest`, skipping the download if another attempt + /// has been successful. Will not return until either an attempt has succeed, or this attempt has + /// failed. + /// + async fn maybe_download( + &self, + digest: Digest, + upload: impl Future>, + ) -> Result<(), E> { + Self::cell_from(&self.in_flight_downloads, digest) + .get_or_try_init(upload) + .await + .map(|&()| ()) + } + + async fn remote_writer( + remote_store: &remote::ByteStore, + digest: Digest, + file: tokio::fs::File, + ) -> Result { + remote_store.load_file(digest, file).await?.ok_or_else(|| { + StoreError::MissingDigest( + "Was not present in either the local or remote store".to_owned(), + digest, + ) + }) + } + + /// Download the digest to the local byte store from this remote store. The function `f_remote` + /// can be used to validate the bytes (NB. if provided, the whole value will be buffered into + /// memory to provide the `Bytes` argument, and thus `f_remote` should only be used for small digests). + async fn download_digest_to_local( + &self, + local_store: local::ByteStore, + digest: Digest, + entry_type: EntryType, + f_remote: Option<&(dyn Fn(Bytes) -> Result<(), String> + Send + Sync + 'static)>, + ) -> Result<(), StoreError> { + let remote_store = self.store.clone(); + self .maybe_download(digest, async move { let store_into_fsdb = ByteStore::should_use_fsdb(entry_type, digest.size_bytes); if store_into_fsdb { @@ -289,7 +289,7 @@ impl RemoteStore { Ok(()) }) .await - } + } } /// @@ -306,27 +306,27 @@ impl RemoteStore { /// #[derive(Debug, Clone)] pub struct Store { - local: local::ByteStore, - remote: Option, - immutable_inputs_base: Option, + local: local::ByteStore, + remote: Option, + immutable_inputs_base: Option, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ShrinkBehavior { - /// - /// Free up space in the store for future writes (marking pages as dirty), but don't proactively - /// free up the disk space that was used. This is fast and safe, but won't free up disk space. - /// - Fast, - - /// - /// As with Fast, but also free up disk space from no-longer-used data. This may use extra disk - /// space temporarily while compaction is happening. - /// - /// Note that any processes which have the Store open may need to re-open the Store after this - /// operation, as the underlying files may have been re-written. - /// - Compact, + /// + /// Free up space in the store for future writes (marking pages as dirty), but don't proactively + /// free up the disk space that was used. This is fast and safe, but won't free up disk space. + /// + Fast, + + /// + /// As with Fast, but also free up disk space from no-longer-used data. This may use extra disk + /// space temporarily while compaction is happening. + /// + /// Note that any processes which have the Store open may need to re-open the Store after this + /// operation, as the underlying files may have been re-written. + /// + Compact, } // Note that Store doesn't implement ByteStore because it operates at a higher level of abstraction, @@ -334,408 +334,410 @@ pub enum ShrinkBehavior { // This has the nice property that Directories can be trusted to be valid and canonical. // We may want to re-visit this if we end up wanting to handle local/remote/merged interchangeably. impl Store { - /// - /// Make a store which only uses its local storage. - /// - pub fn local_only>( - executor: task_executor::Executor, - path: P, - ) -> Result { - Ok(Store { - local: local::ByteStore::new(executor, path)?, - remote: None, - immutable_inputs_base: None, - }) - } - - pub fn local_only_with_options>( - executor: task_executor::Executor, - path: P, - immutable_inputs_base: &Path, - options: LocalOptions, - ) -> Result { - Ok(Store { - local: local::ByteStore::new_with_options(executor, path, options)?, - remote: None, - immutable_inputs_base: Some(immutable_inputs_base.to_path_buf()), - }) - } - - /// - /// Converts this (copy of) a Store to local only by dropping the remote half. - /// - /// Because both underlying stores are reference counted, this is cheap, and has no effect on - /// other clones of the Store. - /// - pub fn into_local_only(self) -> Store { - Store { - local: self.local, - remote: None, - immutable_inputs_base: self.immutable_inputs_base, + /// + /// Make a store which only uses its local storage. + /// + pub fn local_only>( + executor: task_executor::Executor, + path: P, + ) -> Result { + Ok(Store { + local: local::ByteStore::new(executor, path)?, + remote: None, + immutable_inputs_base: None, + }) } - } - - /// - /// Add remote storage to a Store. If it is missing a value which it tries to load, it will - /// attempt to back-fill its local storage from the remote storage. - /// - pub async fn into_with_remote(self, remote_options: RemoteOptions) -> Result { - Ok(Store { - local: self.local, - remote: Some(RemoteStore::new( - remote::ByteStore::from_options(remote_options).await?, - )), - immutable_inputs_base: self.immutable_inputs_base, - }) - } - - // This default suffix is also hard-coded into the Python options code in global_options.py - pub fn default_path() -> PathBuf { - default_cache_path().join("lmdb_store") - } - - /// - /// Remove a file locally, returning true if it existed, or false otherwise. - /// - pub async fn remove_file(&self, digest: Digest) -> Result { - self.local.remove(EntryType::File, digest).await - } - - /// - /// A convenience method for storing small files. - /// - /// NB: This method should not be used for large blobs: prefer to stream them from their source - /// using `store_file`. - /// - pub async fn store_file_bytes( - &self, - bytes: Bytes, - initial_lease: bool, - ) -> Result { - let digest = Digest::of_bytes(&bytes); - self - .local - .store_bytes(EntryType::File, digest.hash, bytes, initial_lease) - .await?; - Ok(digest) - } - - /// - /// A convenience method for storing batches of small files. - /// - /// NB: This method should not be used for large blobs: prefer to stream them from their source - /// using `store_file`. - /// - pub async fn store_file_bytes_batch( - &self, - items: Vec<(Fingerprint, Bytes)>, - initial_lease: bool, - ) -> Result<(), String> { - self - .local - .store_bytes_batch(EntryType::File, items, initial_lease) - .await?; - Ok(()) - } - - /// - /// Store a file locally by streaming its contents. - /// - pub async fn store_file( - &self, - initial_lease: bool, - data_is_immutable: bool, - src: PathBuf, - ) -> Result { - self - .local - .store(EntryType::File, initial_lease, data_is_immutable, src) - .await - } - - /// Store a digest under a given file path, returning a Snapshot - pub async fn snapshot_of_one_file( - &self, - name: RelativePath, - digest: hashing::Digest, - is_executable: bool, - ) -> Result { - #[derive(Clone)] - struct Digester { - digest: hashing::Digest, + + pub fn local_only_with_options>( + executor: task_executor::Executor, + path: P, + immutable_inputs_base: &Path, + options: LocalOptions, + ) -> Result { + Ok(Store { + local: local::ByteStore::new_with_options(executor, path, options)?, + remote: None, + immutable_inputs_base: Some(immutable_inputs_base.to_path_buf()), + }) + } + + /// + /// Converts this (copy of) a Store to local only by dropping the remote half. + /// + /// Because both underlying stores are reference counted, this is cheap, and has no effect on + /// other clones of the Store. + /// + pub fn into_local_only(self) -> Store { + Store { + local: self.local, + remote: None, + immutable_inputs_base: self.immutable_inputs_base, + } + } + + /// + /// Add remote storage to a Store. If it is missing a value which it tries to load, it will + /// attempt to back-fill its local storage from the remote storage. + /// + pub async fn into_with_remote(self, remote_options: RemoteOptions) -> Result { + Ok(Store { + local: self.local, + remote: Some(RemoteStore::new( + remote::ByteStore::from_options(remote_options).await?, + )), + immutable_inputs_base: self.immutable_inputs_base, + }) + } + + // This default suffix is also hard-coded into the Python options code in global_options.py + pub fn default_path() -> PathBuf { + default_cache_path().join("lmdb_store") + } + + /// + /// Remove a file locally, returning true if it existed, or false otherwise. + /// + pub async fn remove_file(&self, digest: Digest) -> Result { + self.local.remove(EntryType::File, digest).await } - impl StoreFileByDigest for Digester { - fn store_by_digest( + /// + /// A convenience method for storing small files. + /// + /// NB: This method should not be used for large blobs: prefer to stream them from their source + /// using `store_file`. + /// + pub async fn store_file_bytes( &self, - _: fs::File, - ) -> future::BoxFuture<'static, Result> { - future::ok(self.digest).boxed() - } + bytes: Bytes, + initial_lease: bool, + ) -> Result { + let digest = Digest::of_bytes(&bytes); + self.local + .store_bytes(EntryType::File, digest.hash, bytes, initial_lease) + .await?; + Ok(digest) } - Snapshot::from_path_stats( - Digester { digest }, - vec![fs::PathStat::File { - path: name.clone().into(), - stat: fs::File { - path: name.into(), - is_executable, - }, - }], - ) - .await - } - - /// - /// Loads the bytes of the file with the passed fingerprint from the local store and back-fill - /// from remote when necessary and possible (i.e. when remote is configured), and returns the - /// result of applying f to that value. - /// - pub async fn load_file_bytes_with< - T: Send + 'static, - F: Fn(&[u8]) -> T + Clone + Send + Sync + 'static, - >( - &self, - digest: Digest, - f: F, - ) -> Result { - // No transformation or verification (beyond verifying their digest, which the - // `remote::ByteStore` will do) is needed for files. - self - .load_bytes_with(EntryType::File, digest, move |v: &[u8]| Ok(f(v)), None) - .await - } - - /// - /// Ensure that the recursive contents of the given DigestTrie are persisted in the local Store. - /// - pub async fn record_digest_trie( - &self, - tree: DigestTrie, - initial_lease: bool, - ) -> Result { - // Collect all Directory structs in the trie. - let mut directories = Vec::new(); - tree.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { - directory::Entry::Directory(d) => { - let directory = d.as_remexec_directory(); + /// + /// A convenience method for storing batches of small files. + /// + /// NB: This method should not be used for large blobs: prefer to stream them from their source + /// using `store_file`. + /// + pub async fn store_file_bytes_batch( + &self, + items: Vec<(Fingerprint, Bytes)>, + initial_lease: bool, + ) -> Result<(), String> { + self.local + .store_bytes_batch(EntryType::File, items, initial_lease) + .await?; + Ok(()) + } + + /// + /// Store a file locally by streaming its contents. + /// + pub async fn store_file( + &self, + initial_lease: bool, + data_is_immutable: bool, + src: PathBuf, + ) -> Result { + self.local + .store(EntryType::File, initial_lease, data_is_immutable, src) + .await + } + + /// Store a digest under a given file path, returning a Snapshot + pub async fn snapshot_of_one_file( + &self, + name: RelativePath, + digest: hashing::Digest, + is_executable: bool, + ) -> Result { + #[derive(Clone)] + struct Digester { + digest: hashing::Digest, + } + + impl StoreFileByDigest for Digester { + fn store_by_digest( + &self, + _: fs::File, + ) -> future::BoxFuture<'static, Result> { + future::ok(self.digest).boxed() + } + } + + Snapshot::from_path_stats( + Digester { digest }, + vec![fs::PathStat::File { + path: name.clone().into(), + stat: fs::File { + path: name.into(), + is_executable, + }, + }], + ) + .await + } + + /// + /// Loads the bytes of the file with the passed fingerprint from the local store and back-fill + /// from remote when necessary and possible (i.e. when remote is configured), and returns the + /// result of applying f to that value. + /// + pub async fn load_file_bytes_with< + T: Send + 'static, + F: Fn(&[u8]) -> T + Clone + Send + Sync + 'static, + >( + &self, + digest: Digest, + f: F, + ) -> Result { + // No transformation or verification (beyond verifying their digest, which the + // `remote::ByteStore` will do) is needed for files. + self.load_bytes_with(EntryType::File, digest, move |v: &[u8]| Ok(f(v)), None) + .await + } + + /// + /// Ensure that the recursive contents of the given DigestTrie are persisted in the local Store. + /// + pub async fn record_digest_trie( + &self, + tree: DigestTrie, + initial_lease: bool, + ) -> Result { + // Collect all Directory structs in the trie. + let mut directories = Vec::new(); + tree.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { + directory::Entry::Directory(d) => { + let directory = d.as_remexec_directory(); + if cfg!(debug_assertions) { + protos::verify_directory_canonical(d.digest(), &directory).unwrap(); + } + directories.push((d.digest().hash, directory.to_bytes())) + } + directory::Entry::File(_) => (), + directory::Entry::Symlink(_) => (), + }); + + // Then store them as a batch. + let local = self.local.clone(); + let root = &directories[0]; + let top_digest = Digest::new(root.0, root.1.len()); + local + .store_bytes_batch(EntryType::Directory, directories, initial_lease) + .await?; + + Ok(DirectoryDigest::new(top_digest, tree)) + } + + /// + /// Save the bytes of the Directory proto locally, without regard for any of the + /// contents of any FileNodes or DirectoryNodes therein (i.e. does not require that its + /// children are already stored). + /// + pub async fn record_directory( + &self, + directory: &remexec::Directory, + initial_lease: bool, + ) -> Result { + let local = self.local.clone(); + let bytes = directory.to_bytes(); + let digest = Digest::of_bytes(&bytes); + local + .store_bytes( + EntryType::Directory, + digest.hash, + directory.to_bytes(), + initial_lease, + ) + .await?; if cfg!(debug_assertions) { - protos::verify_directory_canonical(d.digest(), &directory).unwrap(); + protos::verify_directory_canonical(digest, directory)?; } - directories.push((d.digest().hash, directory.to_bytes())) - } - directory::Entry::File(_) => (), - directory::Entry::Symlink(_) => (), - }); - - // Then store them as a batch. - let local = self.local.clone(); - let root = &directories[0]; - let top_digest = Digest::new(root.0, root.1.len()); - local - .store_bytes_batch(EntryType::Directory, directories, initial_lease) - .await?; - - Ok(DirectoryDigest::new(top_digest, tree)) - } - - /// - /// Save the bytes of the Directory proto locally, without regard for any of the - /// contents of any FileNodes or DirectoryNodes therein (i.e. does not require that its - /// children are already stored). - /// - pub async fn record_directory( - &self, - directory: &remexec::Directory, - initial_lease: bool, - ) -> Result { - let local = self.local.clone(); - let bytes = directory.to_bytes(); - let digest = Digest::of_bytes(&bytes); - local - .store_bytes( - EntryType::Directory, - digest.hash, - directory.to_bytes(), - initial_lease, - ) - .await?; - if cfg!(debug_assertions) { - protos::verify_directory_canonical(digest, directory)?; + Ok(digest) } - Ok(digest) - } - - /// - /// Loads a DigestTree from the local store, back-filling from remote if necessary. - /// - /// TODO: Add a native implementation that skips creating PathStats and directly produces - /// a DigestTrie. - /// - pub async fn load_digest_trie(&self, digest: DirectoryDigest) -> Result { - if let Some(tree) = digest.tree { - // The DigestTrie is already loaded. - return Ok(tree); + + /// + /// Loads a DigestTree from the local store, back-filling from remote if necessary. + /// + /// TODO: Add a native implementation that skips creating PathStats and directly produces + /// a DigestTrie. + /// + pub async fn load_digest_trie( + &self, + digest: DirectoryDigest, + ) -> Result { + if let Some(tree) = digest.tree { + // The DigestTrie is already loaded. + return Ok(tree); + } + + // The DigestTrie needs to be loaded from the Store. + let path_stats_per_directory = self + .walk(digest.as_digest(), |_, path_so_far, _, directory| { + let mut path_stats = Vec::new(); + path_stats.extend(directory.directories.iter().map(move |dir_node| { + let path = path_so_far.join(&dir_node.name); + (PathStat::dir(path.clone(), Dir(path)), None) + })); + path_stats.extend(directory.files.iter().map(move |file_node| { + let path = path_so_far.join(&file_node.name); + ( + PathStat::file( + path.clone(), + File { + path: path.clone(), + is_executable: file_node.is_executable, + }, + ), + Some((path, file_node.digest.as_ref().unwrap().try_into().unwrap())), + ) + })); + path_stats.extend(directory.symlinks.iter().map(move |link_node| { + let path = path_so_far.join(&link_node.name); + ( + PathStat::link( + path.clone(), + Link { + path, + target: link_node.target.clone().into(), + }, + ), + None, + ) + })); + future::ok(path_stats).boxed() + }) + .await?; + + let (path_stats, maybe_digests): (Vec<_>, Vec<_>) = + Iterator::flatten(path_stats_per_directory.into_iter().map(Vec::into_iter)).unzip(); + let file_digests = maybe_digests.into_iter().flatten().collect(); + + let tree = DigestTrie::from_unique_paths( + path_stats.iter().map(|p| p.into()).collect(), + &file_digests, + )?; + let computed_digest = tree.compute_root_digest(); + if digest.as_digest() != computed_digest { + return Err(format!( + "Computed digest for Snapshot loaded from store mismatched: {:?} vs {:?}", + digest.as_digest(), + computed_digest + ) + .into()); + } + + Ok(tree) } - // The DigestTrie needs to be loaded from the Store. - let path_stats_per_directory = self - .walk(digest.as_digest(), |_, path_so_far, _, directory| { - let mut path_stats = Vec::new(); - path_stats.extend(directory.directories.iter().map(move |dir_node| { - let path = path_so_far.join(&dir_node.name); - (PathStat::dir(path.clone(), Dir(path)), None) - })); - path_stats.extend(directory.files.iter().map(move |file_node| { - let path = path_so_far.join(&file_node.name); - ( - PathStat::file( - path.clone(), - File { - path: path.clone(), - is_executable: file_node.is_executable, - }, - ), - Some((path, file_node.digest.as_ref().unwrap().try_into().unwrap())), - ) - })); - path_stats.extend(directory.symlinks.iter().map(move |link_node| { - let path = path_so_far.join(&link_node.name); - ( - PathStat::link( - path.clone(), - Link { - path, - target: link_node.target.clone().into(), - }, - ), - None, - ) - })); - future::ok(path_stats).boxed() - }) - .await?; - - let (path_stats, maybe_digests): (Vec<_>, Vec<_>) = - Iterator::flatten(path_stats_per_directory.into_iter().map(Vec::into_iter)).unzip(); - let file_digests = maybe_digests.into_iter().flatten().collect(); - - let tree = - DigestTrie::from_unique_paths(path_stats.iter().map(|p| p.into()).collect(), &file_digests)?; - let computed_digest = tree.compute_root_digest(); - if digest.as_digest() != computed_digest { - return Err( - format!( - "Computed digest for Snapshot loaded from store mismatched: {:?} vs {:?}", - digest.as_digest(), - computed_digest + /// + /// Loads the given directory Digest as a DirectoryDigest, eagerly fetching its tree from + /// storage. To convert non-eagerly, use `DirectoryDigest::from_persisted_digest`. + /// + /// In general, DirectoryDigests should be consumed lazily to avoid fetching from a remote + /// store unnecessarily, so this method is primarily useful for tests and benchmarks. + /// + pub async fn load_directory_digest( + &self, + digest: Digest, + ) -> Result { + Ok(DirectoryDigest::new( + digest, + self.load_digest_trie(DirectoryDigest::from_persisted_digest(digest)) + .await?, + )) + } + + /// + /// Loads a directory proto from the local store, back-filling from remote if necessary. + /// + /// Guarantees that if an Ok value is returned, it is valid, and canonical, and its fingerprint + /// exactly matches that which is requested. Will return an Err if it would return a non-canonical + /// Directory. + /// + pub async fn load_directory(&self, digest: Digest) -> Result { + self.load_bytes_with( + EntryType::Directory, + digest, + // Trust that locally stored values were canonical when they were written into the CAS + // and only verify in debug mode, as it's slightly expensive. + move |bytes: &[u8]| { + let directory = remexec::Directory::decode(bytes).map_err(|e| { + format!("LMDB corruption: Directory bytes for {digest:?} were not valid: {e:?}") + })?; + if cfg!(debug_assertions) { + protos::verify_directory_canonical(digest, &directory)?; + } + Ok(directory) + }, + // Eagerly verify that CAS-returned Directories are canonical, so that we don't write them + // into our local store. + Some(&move |bytes| { + let directory = remexec::Directory::decode(bytes).map_err(|e| { + format!( + "CAS returned Directory proto for {digest:?} which was not valid: {e:?}" + ) + })?; + protos::verify_directory_canonical(digest, &directory)?; + Ok(()) + }), ) - .into(), - ); + .await } - Ok(tree) - } - - /// - /// Loads the given directory Digest as a DirectoryDigest, eagerly fetching its tree from - /// storage. To convert non-eagerly, use `DirectoryDigest::from_persisted_digest`. - /// - /// In general, DirectoryDigests should be consumed lazily to avoid fetching from a remote - /// store unnecessarily, so this method is primarily useful for tests and benchmarks. - /// - pub async fn load_directory_digest(&self, digest: Digest) -> Result { - Ok(DirectoryDigest::new( - digest, - self - .load_digest_trie(DirectoryDigest::from_persisted_digest(digest)) - .await?, - )) - } - - /// - /// Loads a directory proto from the local store, back-filling from remote if necessary. - /// - /// Guarantees that if an Ok value is returned, it is valid, and canonical, and its fingerprint - /// exactly matches that which is requested. Will return an Err if it would return a non-canonical - /// Directory. - /// - pub async fn load_directory(&self, digest: Digest) -> Result { - self - .load_bytes_with( - EntryType::Directory, - digest, - // Trust that locally stored values were canonical when they were written into the CAS - // and only verify in debug mode, as it's slightly expensive. - move |bytes: &[u8]| { - let directory = remexec::Directory::decode(bytes).map_err(|e| { - format!("LMDB corruption: Directory bytes for {digest:?} were not valid: {e:?}") - })?; - if cfg!(debug_assertions) { - protos::verify_directory_canonical(digest, &directory)?; - } - Ok(directory) - }, - // Eagerly verify that CAS-returned Directories are canonical, so that we don't write them - // into our local store. - Some(&move |bytes| { - let directory = remexec::Directory::decode(bytes).map_err(|e| { - format!("CAS returned Directory proto for {digest:?} which was not valid: {e:?}") - })?; - protos::verify_directory_canonical(digest, &directory)?; - Ok(()) - }), - ) - .await - } - - /// - /// Ensures that the directory entries of the given DirectoryDigest is persisted to disk. - /// - /// TODO: By the end of #13112, usage of this method should be limited to the writing of cache - /// entries. - /// - pub async fn ensure_directory_digest_persisted( - &self, - digest: DirectoryDigest, - ) -> Result<(), StoreError> { - let tree = self.load_digest_trie(digest).await?; - let _ = self.record_digest_trie(tree, true).await?; - Ok(()) - } - - /// - /// Loads bytes from remote cas if required and possible (i.e. if remote is configured). Takes - /// two functions f_local and f_remote. These functions are any validation or transformations you - /// want to perform on the bytes received from the local and remote cas (if remote is configured). - /// - async fn load_bytes_with< - T: Send + 'static, - FLocal: Fn(&[u8]) -> Result + Clone + Send + Sync + 'static, - >( - &self, - entry_type: EntryType, - digest: Digest, - f_local: FLocal, - f_remote: Option<&(dyn Fn(Bytes) -> Result<(), String> + Send + Sync + 'static)>, - ) -> Result { - if let Some(bytes_res) = self - .local - .load_bytes_with(entry_type, digest, f_local.clone()) - .await? - { - return Ok(bytes_res?); + /// + /// Ensures that the directory entries of the given DirectoryDigest is persisted to disk. + /// + /// TODO: By the end of #13112, usage of this method should be limited to the writing of cache + /// entries. + /// + pub async fn ensure_directory_digest_persisted( + &self, + digest: DirectoryDigest, + ) -> Result<(), StoreError> { + let tree = self.load_digest_trie(digest).await?; + let _ = self.record_digest_trie(tree, true).await?; + Ok(()) } - let remote = self.remote.clone().ok_or_else(|| { - StoreError::MissingDigest("Was not present in the local store".to_owned(), digest) - })?; - remote - .download_digest_to_local(self.local.clone(), digest, entry_type, f_remote) - .await?; + /// + /// Loads bytes from remote cas if required and possible (i.e. if remote is configured). Takes + /// two functions f_local and f_remote. These functions are any validation or transformations you + /// want to perform on the bytes received from the local and remote cas (if remote is configured). + /// + async fn load_bytes_with< + T: Send + 'static, + FLocal: Fn(&[u8]) -> Result + Clone + Send + Sync + 'static, + >( + &self, + entry_type: EntryType, + digest: Digest, + f_local: FLocal, + f_remote: Option<&(dyn Fn(Bytes) -> Result<(), String> + Send + Sync + 'static)>, + ) -> Result { + if let Some(bytes_res) = self + .local + .load_bytes_with(entry_type, digest, f_local.clone()) + .await? + { + return Ok(bytes_res?); + } - Ok( + let remote = self.remote.clone().ok_or_else(|| { + StoreError::MissingDigest("Was not present in the local store".to_owned(), digest) + })?; + remote + .download_digest_to_local(self.local.clone(), digest, entry_type, f_remote) + .await?; + + Ok( self .local .load_bytes_with(entry_type, digest, f_local) @@ -744,900 +746,921 @@ impl Store { format!("After downloading {digest:?}, the local store claimed that it was not present.") })??, ) - } - - /// - /// Ensures that the remote ByteStore has a copy of each passed Fingerprint, including any files - /// contained in any Directories in the list. - /// - /// This method starts by expanding all Digests locally to determine their types. If it cannot - /// find a Digest locally, it will check whether it exists remotely, without downloading it. - /// - /// TODO: This method is only aware of File and Directory typed blobs: in particular, that means - /// it will not expand Trees to upload the files that they refer to. See #13006. - /// - pub fn ensure_remote_has_recursive( - &self, - digests: Vec, - ) -> BoxFuture<'static, Result> { - let start_time = Instant::now(); - - let remote_store = if let Some(ref remote) = self.remote { - remote.clone() - } else { - return futures::future::err( - "Cannot ensure remote has blobs without a remote" - .to_owned() - .into(), - ) - .boxed(); - }; - - let store = self.clone(); - let remote = remote_store.store.clone(); - async move { - let ingested_digests = store.expand_local_digests(digests.iter()).await?; - let digests_to_upload = - if Store::upload_is_faster_than_checking_whether_to_upload(ingested_digests.iter()) { - ingested_digests.keys().cloned().collect() + } + + /// + /// Ensures that the remote ByteStore has a copy of each passed Fingerprint, including any files + /// contained in any Directories in the list. + /// + /// This method starts by expanding all Digests locally to determine their types. If it cannot + /// find a Digest locally, it will check whether it exists remotely, without downloading it. + /// + /// TODO: This method is only aware of File and Directory typed blobs: in particular, that means + /// it will not expand Trees to upload the files that they refer to. See #13006. + /// + pub fn ensure_remote_has_recursive( + &self, + digests: Vec, + ) -> BoxFuture<'static, Result> { + let start_time = Instant::now(); + + let remote_store = if let Some(ref remote) = self.remote { + remote.clone() } else { - remote - .list_missing_digests(ingested_digests.keys().cloned()) - .await? + return futures::future::err( + "Cannot ensure remote has blobs without a remote" + .to_owned() + .into(), + ) + .boxed(); }; - future::try_join_all( - digests_to_upload - .iter() - .cloned() - .map(|digest| { - let local = store.local.clone(); - let remote_store = remote_store.clone(); - let maybe_entry_type: Option = ingested_digests[&digest]; - async move { - let entry_type = if let Some(et) = maybe_entry_type { - et - } else { - return Err(StoreError::MissingDigest( - "Did not exist either locally or remotely".to_owned(), - digest, - )); - }; - - remote_store - .clone() - .maybe_upload(digest, async move { - match local.load_from_fs(digest).await? { - Some(path) => { - Self::store_fsdb_blob_remote(remote_store.store, digest, path).await? - } - None => { - Self::store_lmdb_blob_remote(local, remote_store.store, entry_type, digest) + let store = self.clone(); + let remote = remote_store.store.clone(); + async move { + let ingested_digests = store.expand_local_digests(digests.iter()).await?; + let digests_to_upload = + if Store::upload_is_faster_than_checking_whether_to_upload(ingested_digests.iter()) + { + ingested_digests.keys().cloned().collect() + } else { + remote + .list_missing_digests(ingested_digests.keys().cloned()) .await? - } - }; - Ok(()) - }) - .await - } - }) - .collect::>(), - ) - .await?; - - let ingested_file_sizes = ingested_digests.keys().map(|digest| digest.size_bytes); - let uploaded_file_sizes = digests_to_upload.iter().map(|digest| digest.size_bytes); - - Ok(UploadSummary { - ingested_file_count: ingested_file_sizes.len(), - ingested_file_bytes: ingested_file_sizes.sum(), - uploaded_file_count: uploaded_file_sizes.len(), - uploaded_file_bytes: uploaded_file_sizes.sum(), - upload_wall_time: start_time.elapsed(), - }) + }; + + future::try_join_all( + digests_to_upload + .iter() + .cloned() + .map(|digest| { + let local = store.local.clone(); + let remote_store = remote_store.clone(); + let maybe_entry_type: Option = ingested_digests[&digest]; + async move { + let entry_type = if let Some(et) = maybe_entry_type { + et + } else { + return Err(StoreError::MissingDigest( + "Did not exist either locally or remotely".to_owned(), + digest, + )); + }; + + remote_store + .clone() + .maybe_upload(digest, async move { + match local.load_from_fs(digest).await? { + Some(path) => { + Self::store_fsdb_blob_remote( + remote_store.store, + digest, + path, + ) + .await? + } + None => { + Self::store_lmdb_blob_remote( + local, + remote_store.store, + entry_type, + digest, + ) + .await? + } + }; + Ok(()) + }) + .await + } + }) + .collect::>(), + ) + .await?; + + let ingested_file_sizes = ingested_digests.keys().map(|digest| digest.size_bytes); + let uploaded_file_sizes = digests_to_upload.iter().map(|digest| digest.size_bytes); + + Ok(UploadSummary { + ingested_file_count: ingested_file_sizes.len(), + ingested_file_bytes: ingested_file_sizes.sum(), + uploaded_file_count: uploaded_file_sizes.len(), + uploaded_file_bytes: uploaded_file_sizes.sum(), + upload_wall_time: start_time.elapsed(), + }) + } + .boxed() } - .boxed() - } - async fn store_lmdb_blob_remote( - local: local::ByteStore, - remote: remote::ByteStore, - entry_type: EntryType, - digest: Digest, - ) -> Result<(), StoreError> { - // We need to copy the bytes into memory so that they may be used safely in an async - // future. While this unfortunately increases memory consumption, we prioritize - // being able to run `remote.store_bytes()` as async. In addition, this is only used - // for blobs in the LMDB store, most of which are small: large blobs end up in the - // FSDB store. - // - // See https://github.com/pantsbuild/pants/pull/9793 for an earlier implementation - // that used `Executor.block_on`, which avoided the clone but was blocking. - let maybe_bytes = local - .load_bytes_with(entry_type, digest, move |bytes| { - Bytes::copy_from_slice(bytes) - }) - .await?; - match maybe_bytes { - Some(bytes) => Ok(remote.store_bytes(bytes).await?), - None => Err(StoreError::MissingDigest( - format!("Failed to upload {entry_type:?}: Not found in local store",), - digest, - )), + async fn store_lmdb_blob_remote( + local: local::ByteStore, + remote: remote::ByteStore, + entry_type: EntryType, + digest: Digest, + ) -> Result<(), StoreError> { + // We need to copy the bytes into memory so that they may be used safely in an async + // future. While this unfortunately increases memory consumption, we prioritize + // being able to run `remote.store_bytes()` as async. In addition, this is only used + // for blobs in the LMDB store, most of which are small: large blobs end up in the + // FSDB store. + // + // See https://github.com/pantsbuild/pants/pull/9793 for an earlier implementation + // that used `Executor.block_on`, which avoided the clone but was blocking. + let maybe_bytes = local + .load_bytes_with(entry_type, digest, move |bytes| { + Bytes::copy_from_slice(bytes) + }) + .await?; + match maybe_bytes { + Some(bytes) => Ok(remote.store_bytes(bytes).await?), + None => Err(StoreError::MissingDigest( + format!("Failed to upload {entry_type:?}: Not found in local store",), + digest, + )), + } } - } - - async fn store_fsdb_blob_remote( - remote: remote::ByteStore, - digest: Digest, - path: PathBuf, - ) -> Result<(), StoreError> { - let file = tokio::fs::File::open(&path) - .await - .map_err(|e| format!("failed to read {digest:?} from {path:?}: {e}"))?; - remote.store_file(digest, file).await?; - Ok(()) - } - - /// - /// Return true if the given directory and file digests are loadable from either the local or remote - /// Store, without downloading any file content. - /// - /// The given directory digests will be recursively expanded, so it is not necessary to - /// explicitly list their file digests in the file digests list. - /// - pub async fn exists_recursive( - &self, - directory_digests: impl IntoIterator, - file_digests: impl IntoIterator, - ) -> Result { - // Load directories, which implicitly validates that they exist. - let digest_tries = future::try_join_all( - directory_digests - .into_iter() - .map(|dd| self.load_digest_trie(dd)), - ) - .await?; - - // Collect all file digests. - let mut file_digests = file_digests.into_iter().collect::>(); - for digest_trie in digest_tries { - digest_trie.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { - directory::Entry::File(f) => { - file_digests.insert(f.digest()); + + async fn store_fsdb_blob_remote( + remote: remote::ByteStore, + digest: Digest, + path: PathBuf, + ) -> Result<(), StoreError> { + let file = tokio::fs::File::open(&path) + .await + .map_err(|e| format!("failed to read {digest:?} from {path:?}: {e}"))?; + remote.store_file(digest, file).await?; + Ok(()) + } + + /// + /// Return true if the given directory and file digests are loadable from either the local or remote + /// Store, without downloading any file content. + /// + /// The given directory digests will be recursively expanded, so it is not necessary to + /// explicitly list their file digests in the file digests list. + /// + pub async fn exists_recursive( + &self, + directory_digests: impl IntoIterator, + file_digests: impl IntoIterator, + ) -> Result { + // Load directories, which implicitly validates that they exist. + let digest_tries = future::try_join_all( + directory_digests + .into_iter() + .map(|dd| self.load_digest_trie(dd)), + ) + .await?; + + // Collect all file digests. + let mut file_digests = file_digests.into_iter().collect::>(); + for digest_trie in digest_tries { + digest_trie.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { + directory::Entry::File(f) => { + file_digests.insert(f.digest()); + } + directory::Entry::Symlink(_) | directory::Entry::Directory(_) => (), + }); } - directory::Entry::Symlink(_) | directory::Entry::Directory(_) => (), - }); + + // Filter out file digests that exist locally. + let missing_locally = self + .local + .get_missing_digests(EntryType::File, file_digests) + .await?; + + // If there are any digests which don't exist locally, check remotely. + if missing_locally.is_empty() { + return Ok(true); + } + let remote = if let Some(remote) = self.remote.clone() { + remote + } else { + return Ok(false); + }; + let missing = remote.store.list_missing_digests(missing_locally).await?; + + Ok(missing.is_empty()) } - // Filter out file digests that exist locally. - let missing_locally = self - .local - .get_missing_digests(EntryType::File, file_digests) - .await?; + /// Ensure that the files are locally loadable. This will download them from the remote store as + /// a side effect, if one is configured. + pub async fn ensure_downloaded( + &self, + mut file_digests: HashSet, + directory_digests: HashSet, + ) -> Result<(), StoreError> { + let file_digests_from_directories = + future::try_join_all(directory_digests.into_iter().map(|dir_digest| async move { + let mut file_digests_for_dir = Vec::new(); + let trie = self.load_digest_trie(dir_digest).await?; + trie.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { + directory::Entry::File(f) => file_digests_for_dir.push(f.digest()), + directory::Entry::Symlink(_) | directory::Entry::Directory(_) => (), + }); + // Also ensure that the directory trie is persisted to disk, not only its file entries. + self.record_digest_trie(trie, true).await?; + Ok::<_, StoreError>(file_digests_for_dir) + })) + .await?; + file_digests.extend(file_digests_from_directories.into_iter().flatten()); + + let missing_file_digests = self + .local + .get_missing_digests(EntryType::File, file_digests) + .await?; + if missing_file_digests.is_empty() { + return Ok(()); + } - // If there are any digests which don't exist locally, check remotely. - if missing_locally.is_empty() { - return Ok(true); + let remote = &self.remote.clone().ok_or_else(|| { + StoreError::MissingDigest( + "Was not present in the local store".to_owned(), + *missing_file_digests.iter().next().unwrap(), + ) + })?; + let _ = future::try_join_all(missing_file_digests.into_iter().map( + |file_digest| async move { + if let Err(e) = remote + .download_digest_to_local( + self.local.clone(), + file_digest, + EntryType::File, + None, + ) + .await + { + log::debug!("Missing file digest from remote store: {:?}", file_digest); + in_workunit!( + "missing_file_counter", + Level::Trace, + |workunit| async move { + workunit.increment_counter(Metric::RemoteStoreMissingDigest, 1); + }, + ) + .await; + return Err(e); + } + Ok(()) + }, + )) + .await?; + Ok(()) } - let remote = if let Some(remote) = self.remote.clone() { - remote - } else { - return Ok(false); - }; - let missing = remote.store.list_missing_digests(missing_locally).await?; - - Ok(missing.is_empty()) - } - - /// Ensure that the files are locally loadable. This will download them from the remote store as - /// a side effect, if one is configured. - pub async fn ensure_downloaded( - &self, - mut file_digests: HashSet, - directory_digests: HashSet, - ) -> Result<(), StoreError> { - let file_digests_from_directories = - future::try_join_all(directory_digests.into_iter().map(|dir_digest| async move { - let mut file_digests_for_dir = Vec::new(); - let trie = self.load_digest_trie(dir_digest).await?; - trie.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { - directory::Entry::File(f) => file_digests_for_dir.push(f.digest()), - directory::Entry::Symlink(_) | directory::Entry::Directory(_) => (), - }); - // Also ensure that the directory trie is persisted to disk, not only its file entries. - self.record_digest_trie(trie, true).await?; - Ok::<_, StoreError>(file_digests_for_dir) - })) - .await?; - file_digests.extend(file_digests_from_directories.into_iter().flatten()); - - let missing_file_digests = self - .local - .get_missing_digests(EntryType::File, file_digests) - .await?; - if missing_file_digests.is_empty() { - return Ok(()); + + /// Load a REv2 Tree from a remote CAS _without_ persisting the embedded Directory protos in + /// the local store. Tree is used by the REv2 protocol as an optimization for encoding the + /// the Directory protos that comprise the output directories from a remote execution + /// reported by an ActionResult. + /// + /// Returns an `Option` representing the `root` of the Tree (if it in fact + /// exists in the remote CAS). + /// + /// This method requires that this Store be configured with a remote CAS, and will return an + /// error if this is not the case. + pub async fn load_tree_from_remote( + &self, + tree_digest: Digest, + ) -> Result, String> { + let remote = if let Some(ref remote) = self.remote { + remote + } else { + return Err("Cannot load Trees from a remote without a remote".to_owned()); + }; + + match remote.store.load_bytes(tree_digest).await? { + Some(b) => { + let tree = Tree::decode(b).map_err(|e| format!("protobuf decode error: {e:?}"))?; + let trie = DigestTrie::try_from(tree)?; + Ok(Some(trie.into())) + } + None => Ok(None), + } } - let remote = &self.remote.clone().ok_or_else(|| { - StoreError::MissingDigest( - "Was not present in the local store".to_owned(), - *missing_file_digests.iter().next().unwrap(), - ) - })?; - let _ = future::try_join_all( - missing_file_digests - .into_iter() - .map(|file_digest| async move { - if let Err(e) = remote - .download_digest_to_local(self.local.clone(), file_digest, EntryType::File, None) - .await - { - log::debug!("Missing file digest from remote store: {:?}", file_digest); - in_workunit!( - "missing_file_counter", - Level::Trace, - |workunit| async move { - workunit.increment_counter(Metric::RemoteStoreMissingDigest, 1); - }, + pub async fn lease_all_recursively<'a, Ds: Iterator>( + &self, + digests: Ds, + ) -> Result<(), StoreError> { + let reachable_digests_and_types = self.expand_local_digests(digests).await?; + // Lease all Digests which existed (ignoring any that didn't). + self.local + .lease_all( + reachable_digests_and_types + .into_iter() + .flat_map(|(digest, maybe_type)| maybe_type.map(|t| (digest, t))), ) - .await; - return Err(e); - } - Ok(()) - }), - ) - .await?; - Ok(()) - } - - /// Load a REv2 Tree from a remote CAS _without_ persisting the embedded Directory protos in - /// the local store. Tree is used by the REv2 protocol as an optimization for encoding the - /// the Directory protos that comprise the output directories from a remote execution - /// reported by an ActionResult. - /// - /// Returns an `Option` representing the `root` of the Tree (if it in fact - /// exists in the remote CAS). - /// - /// This method requires that this Store be configured with a remote CAS, and will return an - /// error if this is not the case. - pub async fn load_tree_from_remote( - &self, - tree_digest: Digest, - ) -> Result, String> { - let remote = if let Some(ref remote) = self.remote { - remote - } else { - return Err("Cannot load Trees from a remote without a remote".to_owned()); - }; - - match remote.store.load_bytes(tree_digest).await? { - Some(b) => { - let tree = Tree::decode(b).map_err(|e| format!("protobuf decode error: {e:?}"))?; - let trie = DigestTrie::try_from(tree)?; - Ok(Some(trie.into())) - } - None => Ok(None), + .await?; + Ok(()) } - } - - pub async fn lease_all_recursively<'a, Ds: Iterator>( - &self, - digests: Ds, - ) -> Result<(), StoreError> { - let reachable_digests_and_types = self.expand_local_digests(digests).await?; - // Lease all Digests which existed (ignoring any that didn't). - self - .local - .lease_all( - reachable_digests_and_types - .into_iter() - .flat_map(|(digest, maybe_type)| maybe_type.map(|t| (digest, t))), - ) - .await?; - Ok(()) - } - - pub async fn garbage_collect( - &self, - target_size_bytes: usize, - shrink_behavior: ShrinkBehavior, - ) -> Result<(), String> { - match self.local.shrink(target_size_bytes, shrink_behavior).await { - Ok(size) => { - if size > target_size_bytes { - log::warn!( + + pub async fn garbage_collect( + &self, + target_size_bytes: usize, + shrink_behavior: ShrinkBehavior, + ) -> Result<(), String> { + match self.local.shrink(target_size_bytes, shrink_behavior).await { + Ok(size) => { + if size > target_size_bytes { + log::warn!( "Garbage collection attempted to shrink the store to {} bytes but {} bytes \ are currently in use.", target_size_bytes, size ) + } + Ok(()) + } + Err(err) => Err(format!("Garbage collection failed: {err:?}")), } - Ok(()) - } - Err(err) => Err(format!("Garbage collection failed: {err:?}")), } - } - - /// - /// To check if it might be faster to upload the digests recursively - /// vs checking if the files are present first. - /// - /// The values are guesses, feel free to tweak them. - /// - fn upload_is_faster_than_checking_whether_to_upload<'a>( - digests: impl Iterator)>, - ) -> bool { - let mut num_digests = 0; - let mut num_bytes = 0; - for (digest, maybe_type) in digests { - if maybe_type.is_none() { - // We cannot upload this entry, because we don't have it locally. - return false; - } - num_digests += 1; - num_bytes += digest.size_bytes; - if num_digests >= 3 || num_bytes >= (1024 * 1024) { - return false; - } + + /// + /// To check if it might be faster to upload the digests recursively + /// vs checking if the files are present first. + /// + /// The values are guesses, feel free to tweak them. + /// + fn upload_is_faster_than_checking_whether_to_upload<'a>( + digests: impl Iterator)>, + ) -> bool { + let mut num_digests = 0; + let mut num_bytes = 0; + for (digest, maybe_type) in digests { + if maybe_type.is_none() { + // We cannot upload this entry, because we don't have it locally. + return false; + } + num_digests += 1; + num_bytes += digest.size_bytes; + if num_digests >= 3 || num_bytes >= (1024 * 1024) { + return false; + } + } + // There were fewer than 3 digests, and they were less than the threshold. + true } - // There were fewer than 3 digests, and they were less than the threshold. - true - } - - /// - /// Return all Digests locally reachable from the given root Digests (which may represent either - /// Files or Directories). - /// - /// This method will return `None` for either a root or inner Digest if it does not exist. - /// - async fn expand_local_digests<'a, Ds: Iterator>( - &self, - digests: Ds, - ) -> Result>, StoreError> { - // Expand each digest into either a single digest (Left), or a collection of recursive digests - // below a directory (Right). - let expanded_digests: Vec> = future::try_join_all( - digests - .map(|digest| { - let store = self.clone(); - async move { - let entry_type = store - .local - .entry_type(digest.hash) - .await - .map_err(|e| format!("Failed to expand digest {digest:?}: {e}"))?; - match entry_type { - Some(EntryType::File) => Ok(Either::Left((*digest, Some(EntryType::File)))), - Some(EntryType::Directory) => match store.expand_directory(*digest).await { - Ok(entries) => Ok(Either::Right(entries)), - Err(StoreError::MissingDigest(_, digest)) => Ok(Either::Left((digest, None))), - Err(e) => Err(e), - }, - None => Ok(Either::Left((*digest, None))), + + /// + /// Return all Digests locally reachable from the given root Digests (which may represent either + /// Files or Directories). + /// + /// This method will return `None` for either a root or inner Digest if it does not exist. + /// + async fn expand_local_digests<'a, Ds: Iterator>( + &self, + digests: Ds, + ) -> Result>, StoreError> { + // Expand each digest into either a single digest (Left), or a collection of recursive digests + // below a directory (Right). + let expanded_digests: Vec> = future::try_join_all( + digests + .map(|digest| { + let store = self.clone(); + async move { + let entry_type = store + .local + .entry_type(digest.hash) + .await + .map_err(|e| format!("Failed to expand digest {digest:?}: {e}"))?; + match entry_type { + Some(EntryType::File) => { + Ok(Either::Left((*digest, Some(EntryType::File)))) + } + Some(EntryType::Directory) => { + match store.expand_directory(*digest).await { + Ok(entries) => Ok(Either::Right(entries)), + Err(StoreError::MissingDigest(_, digest)) => { + Ok(Either::Left((digest, None))) + } + Err(e) => Err(e), + } + } + None => Ok(Either::Left((*digest, None))), + } + } + }) + .collect::>(), + ) + .await?; + + let mut result = HashMap::with_capacity(expanded_digests.len()); + for e in expanded_digests { + match e { + Either::Left((digest, maybe_type)) => { + result.insert(digest, maybe_type); + } + Either::Right(reachable_digests) => { + result.extend( + reachable_digests + .into_iter() + .map(|(digest, t)| (digest, Some(t))), + ); + } } - } + } + Ok(result) + } + + fn expand_directory( + &self, + digest: Digest, + ) -> BoxFuture<'static, Result, StoreError>> { + self.walk(digest, |_, _, digest, directory| { + let mut digest_types = vec![(digest, EntryType::Directory)]; + for file in &directory.files { + let file_digest = try_future!(require_digest(file.digest.as_ref())); + digest_types.push((file_digest, EntryType::File)); + } + future::ok(digest_types).boxed() }) - .collect::>(), - ) - .await?; + .map(|digest_pairs_per_directory| { + digest_pairs_per_directory.map(|xs| { + xs.into_iter() + .flat_map(|x| x.into_iter()) + .collect::>() + }) + }) + .boxed() + } - let mut result = HashMap::with_capacity(expanded_digests.len()); - for e in expanded_digests { - match e { - Either::Left((digest, maybe_type)) => { - result.insert(digest, maybe_type); + /// + /// Lays out the directory and all of its contents (files and directories) on disk so that a + /// process which uses the directory structure can run. + /// + /// Although `Directory` has internally unique paths, `materialize_directory` can be used with + /// an existing destination directory, meaning that directory and file creation must be + /// idempotent. + /// + /// If the destination (more specifically, the given parent directory of the destination, for + /// memoization purposes) is hardlinkable from the local store, and `!force_mutable`, hardlinks + /// may be used for large files which are not listed in `mutable_paths`. + /// + pub async fn materialize_directory( + &self, + destination: PathBuf, + destination_root: &Path, + digest: DirectoryDigest, + force_mutable: bool, + mutable_paths: &BTreeSet, + perms: Permissions, + ) -> Result<(), StoreError> { + debug_assert!( + destination.starts_with(destination_root), + "The destination root must be a parent directory of the destination." + ); + + // Load the DigestTrie for the digest, and convert it into a mapping between a fully qualified + // parent path and its children. + let mut parent_to_child = HashMap::new(); + let tree = self.load_digest_trie(digest).await?; + tree.walk(SymlinkBehavior::Aware, &mut |path, entry| { + if let Some(parent) = path.parent() { + parent_to_child + .entry(destination.join(parent)) + .or_insert_with(Vec::new) + .push(entry.clone()); + } + }); + + let mut mutable_path_ancestors = BTreeSet::new(); + for relpath in mutable_paths { + mutable_path_ancestors.extend(relpath.ancestors().map(|p| destination.join(p))); } - Either::Right(reachable_digests) => { - result.extend( - reachable_digests - .into_iter() - .map(|(digest, t)| (digest, Some(t))), - ); + + // Create the root, and determine what filesystem it and the store are on. + let destination_is_hardlinkable = { + let (_, destination_is_hardlinkable) = tokio::try_join!( + tokio::fs::create_dir_all(&destination).map_err(|e| format!( + "Failed to create directory {}: {e}", + destination.display() + )), + self.local.is_hardlinkable_destination(destination_root) + )?; + destination_is_hardlinkable + }; + + self.materialize_directory_children( + destination, + true, + force_mutable, + destination_is_hardlinkable, + &parent_to_child, + &mutable_path_ancestors, + perms, + ) + .await + } + + fn materialize_directory_children<'a>( + &self, + destination: PathBuf, + is_root: bool, + force_mutable: bool, + can_hardlink: bool, + parent_to_child: &'a HashMap>, + mutable_paths: &'a BTreeSet, + perms: Permissions, + ) -> BoxFuture<'a, Result<(), StoreError>> { + let store = self.clone(); + async move { + if !is_root { + // NB: Although we know that all parent directories already exist, we use `create_dir_all` + // because it succeeds even if _this_ directory already exists (which it might, if we're + // materializing atop an existing directory structure). + tokio::fs::create_dir_all(&destination).await.map_err(|e| { + format!("Failed to create directory {}: {e}", destination.display()) + })?; + } + + if let Some(children) = parent_to_child.get(&destination) { + let mut child_futures = Vec::new(); + for child in children { + let path = destination.join(child.name().as_ref()); + let store = store.clone(); + child_futures.push(async move { + let can_be_immutable = + !force_mutable && can_hardlink && !mutable_paths.contains(&path); + + match child { + directory::Entry::File(f) => { + store + .materialize_file_maybe_hardlink( + path, + f.digest(), + perms, + f.is_executable(), + can_be_immutable, + ) + .await + } + directory::Entry::Symlink(s) => { + store + .materialize_symlink( + path, + s.target().to_str().unwrap().to_string(), + ) + .await + } + directory::Entry::Directory(_) => { + store + .materialize_directory_children( + path.clone(), + false, + mutable_paths.contains(&path) || force_mutable, + can_hardlink, + parent_to_child, + mutable_paths, + perms, + ) + .await + } + } + }); + } + let _ = future::try_join_all(child_futures).await?; + } + + if perms == Permissions::ReadOnly { + tokio::fs::set_permissions(&destination, std::fs::Permissions::from_mode(0o555)) + .await + .map_err(|e| { + format!( + "Failed to set permissions for {}: {}", + destination.display(), + e + ) + })?; + } + Ok(()) } - } + .boxed() } - Ok(result) - } - - fn expand_directory( - &self, - digest: Digest, - ) -> BoxFuture<'static, Result, StoreError>> { - self - .walk(digest, |_, _, digest, directory| { - let mut digest_types = vec![(digest, EntryType::Directory)]; - for file in &directory.files { - let file_digest = try_future!(require_digest(file.digest.as_ref())); - digest_types.push((file_digest, EntryType::File)); + + async fn materialize_file_maybe_hardlink( + &self, + destination: PathBuf, + digest: Digest, + perms: Permissions, + is_executable: bool, + can_be_immutable: bool, + ) -> Result<(), StoreError> { + let hardlink_tgt = if can_be_immutable { + self.local.load_from_fs(digest).await? + } else { + None + }; + match hardlink_tgt { + Some(path) => { + self.materialize_hardlink(destination, path.to_str().unwrap().to_string()) + .await + } + None => { + self.materialize_file(destination, digest, perms, is_executable) + .await + } } - future::ok(digest_types).boxed() - }) - .map(|digest_pairs_per_directory| { - digest_pairs_per_directory.map(|xs| { - xs.into_iter() - .flat_map(|x| x.into_iter()) - .collect::>() - }) - }) - .boxed() - } - - /// - /// Lays out the directory and all of its contents (files and directories) on disk so that a - /// process which uses the directory structure can run. - /// - /// Although `Directory` has internally unique paths, `materialize_directory` can be used with - /// an existing destination directory, meaning that directory and file creation must be - /// idempotent. - /// - /// If the destination (more specifically, the given parent directory of the destination, for - /// memoization purposes) is hardlinkable from the local store, and `!force_mutable`, hardlinks - /// may be used for large files which are not listed in `mutable_paths`. - /// - pub async fn materialize_directory( - &self, - destination: PathBuf, - destination_root: &Path, - digest: DirectoryDigest, - force_mutable: bool, - mutable_paths: &BTreeSet, - perms: Permissions, - ) -> Result<(), StoreError> { - debug_assert!( - destination.starts_with(destination_root), - "The destination root must be a parent directory of the destination." - ); - - // Load the DigestTrie for the digest, and convert it into a mapping between a fully qualified - // parent path and its children. - let mut parent_to_child = HashMap::new(); - let tree = self.load_digest_trie(digest).await?; - tree.walk(SymlinkBehavior::Aware, &mut |path, entry| { - if let Some(parent) = path.parent() { - parent_to_child - .entry(destination.join(parent)) - .or_insert_with(Vec::new) - .push(entry.clone()); - } - }); - - let mut mutable_path_ancestors = BTreeSet::new(); - for relpath in mutable_paths { - mutable_path_ancestors.extend(relpath.ancestors().map(|p| destination.join(p))); } - // Create the root, and determine what filesystem it and the store are on. - let destination_is_hardlinkable = { - let (_, destination_is_hardlinkable) = tokio::try_join!( - tokio::fs::create_dir_all(&destination) - .map_err(|e| format!("Failed to create directory {}: {e}", destination.display())), - self.local.is_hardlinkable_destination(destination_root) - )?; - destination_is_hardlinkable - }; - - self - .materialize_directory_children( - destination, - true, - force_mutable, - destination_is_hardlinkable, - &parent_to_child, - &mutable_path_ancestors, - perms, - ) - .await - } - - fn materialize_directory_children<'a>( - &self, - destination: PathBuf, - is_root: bool, - force_mutable: bool, - can_hardlink: bool, - parent_to_child: &'a HashMap>, - mutable_paths: &'a BTreeSet, - perms: Permissions, - ) -> BoxFuture<'a, Result<(), StoreError>> { - let store = self.clone(); - async move { - if !is_root { - // NB: Although we know that all parent directories already exist, we use `create_dir_all` - // because it succeeds even if _this_ directory already exists (which it might, if we're - // materializing atop an existing directory structure). - tokio::fs::create_dir_all(&destination) - .await - .map_err(|e| format!("Failed to create directory {}: {e}", destination.display()))?; - } - - if let Some(children) = parent_to_child.get(&destination) { - let mut child_futures = Vec::new(); - for child in children { - let path = destination.join(child.name().as_ref()); - let store = store.clone(); - child_futures.push(async move { - let can_be_immutable = !force_mutable && can_hardlink && !mutable_paths.contains(&path); - - match child { - directory::Entry::File(f) => { - store - .materialize_file_maybe_hardlink( - path, - f.digest(), - perms, - f.is_executable(), - can_be_immutable, - ) - .await - } - directory::Entry::Symlink(s) => { - store - .materialize_symlink(path, s.target().to_str().unwrap().to_string()) - .await - } - directory::Entry::Directory(_) => { - store - .materialize_directory_children( - path.clone(), - false, - mutable_paths.contains(&path) || force_mutable, - can_hardlink, - parent_to_child, - mutable_paths, - perms, - ) - .await - } + async fn materialize_file( + &self, + destination: PathBuf, + digest: Digest, + perms: Permissions, + is_executable: bool, + ) -> Result<(), StoreError> { + let mode = match perms { + Permissions::ReadOnly if is_executable => 0o555, + Permissions::ReadOnly => 0o444, + Permissions::Writable if is_executable => 0o755, + Permissions::Writable => 0o644, + }; + match self.local.load_from_fs(digest).await? { + Some(path) => { + tokio::fs::copy(&path, &destination).await.map_err(|e| { + format!( + "Error copying bytes from {} to {}: {e}", + path.display(), + destination.display() + ) + })?; + tokio::fs::set_permissions(destination, FSPermissions::from_mode(mode)) + .await + .map_err(|e| format!("Error setting permissions on {}: {e}", path.display()))?; + Ok(()) + } + None => { + self.load_file_bytes_with(digest, move |bytes| { + let mut f = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .mode(mode) + .open(&destination) + .map_err(|e| { + format!( + "Error opening file {} for writing: {:?}", + destination.display(), + e + ) + })?; + f.write_all(bytes).map_err(|e| { + format!("Error writing file {}: {:?}", destination.display(), e) + })?; + Ok(()) + }) + .await? } - }); } - let _ = future::try_join_all(child_futures).await?; - } + } - if perms == Permissions::ReadOnly { - tokio::fs::set_permissions(&destination, std::fs::Permissions::from_mode(0o555)) - .await - .map_err(|e| { - format!( - "Failed to set permissions for {}: {}", - destination.display(), - e + pub async fn materialize_symlink( + &self, + destination: PathBuf, + target: String, + ) -> Result<(), StoreError> { + // Overwriting a symlink, even with another symlink, fails if it exists. This can occur when + // materializing to a fixed directory like dist/. To avoid pessimising the more common case (no + // overwrite, e.g. materializing to a temp dir), only remove after noticing a failure. + // + // NB. #17758, #18849: this is a work-around for inaccurate management of the contents of dist/. + for first in [true, false] { + match symlink(&target, &destination).await { + Ok(()) => break, + Err(e) if first && e.kind() == std::io::ErrorKind::AlreadyExists => { + tokio::fs::remove_dir_all(&destination).await.map_err(|e| { + format!( + "Failed to remove existing item at {} when creating symlink to {target} there: {e}", + destination.display() ) - })?; - } - Ok(()) - } - .boxed() - } - - async fn materialize_file_maybe_hardlink( - &self, - destination: PathBuf, - digest: Digest, - perms: Permissions, - is_executable: bool, - can_be_immutable: bool, - ) -> Result<(), StoreError> { - let hardlink_tgt = if can_be_immutable { - self.local.load_from_fs(digest).await? - } else { - None - }; - match hardlink_tgt { - Some(path) => { - self - .materialize_hardlink(destination, path.to_str().unwrap().to_string()) - .await - } - None => { - self - .materialize_file(destination, digest, perms, is_executable) - .await - } - } - } - - async fn materialize_file( - &self, - destination: PathBuf, - digest: Digest, - perms: Permissions, - is_executable: bool, - ) -> Result<(), StoreError> { - let mode = match perms { - Permissions::ReadOnly if is_executable => 0o555, - Permissions::ReadOnly => 0o444, - Permissions::Writable if is_executable => 0o755, - Permissions::Writable => 0o644, - }; - match self.local.load_from_fs(digest).await? { - Some(path) => { - tokio::fs::copy(&path, &destination).await.map_err(|e| { - format!( - "Error copying bytes from {} to {}: {e}", - path.display(), - destination.display() - ) - })?; - tokio::fs::set_permissions(destination, FSPermissions::from_mode(mode)) - .await - .map_err(|e| format!("Error setting permissions on {}: {e}", path.display()))?; + })? + } + Err(e) => Err(format!( + "Failed to create symlink to {target} at {}: {e}", + destination.display() + ))?, + } + } Ok(()) - } - None => { - self - .load_file_bytes_with(digest, move |bytes| { - let mut f = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .mode(mode) - .open(&destination) - .map_err(|e| { - format!( - "Error opening file {} for writing: {:?}", - destination.display(), - e - ) - })?; - f.write_all(bytes) - .map_err(|e| format!("Error writing file {}: {:?}", destination.display(), e))?; - Ok(()) - }) - .await? - } } - } - - pub async fn materialize_symlink( - &self, - destination: PathBuf, - target: String, - ) -> Result<(), StoreError> { - // Overwriting a symlink, even with another symlink, fails if it exists. This can occur when - // materializing to a fixed directory like dist/. To avoid pessimising the more common case (no - // overwrite, e.g. materializing to a temp dir), only remove after noticing a failure. - // - // NB. #17758, #18849: this is a work-around for inaccurate management of the contents of dist/. - for first in [true, false] { - match symlink(&target, &destination).await { - Ok(()) => break, - Err(e) if first && e.kind() == std::io::ErrorKind::AlreadyExists => { - tokio::fs::remove_dir_all(&destination).await.map_err(|e| { + + pub async fn materialize_hardlink( + &self, + destination: PathBuf, + target: String, + ) -> Result<(), StoreError> { + // On macOS, copy uses a copy-on-write syscall (fclonefileat) which creates a disconnected + // clone. It is more defensive than a hardlink, but has the same requirement that the source + // and destination filesystem are the same. + // + // It also has the benefit of playing nicely with Docker for macOS file virtualization: see + // #18162. + #[cfg(target_os = "macos")] + copy(&target, &destination).await.map_err(|e| { format!( - "Failed to remove existing item at {} when creating symlink to {target} there: {e}", - destination.display() + "Failed to copy from {target} to {}: {e}", + destination.display() ) - })? - } - Err(e) => Err(format!( - "Failed to create symlink to {target} at {}: {e}", - destination.display() - ))?, - } + })?; + #[cfg(not(target_os = "macos"))] + hard_link(&target, &destination).await.map_err(|e| { + format!( + "Failed to create hardlink to {target} at {}: {e}", + destination.display() + ) + })?; + Ok(()) } - Ok(()) - } - - pub async fn materialize_hardlink( - &self, - destination: PathBuf, - target: String, - ) -> Result<(), StoreError> { - // On macOS, copy uses a copy-on-write syscall (fclonefileat) which creates a disconnected - // clone. It is more defensive than a hardlink, but has the same requirement that the source - // and destination filesystem are the same. - // - // It also has the benefit of playing nicely with Docker for macOS file virtualization: see - // #18162. - #[cfg(target_os = "macos")] - copy(&target, &destination).await.map_err(|e| { - format!( - "Failed to copy from {target} to {}: {e}", - destination.display() - ) - })?; - #[cfg(not(target_os = "macos"))] - hard_link(&target, &destination).await.map_err(|e| { - format!( - "Failed to create hardlink to {target} at {}: {e}", - destination.display() - ) - })?; - Ok(()) - } - - /// - /// Returns files sorted by their path. - /// - pub async fn contents_for_directory( - &self, - digest: DirectoryDigest, - ) -> Result, StoreError> { - let mut files = Vec::new(); - self - .load_digest_trie(digest) - .await? - .walk(SymlinkBehavior::Oblivious, &mut |path, entry| match entry { - directory::Entry::File(f) => files.push((path.to_owned(), f.digest(), f.is_executable())), - directory::Entry::Symlink(_) => (), - directory::Entry::Directory(_) => (), - }); - - future::try_join_all(files.into_iter().map(|(path, digest, is_executable)| { - let store = self.clone(); - async move { - let content = store - .load_file_bytes_with(digest, Bytes::copy_from_slice) - .await - .map_err(|e| e.enrich(&format!("Couldn't find file contents for {path:?}")))?; - Ok(FileContent { - path, - content, - is_executable, - }) - } - })) - .await - } - - /// - /// Returns indirect references to files in a Digest sorted by their path. - /// - pub async fn entries_for_directory( - &self, - digest: DirectoryDigest, - ) -> Result, StoreError> { - if digest == *EMPTY_DIRECTORY_DIGEST { - return Ok(vec![]); + + /// + /// Returns files sorted by their path. + /// + pub async fn contents_for_directory( + &self, + digest: DirectoryDigest, + ) -> Result, StoreError> { + let mut files = Vec::new(); + self.load_digest_trie(digest).await?.walk( + SymlinkBehavior::Oblivious, + &mut |path, entry| match entry { + directory::Entry::File(f) => { + files.push((path.to_owned(), f.digest(), f.is_executable())) + } + directory::Entry::Symlink(_) => (), + directory::Entry::Directory(_) => (), + }, + ); + + future::try_join_all(files.into_iter().map(|(path, digest, is_executable)| { + let store = self.clone(); + async move { + let content = store + .load_file_bytes_with(digest, Bytes::copy_from_slice) + .await + .map_err(|e| e.enrich(&format!("Couldn't find file contents for {path:?}")))?; + Ok(FileContent { + path, + content, + is_executable, + }) + } + })) + .await } - let mut entries = Vec::new(); - self.load_digest_trie(digest).await?.walk( - SymlinkBehavior::Aware, - &mut |path, entry| match entry { - directory::Entry::File(f) => { - entries.push(DigestEntry::File(FileEntry { - path: path.to_owned(), - digest: f.digest(), - is_executable: f.is_executable(), - })); - } - directory::Entry::Symlink(s) => { - entries.push(DigestEntry::Symlink(SymlinkEntry { - path: path.to_owned(), - target: s.target().to_path_buf(), - })); - } - directory::Entry::Directory(d) => { - // Only report a directory if it is a leaf node. (The caller is expected to create parent - // directories for both files and empty leaf directories.) - if d.tree().entries().is_empty() { - entries.push(DigestEntry::EmptyDirectory(path.to_owned())); - } + /// + /// Returns indirect references to files in a Digest sorted by their path. + /// + pub async fn entries_for_directory( + &self, + digest: DirectoryDigest, + ) -> Result, StoreError> { + if digest == *EMPTY_DIRECTORY_DIGEST { + return Ok(vec![]); } - }, - ); - - Ok(entries) - } - - /// - /// Given the Digest for a Directory, recursively walk the Directory, calling the given function - /// with the path so far, and the new Directory. - /// - /// The recursive walk will proceed concurrently, so if order matters, a caller should sort the - /// output after the call. - /// - pub fn walk< - T: Send + 'static, - F: Fn( - &Store, - &PathBuf, - Digest, - &remexec::Directory, - ) -> future::BoxFuture<'static, Result> - + Send - + Sync - + 'static, - >( - &self, - digest: Digest, - f: F, - ) -> BoxFuture<'static, Result, StoreError>> { - let f = Arc::new(f); - let accumulator = Arc::new(Mutex::new(Vec::new())); - self - .walk_helper(digest, PathBuf::new(), f, accumulator.clone()) - .map(|r| { - r.map(|_| { - Arc::try_unwrap(accumulator) - .unwrap_or_else(|_| panic!("walk_helper violated its contract.")) - .into_inner() - }) - }) - .boxed() - } - - fn walk_helper< - T: Send + 'static, - F: Fn( - &Store, - &PathBuf, - Digest, - &remexec::Directory, - ) -> future::BoxFuture<'static, Result> - + Send - + Sync - + 'static, - >( - &self, - digest: Digest, - path_so_far: PathBuf, - f: Arc, - accumulator: Arc>>, - ) -> BoxFuture<'static, Result<(), StoreError>> { - let store = self.clone(); - let res = async move { - let directory = store.load_directory(digest).await.map_err(|e| { - e.enrich(&format!( - "Could not walk unknown directory at {path_so_far:?}" - )) - })?; - let result_for_directory = f(&store, &path_so_far, digest, &directory).await?; - { - let mut accumulator = accumulator.lock(); - accumulator.push(result_for_directory); - } - future::try_join_all( - directory - .directories - .iter() - .map(move |dir_node| { - let subdir_digest = try_future!(require_digest(dir_node.digest.as_ref())); - let path = path_so_far.join(dir_node.name.clone()); - store.walk_helper(subdir_digest, path, f.clone(), accumulator.clone()) - }) - .collect::>(), - ) - .await?; - Ok(()) - }; - res.boxed() - } - - pub async fn all_local_digests(&self, entry_type: EntryType) -> Result, String> { - self.local.all_digests(entry_type).await - } + + let mut entries = Vec::new(); + self.load_digest_trie(digest) + .await? + .walk(SymlinkBehavior::Aware, &mut |path, entry| match entry { + directory::Entry::File(f) => { + entries.push(DigestEntry::File(FileEntry { + path: path.to_owned(), + digest: f.digest(), + is_executable: f.is_executable(), + })); + } + directory::Entry::Symlink(s) => { + entries.push(DigestEntry::Symlink(SymlinkEntry { + path: path.to_owned(), + target: s.target().to_path_buf(), + })); + } + directory::Entry::Directory(d) => { + // Only report a directory if it is a leaf node. (The caller is expected to create parent + // directories for both files and empty leaf directories.) + if d.tree().entries().is_empty() { + entries.push(DigestEntry::EmptyDirectory(path.to_owned())); + } + } + }); + + Ok(entries) + } + + /// + /// Given the Digest for a Directory, recursively walk the Directory, calling the given function + /// with the path so far, and the new Directory. + /// + /// The recursive walk will proceed concurrently, so if order matters, a caller should sort the + /// output after the call. + /// + pub fn walk< + T: Send + 'static, + F: Fn( + &Store, + &PathBuf, + Digest, + &remexec::Directory, + ) -> future::BoxFuture<'static, Result> + + Send + + Sync + + 'static, + >( + &self, + digest: Digest, + f: F, + ) -> BoxFuture<'static, Result, StoreError>> { + let f = Arc::new(f); + let accumulator = Arc::new(Mutex::new(Vec::new())); + self.walk_helper(digest, PathBuf::new(), f, accumulator.clone()) + .map(|r| { + r.map(|_| { + Arc::try_unwrap(accumulator) + .unwrap_or_else(|_| panic!("walk_helper violated its contract.")) + .into_inner() + }) + }) + .boxed() + } + + fn walk_helper< + T: Send + 'static, + F: Fn( + &Store, + &PathBuf, + Digest, + &remexec::Directory, + ) -> future::BoxFuture<'static, Result> + + Send + + Sync + + 'static, + >( + &self, + digest: Digest, + path_so_far: PathBuf, + f: Arc, + accumulator: Arc>>, + ) -> BoxFuture<'static, Result<(), StoreError>> { + let store = self.clone(); + let res = async move { + let directory = store.load_directory(digest).await.map_err(|e| { + e.enrich(&format!( + "Could not walk unknown directory at {path_so_far:?}" + )) + })?; + let result_for_directory = f(&store, &path_so_far, digest, &directory).await?; + { + let mut accumulator = accumulator.lock(); + accumulator.push(result_for_directory); + } + future::try_join_all( + directory + .directories + .iter() + .map(move |dir_node| { + let subdir_digest = try_future!(require_digest(dir_node.digest.as_ref())); + let path = path_so_far.join(dir_node.name.clone()); + store.walk_helper(subdir_digest, path, f.clone(), accumulator.clone()) + }) + .collect::>(), + ) + .await?; + Ok(()) + }; + res.boxed() + } + + pub async fn all_local_digests(&self, entry_type: EntryType) -> Result, String> { + self.local.all_digests(entry_type).await + } } #[async_trait] impl SnapshotOps for Store { - type Error = StoreError; - - async fn load_file_bytes_with< - T: Send + 'static, - F: Fn(&[u8]) -> T + Clone + Send + Sync + 'static, - >( - &self, - digest: Digest, - f: F, - ) -> Result { - Store::load_file_bytes_with(self, digest, f).await - } - - async fn load_digest_trie(&self, digest: DirectoryDigest) -> Result { - Store::load_digest_trie(self, digest).await - } + type Error = StoreError; + + async fn load_file_bytes_with< + T: Send + 'static, + F: Fn(&[u8]) -> T + Clone + Send + Sync + 'static, + >( + &self, + digest: Digest, + f: F, + ) -> Result { + Store::load_file_bytes_with(self, digest, f).await + } + + async fn load_digest_trie(&self, digest: DirectoryDigest) -> Result { + Store::load_digest_trie(self, digest).await + } } // Only public for testing. #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ord, PartialOrd)] pub enum EntryType { - Directory, - File, + Directory, + File, } #[cfg(test)] diff --git a/src/rust/engine/fs/store/src/local.rs b/src/rust/engine/fs/store/src/local.rs index 8bec5494576..f8ebcb27afe 100644 --- a/src/rust/engine/fs/store/src/local.rs +++ b/src/rust/engine/fs/store/src/local.rs @@ -13,7 +13,7 @@ use async_trait::async_trait; use bytes::Bytes; use futures::future::{self, join_all, try_join, try_join_all}; use hashing::{ - async_copy_and_hash, async_verified_copy, AgedFingerprint, Digest, Fingerprint, EMPTY_DIGEST, + async_copy_and_hash, async_verified_copy, AgedFingerprint, Digest, Fingerprint, EMPTY_DIGEST, }; use parking_lot::Mutex; use sharded_lmdb::ShardedLmdb; @@ -33,114 +33,111 @@ const LARGE_FILE_SIZE_LIMIT: usize = 512 * 1024; /// Trait for the underlying storage, which is either a ShardedLMDB or a ShardedFS. #[async_trait] trait UnderlyingByteStore { - async fn exists_batch( - &self, - fingerprints: Vec, - ) -> Result, String>; - - async fn exists(&self, fingerprint: Fingerprint) -> Result { - let exists = self.exists_batch(vec![fingerprint]).await?; - Ok(exists.contains(&fingerprint)) - } - - async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String>; - - async fn remove(&self, fingerprint: Fingerprint) -> Result; - - async fn store_bytes_batch( - &self, - items: Vec<(Fingerprint, Bytes)>, - initial_lease: bool, - ) -> Result<(), String>; - - async fn store( - &self, - initial_lease: bool, - src_is_immutable: bool, - expected_digest: Digest, - src: PathBuf, - ) -> Result<(), String>; - - async fn load_bytes_with< - T: Send + 'static, - F: FnMut(&[u8]) -> Result + Send + Sync + 'static, - >( - &self, - fingerprint: Fingerprint, - mut f: F, - ) -> Result, String>; - - async fn aged_fingerprints(&self) -> Result, String>; - - async fn all_digests(&self) -> Result, String> { - let fingerprints = self.aged_fingerprints().await?; - Ok( - fingerprints - .into_iter() - .map(|fingerprint| Digest { - hash: fingerprint.fingerprint, - size_bytes: fingerprint.size_bytes, - }) - .collect(), - ) - } + async fn exists_batch( + &self, + fingerprints: Vec, + ) -> Result, String>; + + async fn exists(&self, fingerprint: Fingerprint) -> Result { + let exists = self.exists_batch(vec![fingerprint]).await?; + Ok(exists.contains(&fingerprint)) + } + + async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String>; + + async fn remove(&self, fingerprint: Fingerprint) -> Result; + + async fn store_bytes_batch( + &self, + items: Vec<(Fingerprint, Bytes)>, + initial_lease: bool, + ) -> Result<(), String>; + + async fn store( + &self, + initial_lease: bool, + src_is_immutable: bool, + expected_digest: Digest, + src: PathBuf, + ) -> Result<(), String>; + + async fn load_bytes_with< + T: Send + 'static, + F: FnMut(&[u8]) -> Result + Send + Sync + 'static, + >( + &self, + fingerprint: Fingerprint, + mut f: F, + ) -> Result, String>; + + async fn aged_fingerprints(&self) -> Result, String>; + + async fn all_digests(&self) -> Result, String> { + let fingerprints = self.aged_fingerprints().await?; + Ok(fingerprints + .into_iter() + .map(|fingerprint| Digest { + hash: fingerprint.fingerprint, + size_bytes: fingerprint.size_bytes, + }) + .collect()) + } } #[async_trait] impl UnderlyingByteStore for ShardedLmdb { - async fn exists_batch( - &self, - fingerprints: Vec, - ) -> Result, String> { - self.exists_batch(fingerprints).await - } - - async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String> { - self.lease(fingerprint).await - } - - async fn remove(&self, fingerprint: Fingerprint) -> Result { - self.remove(fingerprint).await - } - - async fn store_bytes_batch( - &self, - items: Vec<(Fingerprint, Bytes)>, - initial_lease: bool, - ) -> Result<(), String> { - self.store_bytes_batch(items, initial_lease).await - } - async fn store( - &self, - initial_lease: bool, - src_is_immutable: bool, - expected_digest: Digest, - src: PathBuf, - ) -> Result<(), String> { - self - .store( - initial_lease, - src_is_immutable, - expected_digest, - move || std::fs::File::open(&src), - ) - .await - } - - async fn load_bytes_with< - T: Send + 'static, - F: FnMut(&[u8]) -> Result + Send + Sync + 'static, - >( - &self, - fingerprint: Fingerprint, - f: F, - ) -> Result, String> { - self.load_bytes_with(fingerprint, f).await - } - - async fn aged_fingerprints(&self) -> Result, String> { - self.all_fingerprints().await - } + async fn exists_batch( + &self, + fingerprints: Vec, + ) -> Result, String> { + self.exists_batch(fingerprints).await + } + + async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String> { + self.lease(fingerprint).await + } + + async fn remove(&self, fingerprint: Fingerprint) -> Result { + self.remove(fingerprint).await + } + + async fn store_bytes_batch( + &self, + items: Vec<(Fingerprint, Bytes)>, + initial_lease: bool, + ) -> Result<(), String> { + self.store_bytes_batch(items, initial_lease).await + } + async fn store( + &self, + initial_lease: bool, + src_is_immutable: bool, + expected_digest: Digest, + src: PathBuf, + ) -> Result<(), String> { + self.store( + initial_lease, + src_is_immutable, + expected_digest, + move || std::fs::File::open(&src), + ) + .await + } + + async fn load_bytes_with< + T: Send + 'static, + F: FnMut(&[u8]) -> Result + Send + Sync + 'static, + >( + &self, + fingerprint: Fingerprint, + f: F, + ) -> Result, String> { + self.load_bytes_with(fingerprint, f).await + } + + async fn aged_fingerprints(&self) -> Result, String> { + self.all_fingerprints().await + } } // We shard so there isn't a plethora of entries in one single dir. @@ -148,764 +145,758 @@ impl UnderlyingByteStore for ShardedLmdb { // TODO: Add Arc'd inner struct to reduce clone costs. #[derive(Debug, Clone)] pub(crate) struct ShardedFSDB { - root: PathBuf, - executor: Executor, - lease_time: Duration, - dest_initializer: Arc>>>>, - // A cache of whether destination root directories are hardlinkable from the fsdb. - hardlinkable_destinations: Arc>>>>, + root: PathBuf, + executor: Executor, + lease_time: Duration, + dest_initializer: Arc>>>>, + // A cache of whether destination root directories are hardlinkable from the fsdb. + hardlinkable_destinations: Arc>>>>, } enum VerifiedCopyError { - CopyFailure(String), - DoesntMatch, + CopyFailure(String), + DoesntMatch, } impl From for VerifiedCopyError { - fn from(err: String) -> Self { - Self::CopyFailure(err) - } + fn from(err: String) -> Self { + Self::CopyFailure(err) + } } impl ShardedFSDB { - pub(crate) fn get_path(&self, fingerprint: Fingerprint) -> PathBuf { - let hex = fingerprint.to_hex(); - self.root.join(hex.get(0..2).unwrap()).join(hex) - } - - async fn is_hardlinkable_destination(&self, destination: &Path) -> Result { - let cell = { - let mut cells = self.hardlinkable_destinations.lock(); - if let Some(cell) = cells.get(destination) { - cell.clone() - } else { - let cell = Arc::new(OnceCell::new()); - cells.insert(destination.to_owned(), cell.clone()); - cell - } - }; - - if let Some(res) = cell.get() { - return Ok(*res); - } - - let fsdb = self.clone(); - let dst_parent_dir = destination.to_owned(); - cell - .get_or_try_init(async move { - let src_display = fsdb.root.display().to_string(); - let dst_display = dst_parent_dir.display().to_string(); - tokio::fs::create_dir_all(&dst_parent_dir) - .await - .map_err(|e| format!("Failed to create directory: {e}"))?; - let (src_file, dst_dir) = fsdb - .executor - .spawn_blocking( - move || { - let src_file = Builder::new() - .suffix(".hardlink_canary") - .tempfile_in(&fsdb.root) - .map_err(|e| format!("Failed to create hardlink canary file: {e}"))?; - let dst_dir = Builder::new() - .suffix(".hardlink_canary") - .tempdir_in(dst_parent_dir) - .map_err(|e| format!("Failed to create hardlink canary dir: {e}"))?; - Ok((src_file, dst_dir)) - }, - |e| Err(format!("hardlink canary temp files task failed: {e}")), - ) - .await?; - let dst_file = dst_dir.path().join("hard_link"); - let is_hardlinkable = hard_link(src_file, dst_file).await.is_ok(); - log::debug!("{src_display} -> {dst_display} hardlinkable: {is_hardlinkable}"); - Ok(is_hardlinkable) - }) - .await - .copied() - } - - async fn bytes_writer( - mut file: tokio::fs::File, - bytes: &Bytes, - ) -> Result { - file - .write_all(bytes) - .await - .map_err(|e| format!("Failed to write bytes: {e}"))?; - Ok(file) - } - - async fn verified_copier( - mut file: tokio::fs::File, - expected_digest: Digest, - src_is_immutable: bool, - mut reader: R, - ) -> Result - where - R: AsyncRead + Unpin, - { - let matches = async_verified_copy(expected_digest, src_is_immutable, &mut reader, &mut file) - .await - .map_err(|e| VerifiedCopyError::CopyFailure(format!("Failed to copy bytes: {e}")))?; - if matches { - Ok(file) - } else { - Err(VerifiedCopyError::DoesntMatch) - } - } - - pub(crate) async fn write_using( - &self, - fingerprint: Fingerprint, - writer_func: F, - ) -> Result<(), E> - where - F: FnOnce(tokio::fs::File) -> Fut, - Fut: Future>, - // NB: The error type must be convertible from a string - E: std::convert::From, - { - let cell = self - .dest_initializer - .lock() - .entry(fingerprint) - .or_default() - .clone(); - cell - .get_or_try_init(async { - let dest_path = self.get_path(fingerprint); - tokio::fs::create_dir_all(dest_path.parent().unwrap()) - .await - .map_err(|e| format! {"Failed to create local store subdirectory {dest_path:?}: {e}"})?; - - let dest_path2 = dest_path.clone(); - // Make the tempfile in the same dir as the final file so that materializing the final file doesn't - // have to worry about parent dirs. - let named_temp_file = self - .executor - .spawn_blocking( - move || { - Builder::new() - .suffix(".tmp") - .tempfile_in(dest_path2.parent().unwrap()) - .map_err(|e| format!("Failed to create temp file: {e}")) - }, - |e| Err(format!("temp file creation task failed: {e}")), - ) - .await?; - let (std_file, tmp_path) = named_temp_file - .keep() - .map_err(|e| format!("Failed to keep temp file: {e}"))?; - - match writer_func(std_file.into()).await { - Ok(mut tokio_file) => { - tokio_file - .shutdown() - .await - .map_err(|e| format!("Failed to shutdown {tmp_path:?}: {e}"))?; - tokio::fs::set_permissions(&tmp_path, std::fs::Permissions::from_mode(0o555)) - .await - .map_err(|e| format!("Failed to set permissions on {:?}: {e}", tmp_path))?; - // NB: Syncing metadata to disk ensures the `hard_link` we do later has the opportunity - // to succeed. Otherwise, if later when we try to `hard_link` the metadata isn't - // persisted to disk, we'll get `No such file or directory`. - // See https://github.com/pantsbuild/pants/pull/18768 - tokio_file - .sync_all() - .await - .map_err(|e| format!("Failed to sync {tmp_path:?}: {e}"))?; - tokio::fs::rename(tmp_path.clone(), dest_path.clone()) - .await - .map_err(|e| format!("Error while renaming: {e}."))?; - Ok(()) - } - Err(e) => { - let _ = tokio::fs::remove_file(tmp_path).await; - Err(e) - } + pub(crate) fn get_path(&self, fingerprint: Fingerprint) -> PathBuf { + let hex = fingerprint.to_hex(); + self.root.join(hex.get(0..2).unwrap()).join(hex) + } + + async fn is_hardlinkable_destination(&self, destination: &Path) -> Result { + let cell = { + let mut cells = self.hardlinkable_destinations.lock(); + if let Some(cell) = cells.get(destination) { + cell.clone() + } else { + let cell = Arc::new(OnceCell::new()); + cells.insert(destination.to_owned(), cell.clone()); + cell + } + }; + + if let Some(res) = cell.get() { + return Ok(*res); } - }) - .await - .cloned() - } -} -#[async_trait] -impl UnderlyingByteStore for ShardedFSDB { - async fn exists_batch( - &self, - fingerprints: Vec, - ) -> Result, String> { - let results = join_all( - fingerprints - .iter() - .map(|fingerprint| tokio::fs::metadata(self.get_path(*fingerprint))), - ) - .await; - let existing = results - .iter() - .zip(fingerprints) - .filter_map(|(result, fingerprint)| { - if result.is_ok() { - Some(fingerprint) + let fsdb = self.clone(); + let dst_parent_dir = destination.to_owned(); + cell.get_or_try_init(async move { + let src_display = fsdb.root.display().to_string(); + let dst_display = dst_parent_dir.display().to_string(); + tokio::fs::create_dir_all(&dst_parent_dir) + .await + .map_err(|e| format!("Failed to create directory: {e}"))?; + let (src_file, dst_dir) = fsdb + .executor + .spawn_blocking( + move || { + let src_file = Builder::new() + .suffix(".hardlink_canary") + .tempfile_in(&fsdb.root) + .map_err(|e| format!("Failed to create hardlink canary file: {e}"))?; + let dst_dir = Builder::new() + .suffix(".hardlink_canary") + .tempdir_in(dst_parent_dir) + .map_err(|e| format!("Failed to create hardlink canary dir: {e}"))?; + Ok((src_file, dst_dir)) + }, + |e| Err(format!("hardlink canary temp files task failed: {e}")), + ) + .await?; + let dst_file = dst_dir.path().join("hard_link"); + let is_hardlinkable = hard_link(src_file, dst_file).await.is_ok(); + log::debug!("{src_display} -> {dst_display} hardlinkable: {is_hardlinkable}"); + Ok(is_hardlinkable) + }) + .await + .copied() + } + + async fn bytes_writer( + mut file: tokio::fs::File, + bytes: &Bytes, + ) -> Result { + file.write_all(bytes) + .await + .map_err(|e| format!("Failed to write bytes: {e}"))?; + Ok(file) + } + + async fn verified_copier( + mut file: tokio::fs::File, + expected_digest: Digest, + src_is_immutable: bool, + mut reader: R, + ) -> Result + where + R: AsyncRead + Unpin, + { + let matches = + async_verified_copy(expected_digest, src_is_immutable, &mut reader, &mut file) + .await + .map_err(|e| { + VerifiedCopyError::CopyFailure(format!("Failed to copy bytes: {e}")) + })?; + if matches { + Ok(file) } else { - None + Err(VerifiedCopyError::DoesntMatch) } - }) - .collect::>(); - - Ok(HashSet::from_iter(existing)) - } - - async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String> { - let path = self.get_path(fingerprint); - self - .executor - .spawn_blocking( - move || { - fs_set_times::set_mtime(&path, fs_set_times::SystemTimeSpec::SymbolicNow) - .map_err(|e| format!("Failed to extend mtime of {path:?}: {e}")) - }, - |e| Err(format!("`lease` task failed: {e}")), - ) - .await - } - - async fn remove(&self, fingerprint: Fingerprint) -> Result { - let _ = self.dest_initializer.lock().remove(&fingerprint); - Ok( - tokio::fs::remove_file(self.get_path(fingerprint)) - .await - .is_ok(), - ) - } - - async fn store_bytes_batch( - &self, - items: Vec<(Fingerprint, Bytes)>, - _initial_lease: bool, - ) -> Result<(), String> { - try_join_all(items.iter().map(|(fingerprint, bytes)| async move { - self - .write_using(*fingerprint, |file| Self::bytes_writer(file, bytes)) - .await?; - Ok::<(), String>(()) - })) - .await?; - - Ok(()) - } - - async fn store( - &self, - _initial_lease: bool, - src_is_immutable: bool, - expected_digest: Digest, - src: PathBuf, - ) -> Result<(), String> { - let mut attempts = 0; - loop { - let reader = tokio::fs::File::open(src.clone()) - .await - .map_err(|e| format!("Failed to open {src:?}: {e}"))?; - - // TODO: Consider using `fclonefileat` on macOS or checking for same filesystem+rename on Linux, - // which would skip actual copying (read+write), and instead just require verifying the - // resulting content after the syscall (read only). - let copy_result = self - .write_using(expected_digest.hash, |file| { - Self::verified_copier(file, expected_digest, src_is_immutable, reader) + } + + pub(crate) async fn write_using( + &self, + fingerprint: Fingerprint, + writer_func: F, + ) -> Result<(), E> + where + F: FnOnce(tokio::fs::File) -> Fut, + Fut: Future>, + // NB: The error type must be convertible from a string + E: std::convert::From, + { + let cell = self + .dest_initializer + .lock() + .entry(fingerprint) + .or_default() + .clone(); + cell.get_or_try_init(async { + let dest_path = self.get_path(fingerprint); + tokio::fs::create_dir_all(dest_path.parent().unwrap()) + .await + .map_err( + |e| format! {"Failed to create local store subdirectory {dest_path:?}: {e}"}, + )?; + + let dest_path2 = dest_path.clone(); + // Make the tempfile in the same dir as the final file so that materializing the final file doesn't + // have to worry about parent dirs. + let named_temp_file = self + .executor + .spawn_blocking( + move || { + Builder::new() + .suffix(".tmp") + .tempfile_in(dest_path2.parent().unwrap()) + .map_err(|e| format!("Failed to create temp file: {e}")) + }, + |e| Err(format!("temp file creation task failed: {e}")), + ) + .await?; + let (std_file, tmp_path) = named_temp_file + .keep() + .map_err(|e| format!("Failed to keep temp file: {e}"))?; + + match writer_func(std_file.into()).await { + Ok(mut tokio_file) => { + tokio_file + .shutdown() + .await + .map_err(|e| format!("Failed to shutdown {tmp_path:?}: {e}"))?; + tokio::fs::set_permissions(&tmp_path, std::fs::Permissions::from_mode(0o555)) + .await + .map_err(|e| format!("Failed to set permissions on {:?}: {e}", tmp_path))?; + // NB: Syncing metadata to disk ensures the `hard_link` we do later has the opportunity + // to succeed. Otherwise, if later when we try to `hard_link` the metadata isn't + // persisted to disk, we'll get `No such file or directory`. + // See https://github.com/pantsbuild/pants/pull/18768 + tokio_file + .sync_all() + .await + .map_err(|e| format!("Failed to sync {tmp_path:?}: {e}"))?; + tokio::fs::rename(tmp_path.clone(), dest_path.clone()) + .await + .map_err(|e| format!("Error while renaming: {e}."))?; + Ok(()) + } + Err(e) => { + let _ = tokio::fs::remove_file(tmp_path).await; + Err(e) + } + } }) - .await; - let should_retry = match copy_result { - Ok(()) => Ok(false), - Err(VerifiedCopyError::CopyFailure(s)) => Err(s), - Err(VerifiedCopyError::DoesntMatch) => Ok(true), - }; - - if should_retry? { - attempts += 1; - let msg = format!("Input {src:?} changed while reading."); - log::debug!("{}", msg); - if attempts > 10 { - return Err(format!("Failed to store {src:?}.")); - } - } else { - break; - } - } - - Ok(()) - } - - async fn load_bytes_with< - T: Send + 'static, - F: FnMut(&[u8]) -> Result + Send + Sync + 'static, - >( - &self, - fingerprint: Fingerprint, - mut f: F, - ) -> Result, String> { - if let Ok(mut file) = tokio::fs::File::open(self.get_path(fingerprint)).await { - // TODO: Use mmap instead of copying into user-space. - let mut contents: Vec = vec![]; - file - .read_to_end(&mut contents) .await - .map_err(|e| format!("Failed to load large file into memory: {e}"))?; - Ok(Some(f(&contents[..])?)) - } else { - Ok(None) - } - } - - async fn aged_fingerprints(&self) -> Result, String> { - // NB: The ShardLmdb implementation stores a lease time in the future, and then compares the - // current time to the stored lease time for a fingerprint to determine how long ago it - // expired. Rather than setting `mtimes` in the future, this implementation instead considers a - // file to be expired if its mtime is outside of the lease time window. - let root = self.root.clone(); - let expiration_time = SystemTime::now() - self.lease_time; - self - .executor - .spawn_blocking( - move || { - let maybe_shards = std::fs::read_dir(&root); - let mut fingerprints = vec![]; - if let Ok(shards) = maybe_shards { - for entry in shards { - let shard = entry.map_err(|e| format!("Error iterating dir {root:?}: {e}."))?; - let large_files = std::fs::read_dir(shard.path()) - .map_err(|e| format!("Failed to read shard directory: {e}."))?; - for entry in large_files { - let large_file = entry.map_err(|e| { - format!("Error iterating dir {:?}: {e}", shard.path().file_name()) - })?; - let path = large_file.path(); - if path.extension().is_some() { - continue; // NB: This is a tempfile + .cloned() + } +} + +#[async_trait] +impl UnderlyingByteStore for ShardedFSDB { + async fn exists_batch( + &self, + fingerprints: Vec, + ) -> Result, String> { + let results = join_all( + fingerprints + .iter() + .map(|fingerprint| tokio::fs::metadata(self.get_path(*fingerprint))), + ) + .await; + let existing = results + .iter() + .zip(fingerprints) + .filter_map(|(result, fingerprint)| { + if result.is_ok() { + Some(fingerprint) + } else { + None } + }) + .collect::>(); + + Ok(HashSet::from_iter(existing)) + } - let hash = path.file_name().unwrap().to_str().unwrap(); - let (length, mtime) = large_file - .metadata() - .and_then(|metadata| { - let length = metadata.len(); - let mtime = metadata.modified()?; - Ok((length, mtime)) - }) - .map_err(|e| format!("Could not access metadata for {path:?}: {e}"))?; - - let expired_seconds_ago = expiration_time - .duration_since(mtime) - .map(|t| t.as_secs()) - // 0 indicates unexpired. - .unwrap_or(0); - - fingerprints.push(AgedFingerprint { - expired_seconds_ago, - fingerprint: Fingerprint::from_hex_string(hash) - .map_err(|e| format!("Invalid file store entry at {path:?}: {e}"))?, - size_bytes: length as usize, - }); - } + async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String> { + let path = self.get_path(fingerprint); + self.executor + .spawn_blocking( + move || { + fs_set_times::set_mtime(&path, fs_set_times::SystemTimeSpec::SymbolicNow) + .map_err(|e| format!("Failed to extend mtime of {path:?}: {e}")) + }, + |e| Err(format!("`lease` task failed: {e}")), + ) + .await + } + + async fn remove(&self, fingerprint: Fingerprint) -> Result { + let _ = self.dest_initializer.lock().remove(&fingerprint); + Ok(tokio::fs::remove_file(self.get_path(fingerprint)) + .await + .is_ok()) + } + + async fn store_bytes_batch( + &self, + items: Vec<(Fingerprint, Bytes)>, + _initial_lease: bool, + ) -> Result<(), String> { + try_join_all(items.iter().map(|(fingerprint, bytes)| async move { + self.write_using(*fingerprint, |file| Self::bytes_writer(file, bytes)) + .await?; + Ok::<(), String>(()) + })) + .await?; + + Ok(()) + } + + async fn store( + &self, + _initial_lease: bool, + src_is_immutable: bool, + expected_digest: Digest, + src: PathBuf, + ) -> Result<(), String> { + let mut attempts = 0; + loop { + let reader = tokio::fs::File::open(src.clone()) + .await + .map_err(|e| format!("Failed to open {src:?}: {e}"))?; + + // TODO: Consider using `fclonefileat` on macOS or checking for same filesystem+rename on Linux, + // which would skip actual copying (read+write), and instead just require verifying the + // resulting content after the syscall (read only). + let copy_result = self + .write_using(expected_digest.hash, |file| { + Self::verified_copier(file, expected_digest, src_is_immutable, reader) + }) + .await; + let should_retry = match copy_result { + Ok(()) => Ok(false), + Err(VerifiedCopyError::CopyFailure(s)) => Err(s), + Err(VerifiedCopyError::DoesntMatch) => Ok(true), + }; + + if should_retry? { + attempts += 1; + let msg = format!("Input {src:?} changed while reading."); + log::debug!("{}", msg); + if attempts > 10 { + return Err(format!("Failed to store {src:?}.")); + } + } else { + break; } - } - Ok(fingerprints) - }, - |e| Err(format!("`aged_fingerprints` task failed: {e}")), - ) - .await - } + } + + Ok(()) + } + + async fn load_bytes_with< + T: Send + 'static, + F: FnMut(&[u8]) -> Result + Send + Sync + 'static, + >( + &self, + fingerprint: Fingerprint, + mut f: F, + ) -> Result, String> { + if let Ok(mut file) = tokio::fs::File::open(self.get_path(fingerprint)).await { + // TODO: Use mmap instead of copying into user-space. + let mut contents: Vec = vec![]; + file.read_to_end(&mut contents) + .await + .map_err(|e| format!("Failed to load large file into memory: {e}"))?; + Ok(Some(f(&contents[..])?)) + } else { + Ok(None) + } + } + + async fn aged_fingerprints(&self) -> Result, String> { + // NB: The ShardLmdb implementation stores a lease time in the future, and then compares the + // current time to the stored lease time for a fingerprint to determine how long ago it + // expired. Rather than setting `mtimes` in the future, this implementation instead considers a + // file to be expired if its mtime is outside of the lease time window. + let root = self.root.clone(); + let expiration_time = SystemTime::now() - self.lease_time; + self.executor + .spawn_blocking( + move || { + let maybe_shards = std::fs::read_dir(&root); + let mut fingerprints = vec![]; + if let Ok(shards) = maybe_shards { + for entry in shards { + let shard = + entry.map_err(|e| format!("Error iterating dir {root:?}: {e}."))?; + let large_files = std::fs::read_dir(shard.path()) + .map_err(|e| format!("Failed to read shard directory: {e}."))?; + for entry in large_files { + let large_file = entry.map_err(|e| { + format!( + "Error iterating dir {:?}: {e}", + shard.path().file_name() + ) + })?; + let path = large_file.path(); + if path.extension().is_some() { + continue; // NB: This is a tempfile + } + + let hash = path.file_name().unwrap().to_str().unwrap(); + let (length, mtime) = large_file + .metadata() + .and_then(|metadata| { + let length = metadata.len(); + let mtime = metadata.modified()?; + Ok((length, mtime)) + }) + .map_err(|e| { + format!("Could not access metadata for {path:?}: {e}") + })?; + + let expired_seconds_ago = expiration_time + .duration_since(mtime) + .map(|t| t.as_secs()) + // 0 indicates unexpired. + .unwrap_or(0); + + fingerprints.push(AgedFingerprint { + expired_seconds_ago, + fingerprint: Fingerprint::from_hex_string(hash).map_err( + |e| format!("Invalid file store entry at {path:?}: {e}"), + )?, + size_bytes: length as usize, + }); + } + } + } + Ok(fingerprints) + }, + |e| Err(format!("`aged_fingerprints` task failed: {e}")), + ) + .await + } } #[derive(Debug, Clone)] pub struct ByteStore { - inner: Arc, + inner: Arc, } #[derive(Debug)] struct InnerStore { - // Store directories separately from files because: - // 1. They may have different lifetimes. - // 2. It's nice to know whether we should be able to parse something as a proto. - file_lmdb: Result, String>, - directory_lmdb: Result, String>, - file_fsdb: ShardedFSDB, + // Store directories separately from files because: + // 1. They may have different lifetimes. + // 2. It's nice to know whether we should be able to parse something as a proto. + file_lmdb: Result, String>, + directory_lmdb: Result, String>, + file_fsdb: ShardedFSDB, } impl ByteStore { - pub fn new>( - executor: task_executor::Executor, - path: P, - ) -> Result { - Self::new_with_options(executor, path, super::LocalOptions::default()) - } - - pub fn new_with_options>( - executor: task_executor::Executor, - path: P, - options: super::LocalOptions, - ) -> Result { - let root = path.as_ref(); - let lmdb_files_root = root.join("files"); - let lmdb_directories_root = root.join("directories"); - let fsdb_files_root = root.join("immutable").join("files"); - - std::fs::create_dir_all(root) - .map_err(|e| format!("Failed to create {}: {e}", root.display()))?; - std::fs::create_dir_all(&fsdb_files_root) - .map_err(|e| format!("Failed to create {}: {e}", fsdb_files_root.display()))?; - - Ok(ByteStore { - inner: Arc::new(InnerStore { - file_lmdb: ShardedLmdb::new( - lmdb_files_root, - options.files_max_size_bytes, - executor.clone(), - options.lease_time, - options.shard_count, - ) - .map(Arc::new), - directory_lmdb: ShardedLmdb::new( - lmdb_directories_root, - options.directories_max_size_bytes, - executor.clone(), - options.lease_time, - options.shard_count, - ) - .map(Arc::new), - file_fsdb: ShardedFSDB { - executor: executor, - root: fsdb_files_root, - lease_time: options.lease_time, - dest_initializer: Arc::new(Mutex::default()), - hardlinkable_destinations: Arc::new(Mutex::default()), - }, - }), - }) - } - - pub async fn is_hardlinkable_destination(&self, destination: &Path) -> Result { - self - .inner - .file_fsdb - .is_hardlinkable_destination(destination) - .await - } - - pub async fn entry_type(&self, fingerprint: Fingerprint) -> Result, String> { - if fingerprint == EMPTY_DIGEST.hash { - // Technically this is valid as both; choose Directory in case a caller is checking whether - // it _can_ be a Directory. - return Ok(Some(EntryType::Directory)); - } - - // In parallel, check for the given fingerprint in all databases. - let directory_lmdb = self.inner.directory_lmdb.clone()?; - let is_lmdb_dir = directory_lmdb.exists(fingerprint); - let file_lmdb = self.inner.file_lmdb.clone()?; - let is_lmdb_file = file_lmdb.exists(fingerprint); - let is_fsdb_file = self.inner.file_fsdb.exists(fingerprint); - - // TODO: Could technically use select to return slightly more quickly with the first - // affirmative answer, but this is simpler. - match future::try_join3(is_lmdb_dir, is_lmdb_file, is_fsdb_file).await? { - (true, _, _) => Ok(Some(EntryType::Directory)), - (_, true, _) => Ok(Some(EntryType::File)), - (_, _, true) => Ok(Some(EntryType::File)), - (false, false, false) => Ok(None), - } - } - - pub async fn lease_all( - &self, - digests: impl Iterator, - ) -> Result<(), String> { - // NB: Lease extension happens periodically in the background, so this code needn't be parallel. - for (digest, entry_type) in digests { - if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { - self.inner.file_fsdb.lease(digest.hash).await?; - } else { - let dbs = match entry_type { - EntryType::File => self.inner.file_lmdb.clone(), - EntryType::Directory => self.inner.directory_lmdb.clone(), + pub fn new>( + executor: task_executor::Executor, + path: P, + ) -> Result { + Self::new_with_options(executor, path, super::LocalOptions::default()) + } + + pub fn new_with_options>( + executor: task_executor::Executor, + path: P, + options: super::LocalOptions, + ) -> Result { + let root = path.as_ref(); + let lmdb_files_root = root.join("files"); + let lmdb_directories_root = root.join("directories"); + let fsdb_files_root = root.join("immutable").join("files"); + + std::fs::create_dir_all(root) + .map_err(|e| format!("Failed to create {}: {e}", root.display()))?; + std::fs::create_dir_all(&fsdb_files_root) + .map_err(|e| format!("Failed to create {}: {e}", fsdb_files_root.display()))?; + + Ok(ByteStore { + inner: Arc::new(InnerStore { + file_lmdb: ShardedLmdb::new( + lmdb_files_root, + options.files_max_size_bytes, + executor.clone(), + options.lease_time, + options.shard_count, + ) + .map(Arc::new), + directory_lmdb: ShardedLmdb::new( + lmdb_directories_root, + options.directories_max_size_bytes, + executor.clone(), + options.lease_time, + options.shard_count, + ) + .map(Arc::new), + file_fsdb: ShardedFSDB { + executor: executor, + root: fsdb_files_root, + lease_time: options.lease_time, + dest_initializer: Arc::new(Mutex::default()), + hardlinkable_destinations: Arc::new(Mutex::default()), + }, + }), + }) + } + + pub async fn is_hardlinkable_destination(&self, destination: &Path) -> Result { + self.inner + .file_fsdb + .is_hardlinkable_destination(destination) + .await + } + + pub async fn entry_type(&self, fingerprint: Fingerprint) -> Result, String> { + if fingerprint == EMPTY_DIGEST.hash { + // Technically this is valid as both; choose Directory in case a caller is checking whether + // it _can_ be a Directory. + return Ok(Some(EntryType::Directory)); + } + + // In parallel, check for the given fingerprint in all databases. + let directory_lmdb = self.inner.directory_lmdb.clone()?; + let is_lmdb_dir = directory_lmdb.exists(fingerprint); + let file_lmdb = self.inner.file_lmdb.clone()?; + let is_lmdb_file = file_lmdb.exists(fingerprint); + let is_fsdb_file = self.inner.file_fsdb.exists(fingerprint); + + // TODO: Could technically use select to return slightly more quickly with the first + // affirmative answer, but this is simpler. + match future::try_join3(is_lmdb_dir, is_lmdb_file, is_fsdb_file).await? { + (true, _, _) => Ok(Some(EntryType::Directory)), + (_, true, _) => Ok(Some(EntryType::File)), + (_, _, true) => Ok(Some(EntryType::File)), + (false, false, false) => Ok(None), + } + } + + pub async fn lease_all( + &self, + digests: impl Iterator, + ) -> Result<(), String> { + // NB: Lease extension happens periodically in the background, so this code needn't be parallel. + for (digest, entry_type) in digests { + if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { + self.inner.file_fsdb.lease(digest.hash).await?; + } else { + let dbs = match entry_type { + EntryType::File => self.inner.file_lmdb.clone(), + EntryType::Directory => self.inner.directory_lmdb.clone(), + }; + dbs?.lease(digest.hash) + .await + .map_err(|err| format!("Error leasing digest {digest:?}: {err}"))?; + } + } + Ok(()) + } + + /// + /// Attempts to shrink the stored files to be no bigger than target_bytes + /// (excluding lmdb overhead). + /// + /// Returns the size it was shrunk to, which may be larger than target_bytes. + /// + /// TODO: Use LMDB database statistics when lmdb-rs exposes them. + /// + pub async fn shrink( + &self, + target_bytes: usize, + shrink_behavior: ShrinkBehavior, + ) -> Result { + let mut used_bytes: usize = 0; + let mut fingerprints_by_expired_ago = BinaryHeap::new(); + + fingerprints_by_expired_ago.extend( + self.inner + .file_lmdb + .clone()? + .aged_fingerprints() + .await? + .into_iter() + .map(|fingerprint| { + used_bytes += fingerprint.size_bytes; + (fingerprint, EntryType::File) + }), + ); + fingerprints_by_expired_ago.extend( + self.inner + .directory_lmdb + .clone()? + .aged_fingerprints() + .await? + .into_iter() + .map(|fingerprint| { + used_bytes += fingerprint.size_bytes; + (fingerprint, EntryType::Directory) + }), + ); + fingerprints_by_expired_ago.extend( + self.inner + .file_fsdb + .aged_fingerprints() + .await? + .into_iter() + .map(|fingerprint| { + used_bytes += fingerprint.size_bytes; + (fingerprint, EntryType::File) + }), + ); + + while used_bytes > target_bytes { + let (aged_fingerprint, entry_type) = fingerprints_by_expired_ago + .pop() + .expect("lmdb corruption detected, sum of size of blobs exceeded stored blobs"); + if aged_fingerprint.expired_seconds_ago == 0 { + // Ran out of expired blobs - everything remaining is leased and cannot be collected. + return Ok(used_bytes); + } + self.remove( + entry_type, + Digest { + hash: aged_fingerprint.fingerprint, + size_bytes: aged_fingerprint.size_bytes, + }, + ) + .await?; + used_bytes -= aged_fingerprint.size_bytes; + } + + if shrink_behavior == ShrinkBehavior::Compact { + self.inner.file_lmdb.clone()?.compact()?; + } + + Ok(used_bytes) + } + + pub async fn remove(&self, entry_type: EntryType, digest: Digest) -> Result { + match entry_type { + EntryType::Directory => self.inner.directory_lmdb.clone()?.remove(digest.hash).await, + EntryType::File if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) => { + self.inner.file_fsdb.remove(digest.hash).await + } + EntryType::File => self.inner.file_lmdb.clone()?.remove(digest.hash).await, + } + } + + /// + /// Store the given data in a single pass, using the given Fingerprint. Prefer `Self::store` + /// for values which should not be pulled into memory, and `Self::store_bytes_batch` when storing + /// multiple values at a time. + /// + pub async fn store_bytes( + &self, + entry_type: EntryType, + fingerprint: Fingerprint, + bytes: Bytes, + initial_lease: bool, + ) -> Result<(), String> { + self.store_bytes_batch(entry_type, vec![(fingerprint, bytes)], initial_lease) + .await + } + + /// + /// Store the given items in a single pass, optionally using the given Digests. Prefer `Self::store` + /// for values which should not be pulled into memory. + /// + /// See also: `Self::store_bytes`. + /// + pub async fn store_bytes_batch( + &self, + entry_type: EntryType, + items: Vec<(Fingerprint, Bytes)>, + initial_lease: bool, + ) -> Result<(), String> { + let mut fsdb_items = vec![]; + let mut lmdb_items = vec![]; + for (fingerprint, bytes) in items { + if ByteStore::should_use_fsdb(entry_type, bytes.len()) { + fsdb_items.push((fingerprint, bytes)); + } else { + lmdb_items.push((fingerprint, bytes)); + } + } + + let lmdb_dbs = match entry_type { + EntryType::Directory => self.inner.directory_lmdb.clone(), + EntryType::File => self.inner.file_lmdb.clone(), }; - dbs? - .lease(digest.hash) - .await - .map_err(|err| format!("Error leasing digest {digest:?}: {err}"))?; - } - } - Ok(()) - } - - /// - /// Attempts to shrink the stored files to be no bigger than target_bytes - /// (excluding lmdb overhead). - /// - /// Returns the size it was shrunk to, which may be larger than target_bytes. - /// - /// TODO: Use LMDB database statistics when lmdb-rs exposes them. - /// - pub async fn shrink( - &self, - target_bytes: usize, - shrink_behavior: ShrinkBehavior, - ) -> Result { - let mut used_bytes: usize = 0; - let mut fingerprints_by_expired_ago = BinaryHeap::new(); - - fingerprints_by_expired_ago.extend( - self - .inner - .file_lmdb - .clone()? - .aged_fingerprints() - .await? - .into_iter() - .map(|fingerprint| { - used_bytes += fingerprint.size_bytes; - (fingerprint, EntryType::File) - }), - ); - fingerprints_by_expired_ago.extend( - self - .inner - .directory_lmdb - .clone()? - .aged_fingerprints() - .await? - .into_iter() - .map(|fingerprint| { - used_bytes += fingerprint.size_bytes; - (fingerprint, EntryType::Directory) - }), - ); - fingerprints_by_expired_ago.extend( - self - .inner - .file_fsdb - .aged_fingerprints() - .await? - .into_iter() - .map(|fingerprint| { - used_bytes += fingerprint.size_bytes; - (fingerprint, EntryType::File) - }), - ); - - while used_bytes > target_bytes { - let (aged_fingerprint, entry_type) = fingerprints_by_expired_ago - .pop() - .expect("lmdb corruption detected, sum of size of blobs exceeded stored blobs"); - if aged_fingerprint.expired_seconds_ago == 0 { - // Ran out of expired blobs - everything remaining is leased and cannot be collected. - return Ok(used_bytes); - } - self - .remove( - entry_type, - Digest { - hash: aged_fingerprint.fingerprint, - size_bytes: aged_fingerprint.size_bytes, - }, + try_join( + self.inner + .file_fsdb + .store_bytes_batch(fsdb_items, initial_lease), + lmdb_dbs?.store_bytes_batch(lmdb_items, initial_lease), ) .await?; - used_bytes -= aged_fingerprint.size_bytes; - } - - if shrink_behavior == ShrinkBehavior::Compact { - self.inner.file_lmdb.clone()?.compact()?; - } - - Ok(used_bytes) - } - - pub async fn remove(&self, entry_type: EntryType, digest: Digest) -> Result { - match entry_type { - EntryType::Directory => self.inner.directory_lmdb.clone()?.remove(digest.hash).await, - EntryType::File if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) => { - self.inner.file_fsdb.remove(digest.hash).await - } - EntryType::File => self.inner.file_lmdb.clone()?.remove(digest.hash).await, - } - } - - /// - /// Store the given data in a single pass, using the given Fingerprint. Prefer `Self::store` - /// for values which should not be pulled into memory, and `Self::store_bytes_batch` when storing - /// multiple values at a time. - /// - pub async fn store_bytes( - &self, - entry_type: EntryType, - fingerprint: Fingerprint, - bytes: Bytes, - initial_lease: bool, - ) -> Result<(), String> { - self - .store_bytes_batch(entry_type, vec![(fingerprint, bytes)], initial_lease) - .await - } - - /// - /// Store the given items in a single pass, optionally using the given Digests. Prefer `Self::store` - /// for values which should not be pulled into memory. - /// - /// See also: `Self::store_bytes`. - /// - pub async fn store_bytes_batch( - &self, - entry_type: EntryType, - items: Vec<(Fingerprint, Bytes)>, - initial_lease: bool, - ) -> Result<(), String> { - let mut fsdb_items = vec![]; - let mut lmdb_items = vec![]; - for (fingerprint, bytes) in items { - if ByteStore::should_use_fsdb(entry_type, bytes.len()) { - fsdb_items.push((fingerprint, bytes)); - } else { - lmdb_items.push((fingerprint, bytes)); - } - } - - let lmdb_dbs = match entry_type { - EntryType::Directory => self.inner.directory_lmdb.clone(), - EntryType::File => self.inner.file_lmdb.clone(), - }; - try_join( - self - .inner - .file_fsdb - .store_bytes_batch(fsdb_items, initial_lease), - lmdb_dbs?.store_bytes_batch(lmdb_items, initial_lease), - ) - .await?; - - Ok(()) - } - - /// - /// Store data in two passes, without buffering it entirely into memory. Prefer - /// `Self::store_bytes` for small values which fit comfortably in memory. - /// - pub async fn store( - &self, - entry_type: EntryType, - initial_lease: bool, - src_is_immutable: bool, - src: PathBuf, - ) -> Result { - let mut file = tokio::fs::File::open(src.clone()) - .await - .map_err(|e| format!("Failed to open {src:?}: {e}"))?; - let digest = async_copy_and_hash(&mut file, &mut tokio::io::sink()) - .await - .map_err(|e| format!("Failed to hash {src:?}: {e}"))?; - - if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { - self - .inner - .file_fsdb - .store(initial_lease, src_is_immutable, digest, src) + + Ok(()) + } + + /// + /// Store data in two passes, without buffering it entirely into memory. Prefer + /// `Self::store_bytes` for small values which fit comfortably in memory. + /// + pub async fn store( + &self, + entry_type: EntryType, + initial_lease: bool, + src_is_immutable: bool, + src: PathBuf, + ) -> Result { + let mut file = tokio::fs::File::open(src.clone()) + .await + .map_err(|e| format!("Failed to open {src:?}: {e}"))?; + let digest = async_copy_and_hash(&mut file, &mut tokio::io::sink()) + .await + .map_err(|e| format!("Failed to hash {src:?}: {e}"))?; + + if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { + self.inner + .file_fsdb + .store(initial_lease, src_is_immutable, digest, src) + .await?; + } else { + let dbs = match entry_type { + EntryType::Directory => self.inner.directory_lmdb.clone()?, + EntryType::File => self.inner.file_lmdb.clone()?, + }; + let _ = dbs + .store(initial_lease, src_is_immutable, digest, move || { + std::fs::File::open(&src) + }) + .await; + } + + Ok(digest) + } + + /// + /// Given a collection of Digests (digests), + /// returns the set of digests from that collection not present in the + /// underlying LMDB store. + /// + pub async fn get_missing_digests( + &self, + entry_type: EntryType, + digests: HashSet, + ) -> Result, String> { + let mut fsdb_digests = vec![]; + let mut lmdb_digests = vec![]; + for digest in digests.iter() { + if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { + fsdb_digests.push(digest); + } + // Avoid I/O for this case. This allows some client-provided operations (like + // merging snapshots) to work without needing to first store the empty snapshot. + else if *digest != EMPTY_DIGEST { + lmdb_digests.push(digest); + } + } + + let lmdb = match entry_type { + EntryType::Directory => self.inner.directory_lmdb.clone(), + EntryType::File => self.inner.file_lmdb.clone(), + }?; + let (mut existing, existing_lmdb_digests) = try_join( + self.inner + .file_fsdb + .exists_batch(fsdb_digests.iter().map(|digest| digest.hash).collect()), + lmdb.exists_batch(lmdb_digests.iter().map(|digest| digest.hash).collect()), + ) .await?; - } else { - let dbs = match entry_type { - EntryType::Directory => self.inner.directory_lmdb.clone()?, - EntryType::File => self.inner.file_lmdb.clone()?, - }; - let _ = dbs - .store(initial_lease, src_is_immutable, digest, move || { - std::fs::File::open(&src) - }) - .await; + + existing.extend(existing_lmdb_digests); + + Ok(digests + .into_iter() + .filter(|digest| *digest != EMPTY_DIGEST && !existing.contains(&digest.hash)) + .collect()) + } + + /// + /// Return the path this digest is persistent on the filesystem at, or None. + /// + pub async fn load_from_fs(&self, digest: Digest) -> Result, String> { + if self.inner.file_fsdb.exists(digest.hash).await? { + return Ok(Some(self.inner.file_fsdb.get_path(digest.hash))); + } + Ok(None) } - Ok(digest) - } - - /// - /// Given a collection of Digests (digests), - /// returns the set of digests from that collection not present in the - /// underlying LMDB store. - /// - pub async fn get_missing_digests( - &self, - entry_type: EntryType, - digests: HashSet, - ) -> Result, String> { - let mut fsdb_digests = vec![]; - let mut lmdb_digests = vec![]; - for digest in digests.iter() { - if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { - fsdb_digests.push(digest); - } - // Avoid I/O for this case. This allows some client-provided operations (like - // merging snapshots) to work without needing to first store the empty snapshot. - else if *digest != EMPTY_DIGEST { - lmdb_digests.push(digest); - } - } - - let lmdb = match entry_type { - EntryType::Directory => self.inner.directory_lmdb.clone(), - EntryType::File => self.inner.file_lmdb.clone(), - }?; - let (mut existing, existing_lmdb_digests) = try_join( - self - .inner - .file_fsdb - .exists_batch(fsdb_digests.iter().map(|digest| digest.hash).collect()), - lmdb.exists_batch(lmdb_digests.iter().map(|digest| digest.hash).collect()), - ) - .await?; - - existing.extend(existing_lmdb_digests); - - Ok( - digests - .into_iter() - .filter(|digest| *digest != EMPTY_DIGEST && !existing.contains(&digest.hash)) - .collect(), - ) - } - - /// - /// Return the path this digest is persistent on the filesystem at, or None. - /// - pub async fn load_from_fs(&self, digest: Digest) -> Result, String> { - if self.inner.file_fsdb.exists(digest.hash).await? { - return Ok(Some(self.inner.file_fsdb.get_path(digest.hash))); - } - Ok(None) - } - - /// - /// Loads bytes from the underlying store using the given function. - /// In the case of the LMDB store, because the database is blocking, this accepts a function that - /// views a slice rather than returning a clone of the data. - /// The upshot is that the database is able to provide slices directly into shared memory. - /// - pub async fn load_bytes_with T + Send + Sync + 'static>( - &self, - entry_type: EntryType, - digest: Digest, - mut f: F, - ) -> Result, String> { - let start = Instant::now(); - if digest == EMPTY_DIGEST { - // Avoid I/O for this case. This allows some client-provided operations (like merging - // snapshots) to work without needing to first store the empty snapshot. - return Ok(Some(f(&[]))); - } - - let len_checked_f = move |bytes: &[u8]| { - if bytes.len() == digest.size_bytes { - Ok(f(bytes)) - } else { - Err(format!( + /// + /// Loads bytes from the underlying store using the given function. + /// In the case of the LMDB store, because the database is blocking, this accepts a function that + /// views a slice rather than returning a clone of the data. + /// The upshot is that the database is able to provide slices directly into shared memory. + /// + pub async fn load_bytes_with< + T: Send + 'static, + F: FnMut(&[u8]) -> T + Send + Sync + 'static, + >( + &self, + entry_type: EntryType, + digest: Digest, + mut f: F, + ) -> Result, String> { + let start = Instant::now(); + if digest == EMPTY_DIGEST { + // Avoid I/O for this case. This allows some client-provided operations (like merging + // snapshots) to work without needing to first store the empty snapshot. + return Ok(Some(f(&[]))); + } + + let len_checked_f = move |bytes: &[u8]| { + if bytes.len() == digest.size_bytes { + Ok(f(bytes)) + } else { + Err(format!( "Got hash collision reading from store - digest {:?} was requested, but retrieved \ bytes with that fingerprint had length {}. Congratulations, you may have broken \ sha256! Underlying bytes: {:?}", @@ -913,53 +904,52 @@ impl ByteStore { bytes.len(), bytes )) - } - }; - - let result = if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { - self - .inner - .file_fsdb - .load_bytes_with(digest.hash, len_checked_f) - .await? - } else { - let dbs = match entry_type { - EntryType::Directory => self.inner.directory_lmdb.clone(), - EntryType::File => self.inner.file_lmdb.clone(), - }?; - dbs.load_bytes_with(digest.hash, len_checked_f).await? - }; - - if let Some(workunit_store_handle) = workunit_store::get_workunit_store_handle() { - workunit_store_handle.store.record_observation( - ObservationMetric::LocalStoreReadBlobSize, - digest.size_bytes as u64, - ); - workunit_store_handle.store.record_observation( - ObservationMetric::LocalStoreReadBlobTimeMicros, - start.elapsed().as_micros() as u64, - ); - } - - Ok(result) - } - - pub async fn all_digests(&self, entry_type: EntryType) -> Result, String> { - let lmdb = match entry_type { - EntryType::File => self.inner.file_lmdb.clone(), - EntryType::Directory => self.inner.directory_lmdb.clone(), - }?; - let mut digests = vec![]; - digests.extend(lmdb.all_digests().await?); - digests.extend(self.inner.file_fsdb.all_digests().await?); - Ok(digests) - } - - pub(crate) fn should_use_fsdb(entry_type: EntryType, len: usize) -> bool { - entry_type == EntryType::File && len >= LARGE_FILE_SIZE_LIMIT - } - - pub(crate) fn get_file_fsdb(&self) -> ShardedFSDB { - self.inner.file_fsdb.clone() - } + } + }; + + let result = if ByteStore::should_use_fsdb(entry_type, digest.size_bytes) { + self.inner + .file_fsdb + .load_bytes_with(digest.hash, len_checked_f) + .await? + } else { + let dbs = match entry_type { + EntryType::Directory => self.inner.directory_lmdb.clone(), + EntryType::File => self.inner.file_lmdb.clone(), + }?; + dbs.load_bytes_with(digest.hash, len_checked_f).await? + }; + + if let Some(workunit_store_handle) = workunit_store::get_workunit_store_handle() { + workunit_store_handle.store.record_observation( + ObservationMetric::LocalStoreReadBlobSize, + digest.size_bytes as u64, + ); + workunit_store_handle.store.record_observation( + ObservationMetric::LocalStoreReadBlobTimeMicros, + start.elapsed().as_micros() as u64, + ); + } + + Ok(result) + } + + pub async fn all_digests(&self, entry_type: EntryType) -> Result, String> { + let lmdb = match entry_type { + EntryType::File => self.inner.file_lmdb.clone(), + EntryType::Directory => self.inner.directory_lmdb.clone(), + }?; + let mut digests = vec![]; + digests.extend(lmdb.all_digests().await?); + digests.extend(self.inner.file_fsdb.all_digests().await?); + Ok(digests) + } + + pub(crate) fn should_use_fsdb(entry_type: EntryType, len: usize) -> bool { + entry_type == EntryType::File && len >= LARGE_FILE_SIZE_LIMIT + } + + pub(crate) fn get_file_fsdb(&self) -> ShardedFSDB { + self.inner.file_fsdb.clone() + } } diff --git a/src/rust/engine/fs/store/src/local_tests.rs b/src/rust/engine/fs/store/src/local_tests.rs index 967d8f8d962..4e1224fd21c 100644 --- a/src/rust/engine/fs/store/src/local_tests.rs +++ b/src/rust/engine/fs/store/src/local_tests.rs @@ -16,786 +16,786 @@ use tokio::time::sleep; use walkdir::WalkDir; async fn assert_store_bytes( - store: ByteStore, - entry_type: EntryType, - bytes: Bytes, - expected_digest: Digest, + store: ByteStore, + entry_type: EntryType, + bytes: Bytes, + expected_digest: Digest, ) { - let mut file = NamedTempFile::new().unwrap(); - file.write_all(&bytes).unwrap(); - file.flush().unwrap(); + let mut file = NamedTempFile::new().unwrap(); + file.write_all(&bytes).unwrap(); + file.flush().unwrap(); - let digest = store - .store(entry_type, false, true, file.path().to_owned()) - .await - .unwrap(); + let digest = store + .store(entry_type, false, true, file.path().to_owned()) + .await + .unwrap(); - assert_eq!(expected_digest, digest); + assert_eq!(expected_digest, digest); } #[tokio::test] async fn save_file() { - let dir = TempDir::new().unwrap(); - - let testdata = TestData::roland(); - assert_store_bytes( - new_store(dir.path()), - EntryType::File, - testdata.bytes(), - testdata.digest(), - ) - .await; + let dir = TempDir::new().unwrap(); + + let testdata = TestData::roland(); + assert_store_bytes( + new_store(dir.path()), + EntryType::File, + testdata.bytes(), + testdata.digest(), + ) + .await; } #[tokio::test] async fn save_file_is_idempotent() { - let dir = TempDir::new().unwrap(); - - let testdata = TestData::roland(); - assert_store_bytes( - new_store(dir.path()), - EntryType::File, - testdata.bytes(), - testdata.digest(), - ) - .await; - assert_store_bytes( - new_store(dir.path()), - EntryType::File, - testdata.bytes(), - testdata.digest(), - ) - .await; + let dir = TempDir::new().unwrap(); + + let testdata = TestData::roland(); + assert_store_bytes( + new_store(dir.path()), + EntryType::File, + testdata.bytes(), + testdata.digest(), + ) + .await; + assert_store_bytes( + new_store(dir.path()), + EntryType::File, + testdata.bytes(), + testdata.digest(), + ) + .await; } #[tokio::test] async fn roundtrip_file() { - let testdata = TestData::roland(); - let dir = TempDir::new().unwrap(); + let testdata = TestData::roland(); + let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let hash = prime_store_with_file_bytes(&store, testdata.bytes()).await; - assert_eq!( - load_file_bytes(&store, hash).await, - Ok(Some(testdata.bytes())) - ); + let store = new_store(dir.path()); + let hash = prime_store_with_file_bytes(&store, testdata.bytes()).await; + assert_eq!( + load_file_bytes(&store, hash).await, + Ok(Some(testdata.bytes())) + ); } #[tokio::test] async fn missing_file() { - let dir = TempDir::new().unwrap(); - assert_eq!( - load_file_bytes(&new_store(dir.path()), TestData::roland().digest()).await, - Ok(None) - ); + let dir = TempDir::new().unwrap(); + assert_eq!( + load_file_bytes(&new_store(dir.path()), TestData::roland().digest()).await, + Ok(None) + ); } #[tokio::test] async fn record_and_load_directory_proto() { - let dir = TempDir::new().unwrap(); - let testdir = TestDirectory::containing_roland(); - - assert_store_bytes( - new_store(dir.path()), - EntryType::Directory, - testdir.bytes(), - testdir.digest(), - ) - .await; + let dir = TempDir::new().unwrap(); + let testdir = TestDirectory::containing_roland(); + + assert_store_bytes( + new_store(dir.path()), + EntryType::Directory, + testdir.bytes(), + testdir.digest(), + ) + .await; - assert_eq!( - load_directory_proto_bytes(&new_store(dir.path()), testdir.digest()).await, - Ok(Some(testdir.bytes())) - ); + assert_eq!( + load_directory_proto_bytes(&new_store(dir.path()), testdir.digest()).await, + Ok(Some(testdir.bytes())) + ); } #[tokio::test] async fn missing_directory() { - let dir = TempDir::new().unwrap(); - let testdir = TestDirectory::containing_roland(); + let dir = TempDir::new().unwrap(); + let testdir = TestDirectory::containing_roland(); - assert_eq!( - load_directory_proto_bytes(&new_store(dir.path()), testdir.digest()).await, - Ok(None) - ); + assert_eq!( + load_directory_proto_bytes(&new_store(dir.path()), testdir.digest()).await, + Ok(None) + ); } #[tokio::test] async fn file_is_not_directory_proto() { - let dir = TempDir::new().unwrap(); - let testdata = TestData::roland(); - - assert_store_bytes( - new_store(dir.path()), - EntryType::File, - testdata.bytes(), - testdata.digest(), - ) - .await; + let dir = TempDir::new().unwrap(); + let testdata = TestData::roland(); + + assert_store_bytes( + new_store(dir.path()), + EntryType::File, + testdata.bytes(), + testdata.digest(), + ) + .await; - assert_eq!( - load_directory_proto_bytes(&new_store(dir.path()), testdata.digest()).await, - Ok(None) - ); + assert_eq!( + load_directory_proto_bytes(&new_store(dir.path()), testdata.digest()).await, + Ok(None) + ); } #[tokio::test] async fn garbage_collect_nothing_to_do() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let bytes = Bytes::from("0123456789"); - let fingerprint = Fingerprint::from_hex_string( - "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", - ) - .unwrap(); - let digest = Digest::new(fingerprint, 10); - - store - .store_bytes(EntryType::File, fingerprint, bytes.clone(), false) - .await - .expect("Error storing"); - store - .shrink(10, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"); - assert_eq!( - load_bytes(&store, EntryType::File, digest).await, - Ok(Some(bytes)) - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let bytes = Bytes::from("0123456789"); + let fingerprint = Fingerprint::from_hex_string( + "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", + ) + .unwrap(); + let digest = Digest::new(fingerprint, 10); + + store + .store_bytes(EntryType::File, fingerprint, bytes.clone(), false) + .await + .expect("Error storing"); + store + .shrink(10, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"); + assert_eq!( + load_bytes(&store, EntryType::File, digest).await, + Ok(Some(bytes)) + ); } #[tokio::test] async fn garbage_collect_nothing_to_do_with_lease() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let bytes = Bytes::from("0123456789"); - let file_fingerprint = Fingerprint::from_hex_string( - "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", - ) - .unwrap(); - let file_digest = Digest::new(file_fingerprint, 10); - store - .store_bytes(EntryType::File, file_fingerprint, bytes.clone(), false) - .await - .expect("Error storing"); - store - .lease_all(vec![(file_digest, EntryType::File)].into_iter()) - .await - .expect("Error leasing"); - store - .shrink(10, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"); - assert_eq!( - load_bytes(&store, EntryType::File, file_digest).await, - Ok(Some(bytes)) - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let bytes = Bytes::from("0123456789"); + let file_fingerprint = Fingerprint::from_hex_string( + "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", + ) + .unwrap(); + let file_digest = Digest::new(file_fingerprint, 10); + store + .store_bytes(EntryType::File, file_fingerprint, bytes.clone(), false) + .await + .expect("Error storing"); + store + .lease_all(vec![(file_digest, EntryType::File)].into_iter()) + .await + .expect("Error leasing"); + store + .shrink(10, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"); + assert_eq!( + load_bytes(&store, EntryType::File, file_digest).await, + Ok(Some(bytes)) + ); } #[tokio::test] async fn garbage_collect_expired() { - let lease_time = Duration::from_secs(1); - let dir = TempDir::new().unwrap(); - let store = new_store_with_lease_time(dir.path(), lease_time); - let bytes = Bytes::from("0123456789"); - let file_fingerprint = Fingerprint::from_hex_string( - "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", - ) - .unwrap(); - let file_len = 10; - let file_digest = Digest::new(file_fingerprint, file_len); - - // Store something (in a store with a shortened lease). Confirm that it hasn't immediately - // expired, and then wait for it to expire. - store - .store_bytes(EntryType::File, file_fingerprint, bytes.clone(), true) - .await - .expect("Error storing"); - assert_eq!( - file_len, - store - .shrink(0, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"), - ); - assert_eq!( - load_bytes(&store, EntryType::File, file_digest).await, - Ok(Some(bytes)) - ); - - // Wait for it to expire. - sleep(lease_time * 2).await; - assert_eq!( - 0, - store - .shrink(0, ShrinkBehavior::Fast) - .await - .expect("Should have cleared expired lease") - ); + let lease_time = Duration::from_secs(1); + let dir = TempDir::new().unwrap(); + let store = new_store_with_lease_time(dir.path(), lease_time); + let bytes = Bytes::from("0123456789"); + let file_fingerprint = Fingerprint::from_hex_string( + "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", + ) + .unwrap(); + let file_len = 10; + let file_digest = Digest::new(file_fingerprint, file_len); + + // Store something (in a store with a shortened lease). Confirm that it hasn't immediately + // expired, and then wait for it to expire. + store + .store_bytes(EntryType::File, file_fingerprint, bytes.clone(), true) + .await + .expect("Error storing"); + assert_eq!( + file_len, + store + .shrink(0, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"), + ); + assert_eq!( + load_bytes(&store, EntryType::File, file_digest).await, + Ok(Some(bytes)) + ); + + // Wait for it to expire. + sleep(lease_time * 2).await; + assert_eq!( + 0, + store + .shrink(0, ShrinkBehavior::Fast) + .await + .expect("Should have cleared expired lease") + ); } #[tokio::test] async fn garbage_collect_remove_one_of_two_files_no_leases() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let bytes_1 = Bytes::from("0123456789"); - let fingerprint_1 = Fingerprint::from_hex_string( - "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", - ) - .unwrap(); - let digest_1 = Digest::new(fingerprint_1, 10); - let bytes_2 = Bytes::from("9876543210"); - let fingerprint_2 = Fingerprint::from_hex_string( - "7619ee8cea49187f309616e30ecf54be072259b43760f1f550a644945d5572f2", - ) - .unwrap(); - let digest_2 = Digest::new(fingerprint_2, 10); - store - .store_bytes(EntryType::File, fingerprint_1, bytes_1.clone(), false) - .await - .expect("Error storing"); - store - .store_bytes(EntryType::File, fingerprint_2, bytes_2.clone(), false) - .await - .expect("Error storing"); - store - .shrink(10, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"); - let mut entries = Vec::new(); - entries.push( - load_bytes(&store, EntryType::File, digest_1) - .await - .expect("Error loading bytes"), - ); - entries.push( - load_bytes(&store, EntryType::File, digest_2) - .await - .expect("Error loading bytes"), - ); - assert_eq!( - 1, - entries.iter().filter(|maybe| maybe.is_some()).count(), - "Want one Some but got: {entries:?}" - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let bytes_1 = Bytes::from("0123456789"); + let fingerprint_1 = Fingerprint::from_hex_string( + "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", + ) + .unwrap(); + let digest_1 = Digest::new(fingerprint_1, 10); + let bytes_2 = Bytes::from("9876543210"); + let fingerprint_2 = Fingerprint::from_hex_string( + "7619ee8cea49187f309616e30ecf54be072259b43760f1f550a644945d5572f2", + ) + .unwrap(); + let digest_2 = Digest::new(fingerprint_2, 10); + store + .store_bytes(EntryType::File, fingerprint_1, bytes_1.clone(), false) + .await + .expect("Error storing"); + store + .store_bytes(EntryType::File, fingerprint_2, bytes_2.clone(), false) + .await + .expect("Error storing"); + store + .shrink(10, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"); + let mut entries = Vec::new(); + entries.push( + load_bytes(&store, EntryType::File, digest_1) + .await + .expect("Error loading bytes"), + ); + entries.push( + load_bytes(&store, EntryType::File, digest_2) + .await + .expect("Error loading bytes"), + ); + assert_eq!( + 1, + entries.iter().filter(|maybe| maybe.is_some()).count(), + "Want one Some but got: {entries:?}" + ); } #[tokio::test] async fn garbage_collect_remove_both_files_no_leases() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let bytes_1 = Bytes::from("0123456789"); - let fingerprint_1 = Fingerprint::from_hex_string( - "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", - ) - .unwrap(); - let digest_1 = Digest::new(fingerprint_1, 10); - let bytes_2 = Bytes::from("9876543210"); - let fingerprint_2 = Fingerprint::from_hex_string( - "7619ee8cea49187f309616e30ecf54be072259b43760f1f550a644945d5572f2", - ) - .unwrap(); - let digest_2 = Digest::new(fingerprint_2, 10); - store - .store_bytes(EntryType::File, fingerprint_1, bytes_1.clone(), false) - .await - .expect("Error storing"); - store - .store_bytes(EntryType::File, fingerprint_2, bytes_2.clone(), false) - .await - .expect("Error storing"); - store - .shrink(1, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"); - assert_eq!( - load_bytes(&store, EntryType::File, digest_1).await, - Ok(None), - "Should have garbage collected {fingerprint_1:?}" - ); - assert_eq!( - load_bytes(&store, EntryType::File, digest_2).await, - Ok(None), - "Should have garbage collected {fingerprint_2:?}" - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let bytes_1 = Bytes::from("0123456789"); + let fingerprint_1 = Fingerprint::from_hex_string( + "84d89877f0d4041efb6bf91a16f0248f2fd573e6af05c19f96bedb9f882f7882", + ) + .unwrap(); + let digest_1 = Digest::new(fingerprint_1, 10); + let bytes_2 = Bytes::from("9876543210"); + let fingerprint_2 = Fingerprint::from_hex_string( + "7619ee8cea49187f309616e30ecf54be072259b43760f1f550a644945d5572f2", + ) + .unwrap(); + let digest_2 = Digest::new(fingerprint_2, 10); + store + .store_bytes(EntryType::File, fingerprint_1, bytes_1.clone(), false) + .await + .expect("Error storing"); + store + .store_bytes(EntryType::File, fingerprint_2, bytes_2.clone(), false) + .await + .expect("Error storing"); + store + .shrink(1, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"); + assert_eq!( + load_bytes(&store, EntryType::File, digest_1).await, + Ok(None), + "Should have garbage collected {fingerprint_1:?}" + ); + assert_eq!( + load_bytes(&store, EntryType::File, digest_2).await, + Ok(None), + "Should have garbage collected {fingerprint_2:?}" + ); } #[tokio::test] async fn garbage_collect_remove_one_of_two_directories_no_leases() { - let dir = TempDir::new().unwrap(); - - let testdir = TestDirectory::containing_roland(); - let other_testdir = TestDirectory::containing_dnalor(); - - let store = new_store(dir.path()); - store - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - false, - ) - .await - .expect("Error storing"); - store - .store_bytes( - EntryType::Directory, - other_testdir.fingerprint(), - other_testdir.bytes(), - false, - ) - .await - .expect("Error storing"); - store - .shrink(84, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"); - let mut entries = Vec::new(); - entries.push( - load_bytes(&store, EntryType::Directory, testdir.digest()) - .await - .expect("Error loading bytes"), - ); - entries.push( - load_bytes(&store, EntryType::Directory, other_testdir.digest()) - .await - .expect("Error loading bytes"), - ); - assert_eq!( - 1, - entries.iter().filter(|maybe| maybe.is_some()).count(), - "Want one Some but got: {entries:?}" - ); + let dir = TempDir::new().unwrap(); + + let testdir = TestDirectory::containing_roland(); + let other_testdir = TestDirectory::containing_dnalor(); + + let store = new_store(dir.path()); + store + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + false, + ) + .await + .expect("Error storing"); + store + .store_bytes( + EntryType::Directory, + other_testdir.fingerprint(), + other_testdir.bytes(), + false, + ) + .await + .expect("Error storing"); + store + .shrink(84, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"); + let mut entries = Vec::new(); + entries.push( + load_bytes(&store, EntryType::Directory, testdir.digest()) + .await + .expect("Error loading bytes"), + ); + entries.push( + load_bytes(&store, EntryType::Directory, other_testdir.digest()) + .await + .expect("Error loading bytes"), + ); + assert_eq!( + 1, + entries.iter().filter(|maybe| maybe.is_some()).count(), + "Want one Some but got: {entries:?}" + ); } #[tokio::test] async fn garbage_collect_remove_file_with_leased_directory() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - - let testdir = TestDirectory::containing_roland(); - let testdata = TestData::forty_chars(); - - store - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - true, - ) - .await - .expect("Error storing"); - - store - .store_bytes( - EntryType::File, - testdata.fingerprint(), - testdata.bytes(), - false, - ) - .await - .expect("Error storing"); - - store - .shrink(80, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"); - - assert_eq!( - load_bytes(&store, EntryType::File, testdata.digest()).await, - Ok(None), - "File was present when it should've been garbage collected" - ); - assert_eq!( - load_bytes(&store, EntryType::Directory, testdir.digest()).await, - Ok(Some(testdir.bytes())), - "Directory was missing despite lease" - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + + let testdir = TestDirectory::containing_roland(); + let testdata = TestData::forty_chars(); + + store + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + true, + ) + .await + .expect("Error storing"); + + store + .store_bytes( + EntryType::File, + testdata.fingerprint(), + testdata.bytes(), + false, + ) + .await + .expect("Error storing"); + + store + .shrink(80, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"); + + assert_eq!( + load_bytes(&store, EntryType::File, testdata.digest()).await, + Ok(None), + "File was present when it should've been garbage collected" + ); + assert_eq!( + load_bytes(&store, EntryType::Directory, testdir.digest()).await, + Ok(Some(testdir.bytes())), + "Directory was missing despite lease" + ); } #[tokio::test] async fn garbage_collect_remove_file_while_leased_file() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); - let testdir = TestDirectory::containing_roland(); + let testdir = TestDirectory::containing_roland(); - store - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - false, - ) - .await - .expect("Error storing"); - let forty_chars = TestData::forty_chars(); - store - .store_bytes( - EntryType::File, - forty_chars.fingerprint(), - forty_chars.bytes(), - true, - ) - .await - .expect("Error storing"); - - store - .shrink(80, ShrinkBehavior::Fast) - .await - .expect("Error shrinking"); - - assert_eq!( - load_bytes(&store, EntryType::File, forty_chars.digest()).await, - Ok(Some(forty_chars.bytes())), - "File was missing despite lease" - ); - assert_eq!( - load_bytes(&store, EntryType::Directory, testdir.digest()).await, - Ok(None), - "Directory was present when it should've been garbage collected" - ); + store + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + false, + ) + .await + .expect("Error storing"); + let forty_chars = TestData::forty_chars(); + store + .store_bytes( + EntryType::File, + forty_chars.fingerprint(), + forty_chars.bytes(), + true, + ) + .await + .expect("Error storing"); + + store + .shrink(80, ShrinkBehavior::Fast) + .await + .expect("Error shrinking"); + + assert_eq!( + load_bytes(&store, EntryType::File, forty_chars.digest()).await, + Ok(Some(forty_chars.bytes())), + "File was missing despite lease" + ); + assert_eq!( + load_bytes(&store, EntryType::Directory, testdir.digest()).await, + Ok(None), + "Directory was present when it should've been garbage collected" + ); } #[tokio::test] async fn garbage_collect_fail_because_too_many_leases() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - - let testdir = TestDirectory::containing_roland(); - let forty_chars = TestData::forty_chars(); - let roland = TestData::roland(); - - store - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - true, - ) - .await - .expect("Error storing"); - store - .store_bytes( - EntryType::File, - forty_chars.fingerprint(), - forty_chars.bytes(), - true, - ) - .await - .expect("Error storing"); - store - .store_bytes(EntryType::File, roland.fingerprint(), roland.bytes(), false) - .await - .expect("Error storing"); - - assert_eq!(store.shrink(80, ShrinkBehavior::Fast).await, Ok(164)); - - assert_eq!( - load_bytes(&store, EntryType::File, forty_chars.digest()).await, - Ok(Some(forty_chars.bytes())), - "Leased file should still be present" - ); - assert_eq!( - load_bytes(&store, EntryType::Directory, testdir.digest()).await, - Ok(Some(testdir.bytes())), - "Leased directory should still be present" - ); - // Whether the unleased file is present is undefined. + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + + let testdir = TestDirectory::containing_roland(); + let forty_chars = TestData::forty_chars(); + let roland = TestData::roland(); + + store + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + true, + ) + .await + .expect("Error storing"); + store + .store_bytes( + EntryType::File, + forty_chars.fingerprint(), + forty_chars.bytes(), + true, + ) + .await + .expect("Error storing"); + store + .store_bytes(EntryType::File, roland.fingerprint(), roland.bytes(), false) + .await + .expect("Error storing"); + + assert_eq!(store.shrink(80, ShrinkBehavior::Fast).await, Ok(164)); + + assert_eq!( + load_bytes(&store, EntryType::File, forty_chars.digest()).await, + Ok(Some(forty_chars.bytes())), + "Leased file should still be present" + ); + assert_eq!( + load_bytes(&store, EntryType::Directory, testdir.digest()).await, + Ok(Some(testdir.bytes())), + "Leased directory should still be present" + ); + // Whether the unleased file is present is undefined. } async fn write_256kb(store: &ByteStore, byte: u8) { - let mut bytes = BytesMut::with_capacity(256 * 1024); - for _ in 0..256 * 1024 { - bytes.put_u8(byte); - } - let fingerprint = Digest::of_bytes(&bytes).hash; - store - .store_bytes(EntryType::File, fingerprint, bytes.freeze(), false) - .await - .expect("Error storing"); + let mut bytes = BytesMut::with_capacity(256 * 1024); + for _ in 0..256 * 1024 { + bytes.put_u8(byte); + } + let fingerprint = Digest::of_bytes(&bytes).hash; + store + .store_bytes(EntryType::File, fingerprint, bytes.freeze(), false) + .await + .expect("Error storing"); } #[tokio::test] async fn garbage_collect_and_compact() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - - write_256kb(&store, b'0').await; - write_256kb(&store, b'1').await; - write_256kb(&store, b'2').await; - write_256kb(&store, b'3').await; - write_256kb(&store, b'4').await; - write_256kb(&store, b'5').await; - write_256kb(&store, b'6').await; - write_256kb(&store, b'7').await; - - let size = get_directory_size(dir.path()); - assert!( - size >= 2 * 1024 * 1024, - "Expect size to be at least 2MB but was {size}" - ); - - store - .shrink(1024 * 1024, ShrinkBehavior::Compact) - .await - .expect("Error shrinking"); - - let size = get_directory_size(dir.path()); - assert!( - size < 2 * 1024 * 1024, - "Expect size to be less than 2MB but was {size}" - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + + write_256kb(&store, b'0').await; + write_256kb(&store, b'1').await; + write_256kb(&store, b'2').await; + write_256kb(&store, b'3').await; + write_256kb(&store, b'4').await; + write_256kb(&store, b'5').await; + write_256kb(&store, b'6').await; + write_256kb(&store, b'7').await; + + let size = get_directory_size(dir.path()); + assert!( + size >= 2 * 1024 * 1024, + "Expect size to be at least 2MB but was {size}" + ); + + store + .shrink(1024 * 1024, ShrinkBehavior::Compact) + .await + .expect("Error shrinking"); + + let size = get_directory_size(dir.path()); + assert!( + size < 2 * 1024 * 1024, + "Expect size to be less than 2MB but was {size}" + ); } async fn write_1mb(store: &ByteStore, byte: u8) -> Digest { - let mut bytes = BytesMut::with_capacity(1024 * 1024); - for _ in 0..1024 * 1024 { - bytes.put_u8(byte); - } - let digest = Digest::of_bytes(&bytes); - store - .store_bytes(EntryType::File, digest.hash, bytes.freeze(), false) - .await - .expect("Error storing"); - digest + let mut bytes = BytesMut::with_capacity(1024 * 1024); + for _ in 0..1024 * 1024 { + bytes.put_u8(byte); + } + let digest = Digest::of_bytes(&bytes); + store + .store_bytes(EntryType::File, digest.hash, bytes.freeze(), false) + .await + .expect("Error storing"); + digest } #[tokio::test] async fn remove_big_file_and_store_again() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - - let digest1 = write_1mb(&store, b'0').await; - let digest2 = write_1mb(&store, b'1').await; - - let size = get_directory_size(dir.path()); - assert!( - size >= 2 * 1024 * 1024, - "Expect size to be at least 2MB but was {size}" - ); - - store - .remove(EntryType::File, digest1) - .await - .expect("Error removing"); - store - .remove(EntryType::File, digest2) - .await - .expect("Error removing"); - - let size = get_directory_size(dir.path()); - assert!( - size < 2 * 1024 * 1024, - "Expect size to be less than 2MB but was {size}" - ); - - write_1mb(&store, b'0').await; - write_1mb(&store, b'1').await; - let size = get_directory_size(dir.path()); - assert!( - size >= 2 * 1024 * 1024, - "Expect size to be at least 2MB but was {size}" - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + + let digest1 = write_1mb(&store, b'0').await; + let digest2 = write_1mb(&store, b'1').await; + + let size = get_directory_size(dir.path()); + assert!( + size >= 2 * 1024 * 1024, + "Expect size to be at least 2MB but was {size}" + ); + + store + .remove(EntryType::File, digest1) + .await + .expect("Error removing"); + store + .remove(EntryType::File, digest2) + .await + .expect("Error removing"); + + let size = get_directory_size(dir.path()); + assert!( + size < 2 * 1024 * 1024, + "Expect size to be less than 2MB but was {size}" + ); + + write_1mb(&store, b'0').await; + write_1mb(&store, b'1').await; + let size = get_directory_size(dir.path()); + assert!( + size >= 2 * 1024 * 1024, + "Expect size to be at least 2MB but was {size}" + ); } #[tokio::test] async fn entry_type_for_file() { - let testdata = TestData::roland(); - let testdir = TestDirectory::containing_roland(); - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - store - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - false, + let testdata = TestData::roland(); + let testdir = TestDirectory::containing_roland(); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + store + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + false, + ) + .await + .expect("Error storing"); + prime_store_with_file_bytes(&store, testdata.bytes()).await; + assert_eq!( + store.entry_type(testdata.fingerprint()).await, + Ok(Some(EntryType::File)) ) - .await - .expect("Error storing"); - prime_store_with_file_bytes(&store, testdata.bytes()).await; - assert_eq!( - store.entry_type(testdata.fingerprint()).await, - Ok(Some(EntryType::File)) - ) } #[tokio::test] async fn entry_type_for_directory() { - let testdata = TestData::roland(); - let testdir = TestDirectory::containing_roland(); - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - store - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - false, + let testdata = TestData::roland(); + let testdir = TestDirectory::containing_roland(); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + store + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + false, + ) + .await + .expect("Error storing"); + prime_store_with_file_bytes(&store, testdata.bytes()).await; + assert_eq!( + store.entry_type(testdir.fingerprint()).await, + Ok(Some(EntryType::Directory)) ) - .await - .expect("Error storing"); - prime_store_with_file_bytes(&store, testdata.bytes()).await; - assert_eq!( - store.entry_type(testdir.fingerprint()).await, - Ok(Some(EntryType::Directory)) - ) } #[tokio::test] async fn entry_type_for_missing() { - let testdata = TestData::roland(); - let testdir = TestDirectory::containing_roland(); - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - store - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - false, - ) - .await - .expect("Error storing"); - prime_store_with_file_bytes(&store, testdata.bytes()).await; - assert_eq!( + let testdata = TestData::roland(); + let testdir = TestDirectory::containing_roland(); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); store - .entry_type(TestDirectory::recursive().fingerprint()) - .await, - Ok(None) - ) + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + false, + ) + .await + .expect("Error storing"); + prime_store_with_file_bytes(&store, testdata.bytes()).await; + assert_eq!( + store + .entry_type(TestDirectory::recursive().fingerprint()) + .await, + Ok(None) + ) } #[tokio::test] async fn empty_file_is_known() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let empty_file = TestData::empty(); - assert_eq!( - store - .load_bytes_with(EntryType::File, empty_file.digest(), |b| { - Bytes::copy_from_slice(b) - }) - .await, - Ok(Some(empty_file.bytes())), - ) + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let empty_file = TestData::empty(); + assert_eq!( + store + .load_bytes_with(EntryType::File, empty_file.digest(), |b| { + Bytes::copy_from_slice(b) + }) + .await, + Ok(Some(empty_file.bytes())), + ) } #[tokio::test] async fn empty_directory_is_known() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let empty_dir = TestDirectory::empty(); - assert_eq!( - store - .load_bytes_with(EntryType::Directory, empty_dir.digest(), |b| { - Bytes::copy_from_slice(b) - }) - .await, - Ok(Some(empty_dir.bytes())), - ) + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let empty_dir = TestDirectory::empty(); + assert_eq!( + store + .load_bytes_with(EntryType::Directory, empty_dir.digest(), |b| { + Bytes::copy_from_slice(b) + }) + .await, + Ok(Some(empty_dir.bytes())), + ) } #[tokio::test] async fn all_digests() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let digest1 = prime_store_with_file_bytes(&store, TestData::roland().bytes()).await; - assert_eq!(Ok(vec![digest1]), store.all_digests(EntryType::File).await); - let large_testdata = TestData::new("123456789".repeat(1000 * 512).as_str()); - let digest2 = prime_store_with_file_bytes(&store, large_testdata.bytes()).await; - assert_eq!( - vec![digest1, digest2].into_iter().collect::>(), - store - .all_digests(EntryType::File) - .await - .unwrap() - .into_iter() - .collect::>(), - ); + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let digest1 = prime_store_with_file_bytes(&store, TestData::roland().bytes()).await; + assert_eq!(Ok(vec![digest1]), store.all_digests(EntryType::File).await); + let large_testdata = TestData::new("123456789".repeat(1000 * 512).as_str()); + let digest2 = prime_store_with_file_bytes(&store, large_testdata.bytes()).await; + assert_eq!( + vec![digest1, digest2].into_iter().collect::>(), + store + .all_digests(EntryType::File) + .await + .unwrap() + .into_iter() + .collect::>(), + ); } #[tokio::test] async fn get_missing_digests() { - let dir = TempDir::new().unwrap(); - let store = new_store(dir.path()); - let small_testdata = TestData::roland(); - let large_testdata = TestData::new("123456789".repeat(1000 * 512).as_str()); - - prime_store_with_file_bytes(&store, small_testdata.bytes()).await; - prime_store_with_file_bytes(&store, large_testdata.bytes()).await; - let missing = store - .get_missing_digests( - EntryType::File, - HashSet::from([ - small_testdata.digest(), - large_testdata.digest(), - hashing::Digest::of_bytes("1".as_bytes()), - ]), + let dir = TempDir::new().unwrap(); + let store = new_store(dir.path()); + let small_testdata = TestData::roland(); + let large_testdata = TestData::new("123456789".repeat(1000 * 512).as_str()); + + prime_store_with_file_bytes(&store, small_testdata.bytes()).await; + prime_store_with_file_bytes(&store, large_testdata.bytes()).await; + let missing = store + .get_missing_digests( + EntryType::File, + HashSet::from([ + small_testdata.digest(), + large_testdata.digest(), + hashing::Digest::of_bytes("1".as_bytes()), + ]), + ) + .await + .unwrap(); + assert_eq!( + missing, + HashSet::from([hashing::Digest::of_bytes("1".as_bytes())]) ) - .await - .unwrap(); - assert_eq!( - missing, - HashSet::from([hashing::Digest::of_bytes("1".as_bytes())]) - ) } pub fn new_store>(dir: P) -> ByteStore { - ByteStore::new(task_executor::Executor::new(), dir).unwrap() + ByteStore::new(task_executor::Executor::new(), dir).unwrap() } pub fn new_store_with_lease_time>(dir: P, lease_time: Duration) -> ByteStore { - ByteStore::new_with_options( - task_executor::Executor::new(), - dir, - LocalOptions { - lease_time, - ..LocalOptions::default() - }, - ) - .unwrap() + ByteStore::new_with_options( + task_executor::Executor::new(), + dir, + LocalOptions { + lease_time, + ..LocalOptions::default() + }, + ) + .unwrap() } pub async fn load_file_bytes(store: &ByteStore, digest: Digest) -> Result, String> { - load_bytes(store, EntryType::File, digest).await + load_bytes(store, EntryType::File, digest).await } pub async fn load_directory_proto_bytes( - store: &ByteStore, - digest: Digest, + store: &ByteStore, + digest: Digest, ) -> Result, String> { - load_bytes(store, EntryType::Directory, digest).await + load_bytes(store, EntryType::Directory, digest).await } pub async fn load_bytes( - store: &ByteStore, - entry_type: EntryType, - digest: Digest, + store: &ByteStore, + entry_type: EntryType, + digest: Digest, ) -> Result, String> { - store - .load_bytes_with(entry_type, digest, Bytes::copy_from_slice) - .await + store + .load_bytes_with(entry_type, digest, Bytes::copy_from_slice) + .await } async fn prime_store_with_file_bytes(store: &ByteStore, bytes: Bytes) -> Digest { - let digest = Digest::of_bytes(&bytes); - store - .store_bytes(EntryType::File, digest.hash, bytes, false) - .await - .expect("Error storing file bytes"); - digest + let digest = Digest::of_bytes(&bytes); + store + .store_bytes(EntryType::File, digest.hash, bytes, false) + .await + .expect("Error storing file bytes"); + digest } fn get_directory_size(path: &Path) -> usize { - let mut len: usize = 0; - for entry in WalkDir::new(path) { - len += entry - .expect("Error walking directory") - .metadata() - .expect("Error reading metadata") - .len() as usize; - } - len + let mut len: usize = 0; + for entry in WalkDir::new(path) { + len += entry + .expect("Error walking directory") + .metadata() + .expect("Error reading metadata") + .len() as usize; + } + len } diff --git a/src/rust/engine/fs/store/src/remote.rs b/src/rust/engine/fs/store/src/remote.rs index 26edd23ea68..6a0090d078e 100644 --- a/src/rust/engine/fs/store/src/remote.rs +++ b/src/rust/engine/fs/store/src/remote.rs @@ -10,167 +10,165 @@ use futures::Future; use hashing::Digest; use log::Level; use remote_provider::{ - choose_byte_store_provider, ByteStoreProvider, LoadDestination, RemoteOptions, + choose_byte_store_provider, ByteStoreProvider, LoadDestination, RemoteOptions, }; use tokio::fs::File; use workunit_store::{in_workunit, ObservationMetric}; #[derive(Clone)] pub struct ByteStore { - instance_name: Option, - provider: Arc, + instance_name: Option, + provider: Arc, } impl fmt::Debug for ByteStore { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "ByteStore(name={:?})", self.instance_name) - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ByteStore(name={:?})", self.instance_name) + } } impl ByteStore { - pub fn new( - instance_name: Option, - provider: Arc, - ) -> ByteStore { - ByteStore { - instance_name, - provider, - } - } - - pub async fn from_options(options: RemoteOptions) -> Result { - let instance_name = options.instance_name.clone(); - let provider = choose_byte_store_provider(options).await?; - Ok(ByteStore::new(instance_name, provider)) - } - - /// Store the bytes readable from `file` into the remote store - pub async fn store_file(&self, digest: Digest, file: File) -> Result<(), String> { - self - .store_tracking("store", digest, || self.provider.store_file(digest, file)) - .await - } - - /// Store the bytes in `bytes` into the remote store, as an optimisation of `store_file` when the - /// bytes are already in memory - pub async fn store_bytes(&self, bytes: Bytes) -> Result<(), String> { - let digest = Digest::of_bytes(&bytes); - self - .store_tracking("store_bytes", digest, || { - self.provider.store_bytes(digest, bytes) - }) - .await - } - - async fn store_tracking( - &self, - workunit: &'static str, - digest: Digest, - do_store: DoStore, - ) -> Result<(), String> - where - DoStore: FnOnce() -> Fut + Send, - Fut: Future> + Send, - { - in_workunit!( - workunit, - Level::Trace, - desc = Some(format!("Storing {digest:?}")), - |workunit| async move { - let result = do_store().await; - - if result.is_ok() { - workunit.record_observation( - ObservationMetric::RemoteStoreBlobBytesUploaded, - digest.size_bytes as u64, - ); + pub fn new( + instance_name: Option, + provider: Arc, + ) -> ByteStore { + ByteStore { + instance_name, + provider, } + } + + pub async fn from_options(options: RemoteOptions) -> Result { + let instance_name = options.instance_name.clone(); + let provider = choose_byte_store_provider(options).await?; + Ok(ByteStore::new(instance_name, provider)) + } + + /// Store the bytes readable from `file` into the remote store + pub async fn store_file(&self, digest: Digest, file: File) -> Result<(), String> { + self.store_tracking("store", digest, || self.provider.store_file(digest, file)) + .await + } + + /// Store the bytes in `bytes` into the remote store, as an optimisation of `store_file` when the + /// bytes are already in memory + pub async fn store_bytes(&self, bytes: Bytes) -> Result<(), String> { + let digest = Digest::of_bytes(&bytes); + self.store_tracking("store_bytes", digest, || { + self.provider.store_bytes(digest, bytes) + }) + .await + } - result - } - ) - .await - } - - async fn load_monomorphic( - &self, - digest: Digest, - destination: &mut dyn LoadDestination, - ) -> Result { - let start = Instant::now(); - let workunit_desc = format!( - "Loading bytes at: {} {} ({} bytes)", - self.instance_name.as_ref().map_or("", |s| s), - digest.hash, - digest.size_bytes - ); - - in_workunit!( - "load", - Level::Trace, - desc = Some(workunit_desc), - |workunit| async move { - let result = self.provider.load(digest, destination).await; - workunit.record_observation( - ObservationMetric::RemoteStoreReadBlobTimeMicros, - start.elapsed().as_micros() as u64, + async fn store_tracking( + &self, + workunit: &'static str, + digest: Digest, + do_store: DoStore, + ) -> Result<(), String> + where + DoStore: FnOnce() -> Fut + Send, + Fut: Future> + Send, + { + in_workunit!( + workunit, + Level::Trace, + desc = Some(format!("Storing {digest:?}")), + |workunit| async move { + let result = do_store().await; + + if result.is_ok() { + workunit.record_observation( + ObservationMetric::RemoteStoreBlobBytesUploaded, + digest.size_bytes as u64, + ); + } + + result + } + ) + .await + } + + async fn load_monomorphic( + &self, + digest: Digest, + destination: &mut dyn LoadDestination, + ) -> Result { + let start = Instant::now(); + let workunit_desc = format!( + "Loading bytes at: {} {} ({} bytes)", + self.instance_name.as_ref().map_or("", |s| s), + digest.hash, + digest.size_bytes ); - if result.is_ok() { - workunit.record_observation( - ObservationMetric::RemoteStoreBlobBytesDownloaded, - digest.size_bytes as u64, - ); + + in_workunit!( + "load", + Level::Trace, + desc = Some(workunit_desc), + |workunit| async move { + let result = self.provider.load(digest, destination).await; + workunit.record_observation( + ObservationMetric::RemoteStoreReadBlobTimeMicros, + start.elapsed().as_micros() as u64, + ); + if result.is_ok() { + workunit.record_observation( + ObservationMetric::RemoteStoreBlobBytesDownloaded, + digest.size_bytes as u64, + ); + } + result + }, + ) + .await + } + + async fn load( + &self, + digest: Digest, + mut destination: W, + ) -> Result, String> { + if self.load_monomorphic(digest, &mut destination).await? { + Ok(Some(destination)) + } else { + Ok(None) } - result - }, - ) - .await - } - - async fn load( - &self, - digest: Digest, - mut destination: W, - ) -> Result, String> { - if self.load_monomorphic(digest, &mut destination).await? { - Ok(Some(destination)) - } else { - Ok(None) } - } - - /// Load the data for `digest` (if it exists in the remote store) into memory. - pub async fn load_bytes(&self, digest: Digest) -> Result, String> { - let result = self - .load(digest, Vec::with_capacity(digest.size_bytes)) - .await?; - Ok(result.map(Bytes::from)) - } - - /// Write the data for `digest` (if it exists in the remote store) into `file`. - pub async fn load_file( - &self, - digest: Digest, - file: tokio::fs::File, - ) -> Result, String> { - self.load(digest, file).await - } - - /// - /// Given a collection of Digests (digests), - /// returns the set of digests from that collection not present in the CAS. - /// - pub async fn list_missing_digests(&self, digests: I) -> Result, String> - where - I: IntoIterator, - I::IntoIter: Send, - { - let mut iter = digests.into_iter(); - in_workunit!( - "list_missing_digests", - Level::Trace, - |_workunit| async move { self.provider.list_missing_digests(&mut iter).await } - ) - .await - } + + /// Load the data for `digest` (if it exists in the remote store) into memory. + pub async fn load_bytes(&self, digest: Digest) -> Result, String> { + let result = self + .load(digest, Vec::with_capacity(digest.size_bytes)) + .await?; + Ok(result.map(Bytes::from)) + } + + /// Write the data for `digest` (if it exists in the remote store) into `file`. + pub async fn load_file( + &self, + digest: Digest, + file: tokio::fs::File, + ) -> Result, String> { + self.load(digest, file).await + } + + /// + /// Given a collection of Digests (digests), + /// returns the set of digests from that collection not present in the CAS. + /// + pub async fn list_missing_digests(&self, digests: I) -> Result, String> + where + I: IntoIterator, + I::IntoIter: Send, + { + let mut iter = digests.into_iter(); + in_workunit!( + "list_missing_digests", + Level::Trace, + |_workunit| async move { self.provider.list_missing_digests(&mut iter).await } + ) + .await + } } diff --git a/src/rust/engine/fs/store/src/remote_tests.rs b/src/rust/engine/fs/store/src/remote_tests.rs index 677de395b54..dc177ba88ac 100644 --- a/src/rust/engine/fs/store/src/remote_tests.rs +++ b/src/rust/engine/fs/store/src/remote_tests.rs @@ -22,400 +22,399 @@ use crate::MEGABYTES; #[tokio::test] async fn smoke_test_from_options_reapi_provider() { - // This runs through the various methods using the 'real' REAPI provider (talking to a stubbed - // CAS), as a double-check that the test provider is plausible and test provider selection works. - let roland = TestData::roland(); - let empty = TestData::empty(); - - let cas = new_cas(10); - - let store = ByteStore::from_options(RemoteOptions { - cas_address: cas.address(), - instance_name: None, - tls_config: tls::Config::default(), - headers: BTreeMap::new(), - chunk_size_bytes: 10 * MEGABYTES, - rpc_timeout: Duration::from_secs(5), - rpc_retries: 1, - rpc_concurrency_limit: 256, - capabilities_cell_opt: None, - batch_api_size_limit: crate::tests::STORE_BATCH_API_SIZE_LIMIT, - }) - .await - .unwrap(); - - let mut missing_set = HashSet::new(); - missing_set.insert(empty.digest()); - - // Only roland is in the CAS: - assert_eq!( - store.load_bytes(roland.digest()).await, - Ok(Some(roland.bytes())) - ); - assert_eq!(store.load_bytes(empty.digest()).await, Ok(None)); - assert_eq!( - store - .list_missing_digests(vec![roland.digest(), empty.digest()]) - .await, - Ok(missing_set) - ); - - // Insert empty: - assert_eq!(store.store_bytes(empty.bytes()).await, Ok(())); - assert_eq!( - store.load_bytes(empty.digest()).await, - Ok(Some(empty.bytes())) - ); + // This runs through the various methods using the 'real' REAPI provider (talking to a stubbed + // CAS), as a double-check that the test provider is plausible and test provider selection works. + let roland = TestData::roland(); + let empty = TestData::empty(); + + let cas = new_cas(10); + + let store = ByteStore::from_options(RemoteOptions { + cas_address: cas.address(), + instance_name: None, + tls_config: tls::Config::default(), + headers: BTreeMap::new(), + chunk_size_bytes: 10 * MEGABYTES, + rpc_timeout: Duration::from_secs(5), + rpc_retries: 1, + rpc_concurrency_limit: 256, + capabilities_cell_opt: None, + batch_api_size_limit: crate::tests::STORE_BATCH_API_SIZE_LIMIT, + }) + .await + .unwrap(); + + let mut missing_set = HashSet::new(); + missing_set.insert(empty.digest()); + + // Only roland is in the CAS: + assert_eq!( + store.load_bytes(roland.digest()).await, + Ok(Some(roland.bytes())) + ); + assert_eq!(store.load_bytes(empty.digest()).await, Ok(None)); + assert_eq!( + store + .list_missing_digests(vec![roland.digest(), empty.digest()]) + .await, + Ok(missing_set) + ); + + // Insert empty: + assert_eq!(store.store_bytes(empty.bytes()).await, Ok(())); + assert_eq!( + store.load_bytes(empty.digest()).await, + Ok(Some(empty.bytes())) + ); } #[tokio::test] async fn smoke_test_from_options_file_provider() { - // This runs through the various methods using the file:// provider, as a double-check that the - // test provider is plausible and test provider selection works. - let roland = TestData::roland(); - let catnip = TestData::catnip(); - - let _ = WorkunitStore::setup_for_tests(); - let dir = TempDir::new().unwrap(); - - let store = ByteStore::from_options(RemoteOptions { - cas_address: format!("file://{}", dir.path().display()), - instance_name: None, - tls_config: tls::Config::default(), - headers: BTreeMap::new(), - chunk_size_bytes: 10 * MEGABYTES, - rpc_timeout: Duration::from_secs(5), - rpc_retries: 1, - rpc_concurrency_limit: 256, - capabilities_cell_opt: None, - batch_api_size_limit: crate::tests::STORE_BATCH_API_SIZE_LIMIT, - }) - .await - .unwrap(); - - let mut missing_set = HashSet::new(); - missing_set.insert(catnip.digest()); - - // Insert roland: - assert_eq!(store.store_bytes(roland.bytes()).await, Ok(())); - assert_eq!( - store.load_bytes(roland.digest()).await, - Ok(Some(roland.bytes())) - ); - // Only roland is stored: - assert_eq!(store.load_bytes(catnip.digest()).await, Ok(None)); - assert_eq!( - store - .list_missing_digests(vec![roland.digest(), catnip.digest()]) - .await, - Ok(missing_set) - ); - - // Insert catnip: - assert_eq!(store.store_bytes(catnip.bytes()).await, Ok(())); - assert_eq!( - store.load_bytes(catnip.digest()).await, - Ok(Some(catnip.bytes())) - ); + // This runs through the various methods using the file:// provider, as a double-check that the + // test provider is plausible and test provider selection works. + let roland = TestData::roland(); + let catnip = TestData::catnip(); + + let _ = WorkunitStore::setup_for_tests(); + let dir = TempDir::new().unwrap(); + + let store = ByteStore::from_options(RemoteOptions { + cas_address: format!("file://{}", dir.path().display()), + instance_name: None, + tls_config: tls::Config::default(), + headers: BTreeMap::new(), + chunk_size_bytes: 10 * MEGABYTES, + rpc_timeout: Duration::from_secs(5), + rpc_retries: 1, + rpc_concurrency_limit: 256, + capabilities_cell_opt: None, + batch_api_size_limit: crate::tests::STORE_BATCH_API_SIZE_LIMIT, + }) + .await + .unwrap(); + + let mut missing_set = HashSet::new(); + missing_set.insert(catnip.digest()); + + // Insert roland: + assert_eq!(store.store_bytes(roland.bytes()).await, Ok(())); + assert_eq!( + store.load_bytes(roland.digest()).await, + Ok(Some(roland.bytes())) + ); + // Only roland is stored: + assert_eq!(store.load_bytes(catnip.digest()).await, Ok(None)); + assert_eq!( + store + .list_missing_digests(vec![roland.digest(), catnip.digest()]) + .await, + Ok(missing_set) + ); + + // Insert catnip: + assert_eq!(store.store_bytes(catnip.bytes()).await, Ok(())); + assert_eq!( + store.load_bytes(catnip.digest()).await, + Ok(Some(catnip.bytes())) + ); } #[tokio::test] async fn load_bytes_existing() { - let _ = WorkunitStore::setup_for_tests(); - let testdata = TestData::roland(); - let store = new_byte_store(&testdata); - - assert_eq!( - store.load_bytes(testdata.digest()).await, - Ok(Some(testdata.bytes())) - ); + let _ = WorkunitStore::setup_for_tests(); + let testdata = TestData::roland(); + let store = new_byte_store(&testdata); + + assert_eq!( + store.load_bytes(testdata.digest()).await, + Ok(Some(testdata.bytes())) + ); } #[tokio::test] async fn load_bytes_missing() { - let _ = WorkunitStore::setup_for_tests(); - let (store, _) = empty_byte_store(); + let _ = WorkunitStore::setup_for_tests(); + let (store, _) = empty_byte_store(); - assert_eq!( - store.load_bytes(TestData::roland().digest()).await, - Ok(None) - ); + assert_eq!( + store.load_bytes(TestData::roland().digest()).await, + Ok(None) + ); } #[tokio::test] async fn load_bytes_provider_error() { - let _ = WorkunitStore::setup_for_tests(); - let store = byte_store_always_error_provider(); + let _ = WorkunitStore::setup_for_tests(); + let store = byte_store_always_error_provider(); - assert_error(store.load_bytes(TestData::roland().digest()).await); + assert_error(store.load_bytes(TestData::roland().digest()).await); } #[tokio::test] async fn load_file_existing() { - // 5MB of data - let testdata = TestData::new(&"12345".repeat(MEGABYTES)); + // 5MB of data + let testdata = TestData::new(&"12345".repeat(MEGABYTES)); - let _ = WorkunitStore::setup_for_tests(); - let store = new_byte_store(&testdata); + let _ = WorkunitStore::setup_for_tests(); + let store = new_byte_store(&testdata); - let file = mk_tempfile(None).await; + let file = mk_tempfile(None).await; - let file = store - .load_file(testdata.digest(), file) - .await - .unwrap() - .unwrap(); + let file = store + .load_file(testdata.digest(), file) + .await + .unwrap() + .unwrap(); - assert_file_contents(file, &testdata.string()).await; + assert_file_contents(file, &testdata.string()).await; } #[tokio::test] async fn load_file_missing() { - let _ = WorkunitStore::setup_for_tests(); - let (store, _) = empty_byte_store(); + let _ = WorkunitStore::setup_for_tests(); + let (store, _) = empty_byte_store(); - let file = mk_tempfile(None).await; + let file = mk_tempfile(None).await; - let result = store.load_file(TestData::roland().digest(), file).await; - assert!(result.unwrap().is_none()); + let result = store.load_file(TestData::roland().digest(), file).await; + assert!(result.unwrap().is_none()); } #[tokio::test] async fn load_file_provider_error() { - let _ = WorkunitStore::setup_for_tests(); - let store = byte_store_always_error_provider(); + let _ = WorkunitStore::setup_for_tests(); + let store = byte_store_always_error_provider(); - let file = mk_tempfile(None).await; + let file = mk_tempfile(None).await; - assert_error(store.load_file(TestData::roland().digest(), file).await); + assert_error(store.load_file(TestData::roland().digest(), file).await); } #[tokio::test] async fn store_bytes() { - let _ = WorkunitStore::setup_for_tests(); - let testdata = TestData::roland(); + let _ = WorkunitStore::setup_for_tests(); + let testdata = TestData::roland(); - let (store, provider) = empty_byte_store(); - assert_eq!(store.store_bytes(testdata.bytes()).await, Ok(())); + let (store, provider) = empty_byte_store(); + assert_eq!(store.store_bytes(testdata.bytes()).await, Ok(())); - let blobs = provider.blobs.lock(); - assert_eq!(blobs.get(&testdata.fingerprint()), Some(&testdata.bytes())); + let blobs = provider.blobs.lock(); + assert_eq!(blobs.get(&testdata.fingerprint()), Some(&testdata.bytes())); } #[tokio::test] async fn store_bytes_provider_error() { - let _ = WorkunitStore::setup_for_tests(); - let store = byte_store_always_error_provider(); - assert_error(store.store_bytes(TestData::roland().bytes()).await) + let _ = WorkunitStore::setup_for_tests(); + let store = byte_store_always_error_provider(); + assert_error(store.store_bytes(TestData::roland().bytes()).await) } #[tokio::test] async fn store_file() { - let _ = WorkunitStore::setup_for_tests(); - let testdata = TestData::roland(); - - let (store, provider) = empty_byte_store(); - assert_eq!( - store - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await - ) - .await, - Ok(()) - ); - - let blobs = provider.blobs.lock(); - assert_eq!(blobs.get(&testdata.fingerprint()), Some(&testdata.bytes())); + let _ = WorkunitStore::setup_for_tests(); + let testdata = TestData::roland(); + + let (store, provider) = empty_byte_store(); + assert_eq!( + store + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await + ) + .await, + Ok(()) + ); + + let blobs = provider.blobs.lock(); + assert_eq!(blobs.get(&testdata.fingerprint()), Some(&testdata.bytes())); } #[tokio::test] async fn store_file_provider_error() { - let _ = WorkunitStore::setup_for_tests(); - let testdata = TestData::roland(); - let store = byte_store_always_error_provider(); - assert_error( - store - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) - .await, - ); + let _ = WorkunitStore::setup_for_tests(); + let testdata = TestData::roland(); + let store = byte_store_always_error_provider(); + assert_error( + store + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await, + ); } #[tokio::test] async fn list_missing_digests_none_missing() { - let _ = WorkunitStore::setup_for_tests(); - let testdata = TestData::roland(); - let store = new_byte_store(&testdata); - - assert_eq!( - store.list_missing_digests(vec![testdata.digest()]).await, - Ok(HashSet::new()) - ); + let _ = WorkunitStore::setup_for_tests(); + let testdata = TestData::roland(); + let store = new_byte_store(&testdata); + + assert_eq!( + store.list_missing_digests(vec![testdata.digest()]).await, + Ok(HashSet::new()) + ); } #[tokio::test] async fn list_missing_digests_some_missing() { - let _ = WorkunitStore::setup_for_tests(); - let (store, _) = empty_byte_store(); + let _ = WorkunitStore::setup_for_tests(); + let (store, _) = empty_byte_store(); - let digest = TestData::roland().digest(); + let digest = TestData::roland().digest(); - let mut digest_set = HashSet::new(); - digest_set.insert(digest); + let mut digest_set = HashSet::new(); + digest_set.insert(digest); - assert_eq!( - store.list_missing_digests(vec![digest]).await, - Ok(digest_set) - ); + assert_eq!( + store.list_missing_digests(vec![digest]).await, + Ok(digest_set) + ); } #[tokio::test] async fn list_missing_digests_provider_error() { - let _ = WorkunitStore::setup_for_tests(); - let store = byte_store_always_error_provider(); - - assert_error( - store - .list_missing_digests(vec![TestData::roland().digest()]) - .await, - ) + let _ = WorkunitStore::setup_for_tests(); + let store = byte_store_always_error_provider(); + + assert_error( + store + .list_missing_digests(vec![TestData::roland().digest()]) + .await, + ) } #[tokio::test] async fn file_as_load_destination_reset() { - let mut file = mk_tempfile(Some(b"initial")).await; + let mut file = mk_tempfile(Some(b"initial")).await; - file.reset().await.unwrap(); - assert_file_contents(file, "").await; + file.reset().await.unwrap(); + assert_file_contents(file, "").await; } #[tokio::test] async fn vec_as_load_destination_reset() { - let mut vec: Vec = b"initial".to_vec(); + let mut vec: Vec = b"initial".to_vec(); - vec.reset().await.unwrap(); - assert!(vec.is_empty()); + vec.reset().await.unwrap(); + assert!(vec.is_empty()); } fn new_byte_store(data: &TestData) -> ByteStore { - let provider = TestProvider::new(); - provider.add(data.bytes()); - ByteStore::new(None, provider) + let provider = TestProvider::new(); + provider.add(data.bytes()); + ByteStore::new(None, provider) } fn empty_byte_store() -> (ByteStore, Arc) { - let provider = TestProvider::new(); - (ByteStore::new(None, provider.clone()), provider) + let provider = TestProvider::new(); + (ByteStore::new(None, provider.clone()), provider) } fn byte_store_always_error_provider() -> ByteStore { - ByteStore::new(None, AlwaysErrorProvider::new()) + ByteStore::new(None, AlwaysErrorProvider::new()) } async fn assert_file_contents(mut file: tokio::fs::File, expected: &str) { - file.rewind().await.unwrap(); + file.rewind().await.unwrap(); - let mut buf = String::new(); - file.read_to_string(&mut buf).await.unwrap(); - assert_eq!(buf.len(), expected.len()); - // (assert_eq! means failures unhelpfully print a potentially-huge string) - assert!(buf == expected); + let mut buf = String::new(); + file.read_to_string(&mut buf).await.unwrap(); + assert_eq!(buf.len(), expected.len()); + // (assert_eq! means failures unhelpfully print a potentially-huge string) + assert!(buf == expected); } fn assert_error(result: Result) { - let error = result.expect_err("Want error"); - assert!( - error.contains("AlwaysErrorProvider always fails"), - "Bad error message, got: {error}" - ); + let error = result.expect_err("Want error"); + assert!( + error.contains("AlwaysErrorProvider always fails"), + "Bad error message, got: {error}" + ); } struct TestProvider { - blobs: Mutex>, + blobs: Mutex>, } impl TestProvider { - #[allow(dead_code)] - fn new() -> Arc { - Arc::new(TestProvider { - blobs: Mutex::new(HashMap::new()), - }) - } - - #[allow(dead_code)] - fn add(&self, bytes: Bytes) { - self - .blobs - .lock() - .insert(Digest::of_bytes(&bytes).hash, bytes); - } + #[allow(dead_code)] + fn new() -> Arc { + Arc::new(TestProvider { + blobs: Mutex::new(HashMap::new()), + }) + } + + #[allow(dead_code)] + fn add(&self, bytes: Bytes) { + self.blobs + .lock() + .insert(Digest::of_bytes(&bytes).hash, bytes); + } } #[async_trait::async_trait] impl ByteStoreProvider for TestProvider { - async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String> { - self.blobs.lock().insert(digest.hash, bytes); - Ok(()) - } - - async fn store_file(&self, digest: Digest, mut file: File) -> Result<(), String> { - // just pull it all into memory - let mut bytes = Vec::new(); - file.read_to_end(&mut bytes).await.unwrap(); - self.blobs.lock().insert(digest.hash, Bytes::from(bytes)); - Ok(()) - } - - async fn load( - &self, - digest: Digest, - destination: &mut dyn LoadDestination, - ) -> Result { - let bytes = self.blobs.lock().get(&digest.hash).cloned(); - match bytes { - None => Ok(false), - Some(bytes) => { - destination.write_all(&bytes).await.unwrap(); - Ok(true) - } + async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String> { + self.blobs.lock().insert(digest.hash, bytes); + Ok(()) + } + + async fn store_file(&self, digest: Digest, mut file: File) -> Result<(), String> { + // just pull it all into memory + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes).await.unwrap(); + self.blobs.lock().insert(digest.hash, Bytes::from(bytes)); + Ok(()) + } + + async fn load( + &self, + digest: Digest, + destination: &mut dyn LoadDestination, + ) -> Result { + let bytes = self.blobs.lock().get(&digest.hash).cloned(); + match bytes { + None => Ok(false), + Some(bytes) => { + destination.write_all(&bytes).await.unwrap(); + Ok(true) + } + } + } + + async fn list_missing_digests( + &self, + digests: &mut (dyn Iterator + Send), + ) -> Result, String> { + let blobs = self.blobs.lock(); + Ok(digests.filter(|d| !blobs.contains_key(&d.hash)).collect()) } - } - - async fn list_missing_digests( - &self, - digests: &mut (dyn Iterator + Send), - ) -> Result, String> { - let blobs = self.blobs.lock(); - Ok(digests.filter(|d| !blobs.contains_key(&d.hash)).collect()) - } } struct AlwaysErrorProvider; impl AlwaysErrorProvider { - fn new() -> Arc { - Arc::new(AlwaysErrorProvider) - } + fn new() -> Arc { + Arc::new(AlwaysErrorProvider) + } } #[async_trait::async_trait] impl ByteStoreProvider for AlwaysErrorProvider { - async fn store_bytes(&self, _: Digest, _: Bytes) -> Result<(), String> { - Err("AlwaysErrorProvider always fails".to_owned()) - } - - async fn store_file(&self, _: Digest, _: File) -> Result<(), String> { - Err("AlwaysErrorProvider always fails".to_owned()) - } - - async fn load(&self, _: Digest, _: &mut dyn LoadDestination) -> Result { - Err("AlwaysErrorProvider always fails".to_owned()) - } - - async fn list_missing_digests( - &self, - _: &mut (dyn Iterator + Send), - ) -> Result, String> { - Err("AlwaysErrorProvider always fails".to_owned()) - } + async fn store_bytes(&self, _: Digest, _: Bytes) -> Result<(), String> { + Err("AlwaysErrorProvider always fails".to_owned()) + } + + async fn store_file(&self, _: Digest, _: File) -> Result<(), String> { + Err("AlwaysErrorProvider always fails".to_owned()) + } + + async fn load(&self, _: Digest, _: &mut dyn LoadDestination) -> Result { + Err("AlwaysErrorProvider always fails".to_owned()) + } + + async fn list_missing_digests( + &self, + _: &mut (dyn Iterator + Send), + ) -> Result, String> { + Err("AlwaysErrorProvider always fails".to_owned()) + } } diff --git a/src/rust/engine/fs/store/src/snapshot.rs b/src/rust/engine/fs/store/src/snapshot.rs index cd48b614d93..d487c037dcd 100644 --- a/src/rust/engine/fs/store/src/snapshot.rs +++ b/src/rust/engine/fs/store/src/snapshot.rs @@ -13,8 +13,8 @@ use futures::future; use futures::FutureExt; use fs::{ - DigestTrie, Dir, DirectoryDigest, Entry, File, GitignoreStyleExcludes, GlobMatching, PathStat, - PosixFS, PreparedPathGlobs, SymlinkBehavior, EMPTY_DIGEST_TREE, + DigestTrie, Dir, DirectoryDigest, Entry, File, GitignoreStyleExcludes, GlobMatching, PathStat, + PosixFS, PreparedPathGlobs, SymlinkBehavior, EMPTY_DIGEST_TREE, }; use hashing::{Digest, EMPTY_DIGEST}; @@ -26,211 +26,212 @@ use crate::{Store, StoreError}; /// the contents of the Digest have been persisted to the Store. See that struct's docs. #[derive(Clone, DeepSizeOf)] pub struct Snapshot { - pub digest: Digest, - pub tree: DigestTrie, + pub digest: Digest, + pub tree: DigestTrie, } impl Eq for Snapshot {} impl PartialEq for Snapshot { - fn eq(&self, other: &Self) -> bool { - self.digest == other.digest - } + fn eq(&self, other: &Self) -> bool { + self.digest == other.digest + } } impl hash::Hash for Snapshot { - fn hash(&self, state: &mut H) { - self.digest.hash(state); - } + fn hash(&self, state: &mut H) { + self.digest.hash(state); + } } impl Snapshot { - pub fn empty() -> Self { - Self { - digest: EMPTY_DIGEST, - tree: EMPTY_DIGEST_TREE.clone(), + pub fn empty() -> Self { + Self { + digest: EMPTY_DIGEST, + tree: EMPTY_DIGEST_TREE.clone(), + } } - } - pub fn files(&self) -> Vec { - let mut files = Vec::new(); - self - .tree - .walk(SymlinkBehavior::Oblivious, &mut |path, entry| { - if let Entry::File(_) = entry { - files.push(path.to_owned()) - } - }); - files - } + pub fn files(&self) -> Vec { + let mut files = Vec::new(); + self.tree + .walk(SymlinkBehavior::Oblivious, &mut |path, entry| { + if let Entry::File(_) = entry { + files.push(path.to_owned()) + } + }); + files + } - pub fn directories(&self) -> Vec { - let mut directories = Vec::new(); - self - .tree - .walk(SymlinkBehavior::Oblivious, &mut |path, entry| { - match entry { - Entry::Directory(d) if d.name().is_empty() => { - // Is the root directory, which is not emitted here. - } - Entry::Directory(_) => directories.push(path.to_owned()), - _ => (), - } - }); - directories - } + pub fn directories(&self) -> Vec { + let mut directories = Vec::new(); + self.tree + .walk(SymlinkBehavior::Oblivious, &mut |path, entry| { + match entry { + Entry::Directory(d) if d.name().is_empty() => { + // Is the root directory, which is not emitted here. + } + Entry::Directory(_) => directories.push(path.to_owned()), + _ => (), + } + }); + directories + } - pub async fn from_path_stats< - S: StoreFileByDigest + Sized + Clone + Send + 'static, - Error: fmt::Debug + 'static + Send, - >( - file_digester: S, - path_stats: Vec, - ) -> Result { - let (paths, files): (Vec<_>, Vec<_>) = path_stats - .iter() - .filter_map(|ps| match ps { - PathStat::File { path, stat } => Some((path.clone(), stat.clone())), - _ => None, - }) - .unzip(); - let file_digests = future::try_join_all( - files - .into_iter() - .map(|file| file_digester.store_by_digest(file)) - .collect::>(), - ) - .await - .map_err(|e| format!("Failed to digest inputs: {e:?}"))?; + pub async fn from_path_stats< + S: StoreFileByDigest + Sized + Clone + Send + 'static, + Error: fmt::Debug + 'static + Send, + >( + file_digester: S, + path_stats: Vec, + ) -> Result { + let (paths, files): (Vec<_>, Vec<_>) = path_stats + .iter() + .filter_map(|ps| match ps { + PathStat::File { path, stat } => Some((path.clone(), stat.clone())), + _ => None, + }) + .unzip(); + let file_digests = future::try_join_all( + files + .into_iter() + .map(|file| file_digester.store_by_digest(file)) + .collect::>(), + ) + .await + .map_err(|e| format!("Failed to digest inputs: {e:?}"))?; - let file_digests_map = paths - .into_iter() - .zip(file_digests) - .collect::>(); + let file_digests_map = paths + .into_iter() + .zip(file_digests) + .collect::>(); - let tree = DigestTrie::from_unique_paths( - path_stats.iter().map(|p| p.into()).collect(), - &file_digests_map, - )?; - Ok(Self { - digest: tree.compute_root_digest(), - tree, - }) - } + let tree = DigestTrie::from_unique_paths( + path_stats.iter().map(|p| p.into()).collect(), + &file_digests_map, + )?; + Ok(Self { + digest: tree.compute_root_digest(), + tree, + }) + } - pub async fn from_digest(store: Store, digest: DirectoryDigest) -> Result { - Ok(Self { - digest: digest.as_digest(), - tree: store.load_digest_trie(digest).await?, - }) - } + pub async fn from_digest( + store: Store, + digest: DirectoryDigest, + ) -> Result { + Ok(Self { + digest: digest.as_digest(), + tree: store.load_digest_trie(digest).await?, + }) + } - /// - /// Capture a Snapshot of a presumed-immutable piece of the filesystem. - /// - /// Note that we don't use a Graph here, and don't cache any intermediate steps, we just place - /// the resultant Snapshot into the store and return it. This is important, because we're reading - /// things from arbitrary filepaths which we don't want to cache in the graph, as we don't watch - /// them for changes. Because we're not caching things, we can safely configure the virtual - /// filesystem to be symlink-oblivious. - /// - /// If the `digest_hint` is given, first attempt to load the Snapshot using that Digest, and only - /// fall back to actually walking the filesystem if we don't have it (either due to garbage - /// collection or Digest-oblivious legacy caching). - /// - pub async fn capture_snapshot_from_arbitrary_root + Send + 'static>( - store: Store, - executor: task_executor::Executor, - root_path: P, - path_globs: PreparedPathGlobs, - digest_hint: Option, - ) -> Result { - // Attempt to use the digest hint to load a Snapshot without expanding the globs; otherwise, - // expand the globs to capture a Snapshot. - let snapshot_result = if let Some(digest) = digest_hint { - Snapshot::from_digest(store.clone(), digest) - .await - .map_err(|e| e.to_string()) - } else { - Err("No digest hint provided.".to_string()) - }; + /// + /// Capture a Snapshot of a presumed-immutable piece of the filesystem. + /// + /// Note that we don't use a Graph here, and don't cache any intermediate steps, we just place + /// the resultant Snapshot into the store and return it. This is important, because we're reading + /// things from arbitrary filepaths which we don't want to cache in the graph, as we don't watch + /// them for changes. Because we're not caching things, we can safely configure the virtual + /// filesystem to be symlink-oblivious. + /// + /// If the `digest_hint` is given, first attempt to load the Snapshot using that Digest, and only + /// fall back to actually walking the filesystem if we don't have it (either due to garbage + /// collection or Digest-oblivious legacy caching). + /// + pub async fn capture_snapshot_from_arbitrary_root + Send + 'static>( + store: Store, + executor: task_executor::Executor, + root_path: P, + path_globs: PreparedPathGlobs, + digest_hint: Option, + ) -> Result { + // Attempt to use the digest hint to load a Snapshot without expanding the globs; otherwise, + // expand the globs to capture a Snapshot. + let snapshot_result = if let Some(digest) = digest_hint { + Snapshot::from_digest(store.clone(), digest) + .await + .map_err(|e| e.to_string()) + } else { + Err("No digest hint provided.".to_string()) + }; - if let Ok(snapshot) = snapshot_result { - Ok(snapshot) - } else { - let posix_fs = Arc::new(PosixFS::new_with_symlink_behavior( - root_path, - GitignoreStyleExcludes::create(vec![])?, - executor, - SymlinkBehavior::Oblivious, - )?); + if let Ok(snapshot) = snapshot_result { + Ok(snapshot) + } else { + let posix_fs = Arc::new(PosixFS::new_with_symlink_behavior( + root_path, + GitignoreStyleExcludes::create(vec![])?, + executor, + SymlinkBehavior::Oblivious, + )?); - let path_stats = posix_fs - .expand_globs(path_globs, SymlinkBehavior::Oblivious, None) - .await - .map_err(|err| format!("Error expanding globs: {err}"))?; - Snapshot::from_path_stats( - OneOffStoreFileByDigest::new(store, posix_fs, true), - path_stats, - ) - .await + let path_stats = posix_fs + .expand_globs(path_globs, SymlinkBehavior::Oblivious, None) + .await + .map_err(|err| format!("Error expanding globs: {err}"))?; + Snapshot::from_path_stats( + OneOffStoreFileByDigest::new(store, posix_fs, true), + path_stats, + ) + .await + } } - } - /// Creates a snapshot containing empty Files for testing purposes. - pub fn create_for_testing(files: Vec, dirs: Vec) -> Result { - // NB: All files receive the EMPTY_DIGEST. - let file_digests = files - .iter() - .map(|s| (PathBuf::from(&s), EMPTY_DIGEST)) - .collect(); - let file_path_stats: Vec = files - .into_iter() - .map(|s| { - PathStat::file( - PathBuf::from(s.clone()), - File { - path: PathBuf::from(s), - is_executable: false, - }, - ) - }) - .collect(); - let dir_path_stats: Vec = dirs - .into_iter() - .map(|s| PathStat::dir(PathBuf::from(&s), Dir(PathBuf::from(s)))) - .collect(); + /// Creates a snapshot containing empty Files for testing purposes. + pub fn create_for_testing(files: Vec, dirs: Vec) -> Result { + // NB: All files receive the EMPTY_DIGEST. + let file_digests = files + .iter() + .map(|s| (PathBuf::from(&s), EMPTY_DIGEST)) + .collect(); + let file_path_stats: Vec = files + .into_iter() + .map(|s| { + PathStat::file( + PathBuf::from(s.clone()), + File { + path: PathBuf::from(s), + is_executable: false, + }, + ) + }) + .collect(); + let dir_path_stats: Vec = dirs + .into_iter() + .map(|s| PathStat::dir(PathBuf::from(&s), Dir(PathBuf::from(s)))) + .collect(); - let tree = DigestTrie::from_unique_paths( - [file_path_stats, dir_path_stats] - .concat() - .iter() - .map(|p| p.into()) - .collect(), - &file_digests, - )?; - Ok(Self { - digest: tree.compute_root_digest(), - tree, - }) - } + let tree = DigestTrie::from_unique_paths( + [file_path_stats, dir_path_stats] + .concat() + .iter() + .map(|p| p.into()) + .collect(), + &file_digests, + )?; + Ok(Self { + digest: tree.compute_root_digest(), + tree, + }) + } } impl fmt::Debug for Snapshot { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "Snapshot(digest={:?}, entries={})", - self.digest, - self.tree.digests().len() - ) - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Snapshot(digest={:?}, entries={})", + self.digest, + self.tree.digests().len() + ) + } } impl From for DirectoryDigest { - fn from(s: Snapshot) -> Self { - Self::new(s.digest, s.tree) - } + fn from(s: Snapshot) -> Self { + Self::new(s.digest, s.tree) + } } // StoreFileByDigest allows a File to be saved to an underlying Store, in such a way that it can be @@ -238,7 +239,7 @@ impl From for DirectoryDigest { // It is a separate trait so that caching implementations can be written which wrap the Store (used // to store the bytes) and Vfs (used to read the files off disk if needed). pub trait StoreFileByDigest { - fn store_by_digest(&self, file: File) -> future::BoxFuture<'static, Result>; + fn store_by_digest(&self, file: File) -> future::BoxFuture<'static, Result>; } /// @@ -247,30 +248,30 @@ pub trait StoreFileByDigest { /// #[derive(Clone)] pub struct OneOffStoreFileByDigest { - store: Store, - posix_fs: Arc, - immutable: bool, + store: Store, + posix_fs: Arc, + immutable: bool, } impl OneOffStoreFileByDigest { - pub fn new(store: Store, posix_fs: Arc, immutable: bool) -> OneOffStoreFileByDigest { - OneOffStoreFileByDigest { - store, - posix_fs, - immutable, + pub fn new(store: Store, posix_fs: Arc, immutable: bool) -> OneOffStoreFileByDigest { + OneOffStoreFileByDigest { + store, + posix_fs, + immutable, + } } - } } impl StoreFileByDigest for OneOffStoreFileByDigest { - fn store_by_digest(&self, file: File) -> future::BoxFuture<'static, Result> { - let store = self.store.clone(); - let posix_fs = self.posix_fs.clone(); - let immutable = self.immutable; - let res = async move { - let path = posix_fs.file_path(&file); - store.store_file(true, immutable, path).await - }; - res.boxed() - } + fn store_by_digest(&self, file: File) -> future::BoxFuture<'static, Result> { + let store = self.store.clone(); + let posix_fs = self.posix_fs.clone(); + let immutable = self.immutable; + let res = async move { + let path = posix_fs.file_path(&file); + store.store_file(true, immutable, path).await + }; + res.boxed() + } } diff --git a/src/rust/engine/fs/store/src/snapshot_ops.rs b/src/rust/engine/fs/store/src/snapshot_ops.rs index 0f6d3113ec9..b040534f1e8 100644 --- a/src/rust/engine/fs/store/src/snapshot_ops.rs +++ b/src/rust/engine/fs/store/src/snapshot_ops.rs @@ -9,8 +9,8 @@ use std::iter::Iterator; use async_trait::async_trait; use bytes::BytesMut; use fs::{ - directory, DigestTrie, DirectoryDigest, GlobMatching, PreparedPathGlobs, RelativePath, - SymlinkBehavior, EMPTY_DIRECTORY_DIGEST, + directory, DigestTrie, DirectoryDigest, GlobMatching, PreparedPathGlobs, RelativePath, + SymlinkBehavior, EMPTY_DIRECTORY_DIGEST, }; use futures::future::{self, FutureExt}; use hashing::Digest; @@ -22,7 +22,7 @@ use log::log_enabled; /// #[derive(Debug, Clone)] pub struct SubsetParams { - pub globs: PreparedPathGlobs, + pub globs: PreparedPathGlobs, } /// @@ -33,30 +33,30 @@ pub struct SubsetParams { /// If a file is present with the same name, but different contents, an error will be returned. /// async fn merge_directories( - store: T, - dir_digests: Vec, + store: T, + dir_digests: Vec, ) -> Result { - let trees = future::try_join_all( - dir_digests - .into_iter() - .map(|dd| store.load_digest_trie(dd)) - .collect::>(), - ) - .await?; - - let tree = match DigestTrie::merge(trees) { - Ok(tree) => tree, - Err(merge_error) => { - // TODO: Use https://doc.rust-lang.org/nightly/std/result/enum.Result.html#method.into_ok_or_err - // once it is stable. - let err_string = match render_merge_error(&store, merge_error).await { - Ok(e) | Err(e) => e, - }; - return Err(err_string.into()); - } - }; + let trees = future::try_join_all( + dir_digests + .into_iter() + .map(|dd| store.load_digest_trie(dd)) + .collect::>(), + ) + .await?; + + let tree = match DigestTrie::merge(trees) { + Ok(tree) => tree, + Err(merge_error) => { + // TODO: Use https://doc.rust-lang.org/nightly/std/result/enum.Result.html#method.into_ok_or_err + // once it is stable. + let err_string = match render_merge_error(&store, merge_error).await { + Ok(e) | Err(e) => e, + }; + return Err(err_string.into()); + } + }; - Ok(tree.into()) + Ok(tree.into()) } /// @@ -64,105 +64,103 @@ async fn merge_directories( /// loaded). /// async fn render_merge_error( - store: &T, - err: directory::MergeError, + store: &T, + err: directory::MergeError, ) -> Result { - let directory::MergeError::Duplicates { - parent_path, - files, - directories, - symlinks, - } = err; - let file_details_by_name = files - .iter() - .map(|file| async move { - let digest: Digest = file.digest(); - let header = format!( - "file digest={} size={}:\n\n", - digest.hash, digest.size_bytes - ); - - let contents = store - .load_file_bytes_with(digest, |bytes| { - const MAX_LENGTH: usize = 1024; - let content_length = bytes.len(); - let mut bytes = BytesMut::from(&bytes[0..std::cmp::min(content_length, MAX_LENGTH)]); - if content_length > MAX_LENGTH && !log_enabled!(log::Level::Debug) { - bytes.extend_from_slice( - format!( - "\n... TRUNCATED contents from {content_length}B to {MAX_LENGTH}B \ - (Pass -ldebug to see full contents)." - ) - .as_bytes(), + let directory::MergeError::Duplicates { + parent_path, + files, + directories, + symlinks, + } = err; + let file_details_by_name = files + .iter() + .map(|file| async move { + let digest: Digest = file.digest(); + let header = format!( + "file digest={} size={}:\n\n", + digest.hash, digest.size_bytes ); - } - String::from_utf8_lossy(bytes.to_vec().as_slice()).to_string() - }) - .await - .unwrap_or_else(|_| "".to_string()); - let detail = format!("{header}{contents}"); - let res: Result<_, String> = Ok((file.name().to_owned(), detail)); - res - }) - .map(|f| f.boxed()); - let symlink_details_by_name = symlinks - .iter() - .map(|symlink| async move { - let target = symlink.target(); - let detail = format!("symlink target={}:\n\n", target.to_str().unwrap()); - let res: Result<_, String> = Ok((symlink.name(), detail)); - res - }) - .map(|f| f.boxed()); - let dir_details_by_name = directories - .iter() - .map(|dir| async move { - let digest = dir.digest(); - let detail = format!("dir digest={} size={}:\n\n", digest.hash, digest.size_bytes); - let res: Result<_, String> = Ok((dir.name().to_owned(), detail)); - res - }) - .map(|f| f.boxed()); - - let duplicate_details = async move { - let details_by_name = future::try_join_all( - file_details_by_name - .chain(symlink_details_by_name) - .chain(dir_details_by_name) - .collect::>(), - ) - .await? - .into_iter() - .into_group_map(); - - let enumerated_details = - std::iter::Iterator::flatten(details_by_name.iter().filter_map(|(name, details)| { - if details.len() > 1 { - Some( - details - .iter() - .enumerate() - .map(move |(index, detail)| format!("`{}`: {}.) {}", name, index + 1, detail)), - ) - } else { - None - } - })) - .collect(); - let res: Result, T::Error> = Ok(enumerated_details); - res - } - .await - .unwrap_or_else(|err| vec![format!("Failed to load contents for comparison: {err}")]); + let contents = store + .load_file_bytes_with(digest, |bytes| { + const MAX_LENGTH: usize = 1024; + let content_length = bytes.len(); + let mut bytes = + BytesMut::from(&bytes[0..std::cmp::min(content_length, MAX_LENGTH)]); + if content_length > MAX_LENGTH && !log_enabled!(log::Level::Debug) { + bytes.extend_from_slice( + format!( + "\n... TRUNCATED contents from {content_length}B to {MAX_LENGTH}B \ + (Pass -ldebug to see full contents)." + ) + .as_bytes(), + ); + } + String::from_utf8_lossy(bytes.to_vec().as_slice()).to_string() + }) + .await + .unwrap_or_else(|_| "".to_string()); + let detail = format!("{header}{contents}"); + let res: Result<_, String> = Ok((file.name().to_owned(), detail)); + res + }) + .map(|f| f.boxed()); + let symlink_details_by_name = symlinks + .iter() + .map(|symlink| async move { + let target = symlink.target(); + let detail = format!("symlink target={}:\n\n", target.to_str().unwrap()); + let res: Result<_, String> = Ok((symlink.name(), detail)); + res + }) + .map(|f| f.boxed()); + let dir_details_by_name = directories + .iter() + .map(|dir| async move { + let digest = dir.digest(); + let detail = format!("dir digest={} size={}:\n\n", digest.hash, digest.size_bytes); + let res: Result<_, String> = Ok((dir.name().to_owned(), detail)); + res + }) + .map(|f| f.boxed()); + + let duplicate_details = async move { + let details_by_name = future::try_join_all( + file_details_by_name + .chain(symlink_details_by_name) + .chain(dir_details_by_name) + .collect::>(), + ) + .await? + .into_iter() + .into_group_map(); + + let enumerated_details = + std::iter::Iterator::flatten(details_by_name.iter().filter_map(|(name, details)| { + if details.len() > 1 { + Some(details.iter().enumerate().map(move |(index, detail)| { + format!("`{}`: {}.) {}", name, index + 1, detail) + })) + } else { + None + } + })) + .collect(); + + let res: Result, T::Error> = Ok(enumerated_details); + res + } + .await + .unwrap_or_else(|err| vec![format!("Failed to load contents for comparison: {err}")]); - Ok(format!( - "Can only merge Directories with no duplicates, but found {} duplicate entries in {}:\ + Ok(format!( + "Can only merge Directories with no duplicates, but found {} duplicate entries in {}:\ \n\n{}", - duplicate_details.len(), - parent_path.display(), - duplicate_details.join("\n\n") - )) + duplicate_details.len(), + parent_path.display(), + duplicate_details.join("\n\n") + )) } /// @@ -173,78 +171,77 @@ async fn render_merge_error( /// #[async_trait] pub trait SnapshotOps: Clone + Send + Sync + 'static { - type Error: Send + Debug + Display + From; - - async fn load_file_bytes_with< - T: Send + 'static, - F: Fn(&[u8]) -> T + Clone + Send + Sync + 'static, - >( - &self, - digest: Digest, - f: F, - ) -> Result; - - async fn load_digest_trie(&self, digest: DirectoryDigest) -> Result; - - /// - /// Given N Snapshots, returns a new Snapshot that merges them. - /// - async fn merge(&self, digests: Vec) -> Result { - merge_directories(self.clone(), digests).await - } - - async fn add_prefix( - &self, - digest: DirectoryDigest, - prefix: &RelativePath, - ) -> Result { - Ok( - self - .load_digest_trie(digest) - .await? - .add_prefix(prefix)? - .into(), - ) - } - - async fn strip_prefix( - &self, - digest: DirectoryDigest, - prefix: &RelativePath, - ) -> Result { - Ok( - self - .load_digest_trie(digest) - .await? - .remove_prefix(prefix)? - .into(), - ) - } - - async fn subset( - &self, - directory_digest: DirectoryDigest, - params: SubsetParams, - ) -> Result { - let input_tree = self.load_digest_trie(directory_digest.clone()).await?; - let path_stats = input_tree - .expand_globs(params.globs, SymlinkBehavior::Aware, None) - .await - .map_err(|err| format!("Error matching globs against {directory_digest:?}: {err}"))?; - - let mut files = HashMap::new(); - input_tree.walk(SymlinkBehavior::Oblivious, &mut |path, entry| match entry { - directory::Entry::File(f) => { - files.insert(path.to_owned(), f.digest()); - } - directory::Entry::Symlink(_) => panic!("Unexpected symlink"), - directory::Entry::Directory(_) => (), - }); - - Ok(DigestTrie::from_unique_paths(path_stats.iter().map(|p| p.into()).collect(), &files)?.into()) - } - - async fn create_empty_dir(&self, path: &RelativePath) -> Result { - self.add_prefix(EMPTY_DIRECTORY_DIGEST.clone(), path).await - } + type Error: Send + Debug + Display + From; + + async fn load_file_bytes_with< + T: Send + 'static, + F: Fn(&[u8]) -> T + Clone + Send + Sync + 'static, + >( + &self, + digest: Digest, + f: F, + ) -> Result; + + async fn load_digest_trie(&self, digest: DirectoryDigest) -> Result; + + /// + /// Given N Snapshots, returns a new Snapshot that merges them. + /// + async fn merge(&self, digests: Vec) -> Result { + merge_directories(self.clone(), digests).await + } + + async fn add_prefix( + &self, + digest: DirectoryDigest, + prefix: &RelativePath, + ) -> Result { + Ok(self + .load_digest_trie(digest) + .await? + .add_prefix(prefix)? + .into()) + } + + async fn strip_prefix( + &self, + digest: DirectoryDigest, + prefix: &RelativePath, + ) -> Result { + Ok(self + .load_digest_trie(digest) + .await? + .remove_prefix(prefix)? + .into()) + } + + async fn subset( + &self, + directory_digest: DirectoryDigest, + params: SubsetParams, + ) -> Result { + let input_tree = self.load_digest_trie(directory_digest.clone()).await?; + let path_stats = input_tree + .expand_globs(params.globs, SymlinkBehavior::Aware, None) + .await + .map_err(|err| format!("Error matching globs against {directory_digest:?}: {err}"))?; + + let mut files = HashMap::new(); + input_tree.walk(SymlinkBehavior::Oblivious, &mut |path, entry| match entry { + directory::Entry::File(f) => { + files.insert(path.to_owned(), f.digest()); + } + directory::Entry::Symlink(_) => panic!("Unexpected symlink"), + directory::Entry::Directory(_) => (), + }); + + Ok( + DigestTrie::from_unique_paths(path_stats.iter().map(|p| p.into()).collect(), &files)? + .into(), + ) + } + + async fn create_empty_dir(&self, path: &RelativePath) -> Result { + self.add_prefix(EMPTY_DIRECTORY_DIGEST.clone(), path).await + } } diff --git a/src/rust/engine/fs/store/src/snapshot_ops_tests.rs b/src/rust/engine/fs/store/src/snapshot_ops_tests.rs index afd1e55821d..d7e6b8870ad 100644 --- a/src/rust/engine/fs/store/src/snapshot_ops_tests.rs +++ b/src/rust/engine/fs/store/src/snapshot_ops_tests.rs @@ -6,164 +6,164 @@ use std::path::Path; use std::sync::Arc; use fs::{ - DirectoryDigest, GlobExpansionConjunction, PosixFS, PreparedPathGlobs, StrictGlobMatching, + DirectoryDigest, GlobExpansionConjunction, PosixFS, PreparedPathGlobs, StrictGlobMatching, }; use testutil::make_file; use crate::{ - snapshot_tests::{expand_all_sorted, setup, STR, STR2}, - OneOffStoreFileByDigest, Snapshot, SnapshotOps, SubsetParams, + snapshot_tests::{expand_all_sorted, setup, STR, STR2}, + OneOffStoreFileByDigest, Snapshot, SnapshotOps, SubsetParams, }; async fn get_duplicate_rolands( - store_wrapper: T, - base_path: &Path, - posix_fs: Arc, - digester: OneOffStoreFileByDigest, + store_wrapper: T, + base_path: &Path, + posix_fs: Arc, + digester: OneOffStoreFileByDigest, ) -> (DirectoryDigest, Snapshot, Snapshot) { - create_dir_all(base_path.join("subdir")).unwrap(); - - make_file(&base_path.join("subdir/roland1"), STR.as_bytes(), 0o600); - let path_stats1 = expand_all_sorted(posix_fs).await; - let snapshot1 = Snapshot::from_path_stats(digester.clone(), path_stats1) - .await - .unwrap(); - - let (_store2, tempdir2, posix_fs2, digester2) = setup(); - create_dir_all(tempdir2.path().join("subdir")).unwrap(); - make_file( - &tempdir2.path().join("subdir/roland2"), - STR2.as_bytes(), - 0o600, - ); - let path_stats2 = expand_all_sorted(posix_fs2).await; - let snapshot2 = Snapshot::from_path_stats(digester2, path_stats2) - .await - .unwrap(); - - let merged_digest = store_wrapper - .merge(vec![snapshot1.clone().into(), snapshot2.clone().into()]) - .await - .unwrap(); - - (merged_digest, snapshot1, snapshot2) + create_dir_all(base_path.join("subdir")).unwrap(); + + make_file(&base_path.join("subdir/roland1"), STR.as_bytes(), 0o600); + let path_stats1 = expand_all_sorted(posix_fs).await; + let snapshot1 = Snapshot::from_path_stats(digester.clone(), path_stats1) + .await + .unwrap(); + + let (_store2, tempdir2, posix_fs2, digester2) = setup(); + create_dir_all(tempdir2.path().join("subdir")).unwrap(); + make_file( + &tempdir2.path().join("subdir/roland2"), + STR2.as_bytes(), + 0o600, + ); + let path_stats2 = expand_all_sorted(posix_fs2).await; + let snapshot2 = Snapshot::from_path_stats(digester2, path_stats2) + .await + .unwrap(); + + let merged_digest = store_wrapper + .merge(vec![snapshot1.clone().into(), snapshot2.clone().into()]) + .await + .unwrap(); + + (merged_digest, snapshot1, snapshot2) } fn make_subset_params(globs: &[&str]) -> SubsetParams { - let globs = PreparedPathGlobs::create( - globs.iter().map(|s| s.to_string()).collect(), - StrictGlobMatching::Ignore, - GlobExpansionConjunction::AllMatch, - ) - .unwrap(); - SubsetParams { globs } + let globs = PreparedPathGlobs::create( + globs.iter().map(|s| s.to_string()).collect(), + StrictGlobMatching::Ignore, + GlobExpansionConjunction::AllMatch, + ) + .unwrap(); + SubsetParams { globs } } #[tokio::test] async fn subset_single_files() { - let (store, tempdir, posix_fs, digester) = setup(); - - let (merged_digest, snapshot1, snapshot2) = - get_duplicate_rolands(store.clone(), tempdir.path(), posix_fs.clone(), digester).await; - - let subset_params1 = make_subset_params(&["subdir/roland1"]); - let subset_roland1 = store - .clone() - .subset(merged_digest.clone(), subset_params1) - .await - .unwrap(); - assert_eq!(subset_roland1, snapshot1.into()); - - let subset_params2 = make_subset_params(&["subdir/roland2"]); - let subset_roland2 = store - .clone() - .subset(merged_digest, subset_params2) - .await - .unwrap(); - assert_eq!(subset_roland2, snapshot2.into()); + let (store, tempdir, posix_fs, digester) = setup(); + + let (merged_digest, snapshot1, snapshot2) = + get_duplicate_rolands(store.clone(), tempdir.path(), posix_fs.clone(), digester).await; + + let subset_params1 = make_subset_params(&["subdir/roland1"]); + let subset_roland1 = store + .clone() + .subset(merged_digest.clone(), subset_params1) + .await + .unwrap(); + assert_eq!(subset_roland1, snapshot1.into()); + + let subset_params2 = make_subset_params(&["subdir/roland2"]); + let subset_roland2 = store + .clone() + .subset(merged_digest, subset_params2) + .await + .unwrap(); + assert_eq!(subset_roland2, snapshot2.into()); } #[tokio::test] async fn subset_symlink() { - // Make the first snapshot with a file - let (store, tempdir1, posix_fs1, digester1) = setup(); - create_dir_all(tempdir1.path().join("subdir")).unwrap(); - make_file( - &tempdir1.path().join("subdir/roland1"), - STR.as_bytes(), - 0o600, - ); - let snapshot_with_real_file = - Snapshot::from_path_stats(digester1.clone(), expand_all_sorted(posix_fs1).await) - .await - .unwrap(); - - // Make the second snapshot with a symlink pointing to the file in the first snapshot. - let (_store2, tempdir2, posix_fs2, digester2) = setup(); - create_dir_all(tempdir2.path().join("subdir")).unwrap(); - symlink("./roland1", &tempdir2.path().join("subdir/roland2")).unwrap(); - let snapshot_with_symlink = - Snapshot::from_path_stats(digester2, expand_all_sorted(posix_fs2).await) - .await - .unwrap(); - - let merged_digest = store - .merge(vec![ - snapshot_with_real_file.clone().into(), - snapshot_with_symlink.clone().into(), - ]) - .await - .unwrap(); - - let subset_params = make_subset_params(&["subdir/roland2"]); - let subset_symlink = store - .clone() - .subset(merged_digest.clone(), subset_params) - .await - .unwrap(); - // NB: The digest subset should still be the symlink. - assert_eq!(subset_symlink, snapshot_with_symlink.into()); + // Make the first snapshot with a file + let (store, tempdir1, posix_fs1, digester1) = setup(); + create_dir_all(tempdir1.path().join("subdir")).unwrap(); + make_file( + &tempdir1.path().join("subdir/roland1"), + STR.as_bytes(), + 0o600, + ); + let snapshot_with_real_file = + Snapshot::from_path_stats(digester1.clone(), expand_all_sorted(posix_fs1).await) + .await + .unwrap(); + + // Make the second snapshot with a symlink pointing to the file in the first snapshot. + let (_store2, tempdir2, posix_fs2, digester2) = setup(); + create_dir_all(tempdir2.path().join("subdir")).unwrap(); + symlink("./roland1", &tempdir2.path().join("subdir/roland2")).unwrap(); + let snapshot_with_symlink = + Snapshot::from_path_stats(digester2, expand_all_sorted(posix_fs2).await) + .await + .unwrap(); + + let merged_digest = store + .merge(vec![ + snapshot_with_real_file.clone().into(), + snapshot_with_symlink.clone().into(), + ]) + .await + .unwrap(); + + let subset_params = make_subset_params(&["subdir/roland2"]); + let subset_symlink = store + .clone() + .subset(merged_digest.clone(), subset_params) + .await + .unwrap(); + // NB: The digest subset should still be the symlink. + assert_eq!(subset_symlink, snapshot_with_symlink.into()); } #[tokio::test] async fn subset_recursive_wildcard() { - let (store, tempdir, posix_fs, digester) = setup(); - - let (merged_digest, snapshot1, _) = - get_duplicate_rolands(store.clone(), tempdir.path(), posix_fs.clone(), digester).await; - - let subset_params1 = make_subset_params(&["subdir/**"]); - let subset_roland1 = store - .clone() - .subset(merged_digest.clone(), subset_params1) - .await - .unwrap(); - assert_eq!(merged_digest, subset_roland1); - - // **/* is a commonly-used alias for **. - let subset_params2 = make_subset_params(&["subdir/**/*"]); - let subset_roland2 = store - .clone() - .subset(merged_digest.clone(), subset_params2) - .await - .unwrap(); - assert_eq!(merged_digest, subset_roland2); - - // ** should not include explicitly excluded files - let subset_params3 = make_subset_params(&["!subdir/roland2", "subdir/**"]); - let subset_roland3 = store - .clone() - .subset(merged_digest.clone(), subset_params3) - .await - .unwrap(); - assert_eq!(subset_roland3, snapshot1.clone().into()); - - // ** should not include explicitly excluded files - let subset_params4 = make_subset_params(&["!subdir/roland2", "**"]); - let subset_roland4 = store - .clone() - .subset(merged_digest, subset_params4) - .await - .unwrap(); - assert_eq!(subset_roland4, snapshot1.into()); + let (store, tempdir, posix_fs, digester) = setup(); + + let (merged_digest, snapshot1, _) = + get_duplicate_rolands(store.clone(), tempdir.path(), posix_fs.clone(), digester).await; + + let subset_params1 = make_subset_params(&["subdir/**"]); + let subset_roland1 = store + .clone() + .subset(merged_digest.clone(), subset_params1) + .await + .unwrap(); + assert_eq!(merged_digest, subset_roland1); + + // **/* is a commonly-used alias for **. + let subset_params2 = make_subset_params(&["subdir/**/*"]); + let subset_roland2 = store + .clone() + .subset(merged_digest.clone(), subset_params2) + .await + .unwrap(); + assert_eq!(merged_digest, subset_roland2); + + // ** should not include explicitly excluded files + let subset_params3 = make_subset_params(&["!subdir/roland2", "subdir/**"]); + let subset_roland3 = store + .clone() + .subset(merged_digest.clone(), subset_params3) + .await + .unwrap(); + assert_eq!(subset_roland3, snapshot1.clone().into()); + + // ** should not include explicitly excluded files + let subset_params4 = make_subset_params(&["!subdir/roland2", "**"]); + let subset_roland4 = store + .clone() + .subset(merged_digest, subset_params4) + .await + .unwrap(); + assert_eq!(subset_roland4, snapshot1.into()); } diff --git a/src/rust/engine/fs/store/src/snapshot_tests.rs b/src/rust/engine/fs/store/src/snapshot_tests.rs index 7636165262c..525697bcab5 100644 --- a/src/rust/engine/fs/store/src/snapshot_tests.rs +++ b/src/rust/engine/fs/store/src/snapshot_tests.rs @@ -11,552 +11,546 @@ use testutil::make_file; use crate::{OneOffStoreFileByDigest, RelativePath, Snapshot, SnapshotOps, Store, StoreError}; use fs::{ - Dir, DirectoryDigest, File, GitignoreStyleExcludes, GlobExpansionConjunction, GlobMatching, - PathGlobs, PathStat, PosixFS, StrictGlobMatching, SymlinkBehavior, + Dir, DirectoryDigest, File, GitignoreStyleExcludes, GlobExpansionConjunction, GlobMatching, + PathGlobs, PathStat, PosixFS, StrictGlobMatching, SymlinkBehavior, }; pub const STR: &str = "European Burmese"; pub const STR2: &str = "asdf"; pub fn setup() -> ( - Store, - tempfile::TempDir, - Arc, - OneOffStoreFileByDigest, + Store, + tempfile::TempDir, + Arc, + OneOffStoreFileByDigest, ) { - let executor = task_executor::Executor::new(); - // TODO: Pass a remote CAS address through. - let store = Store::local_only( - executor.clone(), - tempfile::Builder::new() - .prefix("lmdb_store") - .tempdir() - .unwrap(), - ) - .unwrap(); - let dir = tempfile::Builder::new().prefix("root").tempdir().unwrap(); - let ignorer = GitignoreStyleExcludes::create(vec![]).unwrap(); - let posix_fs = Arc::new(PosixFS::new(dir.path(), ignorer, executor).unwrap()); - let file_saver = OneOffStoreFileByDigest::new(store.clone(), posix_fs.clone(), true); - (store, dir, posix_fs, file_saver) + let executor = task_executor::Executor::new(); + // TODO: Pass a remote CAS address through. + let store = Store::local_only( + executor.clone(), + tempfile::Builder::new() + .prefix("lmdb_store") + .tempdir() + .unwrap(), + ) + .unwrap(); + let dir = tempfile::Builder::new().prefix("root").tempdir().unwrap(); + let ignorer = GitignoreStyleExcludes::create(vec![]).unwrap(); + let posix_fs = Arc::new(PosixFS::new(dir.path(), ignorer, executor).unwrap()); + let file_saver = OneOffStoreFileByDigest::new(store.clone(), posix_fs.clone(), true); + (store, dir, posix_fs, file_saver) } #[tokio::test] async fn snapshot_one_file() { - let (_, dir, posix_fs, digester) = setup(); - - let file_name = PathBuf::from("roland"); - make_file(&dir.path().join(&file_name), STR.as_bytes(), 0o600); - - let path_stats = expand_all_sorted(posix_fs).await; - let snapshot = Snapshot::from_path_stats(digester, path_stats) - .await - .unwrap(); - assert_eq!( - snapshot.digest, - Digest::new( - Fingerprint::from_hex_string( - "63949aa823baf765eff07b946050d76ec0033144c785a94d3ebd82baa931cd16", - ) - .unwrap(), - 80, - ) - ); - assert_eq!(snapshot.files(), vec![PathBuf::from("roland")]); - assert_eq!(snapshot.directories(), Vec::::new()); + let (_, dir, posix_fs, digester) = setup(); + + let file_name = PathBuf::from("roland"); + make_file(&dir.path().join(&file_name), STR.as_bytes(), 0o600); + + let path_stats = expand_all_sorted(posix_fs).await; + let snapshot = Snapshot::from_path_stats(digester, path_stats) + .await + .unwrap(); + assert_eq!( + snapshot.digest, + Digest::new( + Fingerprint::from_hex_string( + "63949aa823baf765eff07b946050d76ec0033144c785a94d3ebd82baa931cd16", + ) + .unwrap(), + 80, + ) + ); + assert_eq!(snapshot.files(), vec![PathBuf::from("roland")]); + assert_eq!(snapshot.directories(), Vec::::new()); } #[tokio::test] async fn snapshot_recursive_directories() { - let (_, dir, posix_fs, digester) = setup(); - - let cats = PathBuf::from("cats"); - let roland = cats.join("roland"); - std::fs::create_dir_all(dir.path().join(cats)).unwrap(); - make_file(&dir.path().join(&roland), STR.as_bytes(), 0o600); - - let path_stats = expand_all_sorted(posix_fs).await; - let snapshot = Snapshot::from_path_stats(digester, path_stats) - .await - .unwrap(); - assert_eq!( - snapshot.digest, - Digest::new( - Fingerprint::from_hex_string( - "8b1a7ea04eaa2527b35683edac088bc826117b53b7ec6601740b55e20bce3deb", - ) - .unwrap(), - 78, - ) - ); - assert_eq!(snapshot.files(), vec![PathBuf::from("cats/roland")]); - assert_eq!(snapshot.directories(), vec![PathBuf::from("cats")]); + let (_, dir, posix_fs, digester) = setup(); + + let cats = PathBuf::from("cats"); + let roland = cats.join("roland"); + std::fs::create_dir_all(dir.path().join(cats)).unwrap(); + make_file(&dir.path().join(&roland), STR.as_bytes(), 0o600); + + let path_stats = expand_all_sorted(posix_fs).await; + let snapshot = Snapshot::from_path_stats(digester, path_stats) + .await + .unwrap(); + assert_eq!( + snapshot.digest, + Digest::new( + Fingerprint::from_hex_string( + "8b1a7ea04eaa2527b35683edac088bc826117b53b7ec6601740b55e20bce3deb", + ) + .unwrap(), + 78, + ) + ); + assert_eq!(snapshot.files(), vec![PathBuf::from("cats/roland")]); + assert_eq!(snapshot.directories(), vec![PathBuf::from("cats")]); } #[tokio::test] async fn snapshot_from_digest() { - let (store, dir, posix_fs, digester) = setup(); - - let cats = PathBuf::from("cats"); - let roland = cats.join("roland"); - std::fs::create_dir_all(dir.path().join(cats)).unwrap(); - make_file(&dir.path().join(&roland), STR.as_bytes(), 0o600); - - let path_stats = expand_all_sorted(posix_fs).await; - let expected_snapshot = Snapshot::from_path_stats(digester, path_stats) - .await - .unwrap(); - - // Confirm that the digest can be loaded either from memory (using a DirectoryDigest with a - // tree attached), or from disk (using one without). - - // From memory. - assert_eq!( - expected_snapshot, - Snapshot::from_digest(store.clone(), expected_snapshot.clone().into()) - .await - .unwrap() - ); - - // From disk. - store - .ensure_directory_digest_persisted(expected_snapshot.clone().into()) - .await - .unwrap(); - assert_eq!( - expected_snapshot, - Snapshot::from_digest( - store, - DirectoryDigest::from_persisted_digest(expected_snapshot.digest) - ) - .await - .unwrap() - ); + let (store, dir, posix_fs, digester) = setup(); + + let cats = PathBuf::from("cats"); + let roland = cats.join("roland"); + std::fs::create_dir_all(dir.path().join(cats)).unwrap(); + make_file(&dir.path().join(&roland), STR.as_bytes(), 0o600); + + let path_stats = expand_all_sorted(posix_fs).await; + let expected_snapshot = Snapshot::from_path_stats(digester, path_stats) + .await + .unwrap(); + + // Confirm that the digest can be loaded either from memory (using a DirectoryDigest with a + // tree attached), or from disk (using one without). + + // From memory. + assert_eq!( + expected_snapshot, + Snapshot::from_digest(store.clone(), expected_snapshot.clone().into()) + .await + .unwrap() + ); + + // From disk. + store + .ensure_directory_digest_persisted(expected_snapshot.clone().into()) + .await + .unwrap(); + assert_eq!( + expected_snapshot, + Snapshot::from_digest( + store, + DirectoryDigest::from_persisted_digest(expected_snapshot.digest) + ) + .await + .unwrap() + ); } #[tokio::test] async fn snapshot_recursive_directories_including_empty() { - let (_, dir, posix_fs, digester) = setup(); - - let cats = PathBuf::from("cats"); - let roland = cats.join("roland"); - let dogs = PathBuf::from("dogs"); - let llamas = PathBuf::from("llamas"); - std::fs::create_dir_all(dir.path().join(&cats)).unwrap(); - std::fs::create_dir_all(dir.path().join(&dogs)).unwrap(); - std::fs::create_dir_all(dir.path().join(&llamas)).unwrap(); - make_file(&dir.path().join(&roland), STR.as_bytes(), 0o600); - - let sorted_path_stats = expand_all_sorted(posix_fs).await; - let mut unsorted_path_stats = sorted_path_stats.clone(); - unsorted_path_stats.reverse(); - let snapshot = Snapshot::from_path_stats(digester, unsorted_path_stats) - .await - .unwrap(); - assert_eq!( - snapshot.digest, - Digest::new( - Fingerprint::from_hex_string( - "fbff703bdaac62accf2ea5083bcfed89292073bf710ef9ad14d9298c637e777b", - ) - .unwrap(), - 232, - ), - ); - assert_eq!(snapshot.files(), vec![PathBuf::from("cats/roland")]); - assert_eq!( - snapshot.directories(), - vec![ - PathBuf::from("cats"), - PathBuf::from("dogs"), - PathBuf::from("llamas") - ] - ); + let (_, dir, posix_fs, digester) = setup(); + + let cats = PathBuf::from("cats"); + let roland = cats.join("roland"); + let dogs = PathBuf::from("dogs"); + let llamas = PathBuf::from("llamas"); + std::fs::create_dir_all(dir.path().join(&cats)).unwrap(); + std::fs::create_dir_all(dir.path().join(&dogs)).unwrap(); + std::fs::create_dir_all(dir.path().join(&llamas)).unwrap(); + make_file(&dir.path().join(&roland), STR.as_bytes(), 0o600); + + let sorted_path_stats = expand_all_sorted(posix_fs).await; + let mut unsorted_path_stats = sorted_path_stats.clone(); + unsorted_path_stats.reverse(); + let snapshot = Snapshot::from_path_stats(digester, unsorted_path_stats) + .await + .unwrap(); + assert_eq!( + snapshot.digest, + Digest::new( + Fingerprint::from_hex_string( + "fbff703bdaac62accf2ea5083bcfed89292073bf710ef9ad14d9298c637e777b", + ) + .unwrap(), + 232, + ), + ); + assert_eq!(snapshot.files(), vec![PathBuf::from("cats/roland")]); + assert_eq!( + snapshot.directories(), + vec![ + PathBuf::from("cats"), + PathBuf::from("dogs"), + PathBuf::from("llamas") + ] + ); } #[tokio::test] async fn merge_directories_two_files() { - let (store, _, _, _) = setup(); - - let containing_roland = TestDirectory::containing_roland(); - let containing_treats = TestDirectory::containing_treats(); - - store - .record_directory(&containing_roland.directory(), false) - .await - .expect("Storing roland directory"); - store - .record_directory(&containing_treats.directory(), false) - .await - .expect("Storing treats directory"); - - let result = store - .merge(vec![ - containing_treats.directory_digest(), - containing_roland.directory_digest(), - ]) - .await; - - assert_eq!( - result, - Ok(TestDirectory::containing_roland_and_treats().directory_digest()) - ); + let (store, _, _, _) = setup(); + + let containing_roland = TestDirectory::containing_roland(); + let containing_treats = TestDirectory::containing_treats(); + + store + .record_directory(&containing_roland.directory(), false) + .await + .expect("Storing roland directory"); + store + .record_directory(&containing_treats.directory(), false) + .await + .expect("Storing treats directory"); + + let result = store + .merge(vec![ + containing_treats.directory_digest(), + containing_roland.directory_digest(), + ]) + .await; + + assert_eq!( + result, + Ok(TestDirectory::containing_roland_and_treats().directory_digest()) + ); } #[tokio::test] async fn merge_directories_clashing_files() { - let (store, _, _, _) = setup(); - - let containing_roland = TestDirectory::containing_roland(); - let containing_wrong_roland = TestDirectory::containing_wrong_roland(); - - store - .record_directory(&containing_roland.directory(), false) - .await - .expect("Storing roland directory"); - store - .record_directory(&containing_wrong_roland.directory(), false) - .await - .expect("Storing wrong roland directory"); - - let err = store - .merge(vec![ - containing_roland.directory_digest(), - containing_wrong_roland.directory_digest(), - ]) - .await - .expect_err("Want error merging"); - - assert!( - format!("{err:?}").contains("roland"), - "Want error message to contain roland but was: {err:?}" - ); + let (store, _, _, _) = setup(); + + let containing_roland = TestDirectory::containing_roland(); + let containing_wrong_roland = TestDirectory::containing_wrong_roland(); + + store + .record_directory(&containing_roland.directory(), false) + .await + .expect("Storing roland directory"); + store + .record_directory(&containing_wrong_roland.directory(), false) + .await + .expect("Storing wrong roland directory"); + + let err = store + .merge(vec![ + containing_roland.directory_digest(), + containing_wrong_roland.directory_digest(), + ]) + .await + .expect_err("Want error merging"); + + assert!( + format!("{err:?}").contains("roland"), + "Want error message to contain roland but was: {err:?}" + ); } #[tokio::test] async fn merge_directories_same_files() { - let (store, _, _, _) = setup(); - - let containing_roland = TestDirectory::containing_roland(); - let containing_roland_and_treats = TestDirectory::containing_roland_and_treats(); - - store - .record_directory(&containing_roland.directory(), false) - .await - .expect("Storing roland directory"); - store - .record_directory(&containing_roland_and_treats.directory(), false) - .await - .expect("Storing treats directory"); - - let result = store - .merge(vec![ - containing_roland.directory_digest(), - containing_roland_and_treats.directory_digest(), - ]) - .await; - - assert_eq!( - result, - Ok(TestDirectory::containing_roland_and_treats().directory_digest()) - ); + let (store, _, _, _) = setup(); + + let containing_roland = TestDirectory::containing_roland(); + let containing_roland_and_treats = TestDirectory::containing_roland_and_treats(); + + store + .record_directory(&containing_roland.directory(), false) + .await + .expect("Storing roland directory"); + store + .record_directory(&containing_roland_and_treats.directory(), false) + .await + .expect("Storing treats directory"); + + let result = store + .merge(vec![ + containing_roland.directory_digest(), + containing_roland_and_treats.directory_digest(), + ]) + .await; + + assert_eq!( + result, + Ok(TestDirectory::containing_roland_and_treats().directory_digest()) + ); } #[tokio::test] async fn snapshot_merge_two_files() { - let (store, tempdir, _, digester) = setup(); - - let common_dir_name = "tower"; - let common_dir = PathBuf::from(common_dir_name); - - let dir = make_dir_stat(tempdir.path(), &common_dir); - let file1 = make_file_stat( - tempdir.path(), - &common_dir.join("roland"), - STR.as_bytes(), - false, - ); - let file2 = make_file_stat( - tempdir.path(), - &common_dir.join("susannah"), - STR.as_bytes(), - true, - ); - - let snapshot1 = Snapshot::from_path_stats(digester.clone(), vec![dir.clone(), file1.clone()]) - .await - .unwrap(); - - let snapshot2 = Snapshot::from_path_stats(digester, vec![dir.clone(), file2.clone()]) - .await - .unwrap(); - - let merged = store - .merge(vec![snapshot1.into(), snapshot2.into()]) - .await - .unwrap(); - store - .ensure_directory_digest_persisted(merged.clone()) - .await - .unwrap(); - let merged_root_directory = store.load_directory(merged.as_digest()).await.unwrap(); - - assert_eq!(merged_root_directory.files.len(), 0); - assert_eq!(merged_root_directory.directories.len(), 1); - - let merged_child_dirnode = merged_root_directory.directories[0].clone(); - let merged_child_dirnode_digest: Result = merged_child_dirnode - .digest - .map(|d| d.try_into()) - .unwrap_or(Ok(EMPTY_DIGEST)); - let merged_child_directory = store - .load_directory(merged_child_dirnode_digest.unwrap()) - .await - .unwrap(); - - assert_eq!(merged_child_dirnode.name, common_dir_name); - assert_eq!( - merged_child_directory - .files - .iter() - .map(|filenode| filenode.name.clone()) - .collect::>(), - vec!["roland".to_string(), "susannah".to_string()], - ); + let (store, tempdir, _, digester) = setup(); + + let common_dir_name = "tower"; + let common_dir = PathBuf::from(common_dir_name); + + let dir = make_dir_stat(tempdir.path(), &common_dir); + let file1 = make_file_stat( + tempdir.path(), + &common_dir.join("roland"), + STR.as_bytes(), + false, + ); + let file2 = make_file_stat( + tempdir.path(), + &common_dir.join("susannah"), + STR.as_bytes(), + true, + ); + + let snapshot1 = Snapshot::from_path_stats(digester.clone(), vec![dir.clone(), file1.clone()]) + .await + .unwrap(); + + let snapshot2 = Snapshot::from_path_stats(digester, vec![dir.clone(), file2.clone()]) + .await + .unwrap(); + + let merged = store + .merge(vec![snapshot1.into(), snapshot2.into()]) + .await + .unwrap(); + store + .ensure_directory_digest_persisted(merged.clone()) + .await + .unwrap(); + let merged_root_directory = store.load_directory(merged.as_digest()).await.unwrap(); + + assert_eq!(merged_root_directory.files.len(), 0); + assert_eq!(merged_root_directory.directories.len(), 1); + + let merged_child_dirnode = merged_root_directory.directories[0].clone(); + let merged_child_dirnode_digest: Result = merged_child_dirnode + .digest + .map(|d| d.try_into()) + .unwrap_or(Ok(EMPTY_DIGEST)); + let merged_child_directory = store + .load_directory(merged_child_dirnode_digest.unwrap()) + .await + .unwrap(); + + assert_eq!(merged_child_dirnode.name, common_dir_name); + assert_eq!( + merged_child_directory + .files + .iter() + .map(|filenode| filenode.name.clone()) + .collect::>(), + vec!["roland".to_string(), "susannah".to_string()], + ); } #[tokio::test] async fn snapshot_merge_same_file() { - let (store, tempdir, _, digester) = setup(); - - let file = make_file_stat( - tempdir.path(), - &PathBuf::from("roland"), - STR.as_bytes(), - false, - ); - - // When the file is the exact same, merging should succeed. - let snapshot1 = Snapshot::from_path_stats(digester.clone(), vec![file.clone()]) - .await - .unwrap(); - let snapshot1_cloned = Snapshot::from_path_stats(digester.clone(), vec![file]) - .await - .unwrap(); - - let merged_res = store - .merge(vec![snapshot1.clone().into(), snapshot1_cloned.into()]) - .await; - - assert_eq!(merged_res, Ok(snapshot1.into())); + let (store, tempdir, _, digester) = setup(); + + let file = make_file_stat( + tempdir.path(), + &PathBuf::from("roland"), + STR.as_bytes(), + false, + ); + + // When the file is the exact same, merging should succeed. + let snapshot1 = Snapshot::from_path_stats(digester.clone(), vec![file.clone()]) + .await + .unwrap(); + let snapshot1_cloned = Snapshot::from_path_stats(digester.clone(), vec![file]) + .await + .unwrap(); + + let merged_res = store + .merge(vec![snapshot1.clone().into(), snapshot1_cloned.into()]) + .await; + + assert_eq!(merged_res, Ok(snapshot1.into())); } #[tokio::test] async fn snapshot_merge_colliding() { - let (store, tempdir, posix_fs, digester) = setup(); - - make_file(&tempdir.path().join("roland"), STR.as_bytes(), 0o600); - let path_stats1 = expand_all_sorted(posix_fs).await; - let snapshot1 = Snapshot::from_path_stats(digester.clone(), path_stats1) - .await - .unwrap(); - - // When the file is *not* the same, error out. - let (_store2, tempdir2, posix_fs2, digester2) = setup(); - make_file(&tempdir2.path().join("roland"), STR2.as_bytes(), 0o600); - let path_stats2 = expand_all_sorted(posix_fs2).await; - let snapshot2 = Snapshot::from_path_stats(digester2, path_stats2) - .await - .unwrap(); - - let merged_res = store.merge(vec![snapshot1.into(), snapshot2.into()]).await; - - match merged_res { - Err(ref msg) - if format!("{msg:?}").contains("found 2 duplicate entries") - && format!("{msg:?}").contains("roland") => {} - x => panic!("Snapshot::merge should have failed with a useful message; got: {x:?}"), - } + let (store, tempdir, posix_fs, digester) = setup(); + + make_file(&tempdir.path().join("roland"), STR.as_bytes(), 0o600); + let path_stats1 = expand_all_sorted(posix_fs).await; + let snapshot1 = Snapshot::from_path_stats(digester.clone(), path_stats1) + .await + .unwrap(); + + // When the file is *not* the same, error out. + let (_store2, tempdir2, posix_fs2, digester2) = setup(); + make_file(&tempdir2.path().join("roland"), STR2.as_bytes(), 0o600); + let path_stats2 = expand_all_sorted(posix_fs2).await; + let snapshot2 = Snapshot::from_path_stats(digester2, path_stats2) + .await + .unwrap(); + + let merged_res = store.merge(vec![snapshot1.into(), snapshot2.into()]).await; + + match merged_res { + Err(ref msg) + if format!("{msg:?}").contains("found 2 duplicate entries") + && format!("{msg:?}").contains("roland") => {} + x => panic!("Snapshot::merge should have failed with a useful message; got: {x:?}"), + } } #[tokio::test] async fn strip_empty_and_non_empty_prefix() { - let (store, _, _, _) = setup(); - - let dir = TestDirectory::nested(); - store - .record_directory(&dir.directory(), false) - .await - .expect("Error storing directory"); - store - .record_directory(&TestDirectory::containing_roland().directory(), false) - .await - .expect("Error storing directory"); - - // Empty. - let prefix = RelativePath::new(PathBuf::from("")).unwrap(); - let result = store.strip_prefix(dir.directory_digest(), &prefix).await; - assert_eq!(result, Ok(dir.directory_digest())); - - // Non-empty. - let prefix = RelativePath::new(PathBuf::from("cats")).unwrap(); - let result = store.strip_prefix(dir.directory_digest(), &prefix).await; - assert_eq!( - result, - Ok(TestDirectory::containing_roland().directory_digest()) - ); + let (store, _, _, _) = setup(); + + let dir = TestDirectory::nested(); + store + .record_directory(&dir.directory(), false) + .await + .expect("Error storing directory"); + store + .record_directory(&TestDirectory::containing_roland().directory(), false) + .await + .expect("Error storing directory"); + + // Empty. + let prefix = RelativePath::new(PathBuf::from("")).unwrap(); + let result = store.strip_prefix(dir.directory_digest(), &prefix).await; + assert_eq!(result, Ok(dir.directory_digest())); + + // Non-empty. + let prefix = RelativePath::new(PathBuf::from("cats")).unwrap(); + let result = store.strip_prefix(dir.directory_digest(), &prefix).await; + assert_eq!( + result, + Ok(TestDirectory::containing_roland().directory_digest()) + ); } #[tokio::test] async fn strip_prefix_empty_subdir() { - let (store, _, _, _) = setup(); + let (store, _, _, _) = setup(); - let dir = TestDirectory::containing_falcons_dir(); - store - .record_directory(&dir.directory(), false) - .await - .expect("Error storing directory"); + let dir = TestDirectory::containing_falcons_dir(); + store + .record_directory(&dir.directory(), false) + .await + .expect("Error storing directory"); - let prefix = RelativePath::new(PathBuf::from("falcons/peregrine")).unwrap(); - let result = store.strip_prefix(dir.directory_digest(), &prefix).await; - assert_eq!(result, Ok(TestDirectory::empty().directory_digest())); + let prefix = RelativePath::new(PathBuf::from("falcons/peregrine")).unwrap(); + let result = store.strip_prefix(dir.directory_digest(), &prefix).await; + assert_eq!(result, Ok(TestDirectory::empty().directory_digest())); } #[tokio::test] async fn strip_dir_not_in_store() { - let (store, _, _, _) = setup(); - let digest = TestDirectory::nested().directory_digest(); - let prefix = RelativePath::new(PathBuf::from("cats")).unwrap(); - let result = store.strip_prefix(digest.clone(), &prefix).await; - assert!(matches!(result, Err(StoreError::MissingDigest { .. })),); + let (store, _, _, _) = setup(); + let digest = TestDirectory::nested().directory_digest(); + let prefix = RelativePath::new(PathBuf::from("cats")).unwrap(); + let result = store.strip_prefix(digest.clone(), &prefix).await; + assert!(matches!(result, Err(StoreError::MissingDigest { .. })),); } #[tokio::test] async fn strip_prefix_non_matching_file() { - let (store, _, _, _) = setup(); - let dir = TestDirectory::recursive(); - let child_dir = TestDirectory::containing_roland(); - store - .record_directory(&dir.directory(), false) - .await - .expect("Error storing directory"); - store - .record_directory(&child_dir.directory(), false) - .await - .expect("Error storing directory"); - let prefix = RelativePath::new(PathBuf::from("cats")).unwrap(); - let result = store.strip_prefix(dir.directory_digest(), &prefix).await; - - assert_eq!( - result, - Err( - format!( - "Cannot strip prefix cats from root directory (Digest with hash {:?}) - \ + let (store, _, _, _) = setup(); + let dir = TestDirectory::recursive(); + let child_dir = TestDirectory::containing_roland(); + store + .record_directory(&dir.directory(), false) + .await + .expect("Error storing directory"); + store + .record_directory(&child_dir.directory(), false) + .await + .expect("Error storing directory"); + let prefix = RelativePath::new(PathBuf::from("cats")).unwrap(); + let result = store.strip_prefix(dir.directory_digest(), &prefix).await; + + assert_eq!( + result, + Err(format!( + "Cannot strip prefix cats from root directory (Digest with hash {:?}) - \ root directory contained non-matching file named: treats.ext", - dir.digest().hash - ) - .into() - ) - ); + dir.digest().hash + ) + .into()) + ); } #[tokio::test] async fn strip_prefix_non_matching_dir() { - let (store, _, _, _) = setup(); - let dir = TestDirectory::double_nested_dir_and_file(); - store - .record_directory(&dir.directory(), false) - .await - .expect("Error storing directory"); - store - .record_directory(&TestDirectory::nested_dir_and_file().directory(), false) - .await - .expect("Error storing directory"); - store - .record_directory(&TestDirectory::containing_falcons_dir().directory(), false) - .await - .expect("Error storing directory"); - store - .record_directory(&TestDirectory::containing_roland().directory(), false) - .await - .expect("Error storing directory"); - let prefix = RelativePath::new(PathBuf::from("animals/cats")).unwrap(); - let result = store.strip_prefix(dir.directory_digest(), &prefix).await; - - assert_eq!( - result, - Err( - format!( - "Cannot strip prefix animals/cats from root directory (Digest with hash {:?}) - \ + let (store, _, _, _) = setup(); + let dir = TestDirectory::double_nested_dir_and_file(); + store + .record_directory(&dir.directory(), false) + .await + .expect("Error storing directory"); + store + .record_directory(&TestDirectory::nested_dir_and_file().directory(), false) + .await + .expect("Error storing directory"); + store + .record_directory(&TestDirectory::containing_falcons_dir().directory(), false) + .await + .expect("Error storing directory"); + store + .record_directory(&TestDirectory::containing_roland().directory(), false) + .await + .expect("Error storing directory"); + let prefix = RelativePath::new(PathBuf::from("animals/cats")).unwrap(); + let result = store.strip_prefix(dir.directory_digest(), &prefix).await; + + assert_eq!( + result, + Err(format!( + "Cannot strip prefix animals/cats from root directory (Digest with hash {:?}) - \ subdirectory animals contained non-matching directory named: birds", - dir.digest().hash - ) - .into() - ) - ); + dir.digest().hash + ) + .into()) + ); } #[tokio::test] async fn strip_subdir_not_in_dir() { - let (store, _, _, _) = setup(); - let dir = TestDirectory::nested(); - store - .record_directory(&dir.directory(), false) - .await - .expect("Error storing directory"); - store - .record_directory(&TestDirectory::containing_roland().directory(), false) - .await - .expect("Error storing directory"); - let prefix = RelativePath::new(PathBuf::from("cats/ugly")).unwrap(); - let result = store.strip_prefix(dir.directory_digest(), &prefix).await; - assert_eq!( - result, - Err( - format!( - "Cannot strip prefix cats/ugly from root directory (Digest with hash {:?}) - \ + let (store, _, _, _) = setup(); + let dir = TestDirectory::nested(); + store + .record_directory(&dir.directory(), false) + .await + .expect("Error storing directory"); + store + .record_directory(&TestDirectory::containing_roland().directory(), false) + .await + .expect("Error storing directory"); + let prefix = RelativePath::new(PathBuf::from("cats/ugly")).unwrap(); + let result = store.strip_prefix(dir.directory_digest(), &prefix).await; + assert_eq!( + result, + Err(format!( + "Cannot strip prefix cats/ugly from root directory (Digest with hash {:?}) - \ subdirectory cats didn't contain a directory named ugly \ but did contain file named: roland.ext", - dir.digest().hash - ) - .into() - ) - ); + dir.digest().hash + ) + .into()) + ); } fn make_dir_stat(root: &Path, relpath: &Path) -> PathStat { - std::fs::create_dir(root.join(relpath)).unwrap(); - PathStat::dir(relpath.to_owned(), Dir(relpath.to_owned())) + std::fs::create_dir(root.join(relpath)).unwrap(); + PathStat::dir(relpath.to_owned(), Dir(relpath.to_owned())) } fn make_file_stat(root: &Path, relpath: &Path, contents: &[u8], is_executable: bool) -> PathStat { - make_file( - &root.join(relpath), - contents, - if is_executable { 0o555 } else { 0o444 }, - ); - PathStat::file( - relpath.to_owned(), - File { - path: relpath.to_owned(), - is_executable, - }, - ) + make_file( + &root.join(relpath), + contents, + if is_executable { 0o555 } else { 0o444 }, + ); + PathStat::file( + relpath.to_owned(), + File { + path: relpath.to_owned(), + is_executable, + }, + ) } pub async fn expand_all_sorted(posix_fs: Arc) -> Vec { - let path_globs = PathGlobs::new( - vec!["**".to_owned()], - // Don't error or warn if there are no paths matched -- that is a valid state. - StrictGlobMatching::Ignore, - GlobExpansionConjunction::AllMatch, - ) - .parse() - .unwrap(); - let mut v = posix_fs - .expand_globs(path_globs, SymlinkBehavior::Aware, None) - .await + let path_globs = PathGlobs::new( + vec!["**".to_owned()], + // Don't error or warn if there are no paths matched -- that is a valid state. + StrictGlobMatching::Ignore, + GlobExpansionConjunction::AllMatch, + ) + .parse() .unwrap(); - v.sort_by(|a, b| a.path().cmp(b.path())); - v + let mut v = posix_fs + .expand_globs(path_globs, SymlinkBehavior::Aware, None) + .await + .unwrap(); + v.sort_by(|a, b| a.path().cmp(b.path())); + v } diff --git a/src/rust/engine/fs/store/src/tests.rs b/src/rust/engine/fs/store/src/tests.rs index 0b25c842082..15e5d5d5c71 100644 --- a/src/rust/engine/fs/store/src/tests.rs +++ b/src/rust/engine/fs/store/src/tests.rs @@ -10,8 +10,8 @@ use testutil::data::{TestData, TestDirectory}; use bytes::Bytes; use fs::{ - DigestEntry, DirectoryDigest, FileEntry, Link, PathStat, Permissions, RelativePath, - EMPTY_DIRECTORY_DIGEST, + DigestEntry, DirectoryDigest, FileEntry, Link, PathStat, Permissions, RelativePath, + EMPTY_DIRECTORY_DIGEST, }; use grpc_util::prost::MessageExt; use grpc_util::tls; @@ -22,1736 +22,1736 @@ use workunit_store::WorkunitStore; use crate::local::ByteStore; use crate::{ - EntryType, FileContent, RemoteOptions, Snapshot, Store, StoreError, StoreFileByDigest, - UploadSummary, MEGABYTES, + EntryType, FileContent, RemoteOptions, Snapshot, Store, StoreError, StoreFileByDigest, + UploadSummary, MEGABYTES, }; pub(crate) const STORE_BATCH_API_SIZE_LIMIT: usize = 4 * 1024 * 1024; pub async fn load_file_bytes(store: &Store, digest: Digest) -> Result { - store - .load_file_bytes_with(digest, Bytes::copy_from_slice) - .await + store + .load_file_bytes_with(digest, Bytes::copy_from_slice) + .await } /// /// Create a StubCas with a file and a directory inside. /// pub fn new_cas(chunk_size_bytes: usize) -> StubCAS { - let _ = WorkunitStore::setup_for_tests(); - StubCAS::builder() - .chunk_size_bytes(chunk_size_bytes) - .file(&TestData::roland()) - .directory(&TestDirectory::containing_roland()) - .build() + let _ = WorkunitStore::setup_for_tests(); + StubCAS::builder() + .chunk_size_bytes(chunk_size_bytes) + .file(&TestData::roland()) + .directory(&TestDirectory::containing_roland()) + .build() } pub fn new_empty_cas() -> StubCAS { - let _ = WorkunitStore::setup_for_tests(); - StubCAS::empty() + let _ = WorkunitStore::setup_for_tests(); + StubCAS::empty() } /// /// Create a new local store with whatever was already serialized in dir. /// fn new_local_store>(dir: P) -> Store { - Store::local_only(task_executor::Executor::new(), dir).expect("Error creating local store") + Store::local_only(task_executor::Executor::new(), dir).expect("Error creating local store") } fn remote_options( - cas_address: String, - instance_name: Option, - headers: BTreeMap, + cas_address: String, + instance_name: Option, + headers: BTreeMap, ) -> RemoteOptions { - RemoteOptions { - cas_address, - instance_name, - tls_config: tls::Config::default(), - headers, - chunk_size_bytes: 10 * MEGABYTES, - rpc_timeout: Duration::from_secs(1), - rpc_retries: 1, - rpc_concurrency_limit: 256, - capabilities_cell_opt: None, - batch_api_size_limit: STORE_BATCH_API_SIZE_LIMIT, - } + RemoteOptions { + cas_address, + instance_name, + tls_config: tls::Config::default(), + headers, + chunk_size_bytes: 10 * MEGABYTES, + rpc_timeout: Duration::from_secs(1), + rpc_retries: 1, + rpc_concurrency_limit: 256, + capabilities_cell_opt: None, + batch_api_size_limit: STORE_BATCH_API_SIZE_LIMIT, + } } /// /// Create a new store with a remote CAS. /// async fn new_store>(dir: P, cas_address: &str) -> Store { - Store::local_only(task_executor::Executor::new(), dir) - .unwrap() - .into_with_remote(remote_options( - cas_address.to_owned(), - None, - BTreeMap::new(), - )) - .await - .unwrap() + Store::local_only(task_executor::Executor::new(), dir) + .unwrap() + .into_with_remote(remote_options( + cas_address.to_owned(), + None, + BTreeMap::new(), + )) + .await + .unwrap() } #[tokio::test] async fn load_file_prefers_local() { - let dir = TempDir::new().unwrap(); - - let testdata = TestData::roland(); - - crate::local_tests::new_store(dir.path()) - .store_bytes( - EntryType::File, - testdata.fingerprint(), - testdata.bytes(), - false, - ) - .await - .expect("Store failed"); - - let cas = new_cas(1024); - assert_eq!( - load_file_bytes( - &new_store(dir.path(), &cas.address()).await, - testdata.digest() - ) - .await, - Ok(testdata.bytes()) - ); - assert_eq!(0, cas.request_count(RequestType::BSRead)); + let dir = TempDir::new().unwrap(); + + let testdata = TestData::roland(); + + crate::local_tests::new_store(dir.path()) + .store_bytes( + EntryType::File, + testdata.fingerprint(), + testdata.bytes(), + false, + ) + .await + .expect("Store failed"); + + let cas = new_cas(1024); + assert_eq!( + load_file_bytes( + &new_store(dir.path(), &cas.address()).await, + testdata.digest() + ) + .await, + Ok(testdata.bytes()) + ); + assert_eq!(0, cas.request_count(RequestType::BSRead)); } #[tokio::test] async fn load_directory_prefers_local() { - let dir = TempDir::new().unwrap(); - - let testdir = TestDirectory::containing_roland(); - - crate::local_tests::new_store(dir.path()) - .store_bytes( - EntryType::Directory, - testdir.fingerprint(), - testdir.bytes(), - false, - ) - .await - .expect("Store failed"); - - let cas = new_cas(1024); - assert_eq!( - new_store(dir.path(), &cas.address()) - .await - .load_directory(testdir.digest(),) - .await - .unwrap(), - testdir.directory() - ); - assert_eq!(0, cas.request_count(RequestType::BSRead)); + let dir = TempDir::new().unwrap(); + + let testdir = TestDirectory::containing_roland(); + + crate::local_tests::new_store(dir.path()) + .store_bytes( + EntryType::Directory, + testdir.fingerprint(), + testdir.bytes(), + false, + ) + .await + .expect("Store failed"); + + let cas = new_cas(1024); + assert_eq!( + new_store(dir.path(), &cas.address()) + .await + .load_directory(testdir.digest(),) + .await + .unwrap(), + testdir.directory() + ); + assert_eq!(0, cas.request_count(RequestType::BSRead)); } #[tokio::test] async fn load_file_falls_back_and_backfills() { - let dir = TempDir::new().unwrap(); - - let testdata = TestData::roland(); - - let cas = new_cas(1024); - assert_eq!( - load_file_bytes( - &new_store(dir.path(), &cas.address()).await, - testdata.digest() - ) - .await, - Ok(testdata.bytes()), - "Read from CAS" - ); - assert_eq!(1, cas.request_count(RequestType::BSRead)); - assert_eq!( - crate::local_tests::load_file_bytes( - &crate::local_tests::new_store(dir.path()), - testdata.digest(), - ) - .await, - Ok(Some(testdata.bytes())), - "Read from local cache" - ); + let dir = TempDir::new().unwrap(); + + let testdata = TestData::roland(); + + let cas = new_cas(1024); + assert_eq!( + load_file_bytes( + &new_store(dir.path(), &cas.address()).await, + testdata.digest() + ) + .await, + Ok(testdata.bytes()), + "Read from CAS" + ); + assert_eq!(1, cas.request_count(RequestType::BSRead)); + assert_eq!( + crate::local_tests::load_file_bytes( + &crate::local_tests::new_store(dir.path()), + testdata.digest(), + ) + .await, + Ok(Some(testdata.bytes())), + "Read from local cache" + ); } #[tokio::test] async fn load_file_falls_back_and_backfills_for_huge_file() { - let dir = TempDir::new().unwrap(); - - // 5MB of data - let testdata = TestData::new(&"12345".repeat(MEGABYTES)); - - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .chunk_size_bytes(MEGABYTES) - .file(&testdata) - .build(); - - assert_eq!( - load_file_bytes( - &new_store(dir.path(), &cas.address()).await, - testdata.digest() - ) - .await - .unwrap(), - testdata.bytes() - ); - assert_eq!(1, cas.request_count(RequestType::BSRead)); - assert!( - crate::local_tests::load_file_bytes( - &crate::local_tests::new_store(dir.path()), - testdata.digest(), - ) - .await - == Ok(Some(testdata.bytes())), - "Read from local cache" - ); + let dir = TempDir::new().unwrap(); + + // 5MB of data + let testdata = TestData::new(&"12345".repeat(MEGABYTES)); + + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .chunk_size_bytes(MEGABYTES) + .file(&testdata) + .build(); + + assert_eq!( + load_file_bytes( + &new_store(dir.path(), &cas.address()).await, + testdata.digest() + ) + .await + .unwrap(), + testdata.bytes() + ); + assert_eq!(1, cas.request_count(RequestType::BSRead)); + assert!( + crate::local_tests::load_file_bytes( + &crate::local_tests::new_store(dir.path()), + testdata.digest(), + ) + .await + == Ok(Some(testdata.bytes())), + "Read from local cache" + ); } #[tokio::test] async fn load_directory_small_falls_back_and_backfills() { - let dir = TempDir::new().unwrap(); - - let cas = new_cas(1024); - - let testdir = TestDirectory::containing_roland(); - - assert_eq!( - new_store(dir.path(), &cas.address()) - .await - .load_directory(testdir.digest(),) - .await - .unwrap(), - testdir.directory() - ); - assert_eq!(1, cas.request_count(RequestType::BSRead)); - assert_eq!( - crate::local_tests::load_directory_proto_bytes( - &crate::local_tests::new_store(dir.path()), - testdir.digest(), - ) - .await, - Ok(Some(testdir.bytes())) - ); + let dir = TempDir::new().unwrap(); + + let cas = new_cas(1024); + + let testdir = TestDirectory::containing_roland(); + + assert_eq!( + new_store(dir.path(), &cas.address()) + .await + .load_directory(testdir.digest(),) + .await + .unwrap(), + testdir.directory() + ); + assert_eq!(1, cas.request_count(RequestType::BSRead)); + assert_eq!( + crate::local_tests::load_directory_proto_bytes( + &crate::local_tests::new_store(dir.path()), + testdir.digest(), + ) + .await, + Ok(Some(testdir.bytes())) + ); } #[tokio::test] async fn load_directory_huge_falls_back_and_backfills() { - let dir = TempDir::new().unwrap(); - - let testdir = TestDirectory::many_files(); - let digest = testdir.digest(); - // this test is ensuring that "huge" directories don't fall into the FSDB code paths, so let's - // ensure we're actually testing that, by validating that a _file_ of this size would use FSDB - assert!(ByteStore::should_use_fsdb( - EntryType::File, - digest.size_bytes - )); - - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .directory(&testdir) - .file(&TestData::empty()) - .build(); - - assert_eq!( - new_store(dir.path(), &cas.address()) - .await - .load_directory(digest) - .await - .unwrap(), - testdir.directory() - ); - assert_eq!(1, cas.request_count(RequestType::BSRead)); - assert_eq!( - crate::local_tests::load_directory_proto_bytes( - &crate::local_tests::new_store(dir.path()), - testdir.digest(), - ) - .await, - Ok(Some(testdir.bytes())) - ); + let dir = TempDir::new().unwrap(); + + let testdir = TestDirectory::many_files(); + let digest = testdir.digest(); + // this test is ensuring that "huge" directories don't fall into the FSDB code paths, so let's + // ensure we're actually testing that, by validating that a _file_ of this size would use FSDB + assert!(ByteStore::should_use_fsdb( + EntryType::File, + digest.size_bytes + )); + + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .directory(&testdir) + .file(&TestData::empty()) + .build(); + + assert_eq!( + new_store(dir.path(), &cas.address()) + .await + .load_directory(digest) + .await + .unwrap(), + testdir.directory() + ); + assert_eq!(1, cas.request_count(RequestType::BSRead)); + assert_eq!( + crate::local_tests::load_directory_proto_bytes( + &crate::local_tests::new_store(dir.path()), + testdir.digest(), + ) + .await, + Ok(Some(testdir.bytes())) + ); } #[tokio::test] async fn load_recursive_directory() { - let dir = TempDir::new().unwrap(); - - let roland = TestData::roland(); - let catnip = TestData::catnip(); - let testdir = TestDirectory::containing_roland(); - let testdir_digest = testdir.digest(); - let testdir_directory = testdir.directory(); - let recursive_testdir = TestDirectory::recursive(); - let recursive_testdir_directory = recursive_testdir.directory(); - let recursive_testdir_digest = recursive_testdir.directory_digest(); - - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .file(&roland) - .file(&catnip) - .directory(&testdir) - .directory(&recursive_testdir) - .build(); - - new_store(dir.path(), &cas.address()) - .await - .ensure_downloaded( - HashSet::new(), - HashSet::from([recursive_testdir_digest.clone()]), - ) - .await - .expect("Downloading recursive directory should have succeeded."); - - assert_eq!( - load_file_bytes(&new_local_store(dir.path()), roland.digest()).await, - Ok(roland.bytes()) - ); - assert_eq!( - load_file_bytes(&new_local_store(dir.path()), catnip.digest()).await, - Ok(catnip.bytes()) - ); - assert_eq!( - new_local_store(dir.path()) - .load_directory(testdir_digest,) - .await - .unwrap(), - testdir_directory - ); - assert_eq!( - new_local_store(dir.path()) - .load_directory(recursive_testdir_digest.as_digest()) - .await - .unwrap(), - recursive_testdir_directory - ); + let dir = TempDir::new().unwrap(); + + let roland = TestData::roland(); + let catnip = TestData::catnip(); + let testdir = TestDirectory::containing_roland(); + let testdir_digest = testdir.digest(); + let testdir_directory = testdir.directory(); + let recursive_testdir = TestDirectory::recursive(); + let recursive_testdir_directory = recursive_testdir.directory(); + let recursive_testdir_digest = recursive_testdir.directory_digest(); + + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .file(&roland) + .file(&catnip) + .directory(&testdir) + .directory(&recursive_testdir) + .build(); + + new_store(dir.path(), &cas.address()) + .await + .ensure_downloaded( + HashSet::new(), + HashSet::from([recursive_testdir_digest.clone()]), + ) + .await + .expect("Downloading recursive directory should have succeeded."); + + assert_eq!( + load_file_bytes(&new_local_store(dir.path()), roland.digest()).await, + Ok(roland.bytes()) + ); + assert_eq!( + load_file_bytes(&new_local_store(dir.path()), catnip.digest()).await, + Ok(catnip.bytes()) + ); + assert_eq!( + new_local_store(dir.path()) + .load_directory(testdir_digest,) + .await + .unwrap(), + testdir_directory + ); + assert_eq!( + new_local_store(dir.path()) + .load_directory(recursive_testdir_digest.as_digest()) + .await + .unwrap(), + recursive_testdir_directory + ); } #[tokio::test] async fn load_file_missing_is_none() { - let dir = TempDir::new().unwrap(); - - let cas = new_empty_cas(); - let result = load_file_bytes( - &new_store(dir.path(), &cas.address()).await, - TestData::roland().digest(), - ) - .await; - assert!(matches!(result, Err(StoreError::MissingDigest { .. })),); - assert_eq!(1, cas.request_count(RequestType::BSRead)); + let dir = TempDir::new().unwrap(); + + let cas = new_empty_cas(); + let result = load_file_bytes( + &new_store(dir.path(), &cas.address()).await, + TestData::roland().digest(), + ) + .await; + assert!(matches!(result, Err(StoreError::MissingDigest { .. })),); + assert_eq!(1, cas.request_count(RequestType::BSRead)); } #[tokio::test] async fn load_directory_missing_errors() { - let dir = TempDir::new().unwrap(); - - let cas = new_empty_cas(); - let result = new_store(dir.path(), &cas.address()) - .await - .load_directory(TestDirectory::containing_roland().digest()) - .await; - assert!(matches!(result, Err(StoreError::MissingDigest { .. })),); - assert_eq!(1, cas.request_count(RequestType::BSRead)); + let dir = TempDir::new().unwrap(); + + let cas = new_empty_cas(); + let result = new_store(dir.path(), &cas.address()) + .await + .load_directory(TestDirectory::containing_roland().digest()) + .await; + assert!(matches!(result, Err(StoreError::MissingDigest { .. })),); + assert_eq!(1, cas.request_count(RequestType::BSRead)); } #[tokio::test] async fn load_file_remote_error_is_error() { - let dir = TempDir::new().unwrap(); - - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::cas_always_errors(); - let error = load_file_bytes( - &new_store(dir.path(), &cas.address()).await, - TestData::roland().digest(), - ) - .await - .expect_err("Want error"); - assert!( - cas.request_count(RequestType::BSRead) > 0, - "Want read_request_count > 0 but got {}", - cas.request_count(RequestType::BSRead) - ); - assert!( - error - .to_string() - .contains("StubCAS is configured to always fail"), - "Bad error message" - ); + let dir = TempDir::new().unwrap(); + + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::cas_always_errors(); + let error = load_file_bytes( + &new_store(dir.path(), &cas.address()).await, + TestData::roland().digest(), + ) + .await + .expect_err("Want error"); + assert!( + cas.request_count(RequestType::BSRead) > 0, + "Want read_request_count > 0 but got {}", + cas.request_count(RequestType::BSRead) + ); + assert!( + error + .to_string() + .contains("StubCAS is configured to always fail"), + "Bad error message" + ); } #[tokio::test] async fn load_directory_remote_error_is_error() { - let dir = TempDir::new().unwrap(); - - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::cas_always_errors(); - let error = new_store(dir.path(), &cas.address()) - .await - .load_directory(TestData::roland().digest()) - .await - .expect_err("Want error"); - assert!( - cas.request_count(RequestType::BSRead) > 0, - "Want read_request_count > 0 but got {}", - cas.request_count(RequestType::BSRead) - ); - assert!( - error - .to_string() - .contains("StubCAS is configured to always fail"), - "Bad error message" - ); + let dir = TempDir::new().unwrap(); + + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::cas_always_errors(); + let error = new_store(dir.path(), &cas.address()) + .await + .load_directory(TestData::roland().digest()) + .await + .expect_err("Want error"); + assert!( + cas.request_count(RequestType::BSRead) > 0, + "Want read_request_count > 0 but got {}", + cas.request_count(RequestType::BSRead) + ); + assert!( + error + .to_string() + .contains("StubCAS is configured to always fail"), + "Bad error message" + ); } #[tokio::test] async fn roundtrip_symlink() { - let _ = WorkunitStore::setup_for_tests(); - let dir = TempDir::new().unwrap(); - - #[derive(Clone)] - struct NoopDigester; - - impl StoreFileByDigest for NoopDigester { - fn store_by_digest( - &self, - _: fs::File, - ) -> futures::future::BoxFuture<'static, Result> { - unimplemented!(); + let _ = WorkunitStore::setup_for_tests(); + let dir = TempDir::new().unwrap(); + + #[derive(Clone)] + struct NoopDigester; + + impl StoreFileByDigest for NoopDigester { + fn store_by_digest( + &self, + _: fs::File, + ) -> futures::future::BoxFuture<'static, Result> { + unimplemented!(); + } } - } - - let input_digest: DirectoryDigest = Snapshot::from_path_stats( - NoopDigester, - vec![PathStat::link( - "x".into(), - Link { - path: "x".into(), - target: "y".into(), - }, - )], - ) - .await - .unwrap() - .into(); - - let store = new_local_store(dir.path()); - - store - .ensure_directory_digest_persisted(input_digest.clone()) + + let input_digest: DirectoryDigest = Snapshot::from_path_stats( + NoopDigester, + vec![PathStat::link( + "x".into(), + Link { + path: "x".into(), + target: "y".into(), + }, + )], + ) .await - .unwrap(); + .unwrap() + .into(); - // Discard the DigestTrie to force it to be reloaded from disk. - let digest = DirectoryDigest::from_persisted_digest(input_digest.as_digest()); - assert!(digest.tree.is_none()); + let store = new_local_store(dir.path()); - let output_digest: DirectoryDigest = store.load_digest_trie(digest).await.unwrap().into(); - assert_eq!(input_digest.as_digest(), output_digest.as_digest()); + store + .ensure_directory_digest_persisted(input_digest.clone()) + .await + .unwrap(); + + // Discard the DigestTrie to force it to be reloaded from disk. + let digest = DirectoryDigest::from_persisted_digest(input_digest.as_digest()); + assert!(digest.tree.is_none()); + + let output_digest: DirectoryDigest = store.load_digest_trie(digest).await.unwrap().into(); + assert_eq!(input_digest.as_digest(), output_digest.as_digest()); } #[tokio::test] async fn malformed_remote_directory_is_error() { - let dir = TempDir::new().unwrap(); + let dir = TempDir::new().unwrap(); - let testdata = TestData::roland(); + let testdata = TestData::roland(); - let cas = new_cas(1024); - new_store(dir.path(), &cas.address()) - .await - .load_directory(testdata.digest()) - .await - .expect_err("Want error"); - - assert_eq!( - crate::local_tests::load_directory_proto_bytes( - &crate::local_tests::new_store(dir.path()), - testdata.digest() - ) - .await, - Ok(None) - ); + let cas = new_cas(1024); + new_store(dir.path(), &cas.address()) + .await + .load_directory(testdata.digest()) + .await + .expect_err("Want error"); + + assert_eq!( + crate::local_tests::load_directory_proto_bytes( + &crate::local_tests::new_store(dir.path()), + testdata.digest() + ) + .await, + Ok(None) + ); } #[tokio::test] async fn non_canonical_remote_directory_is_error() { - let mut non_canonical_directory = TestDirectory::containing_roland().directory(); - non_canonical_directory.files.push({ - remexec::FileNode { - name: "roland".to_string(), - digest: Some((&TestData::roland().digest()).into()), - ..Default::default() - } - }); - let non_canonical_directory_bytes = non_canonical_directory.to_bytes(); - let directory_digest = Digest::of_bytes(&non_canonical_directory_bytes); - let non_canonical_directory_fingerprint = directory_digest.hash; - - let dir = TempDir::new().unwrap(); - - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .unverified_content( - non_canonical_directory_fingerprint, - non_canonical_directory_bytes, - ) - .build(); - new_store(dir.path(), &cas.address()) - .await - .load_directory(directory_digest) - .await - .expect_err("Want error"); - - assert_eq!( - crate::local_tests::load_directory_proto_bytes( - &crate::local_tests::new_store(dir.path()), - directory_digest, - ) - .await, - Ok(None) - ); + let mut non_canonical_directory = TestDirectory::containing_roland().directory(); + non_canonical_directory.files.push({ + remexec::FileNode { + name: "roland".to_string(), + digest: Some((&TestData::roland().digest()).into()), + ..Default::default() + } + }); + let non_canonical_directory_bytes = non_canonical_directory.to_bytes(); + let directory_digest = Digest::of_bytes(&non_canonical_directory_bytes); + let non_canonical_directory_fingerprint = directory_digest.hash; + + let dir = TempDir::new().unwrap(); + + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .unverified_content( + non_canonical_directory_fingerprint, + non_canonical_directory_bytes, + ) + .build(); + new_store(dir.path(), &cas.address()) + .await + .load_directory(directory_digest) + .await + .expect_err("Want error"); + + assert_eq!( + crate::local_tests::load_directory_proto_bytes( + &crate::local_tests::new_store(dir.path()), + directory_digest, + ) + .await, + Ok(None) + ); } #[tokio::test] async fn wrong_remote_file_bytes_is_error() { - let dir = TempDir::new().unwrap(); + let dir = TempDir::new().unwrap(); - let testdata = TestData::roland(); + let testdata = TestData::roland(); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .unverified_content( - testdata.fingerprint(), - TestDirectory::containing_roland().bytes(), - ) - .build(); - load_file_bytes( - &new_store(dir.path(), &cas.address()).await, - testdata.digest(), - ) - .await - .expect_err("Want error"); - - assert_eq!( - crate::local_tests::load_file_bytes( - &crate::local_tests::new_store(dir.path()), - testdata.digest() + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .unverified_content( + testdata.fingerprint(), + TestDirectory::containing_roland().bytes(), + ) + .build(); + load_file_bytes( + &new_store(dir.path(), &cas.address()).await, + testdata.digest(), ) - .await, - Ok(None) - ); + .await + .expect_err("Want error"); + + assert_eq!( + crate::local_tests::load_file_bytes( + &crate::local_tests::new_store(dir.path()), + testdata.digest() + ) + .await, + Ok(None) + ); } #[tokio::test] async fn wrong_remote_directory_bytes_is_error() { - let dir = TempDir::new().unwrap(); + let dir = TempDir::new().unwrap(); - let testdir = TestDirectory::containing_dnalor(); + let testdir = TestDirectory::containing_dnalor(); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .unverified_content( - testdir.fingerprint(), - TestDirectory::containing_roland().bytes(), - ) - .build(); - load_file_bytes( - &new_store(dir.path(), &cas.address()).await, - testdir.digest(), - ) - .await - .expect_err("Want error"); - - assert_eq!( - crate::local_tests::load_file_bytes( - &crate::local_tests::new_store(dir.path()), - testdir.digest() + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .unverified_content( + testdir.fingerprint(), + TestDirectory::containing_roland().bytes(), + ) + .build(); + load_file_bytes( + &new_store(dir.path(), &cas.address()).await, + testdir.digest(), ) - .await, - Ok(None) - ); + .await + .expect_err("Want error"); + + assert_eq!( + crate::local_tests::load_file_bytes( + &crate::local_tests::new_store(dir.path()), + testdir.digest() + ) + .await, + Ok(None) + ); } #[tokio::test] async fn expand_empty_directory() { - let dir = TempDir::new().unwrap(); - - let empty_dir = TestDirectory::empty(); - - let expanded = new_local_store(dir.path()) - .expand_directory(empty_dir.digest()) - .await - .expect("Error expanding directory"); - let want: HashMap = vec![(empty_dir.digest(), EntryType::Directory)] - .into_iter() - .collect(); - assert_eq!(expanded, want); + let dir = TempDir::new().unwrap(); + + let empty_dir = TestDirectory::empty(); + + let expanded = new_local_store(dir.path()) + .expand_directory(empty_dir.digest()) + .await + .expect("Error expanding directory"); + let want: HashMap = vec![(empty_dir.digest(), EntryType::Directory)] + .into_iter() + .collect(); + assert_eq!(expanded, want); } #[tokio::test] async fn expand_flat_directory() { - let dir = TempDir::new().unwrap(); - - let roland = TestData::roland(); - let testdir = TestDirectory::containing_roland(); + let dir = TempDir::new().unwrap(); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); + let roland = TestData::roland(); + let testdir = TestDirectory::containing_roland(); - let expanded = new_local_store(dir.path()) - .expand_directory(testdir.digest()) - .await - .expect("Error expanding directory"); - let want: HashMap = vec![ - (testdir.digest(), EntryType::Directory), - (roland.digest(), EntryType::File), - ] - .into_iter() - .collect(); - assert_eq!(expanded, want); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + + let expanded = new_local_store(dir.path()) + .expand_directory(testdir.digest()) + .await + .expect("Error expanding directory"); + let want: HashMap = vec![ + (testdir.digest(), EntryType::Directory), + (roland.digest(), EntryType::File), + ] + .into_iter() + .collect(); + assert_eq!(expanded, want); } #[tokio::test] async fn expand_recursive_directory() { - let dir = TempDir::new().unwrap(); + let dir = TempDir::new().unwrap(); - let roland = TestData::roland(); - let catnip = TestData::catnip(); - let testdir = TestDirectory::containing_roland(); - let recursive_testdir = TestDirectory::recursive(); + let roland = TestData::roland(); + let catnip = TestData::catnip(); + let testdir = TestDirectory::containing_roland(); + let recursive_testdir = TestDirectory::recursive(); - new_local_store(dir.path()) - .record_directory(&recursive_testdir.directory(), false) - .await - .expect("Error storing directory locally"); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - - let expanded = new_local_store(dir.path()) - .expand_directory(recursive_testdir.digest()) - .await - .expect("Error expanding directory"); - let want: HashMap = vec![ - (recursive_testdir.digest(), EntryType::Directory), - (testdir.digest(), EntryType::Directory), - (roland.digest(), EntryType::File), - (catnip.digest(), EntryType::File), - ] - .into_iter() - .collect(); - assert_eq!(expanded, want); + new_local_store(dir.path()) + .record_directory(&recursive_testdir.directory(), false) + .await + .expect("Error storing directory locally"); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + + let expanded = new_local_store(dir.path()) + .expand_directory(recursive_testdir.digest()) + .await + .expect("Error expanding directory"); + let want: HashMap = vec![ + (recursive_testdir.digest(), EntryType::Directory), + (testdir.digest(), EntryType::Directory), + (roland.digest(), EntryType::File), + (catnip.digest(), EntryType::File), + ] + .into_iter() + .collect(); + assert_eq!(expanded, want); } #[tokio::test] async fn expand_missing_directory() { - let dir = TempDir::new().unwrap(); - let digest = TestDirectory::containing_roland().digest(); - let error = new_local_store(dir.path()) - .expand_directory(digest) - .await - .expect_err("Want error"); - assert!( - matches!(error, StoreError::MissingDigest { .. }), - "Bad error: {error}" - ); + let dir = TempDir::new().unwrap(); + let digest = TestDirectory::containing_roland().digest(); + let error = new_local_store(dir.path()) + .expand_directory(digest) + .await + .expect_err("Want error"); + assert!( + matches!(error, StoreError::MissingDigest { .. }), + "Bad error: {error}" + ); } #[tokio::test] async fn expand_directory_missing_subdir() { - let dir = TempDir::new().unwrap(); + let dir = TempDir::new().unwrap(); - let recursive_testdir = TestDirectory::recursive(); + let recursive_testdir = TestDirectory::recursive(); - new_local_store(dir.path()) - .record_directory(&recursive_testdir.directory(), false) - .await - .expect("Error storing directory locally"); - - let error = new_local_store(dir.path()) - .expand_directory(recursive_testdir.digest()) - .await - .expect_err("Want error"); - assert!( - matches!(error, StoreError::MissingDigest { .. }), - "Bad error message: {error}" - ); + new_local_store(dir.path()) + .record_directory(&recursive_testdir.directory(), false) + .await + .expect("Error storing directory locally"); + + let error = new_local_store(dir.path()) + .expand_directory(recursive_testdir.digest()) + .await + .expect_err("Want error"); + assert!( + matches!(error, StoreError::MissingDigest { .. }), + "Bad error message: {error}" + ); } #[tokio::test] async fn uploads_files() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); - - let testdata = TestData::roland(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - new_local_store(dir.path()) - .store_file_bytes(testdata.bytes(), false) - .await - .expect("Error storing file locally"); + let testdata = TestData::roland(); - assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); + new_local_store(dir.path()) + .store_file_bytes(testdata.bytes(), false) + .await + .expect("Error storing file locally"); - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdata.digest()]) - .await - .expect("Error uploading file"); + assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); - assert_eq!( - cas.blobs.lock().get(&testdata.fingerprint()), - Some(&testdata.bytes()) - ); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdata.digest()]) + .await + .expect("Error uploading file"); + + assert_eq!( + cas.blobs.lock().get(&testdata.fingerprint()), + Some(&testdata.bytes()) + ); } #[tokio::test] async fn uploads_directories_recursively() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - let testdata = TestData::roland(); - let testdir = TestDirectory::containing_roland(); + let testdata = TestData::roland(); + let testdir = TestDirectory::containing_roland(); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - new_local_store(dir.path()) - .store_file_bytes(testdata.bytes(), false) - .await - .expect("Error storing file locally"); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + new_local_store(dir.path()) + .store_file_bytes(testdata.bytes(), false) + .await + .expect("Error storing file locally"); - assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); - assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); + assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); + assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdir.digest()]) - .await - .expect("Error uploading directory"); - - assert_eq!( - cas.blobs.lock().get(&testdir.fingerprint()), - Some(&testdir.bytes()) - ); - assert_eq!( - cas.blobs.lock().get(&testdata.fingerprint()), - Some(&testdata.bytes()) - ); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdir.digest()]) + .await + .expect("Error uploading directory"); + + assert_eq!( + cas.blobs.lock().get(&testdir.fingerprint()), + Some(&testdir.bytes()) + ); + assert_eq!( + cas.blobs.lock().get(&testdata.fingerprint()), + Some(&testdata.bytes()) + ); } #[tokio::test] async fn uploads_files_recursively_when_under_three_digests_ignoring_items_already_in_cas() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); - - let testdata = TestData::roland(); - let testdir = TestDirectory::containing_roland(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - new_local_store(dir.path()) - .store_file_bytes(testdata.bytes(), false) - .await - .expect("Error storing file locally"); - - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdata.digest()]) - .await - .expect("Error uploading file"); + let testdata = TestData::roland(); + let testdir = TestDirectory::containing_roland(); - assert_eq!(cas.write_message_sizes.lock().len(), 1); - assert_eq!( - cas.blobs.lock().get(&testdata.fingerprint()), - Some(&testdata.bytes()) - ); - assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + new_local_store(dir.path()) + .store_file_bytes(testdata.bytes(), false) + .await + .expect("Error storing file locally"); - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdir.digest()]) - .await - .expect("Error uploading directory"); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdata.digest()]) + .await + .expect("Error uploading file"); + + assert_eq!(cas.write_message_sizes.lock().len(), 1); + assert_eq!( + cas.blobs.lock().get(&testdata.fingerprint()), + Some(&testdata.bytes()) + ); + assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); - assert_eq!(cas.write_message_sizes.lock().len(), 3); - assert_eq!( - cas.blobs.lock().get(&testdir.fingerprint()), - Some(&testdir.bytes()) - ); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdir.digest()]) + .await + .expect("Error uploading directory"); + + assert_eq!(cas.write_message_sizes.lock().len(), 3); + assert_eq!( + cas.blobs.lock().get(&testdir.fingerprint()), + Some(&testdir.bytes()) + ); } #[tokio::test] async fn does_not_reupload_file_already_in_cas_when_requested_with_three_other_digests() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - let catnip = TestData::catnip(); - let roland = TestData::roland(); - let testdir = TestDirectory::containing_roland(); + let catnip = TestData::catnip(); + let roland = TestData::roland(); + let testdir = TestDirectory::containing_roland(); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - new_local_store(dir.path()) - .store_file_bytes(roland.bytes(), false) - .await - .expect("Error storing file locally"); - new_local_store(dir.path()) - .store_file_bytes(catnip.bytes(), false) - .await - .expect("Error storing file locally"); - - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![roland.digest()]) - .await - .expect("Error uploading big file"); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + new_local_store(dir.path()) + .store_file_bytes(roland.bytes(), false) + .await + .expect("Error storing file locally"); + new_local_store(dir.path()) + .store_file_bytes(catnip.bytes(), false) + .await + .expect("Error storing file locally"); - assert_eq!(cas.write_message_sizes.lock().len(), 1); - assert_eq!( - cas.blobs.lock().get(&roland.fingerprint()), - Some(&roland.bytes()) - ); - assert_eq!(cas.blobs.lock().get(&catnip.fingerprint()), None); - assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![roland.digest()]) + .await + .expect("Error uploading big file"); + + assert_eq!(cas.write_message_sizes.lock().len(), 1); + assert_eq!( + cas.blobs.lock().get(&roland.fingerprint()), + Some(&roland.bytes()) + ); + assert_eq!(cas.blobs.lock().get(&catnip.fingerprint()), None); + assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdir.digest(), catnip.digest()]) - .await - .expect("Error uploading directory"); - - assert_eq!(cas.write_message_sizes.lock().len(), 3); - assert_eq!( - cas.blobs.lock().get(&catnip.fingerprint()), - Some(&catnip.bytes()) - ); - assert_eq!( - cas.blobs.lock().get(&testdir.fingerprint()), - Some(&testdir.bytes()) - ); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdir.digest(), catnip.digest()]) + .await + .expect("Error uploading directory"); + + assert_eq!(cas.write_message_sizes.lock().len(), 3); + assert_eq!( + cas.blobs.lock().get(&catnip.fingerprint()), + Some(&catnip.bytes()) + ); + assert_eq!( + cas.blobs.lock().get(&testdir.fingerprint()), + Some(&testdir.bytes()) + ); } #[tokio::test] async fn does_not_reupload_big_file_already_in_cas() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); - - let testdata = TestData::double_all_the_henries(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - new_local_store(dir.path()) - .store_file_bytes(testdata.bytes(), false) - .await - .expect("Error storing file locally"); + let testdata = TestData::double_all_the_henries(); - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdata.digest()]) - .await - .expect("Error uploading directory"); + new_local_store(dir.path()) + .store_file_bytes(testdata.bytes(), false) + .await + .expect("Error storing file locally"); - assert_eq!(cas.write_message_sizes.lock().len(), 1); - assert_eq!( - cas.blobs.lock().get(&testdata.fingerprint()), - Some(&testdata.bytes()) - ); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdata.digest()]) + .await + .expect("Error uploading directory"); - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdata.digest()]) - .await - .expect("Error uploading directory"); + assert_eq!(cas.write_message_sizes.lock().len(), 1); + assert_eq!( + cas.blobs.lock().get(&testdata.fingerprint()), + Some(&testdata.bytes()) + ); - assert_eq!(cas.write_message_sizes.lock().len(), 1); - assert_eq!( - cas.blobs.lock().get(&testdata.fingerprint()), - Some(&testdata.bytes()) - ); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdata.digest()]) + .await + .expect("Error uploading directory"); + + assert_eq!(cas.write_message_sizes.lock().len(), 1); + assert_eq!( + cas.blobs.lock().get(&testdata.fingerprint()), + Some(&testdata.bytes()) + ); } #[tokio::test] async fn upload_missing_files() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); - - let testdata = TestData::roland(); - - assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); - - let error = new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdata.digest()]) - .await - .expect_err("Want error"); - assert!( - matches!(error, StoreError::MissingDigest { .. }), - "Bad error: {error}" - ); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); + + let testdata = TestData::roland(); + + assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); + + let error = new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdata.digest()]) + .await + .expect_err("Want error"); + assert!( + matches!(error, StoreError::MissingDigest { .. }), + "Bad error: {error}" + ); } #[tokio::test] async fn upload_succeeds_for_digests_which_only_exist_remotely() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - let testdata = TestData::roland(); + let testdata = TestData::roland(); - cas - .blobs - .lock() - .insert(testdata.fingerprint(), testdata.bytes()); + cas.blobs + .lock() + .insert(testdata.fingerprint(), testdata.bytes()); - // The data does not exist locally, but already exists remotely: succeed. - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdata.digest()]) - .await - .unwrap(); + // The data does not exist locally, but already exists remotely: succeed. + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdata.digest()]) + .await + .unwrap(); } #[tokio::test] async fn upload_missing_file_in_directory() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - let testdir = TestDirectory::containing_roland(); + let testdir = TestDirectory::containing_roland(); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - - assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); - assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); - - let error = new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdir.digest()]) - .await - .expect_err("Want error"); - assert!( - matches!(error, StoreError::MissingDigest { .. }), - "Bad error: {error}" - ); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + + assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); + assert_eq!(cas.blobs.lock().get(&testdir.fingerprint()), None); + + let error = new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdir.digest()]) + .await + .expect_err("Want error"); + assert!( + matches!(error, StoreError::MissingDigest { .. }), + "Bad error: {error}" + ); } #[tokio::test] async fn uploading_digest_with_wrong_size_is_error() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); - let testdata = TestData::roland(); + let testdata = TestData::roland(); - new_local_store(dir.path()) - .store_file_bytes(testdata.bytes(), false) - .await - .expect("Error storing file locally"); + new_local_store(dir.path()) + .store_file_bytes(testdata.bytes(), false) + .await + .expect("Error storing file locally"); - assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); + assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); - let wrong_digest = Digest::new(testdata.fingerprint(), testdata.len() + 1); + let wrong_digest = Digest::new(testdata.fingerprint(), testdata.len() + 1); - new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![wrong_digest]) - .await - .expect_err("Expect error uploading file"); + new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![wrong_digest]) + .await + .expect_err("Expect error uploading file"); - assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); + assert_eq!(cas.blobs.lock().get(&testdata.fingerprint()), None); } #[tokio::test] async fn instance_name_upload() { - let dir = TempDir::new().unwrap(); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .instance_name("dark-tower".to_owned()) - .build(); - - // 3 is enough digests to trigger a FindMissingBlobs request - let testdir = TestDirectory::containing_roland_and_treats(); + let dir = TempDir::new().unwrap(); + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .instance_name("dark-tower".to_owned()) + .build(); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - new_local_store(dir.path()) - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error storing roland locally"); - new_local_store(dir.path()) - .store_file_bytes(TestData::catnip().bytes(), false) - .await - .expect("Error storing catnip locally"); + // 3 is enough digests to trigger a FindMissingBlobs request + let testdir = TestDirectory::containing_roland_and_treats(); - let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) - .unwrap() - .into_with_remote(remote_options( - cas.address(), - Some("dark-tower".to_owned()), - BTreeMap::new(), - )) - .await - .unwrap(); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + new_local_store(dir.path()) + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error storing roland locally"); + new_local_store(dir.path()) + .store_file_bytes(TestData::catnip().bytes(), false) + .await + .expect("Error storing catnip locally"); + + let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) + .unwrap() + .into_with_remote(remote_options( + cas.address(), + Some("dark-tower".to_owned()), + BTreeMap::new(), + )) + .await + .unwrap(); - store_with_remote - .ensure_remote_has_recursive(vec![testdir.digest()]) - .await - .expect("Error uploading"); + store_with_remote + .ensure_remote_has_recursive(vec![testdir.digest()]) + .await + .expect("Error uploading"); } #[tokio::test] async fn instance_name_download() { - let dir = TempDir::new().unwrap(); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .instance_name("dark-tower".to_owned()) - .file(&TestData::roland()) - .build(); - - let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) - .unwrap() - .into_with_remote(remote_options( - cas.address(), - Some("dark-tower".to_owned()), - BTreeMap::new(), - )) - .await - .unwrap(); - - assert_eq!( - store_with_remote - .load_file_bytes_with(TestData::roland().digest(), Bytes::copy_from_slice) - .await - .unwrap(), - TestData::roland().bytes() - ) + let dir = TempDir::new().unwrap(); + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .instance_name("dark-tower".to_owned()) + .file(&TestData::roland()) + .build(); + + let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) + .unwrap() + .into_with_remote(remote_options( + cas.address(), + Some("dark-tower".to_owned()), + BTreeMap::new(), + )) + .await + .unwrap(); + + assert_eq!( + store_with_remote + .load_file_bytes_with(TestData::roland().digest(), Bytes::copy_from_slice) + .await + .unwrap(), + TestData::roland().bytes() + ) } #[tokio::test] async fn auth_upload() { - let dir = TempDir::new().unwrap(); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .required_auth_token("Armory.Key".to_owned()) - .build(); + let dir = TempDir::new().unwrap(); + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .required_auth_token("Armory.Key".to_owned()) + .build(); - // 3 is enough digests to trigger a FindMissingBlobs request - let testdir = TestDirectory::containing_roland_and_treats(); + // 3 is enough digests to trigger a FindMissingBlobs request + let testdir = TestDirectory::containing_roland_and_treats(); - new_local_store(dir.path()) - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - new_local_store(dir.path()) - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error storing roland locally"); - new_local_store(dir.path()) - .store_file_bytes(TestData::catnip().bytes(), false) - .await - .expect("Error storing catnip locally"); - - let mut headers = BTreeMap::new(); - headers.insert("authorization".to_owned(), "Bearer Armory.Key".to_owned()); - let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) - .unwrap() - .into_with_remote(remote_options(cas.address(), None, headers)) - .await - .unwrap(); + new_local_store(dir.path()) + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + new_local_store(dir.path()) + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error storing roland locally"); + new_local_store(dir.path()) + .store_file_bytes(TestData::catnip().bytes(), false) + .await + .expect("Error storing catnip locally"); + + let mut headers = BTreeMap::new(); + headers.insert("authorization".to_owned(), "Bearer Armory.Key".to_owned()); + let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) + .unwrap() + .into_with_remote(remote_options(cas.address(), None, headers)) + .await + .unwrap(); - store_with_remote - .ensure_remote_has_recursive(vec![testdir.digest()]) - .await - .expect("Error uploading"); + store_with_remote + .ensure_remote_has_recursive(vec![testdir.digest()]) + .await + .expect("Error uploading"); } #[tokio::test] async fn auth_download() { - let dir = TempDir::new().unwrap(); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .required_auth_token("Armory.Key".to_owned()) - .file(&TestData::roland()) - .build(); - - let mut headers = BTreeMap::new(); - headers.insert("authorization".to_owned(), "Bearer Armory.Key".to_owned()); - let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) - .unwrap() - .into_with_remote(remote_options(cas.address(), None, headers)) - .await - .unwrap(); - - assert_eq!( - store_with_remote - .load_file_bytes_with(TestData::roland().digest(), Bytes::copy_from_slice) - .await - .unwrap(), - TestData::roland().bytes() - ) + let dir = TempDir::new().unwrap(); + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .required_auth_token("Armory.Key".to_owned()) + .file(&TestData::roland()) + .build(); + + let mut headers = BTreeMap::new(); + headers.insert("authorization".to_owned(), "Bearer Armory.Key".to_owned()); + let store_with_remote = Store::local_only(task_executor::Executor::new(), dir.path()) + .unwrap() + .into_with_remote(remote_options(cas.address(), None, headers)) + .await + .unwrap(); + + assert_eq!( + store_with_remote + .load_file_bytes_with(TestData::roland().digest(), Bytes::copy_from_slice) + .await + .unwrap(), + TestData::roland().bytes() + ) } #[tokio::test] async fn materialize_missing_file() { - let materialize_dir = TempDir::new().unwrap(); - let file = materialize_dir.path().join("file"); - - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); - store - .materialize_file( - file.clone(), - TestData::roland().digest(), - Permissions::ReadOnly, - false, - ) - .await - .expect_err("Want unknown digest error"); + let materialize_dir = TempDir::new().unwrap(); + let file = materialize_dir.path().join("file"); + + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); + store + .materialize_file( + file.clone(), + TestData::roland().digest(), + Permissions::ReadOnly, + false, + ) + .await + .expect_err("Want unknown digest error"); } #[tokio::test] async fn materialize_file() { - let materialize_dir = TempDir::new().unwrap(); - let file = materialize_dir.path().join("file"); - - let testdata = TestData::roland(); - - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); - store - .store_file_bytes(testdata.bytes(), false) - .await - .expect("Error saving bytes"); - store - .materialize_file( - file.clone(), - testdata.digest(), - Permissions::ReadOnly, - false, - ) - .await - .expect("Error materializing file"); - assert_eq!(file_contents(&file), testdata.bytes()); - assert!(!is_executable(&file)); + let materialize_dir = TempDir::new().unwrap(); + let file = materialize_dir.path().join("file"); + + let testdata = TestData::roland(); + + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); + store + .store_file_bytes(testdata.bytes(), false) + .await + .expect("Error saving bytes"); + store + .materialize_file( + file.clone(), + testdata.digest(), + Permissions::ReadOnly, + false, + ) + .await + .expect("Error materializing file"); + assert_eq!(file_contents(&file), testdata.bytes()); + assert!(!is_executable(&file)); } #[tokio::test] async fn materialize_missing_directory() { - let materialize_dir = TempDir::new().unwrap(); - - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); - store - .materialize_directory( - materialize_dir.path().to_owned(), - materialize_dir.path(), - TestDirectory::recursive().directory_digest(), - false, - &BTreeSet::new(), - Permissions::Writable, - ) - .await - .expect_err("Want unknown digest error"); + let materialize_dir = TempDir::new().unwrap(); + + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); + store + .materialize_directory( + materialize_dir.path().to_owned(), + materialize_dir.path(), + TestDirectory::recursive().directory_digest(), + false, + &BTreeSet::new(), + Permissions::Writable, + ) + .await + .expect_err("Want unknown digest error"); } async fn materialize_directory(perms: Permissions, executable_file: bool) { - let materialize_dir = TempDir::new().unwrap(); - - let catnip = TestData::catnip(); - let testdir = TestDirectory::with_maybe_executable_files(executable_file); - let recursive_testdir = TestDirectory::recursive_with(testdir.clone()); - - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); - store - .record_directory(&recursive_testdir.directory(), false) - .await - .expect("Error saving recursive Directory"); - store - .record_directory(&testdir.directory(), false) - .await - .expect("Error saving Directory"); - store - .store_file_bytes(catnip.bytes(), false) - .await - .expect("Error saving file bytes"); - - store - .materialize_directory( - materialize_dir.path().to_owned(), - materialize_dir.path(), - recursive_testdir.directory_digest(), - false, - &BTreeSet::new(), - perms, - ) - .await - .expect("Error materializing"); - - // Validate contents. - assert_eq!(list_dir(materialize_dir.path()), vec!["cats", "treats.ext"]); - assert_eq!( - file_contents(&materialize_dir.path().join("treats.ext")), - catnip.bytes() - ); - assert_eq!( - list_dir(&materialize_dir.path().join("cats")), - vec!["feed.ext", "food.ext"] - ); - assert_eq!( - file_contents(&materialize_dir.path().join("cats").join("feed.ext")), - catnip.bytes() - ); - - // Validate executability. - assert_eq!( - executable_file, - is_executable(&materialize_dir.path().join("cats").join("feed.ext")) - ); - assert!(!is_executable( - &materialize_dir.path().join("cats").join("food.ext") - )); - - // Validate read/write permissions for a file, a nested directory, and the root. - let readonly = perms == Permissions::ReadOnly; - assert_eq!( - readonly, - is_readonly(&materialize_dir.path().join("cats").join("feed.ext")) - ); - assert_eq!(readonly, is_readonly(&materialize_dir.path().join("cats"))); - assert_eq!(readonly, is_readonly(materialize_dir.path())); + let materialize_dir = TempDir::new().unwrap(); + + let catnip = TestData::catnip(); + let testdir = TestDirectory::with_maybe_executable_files(executable_file); + let recursive_testdir = TestDirectory::recursive_with(testdir.clone()); + + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); + store + .record_directory(&recursive_testdir.directory(), false) + .await + .expect("Error saving recursive Directory"); + store + .record_directory(&testdir.directory(), false) + .await + .expect("Error saving Directory"); + store + .store_file_bytes(catnip.bytes(), false) + .await + .expect("Error saving file bytes"); + + store + .materialize_directory( + materialize_dir.path().to_owned(), + materialize_dir.path(), + recursive_testdir.directory_digest(), + false, + &BTreeSet::new(), + perms, + ) + .await + .expect("Error materializing"); + + // Validate contents. + assert_eq!(list_dir(materialize_dir.path()), vec!["cats", "treats.ext"]); + assert_eq!( + file_contents(&materialize_dir.path().join("treats.ext")), + catnip.bytes() + ); + assert_eq!( + list_dir(&materialize_dir.path().join("cats")), + vec!["feed.ext", "food.ext"] + ); + assert_eq!( + file_contents(&materialize_dir.path().join("cats").join("feed.ext")), + catnip.bytes() + ); + + // Validate executability. + assert_eq!( + executable_file, + is_executable(&materialize_dir.path().join("cats").join("feed.ext")) + ); + assert!(!is_executable( + &materialize_dir.path().join("cats").join("food.ext") + )); + + // Validate read/write permissions for a file, a nested directory, and the root. + let readonly = perms == Permissions::ReadOnly; + assert_eq!( + readonly, + is_readonly(&materialize_dir.path().join("cats").join("feed.ext")) + ); + assert_eq!(readonly, is_readonly(&materialize_dir.path().join("cats"))); + assert_eq!(readonly, is_readonly(materialize_dir.path())); } #[tokio::test] async fn materialize_directory_writable() { - materialize_directory(Permissions::Writable, false).await + materialize_directory(Permissions::Writable, false).await } #[tokio::test] async fn materialize_directory_writable_executable() { - materialize_directory(Permissions::Writable, true).await + materialize_directory(Permissions::Writable, true).await } #[tokio::test] async fn materialize_directory_readonly() { - materialize_directory(Permissions::ReadOnly, false).await + materialize_directory(Permissions::ReadOnly, false).await } #[tokio::test] async fn materialize_directory_readonly_executable() { - materialize_directory(Permissions::Writable, true).await + materialize_directory(Permissions::Writable, true).await } #[tokio::test] async fn contents_for_directory_empty() { - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); - let file_contents = store - .contents_for_directory(TestDirectory::empty().directory_digest()) - .await - .expect("Getting FileContents"); + let file_contents = store + .contents_for_directory(TestDirectory::empty().directory_digest()) + .await + .expect("Getting FileContents"); - assert_same_filecontents(file_contents, vec![]); + assert_same_filecontents(file_contents, vec![]); } #[tokio::test] async fn contents_for_directory() { - let roland = TestData::roland(); - let catnip = TestData::catnip(); - let testdir = TestDirectory::containing_roland(); - let recursive_testdir = TestDirectory::recursive(); - - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); - store - .record_directory(&recursive_testdir.directory(), false) - .await - .expect("Error saving recursive Directory"); - store - .record_directory(&testdir.directory(), false) - .await - .expect("Error saving Directory"); - store - .store_file_bytes(roland.bytes(), false) - .await - .expect("Error saving file bytes"); - store - .store_file_bytes(catnip.bytes(), false) - .await - .expect("Error saving catnip file bytes"); - - let file_contents = store - .contents_for_directory(recursive_testdir.directory_digest()) - .await - .expect("Getting FileContents"); - - assert_same_filecontents( - file_contents, - vec![ - FileContent { - path: PathBuf::from("cats").join("roland.ext"), - content: roland.bytes(), - is_executable: false, - }, - FileContent { - path: PathBuf::from("treats.ext"), - content: catnip.bytes(), - is_executable: false, - }, - ], - ); + let roland = TestData::roland(); + let catnip = TestData::catnip(); + let testdir = TestDirectory::containing_roland(); + let recursive_testdir = TestDirectory::recursive(); + + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); + store + .record_directory(&recursive_testdir.directory(), false) + .await + .expect("Error saving recursive Directory"); + store + .record_directory(&testdir.directory(), false) + .await + .expect("Error saving Directory"); + store + .store_file_bytes(roland.bytes(), false) + .await + .expect("Error saving file bytes"); + store + .store_file_bytes(catnip.bytes(), false) + .await + .expect("Error saving catnip file bytes"); + + let file_contents = store + .contents_for_directory(recursive_testdir.directory_digest()) + .await + .expect("Getting FileContents"); + + assert_same_filecontents( + file_contents, + vec![ + FileContent { + path: PathBuf::from("cats").join("roland.ext"), + content: roland.bytes(), + is_executable: false, + }, + FileContent { + path: PathBuf::from("treats.ext"), + content: catnip.bytes(), + is_executable: false, + }, + ], + ); } fn assert_same_filecontents(left: Vec, right: Vec) { - assert_eq!( - left.len(), - right.len(), - "FileContents did not match, different lengths: left: {left:?} right: {right:?}" - ); - - let mut success = true; - for (index, (l, r)) in left.iter().zip(right.iter()).enumerate() { - if l.path != r.path { - success = false; - eprintln!( - "Paths did not match for index {}: {:?}, {:?}", - index, l.path, r.path - ); - } - if l.content != r.content { - success = false; - eprintln!( - "Content did not match for index {}: {:?}, {:?}", - index, l.content, r.content - ); - } - if l.is_executable != r.is_executable { - success = false; - eprintln!( - "Executable bit did not match for index {}: {:?}, {:?}", - index, l.is_executable, r.is_executable - ); + assert_eq!( + left.len(), + right.len(), + "FileContents did not match, different lengths: left: {left:?} right: {right:?}" + ); + + let mut success = true; + for (index, (l, r)) in left.iter().zip(right.iter()).enumerate() { + if l.path != r.path { + success = false; + eprintln!( + "Paths did not match for index {}: {:?}, {:?}", + index, l.path, r.path + ); + } + if l.content != r.content { + success = false; + eprintln!( + "Content did not match for index {}: {:?}, {:?}", + index, l.content, r.content + ); + } + if l.is_executable != r.is_executable { + success = false; + eprintln!( + "Executable bit did not match for index {}: {:?}, {:?}", + index, l.is_executable, r.is_executable + ); + } } - } - assert!( - success, - "FileContents did not match: Left: {left:?}, Right: {right:?}" - ); + assert!( + success, + "FileContents did not match: Left: {left:?}, Right: {right:?}" + ); } #[tokio::test] async fn entries_for_directory() { - let roland = TestData::roland(); - let catnip = TestData::catnip(); - let testdir = TestDirectory::containing_roland(); - let recursive_testdir = TestDirectory::recursive(); - - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); - store - .record_directory(&recursive_testdir.directory(), false) - .await - .expect("Error saving recursive Directory"); - store - .record_directory(&testdir.directory(), false) - .await - .expect("Error saving Directory"); - store - .store_file_bytes(roland.bytes(), false) - .await - .expect("Error saving file bytes"); - store - .store_file_bytes(catnip.bytes(), false) - .await - .expect("Error saving catnip file bytes"); - - let digest_entries = store - .entries_for_directory(recursive_testdir.directory_digest()) - .await - .expect("Getting FileContents"); - - assert_same_digest_entries( - digest_entries, - vec![ - DigestEntry::File(FileEntry { - path: PathBuf::from("cats").join("roland.ext"), - digest: roland.digest(), - is_executable: false, - }), - DigestEntry::File(FileEntry { - path: PathBuf::from("treats.ext"), - digest: catnip.digest(), - is_executable: false, - }), - ], - ); - - let empty_digest_entries = store - .entries_for_directory(EMPTY_DIRECTORY_DIGEST.clone()) - .await - .expect("Getting EMTPY_DIGEST"); - - assert_same_digest_entries(empty_digest_entries, vec![]); + let roland = TestData::roland(); + let catnip = TestData::catnip(); + let testdir = TestDirectory::containing_roland(); + let recursive_testdir = TestDirectory::recursive(); + + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); + store + .record_directory(&recursive_testdir.directory(), false) + .await + .expect("Error saving recursive Directory"); + store + .record_directory(&testdir.directory(), false) + .await + .expect("Error saving Directory"); + store + .store_file_bytes(roland.bytes(), false) + .await + .expect("Error saving file bytes"); + store + .store_file_bytes(catnip.bytes(), false) + .await + .expect("Error saving catnip file bytes"); + + let digest_entries = store + .entries_for_directory(recursive_testdir.directory_digest()) + .await + .expect("Getting FileContents"); + + assert_same_digest_entries( + digest_entries, + vec![ + DigestEntry::File(FileEntry { + path: PathBuf::from("cats").join("roland.ext"), + digest: roland.digest(), + is_executable: false, + }), + DigestEntry::File(FileEntry { + path: PathBuf::from("treats.ext"), + digest: catnip.digest(), + is_executable: false, + }), + ], + ); + + let empty_digest_entries = store + .entries_for_directory(EMPTY_DIRECTORY_DIGEST.clone()) + .await + .expect("Getting EMTPY_DIGEST"); + + assert_same_digest_entries(empty_digest_entries, vec![]); } fn assert_same_digest_entries(left: Vec, right: Vec) { - assert_eq!( - left.len(), - right.len(), - "DigestEntry vectors did not match, different lengths: left: {left:?} right: {right:?}" - ); - - let mut success = true; - for (index, (l, r)) in left.iter().zip(right.iter()).enumerate() { - match (l, r) { - (DigestEntry::File(l), DigestEntry::File(r)) => { - if l.path != r.path { - success = false; - eprintln!( - "Paths did not match for index {}: {:?}, {:?}", - index, l.path, r.path - ); - } - if l.digest != r.digest { - success = false; - eprintln!( - "Digest did not match for index {}: {:?}, {:?}", - index, l.digest, r.digest - ); - } - if l.is_executable != r.is_executable { - success = false; - eprintln!( - "Executable bit did not match for index {}: {:?}, {:?}", - index, l.is_executable, r.is_executable - ); - } - } - (DigestEntry::EmptyDirectory(path_left), DigestEntry::EmptyDirectory(path_right)) => { - if path_left != path_right { - success = false; - eprintln!( + assert_eq!( + left.len(), + right.len(), + "DigestEntry vectors did not match, different lengths: left: {left:?} right: {right:?}" + ); + + let mut success = true; + for (index, (l, r)) in left.iter().zip(right.iter()).enumerate() { + match (l, r) { + (DigestEntry::File(l), DigestEntry::File(r)) => { + if l.path != r.path { + success = false; + eprintln!( + "Paths did not match for index {}: {:?}, {:?}", + index, l.path, r.path + ); + } + if l.digest != r.digest { + success = false; + eprintln!( + "Digest did not match for index {}: {:?}, {:?}", + index, l.digest, r.digest + ); + } + if l.is_executable != r.is_executable { + success = false; + eprintln!( + "Executable bit did not match for index {}: {:?}, {:?}", + index, l.is_executable, r.is_executable + ); + } + } + (DigestEntry::EmptyDirectory(path_left), DigestEntry::EmptyDirectory(path_right)) => { + if path_left != path_right { + success = false; + eprintln!( "Paths did not match for empty directory at index {index}: {path_left:?}, {path_right:?}" ); + } + } + (l, r) => { + success = false; + eprintln!("Differing types at index {index}: {l:?}, {r:?}") + } } - } - (l, r) => { - success = false; - eprintln!("Differing types at index {index}: {l:?}, {r:?}") - } } - } - assert!( - success, - "FileEntry vectors did not match: Left: {left:?}, Right: {right:?}" - ); + assert!( + success, + "FileEntry vectors did not match: Left: {left:?}, Right: {right:?}" + ); } fn list_dir(path: &Path) -> Vec { - let mut v: Vec<_> = std::fs::read_dir(path) - .expect("Listing dir") - .map(|entry| { - entry - .expect("Error reading entry") - .file_name() - .to_string_lossy() - .to_string() - }) - .collect(); - v.sort(); - v + let mut v: Vec<_> = std::fs::read_dir(path) + .expect("Listing dir") + .map(|entry| { + entry + .expect("Error reading entry") + .file_name() + .to_string_lossy() + .to_string() + }) + .collect(); + v.sort(); + v } fn file_contents(path: &Path) -> Bytes { - let mut contents = Vec::new(); - std::fs::File::open(path) - .and_then(|mut f| f.read_to_end(&mut contents)) - .expect("Error reading file"); - Bytes::from(contents) + let mut contents = Vec::new(); + std::fs::File::open(path) + .and_then(|mut f| f.read_to_end(&mut contents)) + .expect("Error reading file"); + Bytes::from(contents) } fn is_executable(path: &Path) -> bool { - let mode = std::fs::metadata(path) - .expect("Getting metadata") - .permissions() - .mode(); - - // NB: macOS's default umask is applied when we create files, and removes the executable bit - // for "all". There probably isn't a good reason to try to override that. - let executable_mask = if cfg!(target_os = "macos") { - 0o110 - } else { - 0o111 - }; - mode & executable_mask == executable_mask + let mode = std::fs::metadata(path) + .expect("Getting metadata") + .permissions() + .mode(); + + // NB: macOS's default umask is applied when we create files, and removes the executable bit + // for "all". There probably isn't a good reason to try to override that. + let executable_mask = if cfg!(target_os = "macos") { + 0o110 + } else { + 0o111 + }; + mode & executable_mask == executable_mask } fn is_readonly(path: &Path) -> bool { - std::fs::metadata(path) - .expect("Getting metadata") - .permissions() - .readonly() + std::fs::metadata(path) + .expect("Getting metadata") + .permissions() + .readonly() } #[tokio::test] async fn returns_upload_summary_on_empty_cas() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); - - let testroland = TestData::roland(); - let testcatnip = TestData::catnip(); - let testdir = TestDirectory::containing_roland_and_treats(); - - let local_store = new_local_store(dir.path()); - local_store - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - local_store - .store_file_bytes(testroland.bytes(), false) - .await - .expect("Error storing file locally"); - local_store - .store_file_bytes(testcatnip.bytes(), false) - .await - .expect("Error storing file locally"); - let mut summary = new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdir.digest()]) - .await - .expect("Error uploading file"); - - // We store all 3 files, and so we must sum their digests - let test_data = vec![ - testdir.digest().size_bytes, - testroland.digest().size_bytes, - testcatnip.digest().size_bytes, - ]; - let test_bytes = test_data.iter().sum(); - summary.upload_wall_time = Duration::default(); - assert_eq!( - summary, - UploadSummary { - ingested_file_count: test_data.len(), - ingested_file_bytes: test_bytes, - uploaded_file_count: test_data.len(), - uploaded_file_bytes: test_bytes, - upload_wall_time: Duration::default(), - } - ); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); + + let testroland = TestData::roland(); + let testcatnip = TestData::catnip(); + let testdir = TestDirectory::containing_roland_and_treats(); + + let local_store = new_local_store(dir.path()); + local_store + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + local_store + .store_file_bytes(testroland.bytes(), false) + .await + .expect("Error storing file locally"); + local_store + .store_file_bytes(testcatnip.bytes(), false) + .await + .expect("Error storing file locally"); + let mut summary = new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdir.digest()]) + .await + .expect("Error uploading file"); + + // We store all 3 files, and so we must sum their digests + let test_data = vec![ + testdir.digest().size_bytes, + testroland.digest().size_bytes, + testcatnip.digest().size_bytes, + ]; + let test_bytes = test_data.iter().sum(); + summary.upload_wall_time = Duration::default(); + assert_eq!( + summary, + UploadSummary { + ingested_file_count: test_data.len(), + ingested_file_bytes: test_bytes, + uploaded_file_count: test_data.len(), + uploaded_file_bytes: test_bytes, + upload_wall_time: Duration::default(), + } + ); } #[tokio::test] async fn summary_does_not_count_things_in_cas() { - let dir = TempDir::new().unwrap(); - let cas = new_empty_cas(); - - let testroland = TestData::roland(); - let testcatnip = TestData::catnip(); - let testdir = TestDirectory::containing_roland_and_treats(); - - // Store everything locally - let local_store = new_local_store(dir.path()); - local_store - .record_directory(&testdir.directory(), false) - .await - .expect("Error storing directory locally"); - local_store - .store_file_bytes(testroland.bytes(), false) - .await - .expect("Error storing file locally"); - local_store - .store_file_bytes(testcatnip.bytes(), false) - .await - .expect("Error storing file locally"); - - // Store testroland first, which should return a summary of one file - let mut data_summary = new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testroland.digest()]) - .await - .expect("Error uploading file"); - data_summary.upload_wall_time = Duration::default(); - - assert_eq!( - data_summary, - UploadSummary { - ingested_file_count: 1, - ingested_file_bytes: testroland.digest().size_bytes, - uploaded_file_count: 1, - uploaded_file_bytes: testroland.digest().size_bytes, - upload_wall_time: Duration::default(), - } - ); - - // Store the directory and catnip. - // It should see the digest of testroland already in cas, - // and not report it in uploads. - let mut dir_summary = new_store(dir.path(), &cas.address()) - .await - .ensure_remote_has_recursive(vec![testdir.digest()]) - .await - .expect("Error uploading directory"); - - dir_summary.upload_wall_time = Duration::default(); - - assert_eq!( - dir_summary, - UploadSummary { - ingested_file_count: 3, - ingested_file_bytes: testdir.digest().size_bytes - + testroland.digest().size_bytes - + testcatnip.digest().size_bytes, - uploaded_file_count: 2, - uploaded_file_bytes: testdir.digest().size_bytes + testcatnip.digest().size_bytes, - upload_wall_time: Duration::default(), - } - ); + let dir = TempDir::new().unwrap(); + let cas = new_empty_cas(); + + let testroland = TestData::roland(); + let testcatnip = TestData::catnip(); + let testdir = TestDirectory::containing_roland_and_treats(); + + // Store everything locally + let local_store = new_local_store(dir.path()); + local_store + .record_directory(&testdir.directory(), false) + .await + .expect("Error storing directory locally"); + local_store + .store_file_bytes(testroland.bytes(), false) + .await + .expect("Error storing file locally"); + local_store + .store_file_bytes(testcatnip.bytes(), false) + .await + .expect("Error storing file locally"); + + // Store testroland first, which should return a summary of one file + let mut data_summary = new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testroland.digest()]) + .await + .expect("Error uploading file"); + data_summary.upload_wall_time = Duration::default(); + + assert_eq!( + data_summary, + UploadSummary { + ingested_file_count: 1, + ingested_file_bytes: testroland.digest().size_bytes, + uploaded_file_count: 1, + uploaded_file_bytes: testroland.digest().size_bytes, + upload_wall_time: Duration::default(), + } + ); + + // Store the directory and catnip. + // It should see the digest of testroland already in cas, + // and not report it in uploads. + let mut dir_summary = new_store(dir.path(), &cas.address()) + .await + .ensure_remote_has_recursive(vec![testdir.digest()]) + .await + .expect("Error uploading directory"); + + dir_summary.upload_wall_time = Duration::default(); + + assert_eq!( + dir_summary, + UploadSummary { + ingested_file_count: 3, + ingested_file_bytes: testdir.digest().size_bytes + + testroland.digest().size_bytes + + testcatnip.digest().size_bytes, + uploaded_file_count: 2, + uploaded_file_bytes: testdir.digest().size_bytes + testcatnip.digest().size_bytes, + upload_wall_time: Duration::default(), + } + ); } #[tokio::test] async fn explicitly_overwrites_already_existing_file() { - fn test_file_with_arbitrary_content(filename: &str, content: &TestData) -> TestDirectory { - let digest = content.digest(); - let directory = remexec::Directory { - files: vec![remexec::FileNode { - name: filename.to_owned(), - digest: Some((&digest).into()), - is_executable: false, - ..Default::default() - }], - ..Default::default() - }; - TestDirectory { directory } - } - - let dir_to_write_to = tempfile::tempdir().unwrap(); - let file_path: PathBuf = [dir_to_write_to.path(), Path::new("some_filename.ext")] - .iter() - .collect(); - - std::fs::write(&file_path, "XXX").unwrap(); - - let file_contents = std::fs::read(&file_path).unwrap(); - assert_eq!(file_contents, b"XXX".to_vec()); - - let cas_file = TestData::new("abc123"); - let contents_dir = test_file_with_arbitrary_content("some_filename.ext", &cas_file); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder() - .directory(&contents_dir) - .file(&cas_file) - .build(); - let store_dir = tempfile::tempdir().unwrap(); - let store = new_store(store_dir.path(), &cas.address()).await; - - store - .materialize_directory( - dir_to_write_to.path().to_owned(), - dir_to_write_to.path(), - contents_dir.directory_digest(), - false, - &BTreeSet::new(), - Permissions::Writable, - ) - .await - .unwrap(); + fn test_file_with_arbitrary_content(filename: &str, content: &TestData) -> TestDirectory { + let digest = content.digest(); + let directory = remexec::Directory { + files: vec![remexec::FileNode { + name: filename.to_owned(), + digest: Some((&digest).into()), + is_executable: false, + ..Default::default() + }], + ..Default::default() + }; + TestDirectory { directory } + } - let file_contents = std::fs::read(&file_path).unwrap(); - assert_eq!(file_contents, b"abc123".to_vec()); + let dir_to_write_to = tempfile::tempdir().unwrap(); + let file_path: PathBuf = [dir_to_write_to.path(), Path::new("some_filename.ext")] + .iter() + .collect(); + + std::fs::write(&file_path, "XXX").unwrap(); + + let file_contents = std::fs::read(&file_path).unwrap(); + assert_eq!(file_contents, b"XXX".to_vec()); + + let cas_file = TestData::new("abc123"); + let contents_dir = test_file_with_arbitrary_content("some_filename.ext", &cas_file); + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder() + .directory(&contents_dir) + .file(&cas_file) + .build(); + let store_dir = tempfile::tempdir().unwrap(); + let store = new_store(store_dir.path(), &cas.address()).await; + + store + .materialize_directory( + dir_to_write_to.path().to_owned(), + dir_to_write_to.path(), + contents_dir.directory_digest(), + false, + &BTreeSet::new(), + Permissions::Writable, + ) + .await + .unwrap(); + + let file_contents = std::fs::read(&file_path).unwrap(); + assert_eq!(file_contents, b"abc123".to_vec()); } #[tokio::test] async fn big_file_immutable_link() { - let materialize_dir = TempDir::new().unwrap(); - let input_file = materialize_dir.path().join("input_file"); - let output_file = materialize_dir.path().join("output_file"); - let output_dir = materialize_dir.path().join("output_dir"); - let nested_output_file = output_dir.join("file"); - - let testdata = TestData::double_all_the_henries(); - let file_bytes = testdata.bytes(); - let file_digest = testdata.digest(); - - let nested_directory = remexec::Directory { - files: vec![remexec::FileNode { - name: "file".to_owned(), - digest: Some(file_digest.into()), - is_executable: true, - ..remexec::FileNode::default() - }], - ..remexec::Directory::default() - }; - let directory = remexec::Directory { - files: vec![ - remexec::FileNode { - name: "input_file".to_owned(), - digest: Some(file_digest.into()), - is_executable: true, - ..remexec::FileNode::default() - }, - remexec::FileNode { - name: "output_file".to_owned(), - digest: Some(file_digest.into()), - is_executable: true, - ..remexec::FileNode::default() - }, - ], - directories: vec![remexec::DirectoryNode { - name: "output_dir".to_string(), - digest: Some(hashing::Digest::of_bytes(&nested_directory.to_bytes()).into()), - }], - ..remexec::Directory::default() - }; - let directory_digest = - fs::DirectoryDigest::from_persisted_digest(hashing::Digest::of_bytes(&directory.to_bytes())); - - let store_dir = TempDir::new().unwrap(); - let store = new_local_store(store_dir.path()); - store - .record_directory(&nested_directory, false) - .await - .expect("Error saving Directory"); - store - .record_directory(&directory, false) - .await - .expect("Error saving Directory"); - store - .store_file_bytes(file_bytes.clone(), false) - .await - .expect("Error saving bytes"); - - store - .materialize_directory( - materialize_dir.path().to_owned(), - materialize_dir.path(), - directory_digest, - false, - &BTreeSet::from([ - RelativePath::new("output_file").unwrap(), - RelativePath::new("output_dir").unwrap(), - ]), - Permissions::Writable, - ) - .await - .expect("Error materializing file"); - - let assert_is_linked = |path: &PathBuf, is_linked: bool| { - assert_eq!(file_contents(path), file_bytes); - assert!(is_executable(path)); - assert_eq!(path.metadata().unwrap().permissions().readonly(), is_linked); - }; + let materialize_dir = TempDir::new().unwrap(); + let input_file = materialize_dir.path().join("input_file"); + let output_file = materialize_dir.path().join("output_file"); + let output_dir = materialize_dir.path().join("output_dir"); + let nested_output_file = output_dir.join("file"); + + let testdata = TestData::double_all_the_henries(); + let file_bytes = testdata.bytes(); + let file_digest = testdata.digest(); + + let nested_directory = remexec::Directory { + files: vec![remexec::FileNode { + name: "file".to_owned(), + digest: Some(file_digest.into()), + is_executable: true, + ..remexec::FileNode::default() + }], + ..remexec::Directory::default() + }; + let directory = remexec::Directory { + files: vec![ + remexec::FileNode { + name: "input_file".to_owned(), + digest: Some(file_digest.into()), + is_executable: true, + ..remexec::FileNode::default() + }, + remexec::FileNode { + name: "output_file".to_owned(), + digest: Some(file_digest.into()), + is_executable: true, + ..remexec::FileNode::default() + }, + ], + directories: vec![remexec::DirectoryNode { + name: "output_dir".to_string(), + digest: Some(hashing::Digest::of_bytes(&nested_directory.to_bytes()).into()), + }], + ..remexec::Directory::default() + }; + let directory_digest = fs::DirectoryDigest::from_persisted_digest(hashing::Digest::of_bytes( + &directory.to_bytes(), + )); + + let store_dir = TempDir::new().unwrap(); + let store = new_local_store(store_dir.path()); + store + .record_directory(&nested_directory, false) + .await + .expect("Error saving Directory"); + store + .record_directory(&directory, false) + .await + .expect("Error saving Directory"); + store + .store_file_bytes(file_bytes.clone(), false) + .await + .expect("Error saving bytes"); + + store + .materialize_directory( + materialize_dir.path().to_owned(), + materialize_dir.path(), + directory_digest, + false, + &BTreeSet::from([ + RelativePath::new("output_file").unwrap(), + RelativePath::new("output_dir").unwrap(), + ]), + Permissions::Writable, + ) + .await + .expect("Error materializing file"); + + let assert_is_linked = |path: &PathBuf, is_linked: bool| { + assert_eq!(file_contents(path), file_bytes); + assert!(is_executable(path)); + assert_eq!(path.metadata().unwrap().permissions().readonly(), is_linked); + }; - assert_is_linked(&input_file, true); - assert_is_linked(&output_file, false); - assert_is_linked(&nested_output_file, false); + assert_is_linked(&input_file, true); + assert_is_linked(&output_file, false); + assert_is_linked(&nested_output_file, false); } diff --git a/src/rust/engine/graph/src/context.rs b/src/rust/engine/graph/src/context.rs index 696de1aace1..3b71b314902 100644 --- a/src/rust/engine/graph/src/context.rs +++ b/src/rust/engine/graph/src/context.rs @@ -13,16 +13,16 @@ use crate::node::{CompoundNode, EntryId, Node, NodeError}; use crate::Graph; struct InnerContext { - context: N::Context, - run_id: AtomicU32, - stats: Stats, - graph: Graph, + context: N::Context, + run_id: AtomicU32, + stats: Stats, + graph: Graph, } #[derive(Clone, Default)] pub(crate) struct DepState { - pub(crate) generations: Vec<(EntryId, Generation)>, - pub(crate) has_uncacheable_deps: bool, + pub(crate) generations: Vec<(EntryId, Generation)>, + pub(crate) has_uncacheable_deps: bool, } /// @@ -33,135 +33,134 @@ pub(crate) struct DepState { /// #[derive(Clone)] pub struct Context { - entry_id: Option, - dep_state: Arc>>, - inner: Arc>, + entry_id: Option, + dep_state: Arc>>, + inner: Arc>, } impl Context { - pub(crate) fn new(graph: Graph, context: N::Context, run_id: RunId) -> Self { - Self { - entry_id: None, - dep_state: Arc::default(), - inner: Arc::new(InnerContext { - context, - run_id: AtomicU32::new(run_id.0), - stats: Stats::default(), - graph, - }), + pub(crate) fn new(graph: Graph, context: N::Context, run_id: RunId) -> Self { + Self { + entry_id: None, + dep_state: Arc::default(), + inner: Arc::new(InnerContext { + context, + run_id: AtomicU32::new(run_id.0), + stats: Stats::default(), + graph, + }), + } } - } - - /// - /// Get the future value for the given Node implementation. - /// - pub async fn get>(&self, node: CN) -> Result { - let (node_result, _generation) = self - .inner - .graph - .get_inner(self.entry_id, self, node.into()) - .await; - - node_result?.try_into().map_err(|_| { - N::Error::generic(format!( - "The CompoundNode implementation for {} was ambiguous.", - std::any::type_name::() - )) - }) - } - - pub fn run_id(&self) -> RunId { - RunId(self.inner.run_id.load(atomic::Ordering::SeqCst)) - } - - pub fn new_run_id(&self) { - self.inner.run_id.store( - self.inner.graph.generate_run_id().0, - atomic::Ordering::SeqCst, - ); - } - - pub fn context(&self) -> &N::Context { - &self.inner.context - } - - pub fn graph(&self) -> &Graph { - &self.inner.graph - } - - pub(crate) fn stats(&self) -> &Stats { - &self.inner.stats - } - - pub(crate) fn dep_record( - &self, - dep_id: EntryId, - generation: Generation, - uncacheable: bool, - ) -> Result<(), N::Error> { - let mut maybe_dep_state = self.dep_state.lock(); - if let Some(dep_state) = maybe_dep_state.as_mut() { - dep_state.generations.push((dep_id, generation)); - dep_state.has_uncacheable_deps |= uncacheable; - Ok(()) - } else { - // This case can occur if a Node has spawned background work which continues to attempt - // to request dependencies in the background. - Err(N::Error::generic(format!( - "Could not request additional dependencies for {:?}: the Node has completed.", - self.entry_id - ))) + + /// + /// Get the future value for the given Node implementation. + /// + pub async fn get>(&self, node: CN) -> Result { + let (node_result, _generation) = self + .inner + .graph + .get_inner(self.entry_id, self, node.into()) + .await; + + node_result?.try_into().map_err(|_| { + N::Error::generic(format!( + "The CompoundNode implementation for {} was ambiguous.", + std::any::type_name::() + )) + }) + } + + pub fn run_id(&self) -> RunId { + RunId(self.inner.run_id.load(atomic::Ordering::SeqCst)) + } + + pub fn new_run_id(&self) { + self.inner.run_id.store( + self.inner.graph.generate_run_id().0, + atomic::Ordering::SeqCst, + ); } - } - - /// - /// Gets the dependency generations which have been computed for this Node so far. May not be - /// called after `complete` has been called for a node. - /// - pub(crate) fn dep_generations_so_far(&self, node: &N) -> Vec<(EntryId, Generation)> { - (*self.dep_state.lock()) - .clone() - .unwrap_or_else(|| panic!("Node {node} has already completed.")) - .generations - } - - /// - /// Completes the Context for this EntryId, returning the dependency generations that were - /// recorded while it was running. May only be called once. - /// - pub(crate) fn complete(&self, node: &N) -> DepState { - self - .dep_state - .lock() - .take() - .unwrap_or_else(|| panic!("Node {node} was completed multiple times.")) - } - - /// - /// Creates a clone of this Context to be used for a different Node. - /// - /// To clone a Context for use by the _same_ Node, `Clone` is used directly. - /// - pub(crate) fn clone_for(&self, entry_id: EntryId) -> Self { - Self { - entry_id: Some(entry_id), - dep_state: Arc::new(Mutex::new(Some(DepState::default()))), - inner: self.inner.clone(), + + pub fn context(&self) -> &N::Context { + &self.inner.context + } + + pub fn graph(&self) -> &Graph { + &self.inner.graph + } + + pub(crate) fn stats(&self) -> &Stats { + &self.inner.stats + } + + pub(crate) fn dep_record( + &self, + dep_id: EntryId, + generation: Generation, + uncacheable: bool, + ) -> Result<(), N::Error> { + let mut maybe_dep_state = self.dep_state.lock(); + if let Some(dep_state) = maybe_dep_state.as_mut() { + dep_state.generations.push((dep_id, generation)); + dep_state.has_uncacheable_deps |= uncacheable; + Ok(()) + } else { + // This case can occur if a Node has spawned background work which continues to attempt + // to request dependencies in the background. + Err(N::Error::generic(format!( + "Could not request additional dependencies for {:?}: the Node has completed.", + self.entry_id + ))) + } + } + + /// + /// Gets the dependency generations which have been computed for this Node so far. May not be + /// called after `complete` has been called for a node. + /// + pub(crate) fn dep_generations_so_far(&self, node: &N) -> Vec<(EntryId, Generation)> { + (*self.dep_state.lock()) + .clone() + .unwrap_or_else(|| panic!("Node {node} has already completed.")) + .generations + } + + /// + /// Completes the Context for this EntryId, returning the dependency generations that were + /// recorded while it was running. May only be called once. + /// + pub(crate) fn complete(&self, node: &N) -> DepState { + self.dep_state + .lock() + .take() + .unwrap_or_else(|| panic!("Node {node} was completed multiple times.")) + } + + /// + /// Creates a clone of this Context to be used for a different Node. + /// + /// To clone a Context for use by the _same_ Node, `Clone` is used directly. + /// + pub(crate) fn clone_for(&self, entry_id: EntryId) -> Self { + Self { + entry_id: Some(entry_id), + dep_state: Arc::new(Mutex::new(Some(DepState::default()))), + inner: self.inner.clone(), + } } - } } impl Deref for Context { - type Target = N::Context; + type Target = N::Context; - fn deref(&self) -> &Self::Target { - &self.inner.context - } + fn deref(&self) -> &Self::Target { + &self.inner.context + } } #[derive(Default)] pub(crate) struct Stats { - pub ran: AtomicUsize, - pub cleaning_succeeded: AtomicUsize, - pub cleaning_failed: AtomicUsize, + pub ran: AtomicUsize, + pub cleaning_succeeded: AtomicUsize, + pub cleaning_failed: AtomicUsize, } diff --git a/src/rust/engine/graph/src/entry.rs b/src/rust/engine/graph/src/entry.rs index 77102633b03..ec58d8ac672 100644 --- a/src/rust/engine/graph/src/entry.rs +++ b/src/rust/engine/graph/src/entry.rs @@ -24,13 +24,13 @@ use workunit_store::RunId; pub struct RunToken(u32); impl RunToken { - pub fn initial() -> RunToken { - RunToken(0) - } + pub fn initial() -> RunToken { + RunToken(0) + } - fn next(self) -> RunToken { - RunToken(self.0 + 1) - } + fn next(self) -> RunToken { + RunToken(self.0 + 1) + } } /// @@ -46,19 +46,19 @@ impl RunToken { pub struct Generation(u32); impl Generation { - pub fn initial() -> Generation { - Generation(0) - } + pub fn initial() -> Generation { + Generation(0) + } - fn next(self) -> Generation { - Generation(self.0 + 1) - } + fn next(self) -> Generation { + Generation(self.0 + 1) + } } #[derive(Debug)] pub(crate) enum NodeInterrupt { - Dirtied, - Aborted(NodeResult), + Dirtied, + Aborted(NodeResult), } /// @@ -66,164 +66,164 @@ pub(crate) enum NodeInterrupt { /// #[derive(Clone, Debug)] pub enum EntryResult { - /// A value that is immediately readable by any consumer, with no constraints. - Clean(N::Item), - /// A consumer should check whether the dependencies of the Node have the same values as they - /// did when this Node was last run; if so, the value can be re-used (and can move to "Clean"). - Dirty(N::Item), - /// Similar to Clean, but the value may only be consumed in the same Run that produced it, and - /// _must_ (unlike UncacheableDependencies) be recomputed in a new Run. - Uncacheable(N::Item, RunId), - /// A value that was computed from an Uncacheable node, and is thus Run-specific. If the Run id - /// of a consumer matches, the value can be considered to be Clean: otherwise, is considered to - /// be Dirty. - UncacheableDependencies(N::Item, RunId), + /// A value that is immediately readable by any consumer, with no constraints. + Clean(N::Item), + /// A consumer should check whether the dependencies of the Node have the same values as they + /// did when this Node was last run; if so, the value can be re-used (and can move to "Clean"). + Dirty(N::Item), + /// Similar to Clean, but the value may only be consumed in the same Run that produced it, and + /// _must_ (unlike UncacheableDependencies) be recomputed in a new Run. + Uncacheable(N::Item, RunId), + /// A value that was computed from an Uncacheable node, and is thus Run-specific. If the Run id + /// of a consumer matches, the value can be considered to be Clean: otherwise, is considered to + /// be Dirty. + UncacheableDependencies(N::Item, RunId), } impl EntryResult { - fn new( - item: N::Item, - context: &Context, - cacheable: bool, - has_uncacheable_deps: bool, - ) -> EntryResult { - if !cacheable { - EntryResult::Uncacheable(item, context.run_id()) - } else if has_uncacheable_deps { - EntryResult::UncacheableDependencies(item, context.run_id()) - } else { - EntryResult::Clean(item) + fn new( + item: N::Item, + context: &Context, + cacheable: bool, + has_uncacheable_deps: bool, + ) -> EntryResult { + if !cacheable { + EntryResult::Uncacheable(item, context.run_id()) + } else if has_uncacheable_deps { + EntryResult::UncacheableDependencies(item, context.run_id()) + } else { + EntryResult::Clean(item) + } + } + + fn is_clean(&self, context: &Context) -> bool { + match self { + EntryResult::Clean(..) => true, + EntryResult::Uncacheable(_, run_id) => context.run_id() == *run_id, + EntryResult::UncacheableDependencies(.., run_id) => context.run_id() == *run_id, + EntryResult::Dirty(..) => false, + } } - } - - fn is_clean(&self, context: &Context) -> bool { - match self { - EntryResult::Clean(..) => true, - EntryResult::Uncacheable(_, run_id) => context.run_id() == *run_id, - EntryResult::UncacheableDependencies(.., run_id) => context.run_id() == *run_id, - EntryResult::Dirty(..) => false, + + fn has_uncacheable_deps(&self) -> bool { + match self { + EntryResult::Uncacheable(_, _) | EntryResult::UncacheableDependencies(_, _) => true, + EntryResult::Clean(..) | EntryResult::Dirty(..) => false, + } } - } - fn has_uncacheable_deps(&self) -> bool { - match self { - EntryResult::Uncacheable(_, _) | EntryResult::UncacheableDependencies(_, _) => true, - EntryResult::Clean(..) | EntryResult::Dirty(..) => false, + /// Returns true if this result should block for polling (because there is no work to do + /// currently to clean it). + fn poll_should_wait(&self, context: &Context) -> bool { + match self { + EntryResult::Uncacheable(_, run_id) => context.run_id() == *run_id, + EntryResult::Dirty(..) => false, + EntryResult::Clean(..) | EntryResult::UncacheableDependencies(_, _) => true, + } } - } - - /// Returns true if this result should block for polling (because there is no work to do - /// currently to clean it). - fn poll_should_wait(&self, context: &Context) -> bool { - match self { - EntryResult::Uncacheable(_, run_id) => context.run_id() == *run_id, - EntryResult::Dirty(..) => false, - EntryResult::Clean(..) | EntryResult::UncacheableDependencies(_, _) => true, + + fn peek(&self, context: &Context) -> Option { + if self.is_clean(context) { + Some(self.as_ref().clone()) + } else { + None + } } - } - fn peek(&self, context: &Context) -> Option { - if self.is_clean(context) { - Some(self.as_ref().clone()) - } else { - None + /// If the value is in a Clean state, mark it Dirty. + fn dirty(&mut self) { + match self { + EntryResult::Clean(v) + | EntryResult::UncacheableDependencies(v, _) + | EntryResult::Uncacheable(v, _) => { + *self = EntryResult::Dirty(v.clone()); + } + EntryResult::Dirty(_) => {} + } } - } - - /// If the value is in a Clean state, mark it Dirty. - fn dirty(&mut self) { - match self { - EntryResult::Clean(v) - | EntryResult::UncacheableDependencies(v, _) - | EntryResult::Uncacheable(v, _) => { - *self = EntryResult::Dirty(v.clone()); - } - EntryResult::Dirty(_) => {} + + /// Assert that the value is in "a dirty state", and move it to a clean state. + fn clean(&mut self, context: &Context, cacheable: bool, has_uncacheable_deps: bool) { + let value = match self { + EntryResult::Dirty(value) => value.clone(), + EntryResult::UncacheableDependencies(value, _) => value.clone(), + x => unreachable!("A node in state {:?} should not have been cleaned.", x), + }; + + *self = EntryResult::new(value, context, cacheable, has_uncacheable_deps); } - } - - /// Assert that the value is in "a dirty state", and move it to a clean state. - fn clean(&mut self, context: &Context, cacheable: bool, has_uncacheable_deps: bool) { - let value = match self { - EntryResult::Dirty(value) => value.clone(), - EntryResult::UncacheableDependencies(value, _) => value.clone(), - x => unreachable!("A node in state {:?} should not have been cleaned.", x), - }; - - *self = EntryResult::new(value, context, cacheable, has_uncacheable_deps); - } } impl AsRef for EntryResult { - fn as_ref(&self) -> &N::Item { - match self { - EntryResult::Clean(v) => v, - EntryResult::Dirty(v) => v, - EntryResult::Uncacheable(v, _) => v, - EntryResult::UncacheableDependencies(v, _) => v, + fn as_ref(&self) -> &N::Item { + match self { + EntryResult::Clean(v) => v, + EntryResult::Dirty(v) => v, + EntryResult::Uncacheable(v, _) => v, + EntryResult::UncacheableDependencies(v, _) => v, + } } - } } pub type NodeResult = ( - Result<::Item, ::Error>, - Generation, - bool, + Result<::Item, ::Error>, + Generation, + bool, ); #[derive(Debug)] pub(crate) enum EntryState { - // A node that has either been explicitly cleared, or has not yet started Running. In this state - // there is no need for a dirty bit because the RunToken is either in its initial state, or has - // been explicitly incremented when the node was cleared. - // - // The previous_result value is _not_ a valid value for this Entry: rather, it is preserved in - // order to compute the generation value for this Node by comparing it to the new result the next - // time the Node runs. - NotStarted { - run_token: RunToken, - generation: Generation, - pollers: Vec>, - previous_result: Option>, - }, - // A node that is running. A running node that has been marked dirty re-runs rather than - // completing. - // - // Holds an AsyncValue, which is canceled if either 1) all AsyncValueReceivers go away, 2) the - // AsyncValue itself is dropped. - // - // The `previous_result` value for a Running node is not a valid value. See NotStarted. - Running { - run_token: RunToken, - pending_value: AsyncValue, NodeInterrupt>, - generation: Generation, - previous_result: Option>, - is_cleaning: bool, - }, - // A node that has completed, and then possibly been marked dirty. Because marking a node - // dirty does not eagerly re-execute any logic, it will stay this way until a caller moves it - // back to Running. - // - // A Completed entry can have "pollers" whom are waiting for the Node to either be dirtied or - // otherwise invalidated. - Completed { - run_token: RunToken, - generation: Generation, - pollers: Vec>, - result: EntryResult, - dep_generations: Vec<(EntryId, Generation)>, - }, + // A node that has either been explicitly cleared, or has not yet started Running. In this state + // there is no need for a dirty bit because the RunToken is either in its initial state, or has + // been explicitly incremented when the node was cleared. + // + // The previous_result value is _not_ a valid value for this Entry: rather, it is preserved in + // order to compute the generation value for this Node by comparing it to the new result the next + // time the Node runs. + NotStarted { + run_token: RunToken, + generation: Generation, + pollers: Vec>, + previous_result: Option>, + }, + // A node that is running. A running node that has been marked dirty re-runs rather than + // completing. + // + // Holds an AsyncValue, which is canceled if either 1) all AsyncValueReceivers go away, 2) the + // AsyncValue itself is dropped. + // + // The `previous_result` value for a Running node is not a valid value. See NotStarted. + Running { + run_token: RunToken, + pending_value: AsyncValue, NodeInterrupt>, + generation: Generation, + previous_result: Option>, + is_cleaning: bool, + }, + // A node that has completed, and then possibly been marked dirty. Because marking a node + // dirty does not eagerly re-execute any logic, it will stay this way until a caller moves it + // back to Running. + // + // A Completed entry can have "pollers" whom are waiting for the Node to either be dirtied or + // otherwise invalidated. + Completed { + run_token: RunToken, + generation: Generation, + pollers: Vec>, + result: EntryResult, + dep_generations: Vec<(EntryId, Generation)>, + }, } impl EntryState { - fn initial() -> EntryState { - EntryState::NotStarted { - run_token: RunToken::initial(), - generation: Generation::initial(), - pollers: Vec::new(), - previous_result: None, + fn initial() -> EntryState { + EntryState::NotStarted { + run_token: RunToken::initial(), + generation: Generation::initial(), + pollers: Vec::new(), + previous_result: None, + } } - } } /// @@ -231,168 +231,168 @@ impl EntryState { /// #[derive(Clone, Debug)] pub(crate) struct Entry { - node: Arc, + node: Arc, - state: Arc>>, + state: Arc>>, } impl Entry { - /// - /// Creates an Entry without starting it. This indirection exists because we cannot know - /// the EntryId of an Entry until after it is stored in the Graph, and we need the EntryId - /// in order to run the Entry. - /// - pub(crate) fn new(node: N) -> Entry { - Entry { - node: Arc::new(node), - state: Arc::new(Mutex::new(EntryState::initial())), - } - } - - pub fn node(&self) -> &N { - &self.node - } - - pub(crate) fn cacheable_with_output(&self, output: Option<&N::Item>) -> bool { - let output_cacheable = if let Some(item) = output { - self.node.cacheable_item(item) - } else { - false - }; - - output_cacheable && self.node.cacheable() - } - - /// - /// If this Node is currently complete and clean with the given Generation, then waits for it to - /// be changed in any way. If the node is not clean, or the generation mismatches, returns - /// immediately. - /// - pub async fn poll(&self, context: &Context, last_seen_generation: Generation) { - let recv = { - let mut state = self.state.lock(); - let pollers = match *state { - EntryState::Completed { - ref result, - generation, - ref mut pollers, - .. - } if generation == last_seen_generation && result.poll_should_wait(context) => { - // The Node is clean in this context, and the last seen generation matches. - pollers + /// + /// Creates an Entry without starting it. This indirection exists because we cannot know + /// the EntryId of an Entry until after it is stored in the Graph, and we need the EntryId + /// in order to run the Entry. + /// + pub(crate) fn new(node: N) -> Entry { + Entry { + node: Arc::new(node), + state: Arc::new(Mutex::new(EntryState::initial())), } - EntryState::NotStarted { - generation, - ref mut pollers, - .. - } if generation == last_seen_generation => { - // The Node has not yet been started, but the last seen generation matches. This - // means that an error occurred on a previous run of the node, but it has already been - // observed by the caller. - pollers - } - _ => { - // The generation didn't match or the Node wasn't Completed. It should be requested - // without waiting. - return; - } - }; + } - // Add a poller on the node that will be notified when it is dirtied or dropped. If the Node - // moves to another state, the receiver will be notified that the sender was dropped. - let (send, recv) = oneshot::channel(); - pollers.push(send); - recv - }; - // Wait outside of the lock. - let _ = recv.await; - } - - /// - /// If the Future for this Node has already completed, returns a clone of its result. - /// - pub fn peek(&self, context: &Context) -> Option { - let state = self.state.lock(); - match *state { - EntryState::Completed { ref result, .. } => result.peek(context), - _ => None, + pub fn node(&self) -> &N { + &self.node } - } - - /// - /// Spawn the execution of the node on an Executor, which will cause it to execute outside of - /// the Graph and Entry locks and call back to the Entry to complete. - /// - pub(crate) fn spawn_node_execution( - context_factory: &Context, - entry: Entry, - entry_id: EntryId, - run_token: RunToken, - generation: Generation, - previous_dep_generations: Option>, - previous_result: Option>, - ) -> (EntryState, AsyncValueReceiver>, Generation) { - // Increment the RunToken to uniquely identify this work. - let run_token = run_token.next(); - let context = context_factory.clone_for(entry_id); - let context2 = context.clone(); - let entry2 = entry.clone(); - let (value, mut sender, receiver) = AsyncValue::, NodeInterrupt>::new(); - let is_cleaning = previous_dep_generations.is_some(); - - let run_or_clean = async move { - // If we have previous result generations, compare them to all current dependency - // generations (which, if they are dirty, will cause recursive cleaning). If they - // match, we can consider the previous result value to be clean for reuse. - let clean_with_cacheability = if let Some(previous_dep_generations) = previous_dep_generations - { - match context - .graph() - .attempt_cleaning(entry_id, run_token, &previous_dep_generations, &context) - .await - { - Err(()) => { - // If dependency generations mismatched, then the node's deps have already been - // cleared, and it should attempt to re-run. - context - .stats() - .cleaning_failed - .fetch_add(1, atomic::Ordering::SeqCst); - Err(()) - } - Ok(uncacheable) => { - // Dependencies have not changed: Node is clean. - context - .stats() - .cleaning_succeeded - .fetch_add(1, atomic::Ordering::SeqCst); - Ok(DepState { - generations: previous_dep_generations, - has_uncacheable_deps: uncacheable, - }) - } - } - } else { - Err(()) - }; - // If the Node was clean, complete it. Otherwise, re-run. - match clean_with_cacheability { - Ok(dep_state) => { - // No dependencies have changed: we can complete the Node without changing its - // previous_result or generation. - (None, dep_state) - } - Err(()) => { - // The Node needs to (re-)run! - let res = entry.node().clone().run(context.clone()).await; - context.stats().ran.fetch_add(1, atomic::Ordering::SeqCst); - (Some(res), context.complete(entry.node())) + pub(crate) fn cacheable_with_output(&self, output: Option<&N::Item>) -> bool { + let output_cacheable = if let Some(item) = output { + self.node.cacheable_item(item) + } else { + false + }; + + output_cacheable && self.node.cacheable() + } + + /// + /// If this Node is currently complete and clean with the given Generation, then waits for it to + /// be changed in any way. If the node is not clean, or the generation mismatches, returns + /// immediately. + /// + pub async fn poll(&self, context: &Context, last_seen_generation: Generation) { + let recv = { + let mut state = self.state.lock(); + let pollers = match *state { + EntryState::Completed { + ref result, + generation, + ref mut pollers, + .. + } if generation == last_seen_generation && result.poll_should_wait(context) => { + // The Node is clean in this context, and the last seen generation matches. + pollers + } + EntryState::NotStarted { + generation, + ref mut pollers, + .. + } if generation == last_seen_generation => { + // The Node has not yet been started, but the last seen generation matches. This + // means that an error occurred on a previous run of the node, but it has already been + // observed by the caller. + pollers + } + _ => { + // The generation didn't match or the Node wasn't Completed. It should be requested + // without waiting. + return; + } + }; + + // Add a poller on the node that will be notified when it is dirtied or dropped. If the Node + // moves to another state, the receiver will be notified that the sender was dropped. + let (send, recv) = oneshot::channel(); + pollers.push(send); + recv + }; + // Wait outside of the lock. + let _ = recv.await; + } + + /// + /// If the Future for this Node has already completed, returns a clone of its result. + /// + pub fn peek(&self, context: &Context) -> Option { + let state = self.state.lock(); + match *state { + EntryState::Completed { ref result, .. } => result.peek(context), + _ => None, } - } - }; + } - let _join = context2.graph().executor.clone().native_spawn(async move { + /// + /// Spawn the execution of the node on an Executor, which will cause it to execute outside of + /// the Graph and Entry locks and call back to the Entry to complete. + /// + pub(crate) fn spawn_node_execution( + context_factory: &Context, + entry: Entry, + entry_id: EntryId, + run_token: RunToken, + generation: Generation, + previous_dep_generations: Option>, + previous_result: Option>, + ) -> (EntryState, AsyncValueReceiver>, Generation) { + // Increment the RunToken to uniquely identify this work. + let run_token = run_token.next(); + let context = context_factory.clone_for(entry_id); + let context2 = context.clone(); + let entry2 = entry.clone(); + let (value, mut sender, receiver) = AsyncValue::, NodeInterrupt>::new(); + let is_cleaning = previous_dep_generations.is_some(); + + let run_or_clean = async move { + // If we have previous result generations, compare them to all current dependency + // generations (which, if they are dirty, will cause recursive cleaning). If they + // match, we can consider the previous result value to be clean for reuse. + let clean_with_cacheability = + if let Some(previous_dep_generations) = previous_dep_generations { + match context + .graph() + .attempt_cleaning(entry_id, run_token, &previous_dep_generations, &context) + .await + { + Err(()) => { + // If dependency generations mismatched, then the node's deps have already been + // cleared, and it should attempt to re-run. + context + .stats() + .cleaning_failed + .fetch_add(1, atomic::Ordering::SeqCst); + Err(()) + } + Ok(uncacheable) => { + // Dependencies have not changed: Node is clean. + context + .stats() + .cleaning_succeeded + .fetch_add(1, atomic::Ordering::SeqCst); + Ok(DepState { + generations: previous_dep_generations, + has_uncacheable_deps: uncacheable, + }) + } + } + } else { + Err(()) + }; + + // If the Node was clean, complete it. Otherwise, re-run. + match clean_with_cacheability { + Ok(dep_state) => { + // No dependencies have changed: we can complete the Node without changing its + // previous_result or generation. + (None, dep_state) + } + Err(()) => { + // The Node needs to (re-)run! + let res = entry.node().clone().run(context.clone()).await; + context.stats().ran.fetch_add(1, atomic::Ordering::SeqCst); + (Some(res), context.complete(entry.node())) + } + } + }; + + let _join = context2.graph().executor.clone().native_spawn(async move { let mut run_or_clean = pin!(run_or_clean); let (maybe_res, dep_state) = loop { tokio::select! { @@ -441,474 +441,478 @@ impl Entry { ); }); - ( - EntryState::Running { - run_token, - pending_value: value, - generation, - previous_result, - is_cleaning, - }, - receiver, - generation, - ) - } - - /// - /// Returns a Future for the Node's value and Generation. - /// - /// The two separate state matches handle two cases: in the first case we simply want to mutate - /// or clone the state, so we take it by reference without swapping it. In the second case, we - /// need to consume the state (which avoids cloning some of the values held there), so we take it - /// by value. - /// - pub(crate) fn get_node_result( - &self, - context: &Context, - entry_id: EntryId, - ) -> BoxFuture> { - let mut state = self.state.lock(); - - // First check whether the Node is already complete, or is currently running: in both of these - // cases we return early without swapping the state of the Node. - match *state { - EntryState::Running { - ref pending_value, - generation, - .. - } => { - if let Some(receiver) = pending_value.receiver() { - return async move { - receiver - .recv() - .await - .unwrap_or_else(|| (Err(N::Error::invalidated()), generation.next(), true)) - } - .boxed(); - } - // Else: this node was just canceled: fall through to restart it. - } - EntryState::Completed { - ref result, - generation, - .. - } if result.is_clean(context) => { - return future::ready(( - Ok(result.as_ref().clone()), - generation, - result.has_uncacheable_deps(), - )) - .boxed(); - } - _ => (), - }; - - // Otherwise, we'll need to swap the state of the Node, so take it by value. - let (next_state, receiver, generation) = match mem::replace(&mut *state, EntryState::initial()) - { - EntryState::NotStarted { - run_token, - generation, - previous_result, - .. - } - | EntryState::Running { - run_token, - generation, - previous_result, - .. - } => Self::spawn_node_execution( - context, - self.clone(), - entry_id, - run_token, - generation, - None, - previous_result, - ), - EntryState::Completed { - run_token, - generation, - result, - dep_generations, - .. - } => { - test_trace_log!( - "Re-starting node {:?}. It was: previous_result={:?}", - self.node, - result, - ); - assert!( - !result.is_clean(context), - "A clean Node should not reach this point: {result:?}" - ); - // The Node has already completed but needs to re-run. If the Node is dirty, we are the - // first caller to request it since it was marked dirty. We attempt to clean it (which - // will cause it to re-run if the dep_generations mismatch). - // - // On the other hand, if the Node is uncacheable, we store the previous result as - // Uncacheable, which allows its value to be used only within the current Run. - Self::spawn_node_execution( - context, - self.clone(), - entry_id, - run_token, - generation, - // TODO: This check shouldn't matter... it's whether we recompute the generations that - // matters. - if self.cacheable_with_output(Some(result.as_ref())) { - Some(dep_generations) - } else { - None - }, - Some(result), + ( + EntryState::Running { + run_token, + pending_value: value, + generation, + previous_result, + is_cleaning, + }, + receiver, + generation, ) - } - }; + } - // Swap in the new state, and return the receiver. - *state = next_state; + /// + /// Returns a Future for the Node's value and Generation. + /// + /// The two separate state matches handle two cases: in the first case we simply want to mutate + /// or clone the state, so we take it by reference without swapping it. In the second case, we + /// need to consume the state (which avoids cloning some of the values held there), so we take it + /// by value. + /// + pub(crate) fn get_node_result( + &self, + context: &Context, + entry_id: EntryId, + ) -> BoxFuture> { + let mut state = self.state.lock(); + + // First check whether the Node is already complete, or is currently running: in both of these + // cases we return early without swapping the state of the Node. + match *state { + EntryState::Running { + ref pending_value, + generation, + .. + } => { + if let Some(receiver) = pending_value.receiver() { + return async move { + receiver.recv().await.unwrap_or_else(|| { + (Err(N::Error::invalidated()), generation.next(), true) + }) + } + .boxed(); + } + // Else: this node was just canceled: fall through to restart it. + } + EntryState::Completed { + ref result, + generation, + .. + } if result.is_clean(context) => { + return future::ready(( + Ok(result.as_ref().clone()), + generation, + result.has_uncacheable_deps(), + )) + .boxed(); + } + _ => (), + }; + + // Otherwise, we'll need to swap the state of the Node, so take it by value. + let (next_state, receiver, generation) = + match mem::replace(&mut *state, EntryState::initial()) { + EntryState::NotStarted { + run_token, + generation, + previous_result, + .. + } + | EntryState::Running { + run_token, + generation, + previous_result, + .. + } => Self::spawn_node_execution( + context, + self.clone(), + entry_id, + run_token, + generation, + None, + previous_result, + ), + EntryState::Completed { + run_token, + generation, + result, + dep_generations, + .. + } => { + test_trace_log!( + "Re-starting node {:?}. It was: previous_result={:?}", + self.node, + result, + ); + assert!( + !result.is_clean(context), + "A clean Node should not reach this point: {result:?}" + ); + // The Node has already completed but needs to re-run. If the Node is dirty, we are the + // first caller to request it since it was marked dirty. We attempt to clean it (which + // will cause it to re-run if the dep_generations mismatch). + // + // On the other hand, if the Node is uncacheable, we store the previous result as + // Uncacheable, which allows its value to be used only within the current Run. + Self::spawn_node_execution( + context, + self.clone(), + entry_id, + run_token, + generation, + // TODO: This check shouldn't matter... it's whether we recompute the generations that + // matters. + if self.cacheable_with_output(Some(result.as_ref())) { + Some(dep_generations) + } else { + None + }, + Some(result), + ) + } + }; - async move { - receiver - .recv() - .await - .unwrap_or_else(|| (Err(N::Error::invalidated()), generation.next(), true)) - } - .boxed() - } - - /// - /// Called from the Executor when a Node is cancelled. - /// - /// See also: `Self::complete`. - /// - pub(crate) fn cancel(&self, result_run_token: RunToken) { - let mut state = self.state.lock(); - - // We care about exactly one case: a Running state with the same run_token. All other states - // represent various (legal) race conditions. See `RunToken`'s docs for more information. - match *state { - EntryState::Running { run_token, .. } if result_run_token == run_token => {} - _ => { - return; - } + // Swap in the new state, and return the receiver. + *state = next_state; + + async move { + receiver + .recv() + .await + .unwrap_or_else(|| (Err(N::Error::invalidated()), generation.next(), true)) + } + .boxed() } - *state = match mem::replace(&mut *state, EntryState::initial()) { - EntryState::Running { - run_token, - generation, - previous_result, - .. - } => { - test_trace_log!("Canceling {:?} of {}.", run_token, self.node); - EntryState::NotStarted { - run_token: run_token.next(), - generation, - pollers: Vec::new(), - previous_result, + /// + /// Called from the Executor when a Node is cancelled. + /// + /// See also: `Self::complete`. + /// + pub(crate) fn cancel(&self, result_run_token: RunToken) { + let mut state = self.state.lock(); + + // We care about exactly one case: a Running state with the same run_token. All other states + // represent various (legal) race conditions. See `RunToken`'s docs for more information. + match *state { + EntryState::Running { run_token, .. } if result_run_token == run_token => {} + _ => { + return; + } } - } - s => s, - }; - } - - /// - /// Called from the Executor when a Node completes. - /// - /// A `result` value of `None` indicates that the Node was found to be clean, and its previous - /// result should be used. This special case exists to avoid 1) cloning the result to call this - /// method, and 2) comparing the current/previous results unnecessarily. - /// - /// See also: `Self::cancel`. - /// - fn complete( - &self, - context: &Context, - result_run_token: RunToken, - sender: AsyncValueSender, NodeInterrupt>, - dep_generations: Vec<(EntryId, Generation)>, - has_uncacheable_deps: bool, - result: Option>, - ) { - let mut state = self.state.lock(); - - // We care about exactly one case: a Running state with the same run_token. All other states - // represent various (legal) race conditions. See `RunToken`'s docs for more information. - match *state { - EntryState::Running { run_token, .. } if result_run_token == run_token => {} - _ => { + + *state = match mem::replace(&mut *state, EntryState::initial()) { + EntryState::Running { + run_token, + generation, + previous_result, + .. + } => { + test_trace_log!("Canceling {:?} of {}.", run_token, self.node); + EntryState::NotStarted { + run_token: run_token.next(), + generation, + pollers: Vec::new(), + previous_result, + } + } + s => s, + }; + } + + /// + /// Called from the Executor when a Node completes. + /// + /// A `result` value of `None` indicates that the Node was found to be clean, and its previous + /// result should be used. This special case exists to avoid 1) cloning the result to call this + /// method, and 2) comparing the current/previous results unnecessarily. + /// + /// See also: `Self::cancel`. + /// + fn complete( + &self, + context: &Context, + result_run_token: RunToken, + sender: AsyncValueSender, NodeInterrupt>, + dep_generations: Vec<(EntryId, Generation)>, + has_uncacheable_deps: bool, + result: Option>, + ) { + let mut state = self.state.lock(); + // We care about exactly one case: a Running state with the same run_token. All other states - // represent various (legal) race conditions. - test_trace_log!( - "Not completing node {:?} because it was invalidated.", - self.node - ); - return; - } + // represent various (legal) race conditions. See `RunToken`'s docs for more information. + match *state { + EntryState::Running { run_token, .. } if result_run_token == run_token => {} + _ => { + // We care about exactly one case: a Running state with the same run_token. All other states + // represent various (legal) race conditions. + test_trace_log!( + "Not completing node {:?} because it was invalidated.", + self.node + ); + return; + } + } + + *state = match mem::replace(&mut *state, EntryState::initial()) { + EntryState::Running { + run_token, + mut generation, + mut previous_result, + .. + } => { + match result { + Some(Err(e)) => { + if let Some(previous_result) = previous_result.as_mut() { + previous_result.dirty(); + } + generation = generation.next(); + sender.send((Err(e), generation, true)); + EntryState::NotStarted { + run_token: run_token.next(), + generation, + pollers: Vec::new(), + previous_result, + } + } + Some(Ok(result)) => { + let cacheable = self.cacheable_with_output(Some(&result)); + let next_result: EntryResult = + EntryResult::new(result, context, cacheable, has_uncacheable_deps); + if Some(next_result.as_ref()) + != previous_result.as_ref().map(EntryResult::as_ref) + { + // Node was re-executed (ie not cleaned) and had a different result value. + generation = generation.next() + }; + sender.send(( + Ok(next_result.as_ref().clone()), + generation, + next_result.has_uncacheable_deps(), + )); + EntryState::Completed { + result: next_result, + pollers: Vec::new(), + dep_generations, + run_token, + generation, + } + } + None => { + // Node was clean. + // NB: The `expect` here avoids a clone and a comparison: see the method docs. + let mut result = previous_result + .expect("A Node cannot be marked clean without a previous result."); + result.clean( + context, + self.cacheable_with_output(Some(result.as_ref())), + has_uncacheable_deps, + ); + sender.send(( + Ok(result.as_ref().clone()), + generation, + result.has_uncacheable_deps(), + )); + EntryState::Completed { + result, + pollers: Vec::new(), + dep_generations, + run_token, + generation, + } + } + } + } + s => s, + }; } - *state = match mem::replace(&mut *state, EntryState::initial()) { - EntryState::Running { - run_token, - mut generation, - mut previous_result, - .. - } => { - match result { - Some(Err(e)) => { + /// + /// Clears the state of this Node, forcing it to be recomputed. + /// + /// # Arguments + /// + /// * `graph_still_contains_edges` - If the caller has guaranteed that all edges from this Node + /// have been removed from the graph, they should pass false here, else true. We may want to + /// remove this parameter, and force this method to remove the edges, but that would require + /// acquiring the graph lock here, which we currently don't do. + /// + pub(crate) fn clear(&mut self, graph_still_contains_edges: bool) { + let mut state = self.state.lock(); + + let (run_token, generation, mut previous_result) = + match mem::replace(&mut *state, EntryState::initial()) { + EntryState::NotStarted { + run_token, + generation, + previous_result, + .. + } => (run_token, generation, previous_result), + EntryState::Running { + run_token, + pending_value, + generation, + previous_result, + .. + } => { + std::mem::drop(pending_value); + (run_token, generation, previous_result) + } + EntryState::Completed { + run_token, + generation, + result, + .. + } => (run_token, generation, Some(result)), + }; + + test_trace_log!("Clearing node {:?}", self.node); + + if graph_still_contains_edges { if let Some(previous_result) = previous_result.as_mut() { - previous_result.dirty(); + previous_result.dirty(); + } + } + + // Swap in a state with a new RunToken value, which invalidates any outstanding work. + *state = EntryState::NotStarted { + run_token: run_token.next(), + generation, + pollers: Vec::new(), + previous_result, + }; + } + + /// + /// Dirties this Node, which will cause it to examine its dependencies the next time it is + /// requested, and re-run if any of them have changed generations. + /// + pub(crate) fn dirty(&mut self) { + let state = &mut *self.state.lock(); + test_trace_log!("Dirtying node {:?}", self.node); + match state { + &mut EntryState::Completed { + ref mut result, + ref mut pollers, + .. + } => { + // Drop the pollers, which will notify them of a change. + pollers.clear(); + result.dirty(); + return; } - generation = generation.next(); - sender.send((Err(e), generation, true)); - EntryState::NotStarted { - run_token: run_token.next(), - generation, - pollers: Vec::new(), - previous_result, + &mut EntryState::NotStarted { + ref mut pollers, .. + } => { + // Drop the pollers, which will notify them of a change. + pollers.clear(); + return; } - } - Some(Ok(result)) => { - let cacheable = self.cacheable_with_output(Some(&result)); - let next_result: EntryResult = - EntryResult::new(result, context, cacheable, has_uncacheable_deps); - if Some(next_result.as_ref()) != previous_result.as_ref().map(EntryResult::as_ref) { - // Node was re-executed (ie not cleaned) and had a different result value. - generation = generation.next() - }; - sender.send(( - Ok(next_result.as_ref().clone()), - generation, - next_result.has_uncacheable_deps(), - )); - EntryState::Completed { - result: next_result, - pollers: Vec::new(), - dep_generations, - run_token, - generation, + &mut EntryState::Running { + ref mut pending_value, + .. + } => { + // Attempt to interrupt the Running node with a notification that it has been dirtied. If + // we fail to interrupt, fall through to move back to NotStarted. + if pending_value.try_interrupt(NodeInterrupt::Dirtied).is_ok() { + return; + } } - } - None => { - // Node was clean. - // NB: The `expect` here avoids a clone and a comparison: see the method docs. - let mut result = - previous_result.expect("A Node cannot be marked clean without a previous result."); - result.clean( - context, - self.cacheable_with_output(Some(result.as_ref())), - has_uncacheable_deps, - ); - sender.send(( - Ok(result.as_ref().clone()), - generation, - result.has_uncacheable_deps(), - )); - EntryState::Completed { - result, - pollers: Vec::new(), - dep_generations, - run_token, - generation, + }; + + *state = match mem::replace(&mut *state, EntryState::initial()) { + EntryState::Running { + run_token, + pending_value, + generation, + previous_result, + .. + } => { + // We failed to interrupt the Running node, so cancel it. + test_trace_log!( + "Failed to interrupt {:?} while running: canceling instead.", + self.node + ); + std::mem::drop(pending_value); + EntryState::NotStarted { + run_token, + generation, + pollers: Vec::new(), + previous_result, + } } - } + _ => unreachable!(), } - } - s => s, - }; - } - - /// - /// Clears the state of this Node, forcing it to be recomputed. - /// - /// # Arguments - /// - /// * `graph_still_contains_edges` - If the caller has guaranteed that all edges from this Node - /// have been removed from the graph, they should pass false here, else true. We may want to - /// remove this parameter, and force this method to remove the edges, but that would require - /// acquiring the graph lock here, which we currently don't do. - /// - pub(crate) fn clear(&mut self, graph_still_contains_edges: bool) { - let mut state = self.state.lock(); - - let (run_token, generation, mut previous_result) = - match mem::replace(&mut *state, EntryState::initial()) { - EntryState::NotStarted { - run_token, - generation, - previous_result, - .. - } => (run_token, generation, previous_result), - EntryState::Running { - run_token, - pending_value, - generation, - previous_result, - .. - } => { - std::mem::drop(pending_value); - (run_token, generation, previous_result) - } - EntryState::Completed { - run_token, - generation, - result, - .. - } => (run_token, generation, Some(result)), - }; - - test_trace_log!("Clearing node {:?}", self.node); + } - if graph_still_contains_edges { - if let Some(previous_result) = previous_result.as_mut() { - previous_result.dirty(); - } + /// + /// Terminates this Node with the given error iff it is Running. + /// + /// This method is asynchronous: the task running the Node will take some time to notice that it + /// has been terminated, and to update the state of the Node. + /// + pub(crate) fn terminate(&mut self, err: N::Error) { + let state = &mut *self.state.lock(); + test_trace_log!("Terminating node {:?} with {:?}", self.node, err); + if let EntryState::Running { + pending_value, + generation, + .. + } = state + { + let _ = pending_value.try_interrupt(NodeInterrupt::Aborted(( + Err(err), + generation.next(), + true, + ))); + }; } - // Swap in a state with a new RunToken value, which invalidates any outstanding work. - *state = EntryState::NotStarted { - run_token: run_token.next(), - generation, - pollers: Vec::new(), - previous_result, - }; - } - - /// - /// Dirties this Node, which will cause it to examine its dependencies the next time it is - /// requested, and re-run if any of them have changed generations. - /// - pub(crate) fn dirty(&mut self) { - let state = &mut *self.state.lock(); - test_trace_log!("Dirtying node {:?}", self.node); - match state { - &mut EntryState::Completed { - ref mut result, - ref mut pollers, - .. - } => { - // Drop the pollers, which will notify them of a change. - pollers.clear(); - result.dirty(); - return; - } - &mut EntryState::NotStarted { - ref mut pollers, .. - } => { - // Drop the pollers, which will notify them of a change. - pollers.clear(); - return; - } - &mut EntryState::Running { - ref mut pending_value, - .. - } => { - // Attempt to interrupt the Running node with a notification that it has been dirtied. If - // we fail to interrupt, fall through to move back to NotStarted. - if pending_value.try_interrupt(NodeInterrupt::Dirtied).is_ok() { - return; + /// + /// Indicates that cleaning this Node has failed, returning an error if the RunToken has changed. + /// + pub(crate) fn cleaning_failed(&mut self, expected_run_token: RunToken) -> Result<(), ()> { + let state = &mut *self.state.lock(); + match state { + EntryState::Running { + is_cleaning, + run_token, + .. + } if *run_token == expected_run_token => { + *is_cleaning = false; + Ok(()) + } + _ => Err(()), } - } - }; + } - *state = match mem::replace(&mut *state, EntryState::initial()) { - EntryState::Running { - run_token, - pending_value, - generation, - previous_result, - .. - } => { - // We failed to interrupt the Running node, so cancel it. - test_trace_log!( - "Failed to interrupt {:?} while running: canceling instead.", - self.node - ); - std::mem::drop(pending_value); - EntryState::NotStarted { - run_token, - generation, - pollers: Vec::new(), - previous_result, + pub fn is_started(&self) -> bool { + match *self.state.lock() { + EntryState::NotStarted { .. } => false, + EntryState::Completed { .. } | EntryState::Running { .. } => true, } - } - _ => unreachable!(), } - } - - /// - /// Terminates this Node with the given error iff it is Running. - /// - /// This method is asynchronous: the task running the Node will take some time to notice that it - /// has been terminated, and to update the state of the Node. - /// - pub(crate) fn terminate(&mut self, err: N::Error) { - let state = &mut *self.state.lock(); - test_trace_log!("Terminating node {:?} with {:?}", self.node, err); - if let EntryState::Running { - pending_value, - generation, - .. - } = state - { - let _ = - pending_value.try_interrupt(NodeInterrupt::Aborted((Err(err), generation.next(), true))); - }; - } - - /// - /// Indicates that cleaning this Node has failed, returning an error if the RunToken has changed. - /// - pub(crate) fn cleaning_failed(&mut self, expected_run_token: RunToken) -> Result<(), ()> { - let state = &mut *self.state.lock(); - match state { - EntryState::Running { - is_cleaning, - run_token, - .. - } if *run_token == expected_run_token => { - *is_cleaning = false; - Ok(()) - } - _ => Err(()), - } - } - pub fn is_started(&self) -> bool { - match *self.state.lock() { - EntryState::NotStarted { .. } => false, - EntryState::Completed { .. } | EntryState::Running { .. } => true, + pub fn is_running(&self) -> bool { + match *self.state.lock() { + EntryState::Running { .. } => true, + EntryState::Completed { .. } | EntryState::NotStarted { .. } => false, + } } - } - pub fn is_running(&self) -> bool { - match *self.state.lock() { - EntryState::Running { .. } => true, - EntryState::Completed { .. } | EntryState::NotStarted { .. } => false, + pub fn is_cleaning(&self) -> bool { + match *self.state.lock() { + EntryState::Running { is_cleaning, .. } => is_cleaning, + EntryState::Completed { .. } | EntryState::NotStarted { .. } => false, + } } - } - pub fn is_cleaning(&self) -> bool { - match *self.state.lock() { - EntryState::Running { is_cleaning, .. } => is_cleaning, - EntryState::Completed { .. } | EntryState::NotStarted { .. } => false, + pub(crate) fn format(&self, context: &Context) -> String { + let state = match self.peek(context) { + Some(ref nr) => { + let item = format!("{nr:?}"); + if item.len() <= 1024 { + item + } else { + item.chars().take(1024).collect() + } + } + None => "".to_string(), + }; + format!("{} == {}", self.node, state) } - } - - pub(crate) fn format(&self, context: &Context) -> String { - let state = match self.peek(context) { - Some(ref nr) => { - let item = format!("{nr:?}"); - if item.len() <= 1024 { - item - } else { - item.chars().take(1024).collect() - } - } - None => "".to_string(), - }; - format!("{} == {}", self.node, state) - } } diff --git a/src/rust/engine/graph/src/lib.rs b/src/rust/engine/graph/src/lib.rs index c3677bb8041..bb43139498a 100644 --- a/src/rust/engine/graph/src/lib.rs +++ b/src/rust/engine/graph/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -58,330 +58,328 @@ type PGraph = DiGraph, (), u32>; #[derive(Debug, Eq, PartialEq)] pub struct InvalidationResult { - pub cleared: usize, - pub dirtied: usize, + pub cleared: usize, + pub dirtied: usize, } type Nodes = HashMap; struct InnerGraph { - nodes: Nodes, - pg: PGraph, - run_id_generator: u32, + nodes: Nodes, + pg: PGraph, + run_id_generator: u32, } impl InnerGraph { - fn entry_id(&self, node: &N) -> Option<&EntryId> { - self.nodes.get(node) - } - - // TODO: Now that we never delete Entries, we should consider making this infallible. - fn entry_for_id(&self, id: EntryId) -> Option<&Entry> { - self.pg.node_weight(id) - } - - // TODO: Now that we never delete Entries, we should consider making this infallible. - fn entry_for_id_mut(&mut self, id: EntryId) -> Option<&mut Entry> { - self.pg.node_weight_mut(id) - } - - fn unsafe_entry_for_id(&self, id: EntryId) -> &Entry { - self - .pg - .node_weight(id) - .expect("The unsafe_entry_for_id method should only be used in read-only methods!") - } - - fn ensure_entry(&mut self, node: N) -> EntryId { - InnerGraph::ensure_entry_internal(&mut self.pg, &mut self.nodes, node) - } - - fn ensure_entry_internal(pg: &mut PGraph, nodes: &mut Nodes, node: N) -> EntryId { - if let Some(&id) = nodes.get(&node) { - return id; + fn entry_id(&self, node: &N) -> Option<&EntryId> { + self.nodes.get(node) } - // New entry. - let id = pg.add_node(Entry::new(node.clone())); - nodes.insert(node, id); - id - } - - /// - /// Locates all* cycles in running nodes in the graph, and terminates one Node in each of them. - /// - /// * Finding "all simple cycles" in a graph is apparently best accomplished with [Johnson's - /// algorithm](https://www.cs.tufts.edu/comp/150GA/homeworks/hw1/Johnson%2075.PDF), which uses - /// the strongly connected components, but goes a bit further. Because this method will run - /// multiple times, we don't worry about that, and just kill one member of each SCC. - /// - fn terminate_cycles(&mut self) { - // Build a graph of Running node indexes. - let running_graph = self.pg.filter_map( - |node_idx, node_weight| { - if node_weight.is_running() { - Some(node_idx) - } else { - None - } - }, - |_edge_idx, _edge_weight| Some(()), - ); - // TODO: We'd usually use `tarjan_scc` because it makes one fewer pass, but it panics (without - // a useful error message) for some graphs. So `kosaraju_scc` it is. - let running_sccs = petgraph::algo::kosaraju_scc(&running_graph); - - for running_scc in running_sccs { - if running_scc.len() <= 1 { - continue; - } + // TODO: Now that we never delete Entries, we should consider making this infallible. + fn entry_for_id(&self, id: EntryId) -> Option<&Entry> { + self.pg.node_weight(id) + } - // There is a cycle. We bias toward terminating nodes which are being cleaned, because it's - // possible for them to form false cycles with nodes which are running from scratch. If no - // nodes are being cleaned, then choose the running node with the highest node id. - let (running_candidate, should_terminate) = if let Some(dirty_candidate) = running_scc - .iter() - .filter(|&id| self.pg[running_graph[*id]].is_cleaning()) - .max_by_key(|&id| running_graph[*id]) - { - // Nodes are being cleaned: clear the highest id entry. - (dirty_candidate, false) - } else { - // There are no nodes being cleaned: terminate the Running node with the highest id. - ( - running_scc - .iter() - .max_by_key(|&id| running_graph[*id]) - .unwrap(), - true, - ) - }; - - test_trace_log!( - "Cycle {:?}", - running_scc - .iter() - .map(|id| { - let entry = &self.pg[running_graph[*id]]; - format!("{:?}: is_cleaning: {}", entry.node(), entry.is_cleaning()) - }) - .collect::>(), - ); - - // Calculate one path between the chosen node and itself by finding a path to its first - // predecessor (which as a fellow member of the SCC, must also be reachable). - let running_predecessor = running_graph - .neighbors_directed(*running_candidate, Direction::Incoming) - .find(|id| running_scc.contains(id)) - .unwrap(); - let running_path: Vec<_> = petgraph::algo::all_simple_paths( - &running_graph, - *running_candidate, - running_predecessor, - 0, - None, - ) - .next() - .unwrap(); - - // Either terminate or clear the candidate. - let candidate = running_graph[*running_candidate]; - if should_terminate { - // Render the error, and terminate the Node with it. - let path = running_path - .into_iter() - .map(|rni| self.pg[running_graph[rni]].node()) - .collect::>(); - let error = N::cyclic_error(&path); - self.pg[candidate].terminate(error); - } else { - // Else, clear. - let node = self.pg[candidate].node().clone(); - self.invalidate_from_roots(true, |n| &node == n); - } + // TODO: Now that we never delete Entries, we should consider making this infallible. + fn entry_for_id_mut(&mut self, id: EntryId) -> Option<&mut Entry> { + self.pg.node_weight_mut(id) } - } - - /// - /// Begins a Walk from the given roots. - /// - /// The Walk will iterate over all nodes that descend from the roots in the direction of - /// traversal but won't necessarily be in topological order. - /// - fn walk bool>( - &self, - roots: VecDeque, - direction: Direction, - stop_walking_predicate: F, - ) -> Walk<'_, N, F> { - Walk { - graph: self, - direction: direction, - deque: roots, - walked: self.pg.visit_map(), - stop_walking_predicate, + + fn unsafe_entry_for_id(&self, id: EntryId) -> &Entry { + self.pg + .node_weight(id) + .expect("The unsafe_entry_for_id method should only be used in read-only methods!") } - } - fn clear(&mut self) { - for eid in self.nodes.values() { - if let Some(entry) = self.pg.node_weight_mut(*eid) { - entry.clear(true); - } + fn ensure_entry(&mut self, node: N) -> EntryId { + InnerGraph::ensure_entry_internal(&mut self.pg, &mut self.nodes, node) } - } - - /// - /// Clears the values of all "invalidation root" Nodes and dirties their transitive dependents. - /// - /// An "invalidation root" is a Node in the graph which can be invalidated for a reason other - /// than having had its dependencies changed. - /// - fn invalidate_from_roots bool>( - &mut self, - log_dirtied: bool, - predicate: P, - ) -> InvalidationResult { - // Collect all entries that will be cleared. - let root_ids: HashSet<_> = self - .nodes - .iter() - .filter_map(|(node, &entry_id)| { - // A NotStarted entry does not need clearing, and we can assume that its dependencies are - // either already dirtied, or have never observed a value for it. Filtering these redundant - // events helps to "debounce" invalidation (ie, avoid redundant re-dirtying of dependencies). - if predicate(node) && self.unsafe_entry_for_id(entry_id).is_started() { - Some(entry_id) - } else { - None + + fn ensure_entry_internal(pg: &mut PGraph, nodes: &mut Nodes, node: N) -> EntryId { + if let Some(&id) = nodes.get(&node) { + return id; } - }) - .collect(); - - // And their transitive dependencies, which will be dirtied. - // - // NB: We only dirty "through" a Node and into its dependents if it is Node::restartable. - let transitive_ids: Vec<_> = self - .walk( - root_ids.iter().cloned().collect(), - Direction::Incoming, - |&entry_id| { - let entry = self.unsafe_entry_for_id(entry_id); - !entry.node().restartable() && entry.is_running() - }, - ) - .filter(|eid| !root_ids.contains(eid)) - .collect(); - - let invalidation_result = InvalidationResult { - cleared: root_ids.len(), - dirtied: transitive_ids.len(), - }; - // If there were no roots, then nothing will be invalidated. Return early to avoid scanning all - // edges in `retain_edges`. - if root_ids.is_empty() { - return invalidation_result; + // New entry. + let id = pg.add_node(Entry::new(node.clone())); + nodes.insert(node, id); + id } - // Clear roots and remove their outbound edges. - for id in &root_ids { - if let Some(entry) = self.pg.node_weight_mut(*id) { - entry.clear(false); - } + /// + /// Locates all* cycles in running nodes in the graph, and terminates one Node in each of them. + /// + /// * Finding "all simple cycles" in a graph is apparently best accomplished with [Johnson's + /// algorithm](https://www.cs.tufts.edu/comp/150GA/homeworks/hw1/Johnson%2075.PDF), which uses + /// the strongly connected components, but goes a bit further. Because this method will run + /// multiple times, we don't worry about that, and just kill one member of each SCC. + /// + fn terminate_cycles(&mut self) { + // Build a graph of Running node indexes. + let running_graph = self.pg.filter_map( + |node_idx, node_weight| { + if node_weight.is_running() { + Some(node_idx) + } else { + None + } + }, + |_edge_idx, _edge_weight| Some(()), + ); + // TODO: We'd usually use `tarjan_scc` because it makes one fewer pass, but it panics (without + // a useful error message) for some graphs. So `kosaraju_scc` it is. + let running_sccs = petgraph::algo::kosaraju_scc(&running_graph); + + for running_scc in running_sccs { + if running_scc.len() <= 1 { + continue; + } + + // There is a cycle. We bias toward terminating nodes which are being cleaned, because it's + // possible for them to form false cycles with nodes which are running from scratch. If no + // nodes are being cleaned, then choose the running node with the highest node id. + let (running_candidate, should_terminate) = if let Some(dirty_candidate) = running_scc + .iter() + .filter(|&id| self.pg[running_graph[*id]].is_cleaning()) + .max_by_key(|&id| running_graph[*id]) + { + // Nodes are being cleaned: clear the highest id entry. + (dirty_candidate, false) + } else { + // There are no nodes being cleaned: terminate the Running node with the highest id. + ( + running_scc + .iter() + .max_by_key(|&id| running_graph[*id]) + .unwrap(), + true, + ) + }; + + test_trace_log!( + "Cycle {:?}", + running_scc + .iter() + .map(|id| { + let entry = &self.pg[running_graph[*id]]; + format!("{:?}: is_cleaning: {}", entry.node(), entry.is_cleaning()) + }) + .collect::>(), + ); + + // Calculate one path between the chosen node and itself by finding a path to its first + // predecessor (which as a fellow member of the SCC, must also be reachable). + let running_predecessor = running_graph + .neighbors_directed(*running_candidate, Direction::Incoming) + .find(|id| running_scc.contains(id)) + .unwrap(); + let running_path: Vec<_> = petgraph::algo::all_simple_paths( + &running_graph, + *running_candidate, + running_predecessor, + 0, + None, + ) + .next() + .unwrap(); + + // Either terminate or clear the candidate. + let candidate = running_graph[*running_candidate]; + if should_terminate { + // Render the error, and terminate the Node with it. + let path = running_path + .into_iter() + .map(|rni| self.pg[running_graph[rni]].node()) + .collect::>(); + let error = N::cyclic_error(&path); + self.pg[candidate].terminate(error); + } else { + // Else, clear. + let node = self.pg[candidate].node().clone(); + self.invalidate_from_roots(true, |n| &node == n); + } + } } - self.pg.retain_edges(|pg, edge| { - if let Some((src, _)) = pg.edge_endpoints(edge) { - !root_ids.contains(&src) - } else { - true - } - }); - - // Dirty transitive entries, but do not yet clear their output edges. We wait to clear - // outbound edges until we decide whether we can clean an entry: if we can, all edges are - // preserved; if we can't, they are cleared in `Graph::clear_deps`. - for id in transitive_ids { - if let Some(entry) = self.entry_for_id_mut(id) { - if log_dirtied { - log::info!("Dirtying {}", entry.node()); + + /// + /// Begins a Walk from the given roots. + /// + /// The Walk will iterate over all nodes that descend from the roots in the direction of + /// traversal but won't necessarily be in topological order. + /// + fn walk bool>( + &self, + roots: VecDeque, + direction: Direction, + stop_walking_predicate: F, + ) -> Walk<'_, N, F> { + Walk { + graph: self, + direction: direction, + deque: roots, + walked: self.pg.visit_map(), + stop_walking_predicate, } - entry.dirty(); - } } - invalidation_result - } - - fn visualize(&self, roots: &[N], path: &Path, context: &Context) -> io::Result<()> { - let file = File::create(path)?; - let mut f = BufWriter::new(file); - - let root_ids = roots - .iter() - .filter_map(|node| self.entry_id(node)) - .cloned() - .collect(); - let included = self - .walk(root_ids, Direction::Outgoing, |_| false) - .collect::>(); - - let graph = self.pg.filter_map( - |node_id, node| { - if included.contains(&node_id) { - Some(node.format(context)) - } else { - None + fn clear(&mut self) { + for eid in self.nodes.values() { + if let Some(entry) = self.pg.node_weight_mut(*eid) { + entry.clear(true); + } + } + } + + /// + /// Clears the values of all "invalidation root" Nodes and dirties their transitive dependents. + /// + /// An "invalidation root" is a Node in the graph which can be invalidated for a reason other + /// than having had its dependencies changed. + /// + fn invalidate_from_roots bool>( + &mut self, + log_dirtied: bool, + predicate: P, + ) -> InvalidationResult { + // Collect all entries that will be cleared. + let root_ids: HashSet<_> = self + .nodes + .iter() + .filter_map(|(node, &entry_id)| { + // A NotStarted entry does not need clearing, and we can assume that its dependencies are + // either already dirtied, or have never observed a value for it. Filtering these redundant + // events helps to "debounce" invalidation (ie, avoid redundant re-dirtying of dependencies). + if predicate(node) && self.unsafe_entry_for_id(entry_id).is_started() { + Some(entry_id) + } else { + None + } + }) + .collect(); + + // And their transitive dependencies, which will be dirtied. + // + // NB: We only dirty "through" a Node and into its dependents if it is Node::restartable. + let transitive_ids: Vec<_> = self + .walk( + root_ids.iter().cloned().collect(), + Direction::Incoming, + |&entry_id| { + let entry = self.unsafe_entry_for_id(entry_id); + !entry.node().restartable() && entry.is_running() + }, + ) + .filter(|eid| !root_ids.contains(eid)) + .collect(); + + let invalidation_result = InvalidationResult { + cleared: root_ids.len(), + dirtied: transitive_ids.len(), + }; + + // If there were no roots, then nothing will be invalidated. Return early to avoid scanning all + // edges in `retain_edges`. + if root_ids.is_empty() { + return invalidation_result; + } + + // Clear roots and remove their outbound edges. + for id in &root_ids { + if let Some(entry) = self.pg.node_weight_mut(*id) { + entry.clear(false); + } + } + self.pg.retain_edges(|pg, edge| { + if let Some((src, _)) = pg.edge_endpoints(edge) { + !root_ids.contains(&src) + } else { + true + } + }); + + // Dirty transitive entries, but do not yet clear their output edges. We wait to clear + // outbound edges until we decide whether we can clean an entry: if we can, all edges are + // preserved; if we can't, they are cleared in `Graph::clear_deps`. + for id in transitive_ids { + if let Some(entry) = self.entry_for_id_mut(id) { + if log_dirtied { + log::info!("Dirtying {}", entry.node()); + } + entry.dirty(); + } } - }, - |_, _| Some("".to_owned()), - ); - - f.write_all( - format!( - "{}", - dot::Dot::with_config(&graph, &[dot::Config::EdgeNoLabel],) - ) - .as_bytes(), - )?; - - Ok(()) - } - - fn live_reachable( - &self, - roots: &[N], - context: &Context, - ) -> impl Iterator { - // TODO: This is a surprisingly expensive method, because it will clone all reachable values by - // calling `peek` on them. - let root_ids = roots - .iter() - .filter_map(|node| self.entry_id(node)) - .cloned() - .collect(); - self.live_internal( - self - .walk(root_ids, Direction::Outgoing, |_| false) - .collect(), - context.clone(), - ) - } - - fn live(&self, context: &Context) -> impl Iterator { - self.live_internal(self.pg.node_indices().collect(), context.clone()) - } - - fn live_internal( - &self, - entryids: Vec, - context: Context, - ) -> impl Iterator + '_ { - entryids - .into_iter() - .filter_map(move |eid| self.entry_for_id(eid)) - .filter_map(move |entry| entry.peek(&context).map(|i| (entry.node(), i))) - } + + invalidation_result + } + + fn visualize(&self, roots: &[N], path: &Path, context: &Context) -> io::Result<()> { + let file = File::create(path)?; + let mut f = BufWriter::new(file); + + let root_ids = roots + .iter() + .filter_map(|node| self.entry_id(node)) + .cloned() + .collect(); + let included = self + .walk(root_ids, Direction::Outgoing, |_| false) + .collect::>(); + + let graph = self.pg.filter_map( + |node_id, node| { + if included.contains(&node_id) { + Some(node.format(context)) + } else { + None + } + }, + |_, _| Some("".to_owned()), + ); + + f.write_all( + format!( + "{}", + dot::Dot::with_config(&graph, &[dot::Config::EdgeNoLabel],) + ) + .as_bytes(), + )?; + + Ok(()) + } + + fn live_reachable( + &self, + roots: &[N], + context: &Context, + ) -> impl Iterator { + // TODO: This is a surprisingly expensive method, because it will clone all reachable values by + // calling `peek` on them. + let root_ids = roots + .iter() + .filter_map(|node| self.entry_id(node)) + .cloned() + .collect(); + self.live_internal( + self.walk(root_ids, Direction::Outgoing, |_| false) + .collect(), + context.clone(), + ) + } + + fn live(&self, context: &Context) -> impl Iterator { + self.live_internal(self.pg.node_indices().collect(), context.clone()) + } + + fn live_internal( + &self, + entryids: Vec, + context: Context, + ) -> impl Iterator + '_ { + entryids + .into_iter() + .filter_map(move |eid| self.entry_for_id(eid)) + .filter_map(move |entry| entry.peek(&context).map(|i| (entry.node(), i))) + } } /// @@ -389,299 +387,303 @@ impl InnerGraph { /// #[derive(Clone)] pub struct Graph { - inner: Arc>>, - invalidation_delay: Duration, - executor: Executor, + inner: Arc>>, + invalidation_delay: Duration, + executor: Executor, } impl Graph { - pub fn new(executor: Executor) -> Graph { - Self::new_with_invalidation_delay(executor, Duration::from_millis(500)) - } - - pub fn new_with_invalidation_delay(executor: Executor, invalidation_delay: Duration) -> Graph { - let inner = Arc::new(Mutex::new(InnerGraph { - nodes: HashMap::default(), - pg: DiGraph::new(), - run_id_generator: 0, - })); - let _join = executor.native_spawn(Self::cycle_check_task(Arc::downgrade(&inner))); - - Graph { - inner, - invalidation_delay, - executor, + pub fn new(executor: Executor) -> Graph { + Self::new_with_invalidation_delay(executor, Duration::from_millis(500)) } - } - - /// Create a Context wrapping an opaque Node::Context type, which will use a newly generated RunId. - pub fn context(&self, context: N::Context) -> Context { - self.context_with_run_id(context, self.generate_run_id()) - } - - /// Create a Context wrapping an opaque Node::Context type. - pub fn context_with_run_id(&self, context: N::Context, run_id: RunId) -> Context { - Context::new(self.clone(), context, run_id) - } - - /// Generate a unique RunId for this Graph which can be reused in `context_with_run_id`. - pub fn generate_run_id(&self) -> RunId { - let mut inner = self.inner.lock(); - let run_id = inner.run_id_generator; - inner.run_id_generator += 1; - RunId(run_id) - } - - /// - /// A task which periodically checks for cycles in Running nodes. Doing this in the background - /// allows for batching and laziness: nodes which don't form cycles may complete without ever - /// being checked. - /// - /// Uses a `Weak` reference to the Graph to detect when the sender has shut down. - /// - async fn cycle_check_task(inner: Weak>>) { - loop { - sleep(Duration::from_millis(500)).await; - - if let Some(inner) = Weak::upgrade(&inner) { - inner.lock().terminate_cycles(); - } else { - // We've been shut down. - break; - }; - } - } - - pub fn len(&self) -> usize { - let inner = self.inner.lock(); - inner.nodes.len() - } - - async fn get_inner( - &self, - src_id: Option, - context: &Context, - dst_node: N, - ) -> (Result, Generation) { - // Compute information about the dst under the Graph lock, and then release it. - let (entry, entry_id) = { - // Get or create the destination, and then insert the dep and return its state. - let mut inner = self.inner.lock(); - - let dst_id = inner.ensure_entry(dst_node); - if let Some(src_id) = src_id { - test_trace_log!( - "Adding dependency from {:?} to {:?}", - inner.entry_for_id(src_id).unwrap().node(), - inner.entry_for_id(dst_id).unwrap().node() - ); - inner.pg.add_edge(src_id, dst_id, ()); - } else { - // Otherwise, this is an external request: always retry. - test_trace_log!( - "Requesting node {:?}", - inner.entry_for_id(dst_id).unwrap().node() - ); - } - let dst_entry = inner.entry_for_id(dst_id).cloned().unwrap(); - (dst_entry, dst_id) - }; - - // Return the state of the destination, retrying the dst to handle Node invalidation. - let context = context.clone(); - let (result, generation, uncacheable) = loop { - match entry.get_node_result(&context, entry_id).await { - (Err(err), _, _) if err == N::Error::invalidated() => { - let node = { - let inner = self.inner.lock(); - inner.unsafe_entry_for_id(entry_id).node().clone() - }; - info!( - "Filesystem changed during run: retrying `{}` in {:?}...", - node, self.invalidation_delay - ); - sleep(self.invalidation_delay).await; - continue; + pub fn new_with_invalidation_delay( + executor: Executor, + invalidation_delay: Duration, + ) -> Graph { + let inner = Arc::new(Mutex::new(InnerGraph { + nodes: HashMap::default(), + pg: DiGraph::new(), + run_id_generator: 0, + })); + let _join = executor.native_spawn(Self::cycle_check_task(Arc::downgrade(&inner))); + + Graph { + inner, + invalidation_delay, + executor, } - res => break res, - } - }; + } - if src_id.is_some() { - if let Err(e) = context.dep_record(entry_id, generation, uncacheable) { - return (Err(e), generation); - } + /// Create a Context wrapping an opaque Node::Context type, which will use a newly generated RunId. + pub fn context(&self, context: N::Context) -> Context { + self.context_with_run_id(context, self.generate_run_id()) + } + + /// Create a Context wrapping an opaque Node::Context type. + pub fn context_with_run_id(&self, context: N::Context, run_id: RunId) -> Context { + Context::new(self.clone(), context, run_id) } - (result, generation) - } - - /// - /// Return the value of the given Node. - /// - pub async fn create(&self, node: N, context: &Context) -> Result { - let (res, _generation) = self.get_inner(None, context, node).await; - res - } - - /// - /// Gets the value of the given Node (optionally waiting for it to have changed since the given - /// LastObserved token), and then returns its new value and a new LastObserved token. - /// - pub async fn poll( - &self, - node: N, - token: Option, - delay: Option, - context: &Context, - ) -> (Result, LastObserved) { - // If the node is currently clean at the given token, Entry::poll will delay until it has - // changed in some way. - if let Some(LastObserved(generation)) = token { - let entry = { + /// Generate a unique RunId for this Graph which can be reused in `context_with_run_id`. + pub fn generate_run_id(&self) -> RunId { let mut inner = self.inner.lock(); - let entry_id = inner.ensure_entry(node.clone()); - inner.unsafe_entry_for_id(entry_id).clone() - }; - entry.poll(context, generation).await; - if let Some(delay) = delay { - sleep(delay).await; - } - }; + let run_id = inner.run_id_generator; + inner.run_id_generator += 1; + RunId(run_id) + } - // Re-request the Node. - let (res, generation) = self.get_inner(None, context, node).await; - (res, LastObserved(generation)) - } - - /// - /// Compares the generations of the dependencies of the given EntryId to their previous - /// generation values (re-computing or cleaning them first if necessary). - /// - /// Returns `Ok(uncacheable_deps)` if the node was successfully cleaned, and clears the node's - /// edges if it was not successfully cleaned. - /// - async fn attempt_cleaning( - &self, - entry_id: EntryId, - run_token: RunToken, - previous_dep_generations: &[(EntryId, Generation)], - context: &Context, - ) -> Result { - let generation_matches = { - let inner = self.inner.lock(); - let entry = if log::log_enabled!(log::Level::Debug) { - Some(inner.pg[entry_id].clone()) - } else { - None - }; - - previous_dep_generations - .iter() - .map(|&(dep_id, previous_dep_generation)| { - let entry = entry.clone(); - let dep_entry = inner - .entry_for_id(dep_id) - .unwrap_or_else(|| panic!("Dependency not present in Graph.")) - .clone(); - - async move { - let (_, generation, uncacheable) = dep_entry.get_node_result(context, dep_id).await; - if generation == previous_dep_generation { - // Matched. - Ok(uncacheable) + /// + /// A task which periodically checks for cycles in Running nodes. Doing this in the background + /// allows for batching and laziness: nodes which don't form cycles may complete without ever + /// being checked. + /// + /// Uses a `Weak` reference to the Graph to detect when the sender has shut down. + /// + async fn cycle_check_task(inner: Weak>>) { + loop { + sleep(Duration::from_millis(500)).await; + + if let Some(inner) = Weak::upgrade(&inner) { + inner.lock().terminate_cycles(); + } else { + // We've been shut down. + break; + }; + } + } + + pub fn len(&self) -> usize { + let inner = self.inner.lock(); + inner.nodes.len() + } + + async fn get_inner( + &self, + src_id: Option, + context: &Context, + dst_node: N, + ) -> (Result, Generation) { + // Compute information about the dst under the Graph lock, and then release it. + let (entry, entry_id) = { + // Get or create the destination, and then insert the dep and return its state. + let mut inner = self.inner.lock(); + + let dst_id = inner.ensure_entry(dst_node); + if let Some(src_id) = src_id { + test_trace_log!( + "Adding dependency from {:?} to {:?}", + inner.entry_for_id(src_id).unwrap().node(), + inner.entry_for_id(dst_id).unwrap().node() + ); + inner.pg.add_edge(src_id, dst_id, ()); } else { - // Did not match. We error here to trigger fail-fast in `try_join_all`. - log::debug!( - "Dependency {} of {:?} changed.", - dep_entry.node(), - entry.map(|e| e.node().to_string()) - ); - Err(()) + // Otherwise, this is an external request: always retry. + test_trace_log!( + "Requesting node {:?}", + inner.entry_for_id(dst_id).unwrap().node() + ); } - } - }) - .collect::>() - }; - // We use try_join_all in order to speculatively execute all branches, and to fail fast if any - // generation mismatches. The first mismatch encountered will cause any extraneous cleaning - // work to be canceled. See #11290 for more information about the tradeoffs inherent in - // speculation. - match future::try_join_all(generation_matches).await { - Ok(uncacheable_deps) => { - // Cleaning succeeded. - // - // Return true if any dep was uncacheable. - Ok(uncacheable_deps.into_iter().any(|u| u)) - } - Err(()) => { - // Cleaning failed. - // - // If the RunToken still matches, clear all edges of the Node before returning. - let mut inner = self.inner.lock(); - if let Some(entry) = inner.entry_for_id_mut(entry_id) { - if entry.cleaning_failed(run_token).is_ok() { - // Clear the deps. We remove edges in reverse index order, because `remove_edge` is - // implemented in terms of `swap_remove`, and so affects edge ids greater than the removed edge - // id. See https://docs.rs/petgraph/0.5.1/petgraph/graph/struct.Graph.html#method.remove_edge - let mut edge_ids = inner - .pg - .edges_directed(entry_id, Direction::Outgoing) - .map(|e| e.id()) - .collect::>(); - edge_ids.sort_by_key(|id| std::cmp::Reverse(id.index())); - for edge_id in edge_ids { - inner.pg.remove_edge(edge_id); + let dst_entry = inner.entry_for_id(dst_id).cloned().unwrap(); + (dst_entry, dst_id) + }; + + // Return the state of the destination, retrying the dst to handle Node invalidation. + let context = context.clone(); + let (result, generation, uncacheable) = loop { + match entry.get_node_result(&context, entry_id).await { + (Err(err), _, _) if err == N::Error::invalidated() => { + let node = { + let inner = self.inner.lock(); + inner.unsafe_entry_for_id(entry_id).node().clone() + }; + info!( + "Filesystem changed during run: retrying `{}` in {:?}...", + node, self.invalidation_delay + ); + sleep(self.invalidation_delay).await; + continue; + } + res => break res, + } + }; + + if src_id.is_some() { + if let Err(e) = context.dep_record(entry_id, generation, uncacheable) { + return (Err(e), generation); } - } } - Err(()) - } + + (result, generation) } - } - - /// - /// Clears the state of all Nodes in the Graph by dropping their state fields. - /// - pub fn clear(&self) { - let mut inner = self.inner.lock(); - inner.clear() - } - - pub fn invalidate_from_roots bool>( - &self, - log_dirtied: bool, - predicate: P, - ) -> InvalidationResult { - let mut inner = self.inner.lock(); - inner.invalidate_from_roots(log_dirtied, predicate) - } - - pub fn visualize(&self, roots: &[N], path: &Path, context: &Context) -> io::Result<()> { - let inner = self.inner.lock(); - inner.visualize(roots, path, context) - } - - pub fn visit_live_reachable( - &self, - roots: &[N], - context: &Context, - mut f: impl FnMut(&N, N::Item), - ) { - let inner = self.inner.lock(); - for (n, v) in inner.live_reachable(roots, context) { - f(n, v); + + /// + /// Return the value of the given Node. + /// + pub async fn create(&self, node: N, context: &Context) -> Result { + let (res, _generation) = self.get_inner(None, context, node).await; + res } - } - pub fn visit_live(&self, context: &Context, mut f: impl FnMut(&N, N::Item)) { - let inner = self.inner.lock(); - for (n, v) in inner.live(context) { - f(n, v); + /// + /// Gets the value of the given Node (optionally waiting for it to have changed since the given + /// LastObserved token), and then returns its new value and a new LastObserved token. + /// + pub async fn poll( + &self, + node: N, + token: Option, + delay: Option, + context: &Context, + ) -> (Result, LastObserved) { + // If the node is currently clean at the given token, Entry::poll will delay until it has + // changed in some way. + if let Some(LastObserved(generation)) = token { + let entry = { + let mut inner = self.inner.lock(); + let entry_id = inner.ensure_entry(node.clone()); + inner.unsafe_entry_for_id(entry_id).clone() + }; + entry.poll(context, generation).await; + if let Some(delay) = delay { + sleep(delay).await; + } + }; + + // Re-request the Node. + let (res, generation) = self.get_inner(None, context, node).await; + (res, LastObserved(generation)) + } + + /// + /// Compares the generations of the dependencies of the given EntryId to their previous + /// generation values (re-computing or cleaning them first if necessary). + /// + /// Returns `Ok(uncacheable_deps)` if the node was successfully cleaned, and clears the node's + /// edges if it was not successfully cleaned. + /// + async fn attempt_cleaning( + &self, + entry_id: EntryId, + run_token: RunToken, + previous_dep_generations: &[(EntryId, Generation)], + context: &Context, + ) -> Result { + let generation_matches = { + let inner = self.inner.lock(); + let entry = if log::log_enabled!(log::Level::Debug) { + Some(inner.pg[entry_id].clone()) + } else { + None + }; + + previous_dep_generations + .iter() + .map(|&(dep_id, previous_dep_generation)| { + let entry = entry.clone(); + let dep_entry = inner + .entry_for_id(dep_id) + .unwrap_or_else(|| panic!("Dependency not present in Graph.")) + .clone(); + + async move { + let (_, generation, uncacheable) = + dep_entry.get_node_result(context, dep_id).await; + if generation == previous_dep_generation { + // Matched. + Ok(uncacheable) + } else { + // Did not match. We error here to trigger fail-fast in `try_join_all`. + log::debug!( + "Dependency {} of {:?} changed.", + dep_entry.node(), + entry.map(|e| e.node().to_string()) + ); + Err(()) + } + } + }) + .collect::>() + }; + + // We use try_join_all in order to speculatively execute all branches, and to fail fast if any + // generation mismatches. The first mismatch encountered will cause any extraneous cleaning + // work to be canceled. See #11290 for more information about the tradeoffs inherent in + // speculation. + match future::try_join_all(generation_matches).await { + Ok(uncacheable_deps) => { + // Cleaning succeeded. + // + // Return true if any dep was uncacheable. + Ok(uncacheable_deps.into_iter().any(|u| u)) + } + Err(()) => { + // Cleaning failed. + // + // If the RunToken still matches, clear all edges of the Node before returning. + let mut inner = self.inner.lock(); + if let Some(entry) = inner.entry_for_id_mut(entry_id) { + if entry.cleaning_failed(run_token).is_ok() { + // Clear the deps. We remove edges in reverse index order, because `remove_edge` is + // implemented in terms of `swap_remove`, and so affects edge ids greater than the removed edge + // id. See https://docs.rs/petgraph/0.5.1/petgraph/graph/struct.Graph.html#method.remove_edge + let mut edge_ids = inner + .pg + .edges_directed(entry_id, Direction::Outgoing) + .map(|e| e.id()) + .collect::>(); + edge_ids.sort_by_key(|id| std::cmp::Reverse(id.index())); + for edge_id in edge_ids { + inner.pg.remove_edge(edge_id); + } + } + } + Err(()) + } + } + } + + /// + /// Clears the state of all Nodes in the Graph by dropping their state fields. + /// + pub fn clear(&self) { + let mut inner = self.inner.lock(); + inner.clear() + } + + pub fn invalidate_from_roots bool>( + &self, + log_dirtied: bool, + predicate: P, + ) -> InvalidationResult { + let mut inner = self.inner.lock(); + inner.invalidate_from_roots(log_dirtied, predicate) + } + + pub fn visualize(&self, roots: &[N], path: &Path, context: &Context) -> io::Result<()> { + let inner = self.inner.lock(); + inner.visualize(roots, path, context) + } + + pub fn visit_live_reachable( + &self, + roots: &[N], + context: &Context, + mut f: impl FnMut(&N, N::Item), + ) { + let inner = self.inner.lock(); + for (n, v) in inner.live_reachable(roots, context) { + f(n, v); + } + } + + pub fn visit_live(&self, context: &Context, mut f: impl FnMut(&N, N::Item)) { + let inner = self.inner.lock(); + for (n, v) in inner.live(context) { + f(n, v); + } } - } } /// @@ -696,36 +698,35 @@ pub struct LastObserved(Generation); /// struct Walk<'a, N: Node, F> where - F: Fn(&EntryId) -> bool, + F: Fn(&EntryId) -> bool, { - graph: &'a InnerGraph, - direction: Direction, - deque: VecDeque, - walked: FixedBitSet, - stop_walking_predicate: F, + graph: &'a InnerGraph, + direction: Direction, + deque: VecDeque, + walked: FixedBitSet, + stop_walking_predicate: F, } impl<'a, N: Node + 'a, F: Fn(&EntryId) -> bool> Iterator for Walk<'a, N, F> { - type Item = EntryId; - - fn next(&mut self) -> Option { - while let Some(id) = self.deque.pop_front() { - // Visit this node and it neighbors if this node has not yet be visited and we aren't - // stopping our walk at this node, based on if it satisfies the stop_walking_predicate. - // This mechanism gives us a way to selectively dirty parts of the graph respecting node boundaries - // like !restartable nodes, which shouldn't be dirtied. - if !self.walked.visit(id) || (self.stop_walking_predicate)(&id) { - continue; - } + type Item = EntryId; + + fn next(&mut self) -> Option { + while let Some(id) = self.deque.pop_front() { + // Visit this node and it neighbors if this node has not yet be visited and we aren't + // stopping our walk at this node, based on if it satisfies the stop_walking_predicate. + // This mechanism gives us a way to selectively dirty parts of the graph respecting node boundaries + // like !restartable nodes, which shouldn't be dirtied. + if !self.walked.visit(id) || (self.stop_walking_predicate)(&id) { + continue; + } - self - .deque - .extend(self.graph.pg.neighbors_directed(id, self.direction)); - return Some(id); - } + self.deque + .extend(self.graph.pg.neighbors_directed(id, self.direction)); + return Some(id); + } - None - } + None + } } /// diff --git a/src/rust/engine/graph/src/node.rs b/src/rust/engine/graph/src/node.rs index b628752007d..a70c1339c6d 100644 --- a/src/rust/engine/graph/src/node.rs +++ b/src/rust/engine/graph/src/node.rs @@ -20,61 +20,61 @@ pub type EntryId = graph::NodeIndex; /// #[async_trait] pub trait Node: Clone + Debug + Display + Eq + Hash + Send + Sync + 'static { - /// An implementation-specific context required to run this Node. - type Context: Send + Sync; - - type Item: Clone + Debug + Eq + Send + Sync + 'static; - type Error: NodeError; - - async fn run(self, context: Context) -> Result; - - /// - /// True if this Node may be restarted while running. This property is consumed at the point when - /// a Node might be dirtied, so it's valid for a Node to change its restartable state while running. - /// - /// Note that this property does not control whether a Node is cancellable: if all consumers of - /// a Node go away, it will always be cancelled. - /// - fn restartable(&self) -> bool; - - /// - /// If a node's output is cacheable based solely on properties of the node, and not the output, - /// return true. - /// - /// Nodes which are not cacheable will be recomputed once (at least, in case of dirtying) per - /// RunId. - /// - /// This property must remain stable for the entire lifetime of a particular Node, but a Node - /// may change its cacheability for a particular output value using `cacheable_item`. - /// - fn cacheable(&self) -> bool; - - /// - /// A Node may want to compute cacheability differently based on properties of the Node's item. - /// The output of this method will be and'd with `cacheable` to compute overall cacheability. - /// - fn cacheable_item(&self, _item: &Self::Item) -> bool { - self.cacheable() - } - - /// - /// Creates an error instance that represents that a Node dependency was cyclic along the given - /// path. - /// - fn cyclic_error(path: &[&Self]) -> Self::Error; + /// An implementation-specific context required to run this Node. + type Context: Send + Sync; + + type Item: Clone + Debug + Eq + Send + Sync + 'static; + type Error: NodeError; + + async fn run(self, context: Context) -> Result; + + /// + /// True if this Node may be restarted while running. This property is consumed at the point when + /// a Node might be dirtied, so it's valid for a Node to change its restartable state while running. + /// + /// Note that this property does not control whether a Node is cancellable: if all consumers of + /// a Node go away, it will always be cancelled. + /// + fn restartable(&self) -> bool; + + /// + /// If a node's output is cacheable based solely on properties of the node, and not the output, + /// return true. + /// + /// Nodes which are not cacheable will be recomputed once (at least, in case of dirtying) per + /// RunId. + /// + /// This property must remain stable for the entire lifetime of a particular Node, but a Node + /// may change its cacheability for a particular output value using `cacheable_item`. + /// + fn cacheable(&self) -> bool; + + /// + /// A Node may want to compute cacheability differently based on properties of the Node's item. + /// The output of this method will be and'd with `cacheable` to compute overall cacheability. + /// + fn cacheable_item(&self, _item: &Self::Item) -> bool { + self.cacheable() + } + + /// + /// Creates an error instance that represents that a Node dependency was cyclic along the given + /// path. + /// + fn cyclic_error(path: &[&Self]) -> Self::Error; } pub trait NodeError: Clone + Debug + Eq + Send + Sync { - /// - /// Creates a generic Error of type NodeError. - /// - fn generic(message: String) -> Self; - - /// - /// Creates an instance that represents that a Node was invalidated out of the - /// Graph (generally while running). - /// - fn invalidated() -> Self; + /// + /// Creates a generic Error of type NodeError. + /// + fn generic(message: String) -> Self; + + /// + /// Creates an instance that represents that a Node was invalidated out of the + /// Graph (generally while running). + /// + fn invalidated() -> Self; } /// @@ -83,11 +83,11 @@ pub trait NodeError: Clone + Debug + Eq + Send + Sync { /// pub trait CompoundNode: Into + Send where - N: Node, + N: Node, { - type Item: TryFrom; + type Item: TryFrom; } impl CompoundNode for N { - type Item = N::Item; + type Item = N::Item; } diff --git a/src/rust/engine/graph/src/tests.rs b/src/rust/engine/graph/src/tests.rs index 71b79e31ae3..cb7598238f8 100644 --- a/src/rust/engine/graph/src/tests.rs +++ b/src/rust/engine/graph/src/tests.rs @@ -19,238 +19,238 @@ use crate::context::Context; use crate::{Graph, InvalidationResult, Node, NodeError}; fn empty_graph() -> Arc> { - Arc::new(Graph::new(Executor::new())) + Arc::new(Graph::new(Executor::new())) } macro_rules! assert_atomic_usize_eq { - ($actual: expr, $expected: expr) => {{ - assert_eq!($actual.load(atomic::Ordering::SeqCst), $expected); - }}; + ($actual: expr, $expected: expr) => {{ + assert_eq!($actual.load(atomic::Ordering::SeqCst), $expected); + }}; } #[tokio::test] async fn create() { - let graph = empty_graph(); - let context = graph.context(TContext::new()); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); + let graph = empty_graph(); + let context = graph.context(TContext::new()); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); } #[tokio::test] async fn invalidate_and_clean() { - let graph = empty_graph(); - let context = graph.context(TContext::new()); + let graph = empty_graph(); + let context = graph.context(TContext::new()); - // Create three nodes. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0)] - ); + // Create three nodes. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0)] + ); - // Clear the middle Node, which dirties the upper node. - assert_eq!( - graph.invalidate_from_roots(true, |n| n.id == 1), - InvalidationResult { - cleared: 1, - dirtied: 1 - } - ); + // Clear the middle Node, which dirties the upper node. + assert_eq!( + graph.invalidate_from_roots(true, |n| n.id == 1), + InvalidationResult { + cleared: 1, + dirtied: 1 + } + ); - // Confirm that the cleared Node re-runs, and the upper node is cleaned without re-running. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0), TNode::new(1)] - ); + // Confirm that the cleared Node re-runs, and the upper node is cleaned without re-running. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0), TNode::new(1)] + ); } #[tokio::test] async fn invalidate_and_rerun() { - let graph = empty_graph(); - let context = graph.context(TContext::new()); + let graph = empty_graph(); + let context = graph.context(TContext::new()); - // Create three nodes. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0)] - ); + // Create three nodes. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0)] + ); - // Clear the middle Node, which dirties the upper node. - assert_eq!( - graph.invalidate_from_roots(true, |n| n.id == 1), - InvalidationResult { - cleared: 1, - dirtied: 1 - } - ); + // Clear the middle Node, which dirties the upper node. + assert_eq!( + graph.invalidate_from_roots(true, |n| n.id == 1), + InvalidationResult { + cleared: 1, + dirtied: 1 + } + ); - // Request with a different salt, which will cause both the middle and upper nodes to rerun since - // their input values have changed. - let context = graph.context(TContext::new().with_salt(1)); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 1), T(2, 1)]) - ); - assert_eq!(context.runs(), vec![TNode::new(1), TNode::new(2)]); + // Request with a different salt, which will cause both the middle and upper nodes to rerun since + // their input values have changed. + let context = graph.context(TContext::new().with_salt(1)); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 1), T(2, 1)]) + ); + assert_eq!(context.runs(), vec![TNode::new(1), TNode::new(2)]); } #[tokio::test] async fn invalidate_uncacheable() { - let graph = empty_graph(); + let graph = empty_graph(); - // Create three nodes, with the middle node as uncacheable. - let context = { - let mut uncacheable = HashSet::new(); - uncacheable.insert(TNode::new(1)); - graph.context(TContext::new().with_uncacheable(uncacheable)) - }; - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0)] - ); + // Create three nodes, with the middle node as uncacheable. + let context = { + let mut uncacheable = HashSet::new(); + uncacheable.insert(TNode::new(1)); + graph.context(TContext::new().with_uncacheable(uncacheable)) + }; + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0)] + ); - // Clear the bottom Node, which dirties the middle and upper node. - assert_eq!( - graph.invalidate_from_roots(true, |n| n.id == 0), - InvalidationResult { - cleared: 1, - dirtied: 2 - } - ); + // Clear the bottom Node, which dirties the middle and upper node. + assert_eq!( + graph.invalidate_from_roots(true, |n| n.id == 0), + InvalidationResult { + cleared: 1, + dirtied: 2 + } + ); - // Re-request in the same session with new salt, and validate that all three re-run. - let context = graph.context((*context).clone().with_salt(1)); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 1), T(1, 1), T(2, 1)]) - ); - assert_eq!( - context.runs(), - vec![ - TNode::new(2), - TNode::new(1), - TNode::new(0), - TNode::new(1), - TNode::new(0), - TNode::new(2) - ] - ); + // Re-request in the same session with new salt, and validate that all three re-run. + let context = graph.context((*context).clone().with_salt(1)); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 1), T(1, 1), T(2, 1)]) + ); + assert_eq!( + context.runs(), + vec![ + TNode::new(2), + TNode::new(1), + TNode::new(0), + TNode::new(1), + TNode::new(0), + TNode::new(2) + ] + ); } #[tokio::test] async fn invalidate_with_changed_dependencies() { - let graph = empty_graph(); - let context = graph.context(TContext::new()); + let graph = empty_graph(); + let context = graph.context(TContext::new()); - // Create three nodes. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); + // Create three nodes. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); - // Clear the middle Node, which dirties the upper node. - assert_eq!( - graph.invalidate_from_roots(true, |n| n.id == 1), - InvalidationResult { - cleared: 1, - dirtied: 1 - } - ); + // Clear the middle Node, which dirties the upper node. + assert_eq!( + graph.invalidate_from_roots(true, |n| n.id == 1), + InvalidationResult { + cleared: 1, + dirtied: 1 + } + ); - // Request with a new context that truncates execution at the middle Node. - let context = graph.context( - TContext::new().with_dependencies(vec![(TNode::new(1), vec![])].into_iter().collect()), - ); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(1, 0), T(2, 0)]) - ); + // Request with a new context that truncates execution at the middle Node. + let context = graph.context( + TContext::new().with_dependencies(vec![(TNode::new(1), vec![])].into_iter().collect()), + ); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(1, 0), T(2, 0)]) + ); - // Confirm that dirtying the bottom Node does not affect the middle/upper Nodes, which no - // longer depend on it. - assert_eq!( - graph.invalidate_from_roots(true, |n| n.id == 0), - InvalidationResult { - cleared: 1, - dirtied: 0, - } - ); + // Confirm that dirtying the bottom Node does not affect the middle/upper Nodes, which no + // longer depend on it. + assert_eq!( + graph.invalidate_from_roots(true, |n| n.id == 0), + InvalidationResult { + cleared: 1, + dirtied: 0, + } + ); } // Historically flaky: https://github.com/pantsbuild/pants/issues/10839 #[tokio::test] async fn invalidate_randomly() { - let graph = empty_graph(); - - let invalidations = 10; - let sleep_per_invalidation = Duration::from_millis(100); - let range = 100; - - // Spawn a background thread to randomly invalidate in the relevant range. Hold its handle so - // it doesn't detach. - let graph2 = graph.clone(); - let (send, recv) = mpsc::channel(); - let _join = thread::spawn(move || { - let mut rng = rand::thread_rng(); - let mut invalidations = invalidations; - while invalidations > 0 { - invalidations -= 1; - - // Invalidate a random node in the graph. - let candidate = rng.gen_range(0..range); - graph2.invalidate_from_roots(true, |n: &TNode| n.id == candidate); - - thread::sleep(sleep_per_invalidation); - } - send.send(()).unwrap(); - }); - - // Continuously re-request the root with increasing context values, and assert that Node and - // context values are ascending. - let mut iterations = 0; - let mut max_distinct_context_values = 0; - loop { - let context = graph.context(TContext::new().with_salt(iterations)); - - // Compute the root, and validate its output. - let node_output = match graph.create(TNode::new(range), &context).await { - Ok(output) => output, - Err(TError::Invalidated) => { - // Some amount of concurrent invalidation is expected: retry. - continue; - } - Err(e) => panic!("Did not expect any errors other than Invalidation. Got: {e:?}"), - }; - max_distinct_context_values = cmp::max( - max_distinct_context_values, - TNode::validate(&node_output).unwrap(), - ); - - // Poll the channel to see whether the background thread has exited. - if recv.try_recv().is_ok() { - break; + let graph = empty_graph(); + + let invalidations = 10; + let sleep_per_invalidation = Duration::from_millis(100); + let range = 100; + + // Spawn a background thread to randomly invalidate in the relevant range. Hold its handle so + // it doesn't detach. + let graph2 = graph.clone(); + let (send, recv) = mpsc::channel(); + let _join = thread::spawn(move || { + let mut rng = rand::thread_rng(); + let mut invalidations = invalidations; + while invalidations > 0 { + invalidations -= 1; + + // Invalidate a random node in the graph. + let candidate = rng.gen_range(0..range); + graph2.invalidate_from_roots(true, |n: &TNode| n.id == candidate); + + thread::sleep(sleep_per_invalidation); + } + send.send(()).unwrap(); + }); + + // Continuously re-request the root with increasing context values, and assert that Node and + // context values are ascending. + let mut iterations = 0; + let mut max_distinct_context_values = 0; + loop { + let context = graph.context(TContext::new().with_salt(iterations)); + + // Compute the root, and validate its output. + let node_output = match graph.create(TNode::new(range), &context).await { + Ok(output) => output, + Err(TError::Invalidated) => { + // Some amount of concurrent invalidation is expected: retry. + continue; + } + Err(e) => panic!("Did not expect any errors other than Invalidation. Got: {e:?}"), + }; + max_distinct_context_values = cmp::max( + max_distinct_context_values, + TNode::validate(&node_output).unwrap(), + ); + + // Poll the channel to see whether the background thread has exited. + if recv.try_recv().is_ok() { + break; + } + iterations += 1; } - iterations += 1; - } - assert!( + assert!( max_distinct_context_values > 1, "In {iterations} iterations, observed a maximum of {max_distinct_context_values} distinct context values." ); @@ -258,508 +258,508 @@ async fn invalidate_randomly() { #[tokio::test] async fn poll_cacheable() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); - let context = graph.context(TContext::new()); - - // Poll with an empty graph should succeed. - let (result, token1) = graph.poll(TNode::new(2), None, None, &context).await; - assert_eq!(result.unwrap(), vec![T(0, 0), T(1, 0), T(2, 0)]); - - // Re-polling on a non-empty graph but with no LastObserved token should return immediately with - // the same value, and the same token. - let (result, token2) = graph.poll(TNode::new(2), None, None, &context).await; - assert_eq!(result.unwrap(), vec![T(0, 0), T(1, 0), T(2, 0)]); - assert_eq!(token1, token2); - - // But polling with the previous token should wait, since nothing has changed. - let request = graph.poll(TNode::new(2), Some(token2), None, &context); - match timeout(Duration::from_millis(1000), request).await { - Err(Elapsed { .. }) => (), - e => panic!("Should have timed out, instead got: {e:?}"), - } - - // Invalidating something and re-polling should re-compute. - graph.invalidate_from_roots(true, |n| n.id == 0); - let result = graph - .poll(TNode::new(2), Some(token2), None, &context) - .await - .0 - .unwrap(); - assert_eq!(result, vec![T(0, 0), T(1, 0), T(2, 0)]); + let _logger = env_logger::try_init(); + let graph = empty_graph(); + let context = graph.context(TContext::new()); + + // Poll with an empty graph should succeed. + let (result, token1) = graph.poll(TNode::new(2), None, None, &context).await; + assert_eq!(result.unwrap(), vec![T(0, 0), T(1, 0), T(2, 0)]); + + // Re-polling on a non-empty graph but with no LastObserved token should return immediately with + // the same value, and the same token. + let (result, token2) = graph.poll(TNode::new(2), None, None, &context).await; + assert_eq!(result.unwrap(), vec![T(0, 0), T(1, 0), T(2, 0)]); + assert_eq!(token1, token2); + + // But polling with the previous token should wait, since nothing has changed. + let request = graph.poll(TNode::new(2), Some(token2), None, &context); + match timeout(Duration::from_millis(1000), request).await { + Err(Elapsed { .. }) => (), + e => panic!("Should have timed out, instead got: {e:?}"), + } + + // Invalidating something and re-polling should re-compute. + graph.invalidate_from_roots(true, |n| n.id == 0); + let result = graph + .poll(TNode::new(2), Some(token2), None, &context) + .await + .0 + .unwrap(); + assert_eq!(result, vec![T(0, 0), T(1, 0), T(2, 0)]); } #[tokio::test] async fn poll_uncacheable() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); - // Create a context where the middle node is uncacheable. - let context = { - let mut uncacheable = HashSet::new(); - uncacheable.insert(TNode::new(1)); - graph.context(TContext::new().with_uncacheable(uncacheable)) - }; - - // Poll with an empty graph should succeed. - let (result, token1) = graph.poll(TNode::new(2), None, None, &context).await; - assert_eq!(result.unwrap(), vec![T(0, 0), T(1, 0), T(2, 0)]); - - // Polling with the previous token (in the same session) should wait, since nothing has changed. - let request = graph.poll(TNode::new(2), Some(token1), None, &context); - match timeout(Duration::from_millis(1000), request).await { - Err(Elapsed { .. }) => (), - e => panic!("Should have timed out, instead got: {e:?}"), - } - - // Invalidating something and re-polling should re-compute. - graph.invalidate_from_roots(true, |n| n.id == 0); - let result = graph - .poll(TNode::new(2), Some(token1), None, &context) - .await - .0 - .unwrap(); - assert_eq!(result, vec![T(0, 0), T(1, 0), T(2, 0)]); + let _logger = env_logger::try_init(); + let graph = empty_graph(); + // Create a context where the middle node is uncacheable. + let context = { + let mut uncacheable = HashSet::new(); + uncacheable.insert(TNode::new(1)); + graph.context(TContext::new().with_uncacheable(uncacheable)) + }; + + // Poll with an empty graph should succeed. + let (result, token1) = graph.poll(TNode::new(2), None, None, &context).await; + assert_eq!(result.unwrap(), vec![T(0, 0), T(1, 0), T(2, 0)]); + + // Polling with the previous token (in the same session) should wait, since nothing has changed. + let request = graph.poll(TNode::new(2), Some(token1), None, &context); + match timeout(Duration::from_millis(1000), request).await { + Err(Elapsed { .. }) => (), + e => panic!("Should have timed out, instead got: {e:?}"), + } + + // Invalidating something and re-polling should re-compute. + graph.invalidate_from_roots(true, |n| n.id == 0); + let result = graph + .poll(TNode::new(2), Some(token1), None, &context) + .await + .0 + .unwrap(); + assert_eq!(result, vec![T(0, 0), T(1, 0), T(2, 0)]); } #[tokio::test] async fn poll_errored() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); - let context = - graph.context(TContext::new().with_errors(vec![TNode::new(0)].into_iter().collect())); - - // Poll should error. - let (result, token1) = graph.poll(TNode::new(2), None, None, &context).await; - assert_eq!(result.err().unwrap(), TError::Error); - - // Polling with the previous token should wait, since nothing has changed. - let request = graph.poll(TNode::new(2), Some(token1), None, &context); - match timeout(Duration::from_millis(1000), request).await { - Err(Elapsed { .. }) => (), - e => panic!("Should have timed out, instead got: {e:?}"), - } + let _logger = env_logger::try_init(); + let graph = empty_graph(); + let context = + graph.context(TContext::new().with_errors(vec![TNode::new(0)].into_iter().collect())); + + // Poll should error. + let (result, token1) = graph.poll(TNode::new(2), None, None, &context).await; + assert_eq!(result.err().unwrap(), TError::Error); + + // Polling with the previous token should wait, since nothing has changed. + let request = graph.poll(TNode::new(2), Some(token1), None, &context); + match timeout(Duration::from_millis(1000), request).await { + Err(Elapsed { .. }) => (), + e => panic!("Should have timed out, instead got: {e:?}"), + } } #[tokio::test] async fn uncacheable_dependents_of_uncacheable_node() { - let graph = empty_graph(); + let graph = empty_graph(); - // Create a context for which the bottommost Node is not cacheable. - let context = { - let mut uncacheable = HashSet::new(); - uncacheable.insert(TNode::new(0)); - graph.context(TContext::new().with_uncacheable(uncacheable)) - }; + // Create a context for which the bottommost Node is not cacheable. + let context = { + let mut uncacheable = HashSet::new(); + uncacheable.insert(TNode::new(0)); + graph.context(TContext::new().with_uncacheable(uncacheable)) + }; - // Create three nodes. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0)] - ); + // Create three nodes. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0)] + ); - // Re-request the root in a new session and confirm that only the bottom node re-runs. - let context = graph.context(TContext::new()); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!(context.runs(), vec![TNode::new(0)]); - - // Re-request with a new session and different salt, and confirm that everything re-runs bottom - // up (the order of node cleaning). - let context = graph.context(TContext::new().with_salt(1)); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 1), T(1, 1), T(2, 1)]) - ); - assert_eq!( - context.runs(), - vec![TNode::new(0), TNode::new(1), TNode::new(2)] - ); + // Re-request the root in a new session and confirm that only the bottom node re-runs. + let context = graph.context(TContext::new()); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_eq!(context.runs(), vec![TNode::new(0)]); + + // Re-request with a new session and different salt, and confirm that everything re-runs bottom + // up (the order of node cleaning). + let context = graph.context(TContext::new().with_salt(1)); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 1), T(1, 1), T(2, 1)]) + ); + assert_eq!( + context.runs(), + vec![TNode::new(0), TNode::new(1), TNode::new(2)] + ); } #[tokio::test] async fn non_restartable_node_only_runs_once() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); - - let context = { - let mut non_restartable = HashSet::new(); - non_restartable.insert(TNode::new(1)); - let sleep_root = Duration::from_millis(1000); - let mut delays = HashMap::new(); - delays.insert(TNode::new(0), sleep_root); - graph.context( - TContext::new() - .with_non_restartable(non_restartable) - .with_delays_pre(delays), - ) - }; - - let graph2 = graph.clone(); - let (send, recv) = mpsc::channel::<()>(); - let _join = thread::spawn(move || { - recv.recv_timeout(Duration::from_secs(10)).unwrap(); - thread::sleep(Duration::from_millis(50)); - graph2.invalidate_from_roots(true, |n| n.id == 0); - }); - - send.send(()).unwrap(); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - // TNode(0) is cleared before completing, and so will run twice. But the non_restartable node and its - // dependent each run once. - assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0), TNode::new(0),] - ); + let _logger = env_logger::try_init(); + let graph = empty_graph(); + + let context = { + let mut non_restartable = HashSet::new(); + non_restartable.insert(TNode::new(1)); + let sleep_root = Duration::from_millis(1000); + let mut delays = HashMap::new(); + delays.insert(TNode::new(0), sleep_root); + graph.context( + TContext::new() + .with_non_restartable(non_restartable) + .with_delays_pre(delays), + ) + }; + + let graph2 = graph.clone(); + let (send, recv) = mpsc::channel::<()>(); + let _join = thread::spawn(move || { + recv.recv_timeout(Duration::from_secs(10)).unwrap(); + thread::sleep(Duration::from_millis(50)); + graph2.invalidate_from_roots(true, |n| n.id == 0); + }); + + send.send(()).unwrap(); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + // TNode(0) is cleared before completing, and so will run twice. But the non_restartable node and its + // dependent each run once. + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0), TNode::new(0),] + ); } #[tokio::test] async fn uncacheable_deps_is_cleaned_for_the_session() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); + let _logger = env_logger::try_init(); + let graph = empty_graph(); - let context = { - let mut uncacheable = HashSet::new(); - uncacheable.insert(TNode::new(1)); - graph.context(TContext::new().with_uncacheable(uncacheable)) - }; + let context = { + let mut uncacheable = HashSet::new(); + uncacheable.insert(TNode::new(1)); + graph.context(TContext::new().with_uncacheable(uncacheable)) + }; + + // Request twice in a row in the same session, and confirm that nothing re-runs or is cleaned + // on the second attempt. + let assert_no_change_within_session = |context: &Context| { + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0)] + ); + assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 0); + assert_atomic_usize_eq!(context.stats().cleaning_failed, 0); + }; - // Request twice in a row in the same session, and confirm that nothing re-runs or is cleaned - // on the second attempt. - let assert_no_change_within_session = |context: &Context| { assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0)] + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) ); - assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 0); - assert_atomic_usize_eq!(context.stats().cleaning_failed, 0); - }; + assert_no_change_within_session(&context); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_no_change_within_session(&context); - - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_no_change_within_session(&context); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_no_change_within_session(&context); } #[tokio::test] async fn dirtied_uncacheable_deps_node_re_runs() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); + let _logger = env_logger::try_init(); + let graph = empty_graph(); - let mut uncacheable = HashSet::new(); - uncacheable.insert(TNode::new(0)); + let mut uncacheable = HashSet::new(); + uncacheable.insert(TNode::new(0)); - let context = graph.context(TContext::new().with_uncacheable(uncacheable.clone())); + let context = graph.context(TContext::new().with_uncacheable(uncacheable.clone())); - // Request two nodes above an uncacheable node. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0)] - ); - assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 0); - assert_atomic_usize_eq!(context.stats().cleaning_failed, 0); - assert_atomic_usize_eq!(context.stats().ran, 3); - - let assert_stable_after_cleaning = |context: &Context| { + // Request two nodes above an uncacheable node. assert_eq!( - context.runs(), - vec![TNode::new(2), TNode::new(1), TNode::new(0), TNode::new(1)] + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) ); + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0)] + ); + assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 0); assert_atomic_usize_eq!(context.stats().cleaning_failed, 0); - assert_atomic_usize_eq!(context.stats().ran, 4); - assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 1); - }; - - // Clear the middle node, which will dirty the top node, and then clean both of them. - graph.invalidate_from_roots(true, |n| n.id == 1); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_stable_after_cleaning(&context); + assert_atomic_usize_eq!(context.stats().ran, 3); + + let assert_stable_after_cleaning = |context: &Context| { + assert_eq!( + context.runs(), + vec![TNode::new(2), TNode::new(1), TNode::new(0), TNode::new(1)] + ); + assert_atomic_usize_eq!(context.stats().cleaning_failed, 0); + assert_atomic_usize_eq!(context.stats().ran, 4); + assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 1); + }; - // We expect that the two upper nodes went to the UncacheableDependencies state for the session: - // re-requesting should be a noop. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_stable_after_cleaning(&context); - - // Finally, confirm that in a new session/run the UncacheableDependencies nodes trigger detection - // of the Uncacheable node (which runs), and are then cleaned themselves. - let context = graph.context(TContext::new().with_uncacheable(uncacheable)); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - assert_eq!(context.runs(), vec![TNode::new(0)]); - assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 2); - assert_atomic_usize_eq!(context.stats().cleaning_failed, 0); + // Clear the middle node, which will dirty the top node, and then clean both of them. + graph.invalidate_from_roots(true, |n| n.id == 1); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_stable_after_cleaning(&context); + + // We expect that the two upper nodes went to the UncacheableDependencies state for the session: + // re-requesting should be a noop. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_stable_after_cleaning(&context); + + // Finally, confirm that in a new session/run the UncacheableDependencies nodes trigger detection + // of the Uncacheable node (which runs), and are then cleaned themselves. + let context = graph.context(TContext::new().with_uncacheable(uncacheable)); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + assert_eq!(context.runs(), vec![TNode::new(0)]); + assert_atomic_usize_eq!(context.stats().cleaning_succeeded, 2); + assert_atomic_usize_eq!(context.stats().cleaning_failed, 0); } #[tokio::test] async fn retries() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); - - let context = { - let sleep_root = Duration::from_millis(100); - let mut delays = HashMap::new(); - delays.insert(TNode::new(0), sleep_root); - graph.context(TContext::new().with_delays_pre(delays)) - }; - - // Spawn a thread that will invalidate in a loop for one second (much less than our timeout). - let sleep_per_invalidation = Duration::from_millis(10); - let invalidation_deadline = Instant::now() + Duration::from_secs(1); - let graph2 = graph.clone(); - let join_handle = thread::spawn(move || loop { - thread::sleep(sleep_per_invalidation); - graph2.invalidate_from_roots(true, |n| n.id == 0); - if Instant::now() > invalidation_deadline { - break; - } - }); + let _logger = env_logger::try_init(); + let graph = empty_graph(); + + let context = { + let sleep_root = Duration::from_millis(100); + let mut delays = HashMap::new(); + delays.insert(TNode::new(0), sleep_root); + graph.context(TContext::new().with_delays_pre(delays)) + }; - // Should succeed anyway. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); - join_handle.join().unwrap(); + // Spawn a thread that will invalidate in a loop for one second (much less than our timeout). + let sleep_per_invalidation = Duration::from_millis(10); + let invalidation_deadline = Instant::now() + Duration::from_secs(1); + let graph2 = graph.clone(); + let join_handle = thread::spawn(move || loop { + thread::sleep(sleep_per_invalidation); + graph2.invalidate_from_roots(true, |n| n.id == 0); + if Instant::now() > invalidation_deadline { + break; + } + }); + + // Should succeed anyway. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); + join_handle.join().unwrap(); } #[tokio::test] async fn eager_cleaning_success() { - // Test that invalidation does not cause a Running node to restart if the dependencies that it - // has already requested can successfully be cleaned. - let _logger = env_logger::try_init(); - let invalidation_delay = Duration::from_millis(100); - let graph = Arc::new(Graph::::new_with_invalidation_delay( - Executor::new(), - invalidation_delay, - )); - - let sleep_middle = Duration::from_millis(2000); - let context = { - let mut delays = HashMap::new(); - delays.insert(TNode::new(1), sleep_middle); - graph.context(TContext::new().with_delays_pre(delays)) - }; - - // Invalidate the bottom Node (after the middle node has already requested it). - assert!(sleep_middle > invalidation_delay * 3); - let graph2 = graph.clone(); - let _join = thread::spawn(move || { - thread::sleep(invalidation_delay); - graph2.invalidate_from_roots(true, |n| n.id == 0); - }); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); + // Test that invalidation does not cause a Running node to restart if the dependencies that it + // has already requested can successfully be cleaned. + let _logger = env_logger::try_init(); + let invalidation_delay = Duration::from_millis(100); + let graph = Arc::new(Graph::::new_with_invalidation_delay( + Executor::new(), + invalidation_delay, + )); + + let sleep_middle = Duration::from_millis(2000); + let context = { + let mut delays = HashMap::new(); + delays.insert(TNode::new(1), sleep_middle); + graph.context(TContext::new().with_delays_pre(delays)) + }; + + // Invalidate the bottom Node (after the middle node has already requested it). + assert!(sleep_middle > invalidation_delay * 3); + let graph2 = graph.clone(); + let _join = thread::spawn(move || { + thread::sleep(invalidation_delay); + graph2.invalidate_from_roots(true, |n| n.id == 0); + }); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); - // No nodes should have seen aborts, since the dirtied nodes had already completed, and the - // running node should not have been interrupted. - assert!(context.aborts().is_empty(), "{:?}", context.aborts()); + // No nodes should have seen aborts, since the dirtied nodes had already completed, and the + // running node should not have been interrupted. + assert!(context.aborts().is_empty(), "{:?}", context.aborts()); } #[tokio::test] async fn eager_cleaning_failure() { - // Test that invalidation causes a Running node to restart if the dependencies that it - // has already requested end up with new values before it completes. - let _logger = env_logger::try_init(); - let invalidation_delay = Duration::from_millis(100); - let sleep_middle = Duration::from_millis(2000); - let graph = Arc::new(Graph::new_with_invalidation_delay( - Executor::new(), - invalidation_delay, - )); - - let context = { - let mut delays = HashMap::new(); - delays.insert(TNode::new(1), sleep_middle); - graph.context(TContext::new().with_delays_post(delays)) - }; - - // Invalidate the bottom Node with a new salt (after the middle node has already requested it). - assert!(sleep_middle > invalidation_delay * 3); - let graph2 = graph.clone(); - let context2 = Context::::clone(&context); - let _join = thread::spawn(move || { - thread::sleep(invalidation_delay); - context2.set_salt(1); - graph2.invalidate_from_roots(true, |n| n.id == 0); - }); - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 1), T(1, 1), T(2, 0)]) - ); + // Test that invalidation causes a Running node to restart if the dependencies that it + // has already requested end up with new values before it completes. + let _logger = env_logger::try_init(); + let invalidation_delay = Duration::from_millis(100); + let sleep_middle = Duration::from_millis(2000); + let graph = Arc::new(Graph::new_with_invalidation_delay( + Executor::new(), + invalidation_delay, + )); + + let context = { + let mut delays = HashMap::new(); + delays.insert(TNode::new(1), sleep_middle); + graph.context(TContext::new().with_delays_post(delays)) + }; + + // Invalidate the bottom Node with a new salt (after the middle node has already requested it). + assert!(sleep_middle > invalidation_delay * 3); + let graph2 = graph.clone(); + let context2 = Context::::clone(&context); + let _join = thread::spawn(move || { + thread::sleep(invalidation_delay); + context2.set_salt(1); + graph2.invalidate_from_roots(true, |n| n.id == 0); + }); + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 1), T(1, 1), T(2, 0)]) + ); - // The middle node should have seen an abort, since it already observed its dependency, and that - // dependency could not be cleaned. But the top Node will not abort, because it will not yet have - // received a value for its dependency, and so will be successfully cleaned. - assert_eq!(vec![TNode::new(1)], context.aborts()); + // The middle node should have seen an abort, since it already observed its dependency, and that + // dependency could not be cleaned. But the top Node will not abort, because it will not yet have + // received a value for its dependency, and so will be successfully cleaned. + assert_eq!(vec![TNode::new(1)], context.aborts()); } #[tokio::test] async fn canceled_on_loss_of_interest() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); - - let sleep_middle = Duration::from_millis(2000); - let start_time = Instant::now(); - let context = { - let mut delays = HashMap::new(); - delays.insert(TNode::new(1), sleep_middle); - graph.context(TContext::new().with_delays_pre(delays)) - }; - - // Start a run, but cancel it well before the delayed middle node can complete. - tokio::select! { - _ = sleep(Duration::from_millis(100)) => {}, - _ = graph.create(TNode::new(2), &context) => { panic!("Should have timed out.") } - } - - // Then start again, and allow to run to completion. - assert_eq!( - graph.create(TNode::new(2), &context).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); + let _logger = env_logger::try_init(); + let graph = empty_graph(); + + let sleep_middle = Duration::from_millis(2000); + let start_time = Instant::now(); + let context = { + let mut delays = HashMap::new(); + delays.insert(TNode::new(1), sleep_middle); + graph.context(TContext::new().with_delays_pre(delays)) + }; + + // Start a run, but cancel it well before the delayed middle node can complete. + tokio::select! { + _ = sleep(Duration::from_millis(100)) => {}, + _ = graph.create(TNode::new(2), &context) => { panic!("Should have timed out.") } + } + + // Then start again, and allow to run to completion. + assert_eq!( + graph.create(TNode::new(2), &context).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); - // We should have waited more than the delay, but less than the time it would have taken to - // run twice. - assert!(Instant::now() >= start_time + sleep_middle); - assert!(Instant::now() < start_time + (sleep_middle * 2)); + // We should have waited more than the delay, but less than the time it would have taken to + // run twice. + assert!(Instant::now() >= start_time + sleep_middle); + assert!(Instant::now() < start_time + (sleep_middle * 2)); - // And the top nodes should have seen one abort each. - assert_eq!(vec![TNode::new(2), TNode::new(1),], context.aborts(),); + // And the top nodes should have seen one abort each. + assert_eq!(vec![TNode::new(2), TNode::new(1),], context.aborts(),); } #[tokio::test] async fn clean_speculatively() { - let _logger = env_logger::try_init(); - let graph = empty_graph(); - - // Create a graph with a node with two dependencies, one of which takes much longer - // to run. - let mut dependencies = vec![ - (TNode::new(3), vec![TNode::new(2), TNode::new(1)]), - (TNode::new(2), vec![TNode::new(0)]), - (TNode::new(1), vec![TNode::new(0)]), - ] - .into_iter() - .collect::>(); - let delay = Duration::from_millis(2000); - let context = { - let mut delays = HashMap::new(); - delays.insert(TNode::new(2), delay); - graph.context( - TContext::new() - .with_delays_pre(delays) - .with_dependencies(dependencies.clone()), - ) - }; - - // Run it to completion, and then clear a node at the bottom of the graph to force cleaning of - // both dependencies. - assert_eq!( - graph.create(TNode::new(3), &context).await, - Ok(vec![T(0, 0), T(2, 0), T(3, 0)]) - ); - graph.invalidate_from_roots(true, |n| n == &TNode::new(0)); - - // Then request again with the slow node removed from the dependencies, and confirm that it is - // cleaned much sooner than it would been if it had waited for the slow node. - dependencies.insert(TNode::new(3), vec![TNode::new(1)]); - let context = graph.context( - (*context) - .clone() - .with_salt(1) - .with_dependencies(dependencies), - ); - let start_time = Instant::now(); - assert_eq!( - graph.create(TNode::new(3), &context).await, - Ok(vec![T(0, 1), T(1, 1), T(3, 1)]) - ); - assert!(Instant::now() < start_time + delay); - assert_atomic_usize_eq!(context.stats().cleaning_failed, 3); + let _logger = env_logger::try_init(); + let graph = empty_graph(); + + // Create a graph with a node with two dependencies, one of which takes much longer + // to run. + let mut dependencies = vec![ + (TNode::new(3), vec![TNode::new(2), TNode::new(1)]), + (TNode::new(2), vec![TNode::new(0)]), + (TNode::new(1), vec![TNode::new(0)]), + ] + .into_iter() + .collect::>(); + let delay = Duration::from_millis(2000); + let context = { + let mut delays = HashMap::new(); + delays.insert(TNode::new(2), delay); + graph.context( + TContext::new() + .with_delays_pre(delays) + .with_dependencies(dependencies.clone()), + ) + }; + + // Run it to completion, and then clear a node at the bottom of the graph to force cleaning of + // both dependencies. + assert_eq!( + graph.create(TNode::new(3), &context).await, + Ok(vec![T(0, 0), T(2, 0), T(3, 0)]) + ); + graph.invalidate_from_roots(true, |n| n == &TNode::new(0)); + + // Then request again with the slow node removed from the dependencies, and confirm that it is + // cleaned much sooner than it would been if it had waited for the slow node. + dependencies.insert(TNode::new(3), vec![TNode::new(1)]); + let context = graph.context( + (*context) + .clone() + .with_salt(1) + .with_dependencies(dependencies), + ); + let start_time = Instant::now(); + assert_eq!( + graph.create(TNode::new(3), &context).await, + Ok(vec![T(0, 1), T(1, 1), T(3, 1)]) + ); + assert!(Instant::now() < start_time + delay); + assert_atomic_usize_eq!(context.stats().cleaning_failed, 3); } #[tokio::test] async fn cyclic_failure() { - // Confirms that an attempt to create a cycle fails. - let graph = empty_graph(); - let top = TNode::new(2); - let context = graph.context(TContext::new().with_dependencies( - // Request creation of a cycle by sending the bottom most node to the top. - vec![(TNode::new(0), vec![top])].into_iter().collect(), - )); - - assert_eq!( - graph.create(TNode::new(2), &context).await, - Err(TError::Cyclic(vec![0, 2, 1])) - ); + // Confirms that an attempt to create a cycle fails. + let graph = empty_graph(); + let top = TNode::new(2); + let context = graph.context(TContext::new().with_dependencies( + // Request creation of a cycle by sending the bottom most node to the top. + vec![(TNode::new(0), vec![top])].into_iter().collect(), + )); + + assert_eq!( + graph.create(TNode::new(2), &context).await, + Err(TError::Cyclic(vec![0, 2, 1])) + ); } #[tokio::test] async fn cyclic_dirtying() { - let _logger = env_logger::try_init(); - // Confirms that a dirtied path between two nodes is able to reverse direction while being - // cleaned. - let graph = empty_graph(); - let initial_top = TNode::new(2); - let initial_bot = TNode::new(0); - - // Request with a context that creates a path downward. - let context_down = graph.context(TContext::new()); - assert_eq!( - graph.create(initial_top.clone(), &context_down).await, - Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) - ); + let _logger = env_logger::try_init(); + // Confirms that a dirtied path between two nodes is able to reverse direction while being + // cleaned. + let graph = empty_graph(); + let initial_top = TNode::new(2); + let initial_bot = TNode::new(0); + + // Request with a context that creates a path downward. + let context_down = graph.context(TContext::new()); + assert_eq!( + graph.create(initial_top.clone(), &context_down).await, + Ok(vec![T(0, 0), T(1, 0), T(2, 0)]) + ); - // Clear the bottom node, and then clean it with a context that causes the path to reverse. - graph.invalidate_from_roots(true, |n| n == &initial_bot); - let context_up = graph.context( - (*context_down).clone().with_salt(1).with_dependencies( - // Reverse the path from bottom to top. - vec![ - (TNode::new(1), vec![]), - (TNode::new(0), vec![TNode::new(1)]), - ] - .into_iter() - .collect(), - ), - ); + // Clear the bottom node, and then clean it with a context that causes the path to reverse. + graph.invalidate_from_roots(true, |n| n == &initial_bot); + let context_up = graph.context( + (*context_down).clone().with_salt(1).with_dependencies( + // Reverse the path from bottom to top. + vec![ + (TNode::new(1), vec![]), + (TNode::new(0), vec![TNode::new(1)]), + ] + .into_iter() + .collect(), + ), + ); - let res = graph.create(initial_bot, &context_up).await; + let res = graph.create(initial_bot, &context_up).await; - assert_eq!(res, Ok(vec![T(1, 1), T(0, 1)])); + assert_eq!(res, Ok(vec![T(1, 1), T(0, 1)])); - let res = graph.create(initial_top, &context_up).await; + let res = graph.create(initial_top, &context_up).await; - assert_eq!(res, Ok(vec![T(1, 1), T(2, 1)])); + assert_eq!(res, Ok(vec![T(1, 1), T(2, 1)])); } /// @@ -775,129 +775,128 @@ struct T(usize, usize); /// #[derive(Clone, Debug)] struct TNode { - pub id: usize, - restartable: bool, - cacheable: bool, + pub id: usize, + restartable: bool, + cacheable: bool, } impl TNode { - fn new(id: usize) -> Self { - TNode { - id, - restartable: true, - cacheable: true, + fn new(id: usize) -> Self { + TNode { + id, + restartable: true, + cacheable: true, + } } - } } impl PartialEq for TNode { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } } impl Eq for TNode {} impl Hash for TNode { - fn hash(&self, state: &mut H) { - self.id.hash(state); - } + fn hash(&self, state: &mut H) { + self.id.hash(state); + } } #[async_trait] impl Node for TNode { - type Context = TContext; + type Context = TContext; + + type Item = Vec; + type Error = TError; + + async fn run(self, context: Context) -> Result, TError> { + let mut abort_guard = context.abort_guard(self.clone()); + context.ran(self.clone()); + if context.errors.contains(&self) { + return Err(TError::Error); + } + let token = T(self.id, context.salt()); + context.maybe_delay_pre(&self).await; + let res = match context.dependencies_of(&self) { + deps if !deps.is_empty() => { + // Request all dependencies, but include only the first in our output value. + let mut values = future::try_join_all( + deps.into_iter() + .map(|dep| context.get(dep)) + .collect::>(), + ) + .await?; + let mut v = values.swap_remove(0); + v.push(token); + Ok(v) + } + _ => Ok(vec![token]), + }; + context.maybe_delay_post(&self).await; + abort_guard.did_not_abort(); + res + } + + fn restartable(&self) -> bool { + self.restartable + } - type Item = Vec; - type Error = TError; + fn cacheable(&self) -> bool { + self.cacheable + } - async fn run(self, context: Context) -> Result, TError> { - let mut abort_guard = context.abort_guard(self.clone()); - context.ran(self.clone()); - if context.errors.contains(&self) { - return Err(TError::Error); + fn cyclic_error(path: &[&Self]) -> Self::Error { + TError::Cyclic(path.iter().map(|n| n.id).collect()) } - let token = T(self.id, context.salt()); - context.maybe_delay_pre(&self).await; - let res = match context.dependencies_of(&self) { - deps if !deps.is_empty() => { - // Request all dependencies, but include only the first in our output value. - let mut values = future::try_join_all( - deps - .into_iter() - .map(|dep| context.get(dep)) - .collect::>(), - ) - .await?; - let mut v = values.swap_remove(0); - v.push(token); - Ok(v) - } - _ => Ok(vec![token]), - }; - context.maybe_delay_post(&self).await; - abort_guard.did_not_abort(); - res - } - - fn restartable(&self) -> bool { - self.restartable - } - - fn cacheable(&self) -> bool { - self.cacheable - } - - fn cyclic_error(path: &[&Self]) -> Self::Error { - TError::Cyclic(path.iter().map(|n| n.id).collect()) - } } impl std::fmt::Display for TNode { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{self:?}") - } + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{self:?}") + } } impl TNode { - /// - /// Validates the given TNode output. Both node ids and context ids should increase left to - /// right: node ids monotonically, and context ids non-monotonically. - /// - /// Valid: - /// (0,0), (1,1), (2,2), (3,3) - /// (0,0), (1,0), (2,1), (3,1) - /// - /// Invalid: - /// (0,0), (1,1), (2,1), (3,0) - /// (0,0), (1,0), (2,0), (1,0) - /// - /// If successful, returns the count of distinct context ids in the path. - /// - fn validate(output: &Vec) -> Result { - let (node_ids, context_ids): (Vec<_>, Vec<_>) = output - .iter() - .map(|&T(node_id, context_id)| { - // We cast to isize to allow comparison to -1. - (node_id as isize, context_id) - }) - .unzip(); - // Confirm monotonically ordered. - let mut previous: isize = -1; - for node_id in node_ids { - if previous + 1 != node_id { - return Err(format!( - "Node ids in {output:?} were not monotonically ordered." - )); - } - previous = node_id; - } - // Confirm ordered (non-monotonically). - let mut previous: usize = 0; - for &context_id in &context_ids { - if previous > context_id { - return Err(format!("Context ids in {output:?} were not ordered.")); - } - previous = context_id; + /// + /// Validates the given TNode output. Both node ids and context ids should increase left to + /// right: node ids monotonically, and context ids non-monotonically. + /// + /// Valid: + /// (0,0), (1,1), (2,2), (3,3) + /// (0,0), (1,0), (2,1), (3,1) + /// + /// Invalid: + /// (0,0), (1,1), (2,1), (3,0) + /// (0,0), (1,0), (2,0), (1,0) + /// + /// If successful, returns the count of distinct context ids in the path. + /// + fn validate(output: &Vec) -> Result { + let (node_ids, context_ids): (Vec<_>, Vec<_>) = output + .iter() + .map(|&T(node_id, context_id)| { + // We cast to isize to allow comparison to -1. + (node_id as isize, context_id) + }) + .unzip(); + // Confirm monotonically ordered. + let mut previous: isize = -1; + for node_id in node_ids { + if previous + 1 != node_id { + return Err(format!( + "Node ids in {output:?} were not monotonically ordered." + )); + } + previous = node_id; + } + // Confirm ordered (non-monotonically). + let mut previous: usize = 0; + for &context_id in &context_ids { + if previous > context_id { + return Err(format!("Context ids in {output:?} were not ordered.")); + } + previous = context_id; + } + + Ok(context_ids.into_iter().collect::>().len()) } - - Ok(context_ids.into_iter().collect::>().len()) - } } /// @@ -905,138 +904,138 @@ impl TNode { /// #[derive(Clone)] struct TContext { - // A value that is included in every value computed by this context. Stands in for "the state of the - // outside world". A test that wants to "change the outside world" and observe its effect on the - // graph should change the salt to do so. - salt: Arc, - // A mapping from source to destinations that drives what values each TNode depends on. - // If there is no entry in this map for a node, then TNode::run will default to requesting - // the next smallest node. - edges: Arc>>, - delays_pre: Arc>, - delays_post: Arc>, - // Nodes which should error when they run. - errors: Arc>, - non_restartable: Arc>, - uncacheable: Arc>, - aborts: Arc>>, - runs: Arc>>, + // A value that is included in every value computed by this context. Stands in for "the state of the + // outside world". A test that wants to "change the outside world" and observe its effect on the + // graph should change the salt to do so. + salt: Arc, + // A mapping from source to destinations that drives what values each TNode depends on. + // If there is no entry in this map for a node, then TNode::run will default to requesting + // the next smallest node. + edges: Arc>>, + delays_pre: Arc>, + delays_post: Arc>, + // Nodes which should error when they run. + errors: Arc>, + non_restartable: Arc>, + uncacheable: Arc>, + aborts: Arc>>, + runs: Arc>>, } impl TContext { - fn new() -> TContext { - TContext { - salt: Arc::new(AtomicUsize::new(0)), - edges: Arc::default(), - delays_pre: Arc::default(), - delays_post: Arc::default(), - errors: Arc::default(), - non_restartable: Arc::default(), - uncacheable: Arc::default(), - aborts: Arc::default(), - runs: Arc::default(), + fn new() -> TContext { + TContext { + salt: Arc::new(AtomicUsize::new(0)), + edges: Arc::default(), + delays_pre: Arc::default(), + delays_post: Arc::default(), + errors: Arc::default(), + non_restartable: Arc::default(), + uncacheable: Arc::default(), + aborts: Arc::default(), + runs: Arc::default(), + } + } + + fn with_dependencies(mut self, edges: HashMap>) -> TContext { + self.edges = Arc::new(edges); + self + } + + /// Delays incurred before a node has requested its dependencies. + fn with_delays_pre(mut self, delays: HashMap) -> TContext { + self.delays_pre = Arc::new(delays); + self + } + + /// Delays incurred after a node has requested its dependencies. + fn with_delays_post(mut self, delays: HashMap) -> TContext { + self.delays_post = Arc::new(delays); + self + } + + fn with_errors(mut self, errors: HashSet) -> TContext { + self.errors = Arc::new(errors); + self + } + + fn with_non_restartable(mut self, non_restartable: HashSet) -> TContext { + self.non_restartable = Arc::new(non_restartable); + self + } + + fn with_uncacheable(mut self, uncacheable: HashSet) -> TContext { + self.uncacheable = Arc::new(uncacheable); + self + } + + fn with_salt(mut self, salt: usize) -> TContext { + self.salt = Arc::new(AtomicUsize::new(salt)); + self + } + + fn salt(&self) -> usize { + self.salt.load(atomic::Ordering::SeqCst) } - } - - fn with_dependencies(mut self, edges: HashMap>) -> TContext { - self.edges = Arc::new(edges); - self - } - - /// Delays incurred before a node has requested its dependencies. - fn with_delays_pre(mut self, delays: HashMap) -> TContext { - self.delays_pre = Arc::new(delays); - self - } - - /// Delays incurred after a node has requested its dependencies. - fn with_delays_post(mut self, delays: HashMap) -> TContext { - self.delays_post = Arc::new(delays); - self - } - - fn with_errors(mut self, errors: HashSet) -> TContext { - self.errors = Arc::new(errors); - self - } - - fn with_non_restartable(mut self, non_restartable: HashSet) -> TContext { - self.non_restartable = Arc::new(non_restartable); - self - } - - fn with_uncacheable(mut self, uncacheable: HashSet) -> TContext { - self.uncacheable = Arc::new(uncacheable); - self - } - - fn with_salt(mut self, salt: usize) -> TContext { - self.salt = Arc::new(AtomicUsize::new(salt)); - self - } - - fn salt(&self) -> usize { - self.salt.load(atomic::Ordering::SeqCst) - } - - fn set_salt(&self, salt: usize) { - self.salt.store(salt, atomic::Ordering::SeqCst) - } - - fn abort_guard(&self, node: TNode) -> AbortGuard { - AbortGuard { - context: self.clone(), - node: Some(node), + + fn set_salt(&self, salt: usize) { + self.salt.store(salt, atomic::Ordering::SeqCst) } - } - fn aborted(&self, node: TNode) { - let mut aborts = self.aborts.lock(); - aborts.push(node); - } + fn abort_guard(&self, node: TNode) -> AbortGuard { + AbortGuard { + context: self.clone(), + node: Some(node), + } + } + + fn aborted(&self, node: TNode) { + let mut aborts = self.aborts.lock(); + aborts.push(node); + } - fn ran(&self, node: TNode) { - let mut runs = self.runs.lock(); - runs.push(node); - } + fn ran(&self, node: TNode) { + let mut runs = self.runs.lock(); + runs.push(node); + } - async fn maybe_delay_pre(&self, node: &TNode) { - if let Some(delay) = self.delays_pre.get(node) { - sleep(*delay).await; + async fn maybe_delay_pre(&self, node: &TNode) { + if let Some(delay) = self.delays_pre.get(node) { + sleep(*delay).await; + } } - } - async fn maybe_delay_post(&self, node: &TNode) { - if let Some(delay) = self.delays_post.get(node) { - sleep(*delay).await; + async fn maybe_delay_post(&self, node: &TNode) { + if let Some(delay) = self.delays_post.get(node) { + sleep(*delay).await; + } } - } - - /// - /// If the given TNode should declare a dependency on another TNode, returns that dependency. - /// - fn dependencies_of(&self, node: &TNode) -> Vec { - match self.edges.get(node) { - Some(deps) => deps.clone(), - None if node.id > 0 => { - let new_node_id = node.id - 1; - vec![TNode { - id: new_node_id, - restartable: !self.non_restartable.contains(&TNode::new(new_node_id)), - cacheable: !self.uncacheable.contains(&TNode::new(new_node_id)), - }] - } - None => vec![], + + /// + /// If the given TNode should declare a dependency on another TNode, returns that dependency. + /// + fn dependencies_of(&self, node: &TNode) -> Vec { + match self.edges.get(node) { + Some(deps) => deps.clone(), + None if node.id > 0 => { + let new_node_id = node.id - 1; + vec![TNode { + id: new_node_id, + restartable: !self.non_restartable.contains(&TNode::new(new_node_id)), + cacheable: !self.uncacheable.contains(&TNode::new(new_node_id)), + }] + } + None => vec![], + } } - } - fn aborts(&self) -> Vec { - self.aborts.lock().clone() - } + fn aborts(&self) -> Vec { + self.aborts.lock().clone() + } - fn runs(&self) -> Vec { - self.runs.lock().clone() - } + fn runs(&self) -> Vec { + self.runs.lock().clone() + } } /// @@ -1044,36 +1043,36 @@ impl TContext { /// is dropped without re-running. /// struct AbortGuard { - context: TContext, - node: Option, + context: TContext, + node: Option, } impl AbortGuard { - fn did_not_abort(&mut self) { - self.node = None; - } + fn did_not_abort(&mut self) { + self.node = None; + } } impl Drop for AbortGuard { - fn drop(&mut self) { - if let Some(node) = self.node.take() { - self.context.aborted(node); + fn drop(&mut self) { + if let Some(node) = self.node.take() { + self.context.aborted(node); + } } - } } #[derive(Clone, Debug, Eq, PartialEq)] enum TError { - Error, - Cyclic(Vec), - Invalidated, + Error, + Cyclic(Vec), + Invalidated, } impl NodeError for TError { - fn invalidated() -> Self { - TError::Invalidated - } + fn invalidated() -> Self { + TError::Invalidated + } - fn generic(_message: String) -> Self { - TError::Error - } + fn generic(_message: String) -> Self { + TError::Error + } } diff --git a/src/rust/engine/grpc_util/build.rs b/src/rust/engine/grpc_util/build.rs index d57cfa1d2ea..720e934905b 100644 --- a/src/rust/engine/grpc_util/build.rs +++ b/src/rust/engine/grpc_util/build.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -26,14 +26,14 @@ #![allow(clippy::mutex_atomic)] fn main() -> Result<(), Box> { - use prost_build::Config; + use prost_build::Config; - let config = Config::new(); + let config = Config::new(); - tonic_build::configure() - .build_client(true) - .build_server(true) - .compile_with_config(config, &["protos/test.proto"], &["protos"])?; + tonic_build::configure() + .build_client(true) + .build_server(true) + .compile_with_config(config, &["protos/test.proto"], &["protos"])?; - Ok(()) + Ok(()) } diff --git a/src/rust/engine/grpc_util/src/channel.rs b/src/rust/engine/grpc_util/src/channel.rs index 7ab0db7d7d7..6ec86c87068 100644 --- a/src/rust/engine/grpc_util/src/channel.rs +++ b/src/rust/engine/grpc_util/src/channel.rs @@ -16,8 +16,8 @@ use tower_service::Service; /// `Channel`. #[derive(Clone, Debug)] pub enum Client { - Plain(hyper::Client), - Tls(hyper::Client, BoxBody>), + Plain(hyper::Client), + Tls(hyper::Client, BoxBody>), } /// A communication channel which may either communicate using HTTP or HTTP over TLS. This @@ -29,284 +29,283 @@ pub enum Client { /// continue to use `rustls` directly. #[derive(Clone, Debug)] pub struct Channel { - client: Client, - uri: Uri, + client: Client, + uri: Uri, } impl Channel { - pub async fn new( - tls_config: Option<&ClientConfig>, - uri: Uri, - ) -> Result> { - let client = match tls_config { - None => { - let mut http = HttpConnector::new(); - http.enforce_http(false); - - Client::Plain(hyper::Client::builder().http2_only(true).build(http)) - } - Some(tls_config) => { - let tls_config = tls_config.to_owned(); - - let https = hyper_rustls::HttpsConnectorBuilder::new() - .with_tls_config(tls_config) - .https_or_http() - .enable_http2() - .build(); - - Client::Tls(hyper::Client::builder().http2_only(true).build(https)) - } - }; - - Ok(Self { client, uri }) - } + pub async fn new( + tls_config: Option<&ClientConfig>, + uri: Uri, + ) -> Result> { + let client = match tls_config { + None => { + let mut http = HttpConnector::new(); + http.enforce_http(false); + + Client::Plain(hyper::Client::builder().http2_only(true).build(http)) + } + Some(tls_config) => { + let tls_config = tls_config.to_owned(); + + let https = hyper_rustls::HttpsConnectorBuilder::new() + .with_tls_config(tls_config) + .https_or_http() + .enable_http2() + .build(); + + Client::Tls(hyper::Client::builder().http2_only(true).build(https)) + } + }; + + Ok(Self { client, uri }) + } } impl Service> for Channel { - type Response = Response; - type Error = hyper::Error; - type Future = ResponseFuture; - - fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { - Ok(()).into() - } - - fn call(&mut self, mut req: Request) -> Self::Future { - // Apparently the schema and authority do not get set by Hyper. Thus, the examples generally - // opy the URI and replace the scheme and authority with the ones from the initial URI used - // to configure the client. - // - // See https://github.com/LucioFranco/tonic-openssl/blob/bdaaecda437949244a1b4d61cb39110c4bcad019/example/src/client2.rs#L92 - // from the inspiration example - let uri = Uri::builder() - .scheme(self.uri.scheme().unwrap().clone()) - .authority(self.uri.authority().unwrap().clone()) - .path_and_query(req.uri().path_and_query().unwrap().clone()) - .build() - .unwrap(); - *req.uri_mut() = uri; - - match &self.client { - Client::Plain(client) => client.request(req), - Client::Tls(client) => client.request(req), + type Response = Response; + type Error = hyper::Error; + type Future = ResponseFuture; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Ok(()).into() + } + + fn call(&mut self, mut req: Request) -> Self::Future { + // Apparently the schema and authority do not get set by Hyper. Thus, the examples generally + // opy the URI and replace the scheme and authority with the ones from the initial URI used + // to configure the client. + // + // See https://github.com/LucioFranco/tonic-openssl/blob/bdaaecda437949244a1b4d61cb39110c4bcad019/example/src/client2.rs#L92 + // from the inspiration example + let uri = Uri::builder() + .scheme(self.uri.scheme().unwrap().clone()) + .authority(self.uri.authority().unwrap().clone()) + .path_and_query(req.uri().path_and_query().unwrap().clone()) + .build() + .unwrap(); + *req.uri_mut() = uri; + + match &self.client { + Client::Plain(client) => client.request(req), + Client::Tls(client) => client.request(req), + } } - } } #[cfg(test)] mod tests { - use std::net::SocketAddr; - use std::path::PathBuf; - use std::sync::Arc; - - use axum::{routing::get, Router}; - use axum_server::tls_rustls::RustlsConfig; - use http::{Request, Uri}; - use rustls::ClientConfig; - use tower::ServiceExt; - use tower_service::Service; - - use super::Channel; - use crate::tls::NoVerifier; - - const TEST_RESPONSE: &[u8] = b"xyzzy"; - - fn router() -> Router { - Router::new().route("/", get(|| async { TEST_RESPONSE })) - } - - #[tokio::test] - async fn plain_client_request_test() { - let bind_addr = "127.0.0.1:0".parse::().unwrap(); - let listener = std::net::TcpListener::bind(bind_addr).unwrap(); - let addr = listener.local_addr().unwrap(); - - tokio::spawn(async move { - axum::Server::from_tcp(listener) - .unwrap() - .serve(router().into_make_service()) - .await - .unwrap(); - }); - - let uri = Uri::try_from(format!("http://{}", addr.to_string())).unwrap(); - - let mut channel = Channel::new(None, uri).await.unwrap(); - - let request = Request::builder() - .uri(format!("http://{}", addr)) - .body(tonic::body::empty_body()) - .unwrap(); - - channel.ready().await.unwrap(); - let response = channel.call(request).await.unwrap(); - - let body = hyper::body::to_bytes(response.into_body()).await.unwrap(); - assert_eq!(&body[..], TEST_RESPONSE); - } + use std::net::SocketAddr; + use std::path::PathBuf; + use std::sync::Arc; - #[tokio::test] - async fn tls_client_request_test() { - let config = RustlsConfig::from_pem_file( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("test-certs") - .join("cert.pem"), - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("test-certs") - .join("key.pem"), - ) - .await - .unwrap(); + use axum::{routing::get, Router}; + use axum_server::tls_rustls::RustlsConfig; + use http::{Request, Uri}; + use rustls::ClientConfig; + use tower::ServiceExt; + use tower_service::Service; - let bind_addr = "127.0.0.1:0".parse::().unwrap(); - let listener = std::net::TcpListener::bind(bind_addr).unwrap(); - let addr = listener.local_addr().unwrap(); + use super::Channel; + use crate::tls::NoVerifier; - let server = axum_server::from_tcp_rustls(listener, config); + const TEST_RESPONSE: &[u8] = b"xyzzy"; - tokio::spawn(async move { - server.serve(router().into_make_service()).await.unwrap(); - }); - - let uri = Uri::try_from(format!("https://{}", addr.to_string())).unwrap(); + fn router() -> Router { + Router::new().route("/", get(|| async { TEST_RESPONSE })) + } - let tls_config = ClientConfig::builder() - .with_safe_defaults() - .with_custom_certificate_verifier(Arc::new(NoVerifier)) - .with_no_client_auth(); + #[tokio::test] + async fn plain_client_request_test() { + let bind_addr = "127.0.0.1:0".parse::().unwrap(); + let listener = std::net::TcpListener::bind(bind_addr).unwrap(); + let addr = listener.local_addr().unwrap(); - let mut channel = Channel::new(Some(&tls_config), uri).await.unwrap(); + tokio::spawn(async move { + axum::Server::from_tcp(listener) + .unwrap() + .serve(router().into_make_service()) + .await + .unwrap(); + }); - let request = Request::builder() - .uri(format!("https://{}", addr)) - .body(tonic::body::empty_body()) - .unwrap(); + let uri = Uri::try_from(format!("http://{}", addr.to_string())).unwrap(); - channel.ready().await.unwrap(); - let response = channel.call(request).await.unwrap(); + let mut channel = Channel::new(None, uri).await.unwrap(); - let body = hyper::body::to_bytes(response.into_body()).await.unwrap(); - assert_eq!(&body[..], TEST_RESPONSE); - } + let request = Request::builder() + .uri(format!("http://{}", addr)) + .body(tonic::body::empty_body()) + .unwrap(); - #[tokio::test] - async fn tls_mtls_client_request_test() { - pub struct CertVerifierMock { - saw_a_cert: std::sync::atomic::AtomicUsize, - } + channel.ready().await.unwrap(); + let response = channel.call(request).await.unwrap(); - impl rustls::server::ClientCertVerifier for CertVerifierMock { - fn offer_client_auth(&self) -> bool { - true - } - - fn client_auth_root_subjects(&self) -> &[rustls::DistinguishedName] { - &[] - } - - fn verify_client_cert( - &self, - _end_entity: &rustls::Certificate, - _intermediates: &[rustls::Certificate], - _now: std::time::SystemTime, - ) -> Result { - self - .saw_a_cert - .fetch_add(1, std::sync::atomic::Ordering::SeqCst); - - Ok(rustls::server::ClientCertVerified::assertion()) - } + let body = hyper::body::to_bytes(response.into_body()).await.unwrap(); + assert_eq!(&body[..], TEST_RESPONSE); } - let cert_pem = std::fs::read( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("test-certs") - .join("cert.pem"), - ) - .unwrap(); - - let key_pem = std::fs::read( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("test-certs") - .join("key.pem"), - ) - .unwrap(); - - let certificates = rustls_pemfile::certs(&mut std::io::Cursor::new(&cert_pem)) - .unwrap() - .into_iter() - .map(rustls::Certificate) - .collect::>(); - - let privkey = rustls::PrivateKey( - rustls_pemfile::pkcs8_private_keys(&mut std::io::Cursor::new(&key_pem)) - .unwrap() - .remove(0), - ); - - let mut root_store = rustls::RootCertStore::empty(); - root_store.add(&certificates[0]).unwrap(); + #[tokio::test] + async fn tls_client_request_test() { + let config = RustlsConfig::from_pem_file( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-certs") + .join("cert.pem"), + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-certs") + .join("key.pem"), + ) + .await + .unwrap(); - let verifier = Arc::new(CertVerifierMock { - saw_a_cert: std::sync::atomic::AtomicUsize::new(0), - }); - let mut config = rustls::ServerConfig::builder() - .with_safe_defaults() - .with_client_cert_verifier(verifier.clone()) - .with_single_cert(certificates.clone(), privkey.clone()) - .unwrap(); + let bind_addr = "127.0.0.1:0".parse::().unwrap(); + let listener = std::net::TcpListener::bind(bind_addr).unwrap(); + let addr = listener.local_addr().unwrap(); - config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()]; + let server = axum_server::from_tcp_rustls(listener, config); - let config = RustlsConfig::from_config(Arc::new(config)); + tokio::spawn(async move { + server.serve(router().into_make_service()).await.unwrap(); + }); - let bind_addr = "127.0.0.1:0".parse::().unwrap(); - let listener = std::net::TcpListener::bind(bind_addr).unwrap(); - let addr = listener.local_addr().unwrap(); + let uri = Uri::try_from(format!("https://{}", addr.to_string())).unwrap(); - let server = axum_server::from_tcp_rustls(listener, config); + let tls_config = ClientConfig::builder() + .with_safe_defaults() + .with_custom_certificate_verifier(Arc::new(NoVerifier)) + .with_no_client_auth(); - tokio::spawn(async move { - server.serve(router().into_make_service()).await.unwrap(); - }); + let mut channel = Channel::new(Some(&tls_config), uri).await.unwrap(); - let uri = Uri::try_from(format!("https://{}", addr.to_string())).unwrap(); + let request = Request::builder() + .uri(format!("https://{}", addr)) + .body(tonic::body::empty_body()) + .unwrap(); - let mut tls_config = - crate::tls::Config::new(Some(&cert_pem), Some((&cert_pem, &key_pem))).unwrap(); + channel.ready().await.unwrap(); + let response = channel.call(request).await.unwrap(); - tls_config.certificate_check = crate::tls::CertificateCheck::DangerouslyDisabled; + let body = hyper::body::to_bytes(response.into_body()).await.unwrap(); + assert_eq!(&body[..], TEST_RESPONSE); + } - let tls_config: rustls::ClientConfig = tls_config.try_into().unwrap(); + #[tokio::test] + async fn tls_mtls_client_request_test() { + pub struct CertVerifierMock { + saw_a_cert: std::sync::atomic::AtomicUsize, + } + + impl rustls::server::ClientCertVerifier for CertVerifierMock { + fn offer_client_auth(&self) -> bool { + true + } + + fn client_auth_root_subjects(&self) -> &[rustls::DistinguishedName] { + &[] + } + + fn verify_client_cert( + &self, + _end_entity: &rustls::Certificate, + _intermediates: &[rustls::Certificate], + _now: std::time::SystemTime, + ) -> Result { + self.saw_a_cert + .fetch_add(1, std::sync::atomic::Ordering::SeqCst); + + Ok(rustls::server::ClientCertVerified::assertion()) + } + } + + let cert_pem = std::fs::read( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-certs") + .join("cert.pem"), + ) + .unwrap(); - let mut channel = Channel::new(Some(&tls_config), uri).await.unwrap(); + let key_pem = std::fs::read( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-certs") + .join("key.pem"), + ) + .unwrap(); - match &channel.client { - super::Client::Plain(_) => panic!("Expected a TLS client"), - super::Client::Tls(_) => {} + let certificates = rustls_pemfile::certs(&mut std::io::Cursor::new(&cert_pem)) + .unwrap() + .into_iter() + .map(rustls::Certificate) + .collect::>(); + + let privkey = rustls::PrivateKey( + rustls_pemfile::pkcs8_private_keys(&mut std::io::Cursor::new(&key_pem)) + .unwrap() + .remove(0), + ); + + let mut root_store = rustls::RootCertStore::empty(); + root_store.add(&certificates[0]).unwrap(); + + let verifier = Arc::new(CertVerifierMock { + saw_a_cert: std::sync::atomic::AtomicUsize::new(0), + }); + let mut config = rustls::ServerConfig::builder() + .with_safe_defaults() + .with_client_cert_verifier(verifier.clone()) + .with_single_cert(certificates.clone(), privkey.clone()) + .unwrap(); + + config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()]; + + let config = RustlsConfig::from_config(Arc::new(config)); + + let bind_addr = "127.0.0.1:0".parse::().unwrap(); + let listener = std::net::TcpListener::bind(bind_addr).unwrap(); + let addr = listener.local_addr().unwrap(); + + let server = axum_server::from_tcp_rustls(listener, config); + + tokio::spawn(async move { + server.serve(router().into_make_service()).await.unwrap(); + }); + + let uri = Uri::try_from(format!("https://{}", addr.to_string())).unwrap(); + + let mut tls_config = + crate::tls::Config::new(Some(&cert_pem), Some((&cert_pem, &key_pem))).unwrap(); + + tls_config.certificate_check = crate::tls::CertificateCheck::DangerouslyDisabled; + + let tls_config: rustls::ClientConfig = tls_config.try_into().unwrap(); + + let mut channel = Channel::new(Some(&tls_config), uri).await.unwrap(); + + match &channel.client { + super::Client::Plain(_) => panic!("Expected a TLS client"), + super::Client::Tls(_) => {} + } + assert_eq!( + verifier + .saw_a_cert + .load(std::sync::atomic::Ordering::SeqCst), + 0 + ); + let request = Request::builder() + .uri(format!("https://{}", addr)) + .body(tonic::body::empty_body()) + .unwrap(); + + channel.ready().await.unwrap(); + let response = channel.call(request).await.unwrap(); + + let body = hyper::body::to_bytes(response.into_body()).await.unwrap(); + assert_eq!(&body[..], TEST_RESPONSE); + + assert_eq!( + verifier + .saw_a_cert + .load(std::sync::atomic::Ordering::SeqCst), + 1 + ); } - assert_eq!( - verifier - .saw_a_cert - .load(std::sync::atomic::Ordering::SeqCst), - 0 - ); - let request = Request::builder() - .uri(format!("https://{}", addr)) - .body(tonic::body::empty_body()) - .unwrap(); - - channel.ready().await.unwrap(); - let response = channel.call(request).await.unwrap(); - - let body = hyper::body::to_bytes(response.into_body()).await.unwrap(); - assert_eq!(&body[..], TEST_RESPONSE); - - assert_eq!( - verifier - .saw_a_cert - .load(std::sync::atomic::Ordering::SeqCst), - 1 - ); - } } diff --git a/src/rust/engine/grpc_util/src/headers.rs b/src/rust/engine/grpc_util/src/headers.rs index 7e8af38d003..ec009b35be1 100644 --- a/src/rust/engine/grpc_util/src/headers.rs +++ b/src/rust/engine/grpc_util/src/headers.rs @@ -11,71 +11,71 @@ use tower_service::Service; #[derive(Debug)] pub struct SetRequestHeadersLayer { - headers: HeaderMap, + headers: HeaderMap, } impl SetRequestHeadersLayer { - pub fn new(headers: HeaderMap) -> Self { - SetRequestHeadersLayer { headers } - } + pub fn new(headers: HeaderMap) -> Self { + SetRequestHeadersLayer { headers } + } } impl Layer for SetRequestHeadersLayer { - type Service = SetRequestHeaders; + type Service = SetRequestHeaders; - fn layer(&self, inner: S) -> Self::Service { - SetRequestHeaders { - inner, - headers: self.headers.clone(), + fn layer(&self, inner: S) -> Self::Service { + SetRequestHeaders { + inner, + headers: self.headers.clone(), + } } - } } #[derive(Clone)] pub struct SetRequestHeaders { - inner: S, - headers: HeaderMap, + inner: S, + headers: HeaderMap, } impl SetRequestHeaders { - pub fn new(inner: S, headers: HeaderMap) -> Self { - SetRequestHeaders { inner, headers } - } + pub fn new(inner: S, headers: HeaderMap) -> Self { + SetRequestHeaders { inner, headers } + } } impl fmt::Debug for SetRequestHeaders where - S: fmt::Debug, + S: fmt::Debug, { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("SetRequestHeaders") - .field("inner", &self.inner) - .field("headers", &self.headers) - .finish() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SetRequestHeaders") + .field("inner", &self.inner) + .field("headers", &self.headers) + .finish() + } } impl Service> for SetRequestHeaders where - S: Service>, + S: Service>, { - type Response = S::Response; - type Error = S::Error; - type Future = S::Future; + type Response = S::Response; + type Error = S::Error; + type Future = S::Future; - #[inline] - fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { - self.inner.poll_ready(cx) - } - - fn call(&mut self, mut req: Request) -> Self::Future { - if !self.headers.is_empty() { - let headers = req.headers_mut(); - for (header_name, header_value) in &self.headers { - headers.insert(header_name, header_value.clone()); - } + #[inline] + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) } - self.inner.call(req) - } + fn call(&mut self, mut req: Request) -> Self::Future { + if !self.headers.is_empty() { + let headers = req.headers_mut(); + for (header_name, header_value) in &self.headers { + headers.insert(header_name, header_value.clone()); + } + } + + self.inner.call(req) + } } diff --git a/src/rust/engine/grpc_util/src/hyper_util.rs b/src/rust/engine/grpc_util/src/hyper_util.rs index 9c9776daf55..e14b883d4c5 100644 --- a/src/rust/engine/grpc_util/src/hyper_util.rs +++ b/src/rust/engine/grpc_util/src/hyper_util.rs @@ -11,17 +11,17 @@ use hyper::server::conn::{AddrIncoming, AddrStream}; pub struct AddrIncomingWithStream(pub AddrIncoming); impl Deref for AddrIncomingWithStream { - type Target = AddrIncoming; + type Target = AddrIncoming; - fn deref(&self) -> &Self::Target { - &self.0 - } + fn deref(&self) -> &Self::Target { + &self.0 + } } impl Stream for AddrIncomingWithStream { - type Item = Result; + type Item = Result; - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.0).poll_accept(cx) - } + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.0).poll_accept(cx) + } } diff --git a/src/rust/engine/grpc_util/src/lib.rs b/src/rust/engine/grpc_util/src/lib.rs index e79971b7dd1..086afb2d4e7 100644 --- a/src/rust/engine/grpc_util/src/lib.rs +++ b/src/rust/engine/grpc_util/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -63,195 +63,199 @@ pub mod tls; // Service layers that we use universally. If this type becomes unwieldy, or our various Services // diverge in which layers they use, we should instead use a Box>. pub type LayeredService = - SetRequestHeaders>>>>; + SetRequestHeaders>>>>; pub fn layered_service( - channel: Channel, - concurrency_limit: usize, - http_headers: HeaderMap, - timeout: Option<(Duration, Metric)>, + channel: Channel, + concurrency_limit: usize, + http_headers: HeaderMap, + timeout: Option<(Duration, Metric)>, ) -> LayeredService { - let (timeout, metric) = timeout - .map(|(t, m)| (t, Some(m))) - .unwrap_or_else(|| (Duration::from_secs(60 * 60), None)); - - ServiceBuilder::new() - .layer(SetRequestHeadersLayer::new(http_headers)) - .concurrency_limit(concurrency_limit) - .layer(NetworkMetricsLayer::new(&METRIC_FOR_REAPI_PATH)) - .layer_fn(|service| CountErrorsService { service, metric }) - .layer(TimeoutLayer::new(timeout)) - .service(channel) + let (timeout, metric) = timeout + .map(|(t, m)| (t, Some(m))) + .unwrap_or_else(|| (Duration::from_secs(60 * 60), None)); + + ServiceBuilder::new() + .layer(SetRequestHeadersLayer::new(http_headers)) + .concurrency_limit(concurrency_limit) + .layer(NetworkMetricsLayer::new(&METRIC_FOR_REAPI_PATH)) + .layer_fn(|service| CountErrorsService { service, metric }) + .layer(TimeoutLayer::new(timeout)) + .service(channel) } lazy_static! { - static ref METRIC_FOR_REAPI_PATH: Arc> = { - let mut m = HashMap::new(); - m.insert( - "/build.bazel.remote.execution.v2.ActionCache/GetActionResult".to_string(), - ObservationMetric::RemoteCacheGetActionResultNetworkTimeMicros, - ); - Arc::new(m) - }; + static ref METRIC_FOR_REAPI_PATH: Arc> = { + let mut m = HashMap::new(); + m.insert( + "/build.bazel.remote.execution.v2.ActionCache/GetActionResult".to_string(), + ObservationMetric::RemoteCacheGetActionResultNetworkTimeMicros, + ); + Arc::new(m) + }; } pub async fn create_channel( - addr: &str, - tls_config: Option<&ClientConfig>, + addr: &str, + tls_config: Option<&ClientConfig>, ) -> Result { - let uri = Uri::try_from(addr).map_err(|err| format!("invalid address: {err}"))?; - Channel::new(tls_config, uri) - .await - .map_err(|err| format!("gRPC connection error: {err}")) + let uri = Uri::try_from(addr).map_err(|err| format!("invalid address: {err}"))?; + Channel::new(tls_config, uri) + .await + .map_err(|err| format!("gRPC connection error: {err}")) } pub fn headers_to_http_header_map(headers: &BTreeMap) -> Result { - let (http_headers, errors): (Vec<(HeaderName, HeaderValue)>, Vec) = headers - .iter() - .map(|(key, value)| { - let header_name = - HeaderName::from_str(key).map_err(|err| format!("Invalid header name {key}: {err}"))?; - - let header_value = HeaderValue::from_str(value) - .map_err(|err| format!("Invalid header value {value}: {err}"))?; - - Ok((header_name, header_value)) - }) - .partition_map(|result| match result { - Ok(v) => Either::Left(v), - Err(err) => Either::Right(err), - }); - - if !errors.is_empty() { - return Err(format!("header conversion errors: {}", errors.join("; "))); - } - - Ok(HeaderMap::from_iter(http_headers)) + let (http_headers, errors): (Vec<(HeaderName, HeaderValue)>, Vec) = headers + .iter() + .map(|(key, value)| { + let header_name = HeaderName::from_str(key) + .map_err(|err| format!("Invalid header name {key}: {err}"))?; + + let header_value = HeaderValue::from_str(value) + .map_err(|err| format!("Invalid header value {value}: {err}"))?; + + Ok((header_name, header_value)) + }) + .partition_map(|result| match result { + Ok(v) => Either::Left(v), + Err(err) => Either::Right(err), + }); + + if !errors.is_empty() { + return Err(format!("header conversion errors: {}", errors.join("; "))); + } + + Ok(HeaderMap::from_iter(http_headers)) } pub fn status_ref_to_str(status: &tonic::Status) -> String { - format!("{:?}: {:?}", status.code(), status.message()) + format!("{:?}: {:?}", status.code(), status.message()) } pub fn status_to_str(status: tonic::Status) -> String { - status_ref_to_str(&status) + status_ref_to_str(&status) } #[derive(Clone)] pub struct CountErrorsService { - service: S, - metric: Option, + service: S, + metric: Option, } impl Service for CountErrorsService where - S: Service + Send + 'static, - S::Response: Send + 'static, - S::Error: Send + 'static, - S::Future: Send + 'static, + S: Service + Send + 'static, + S::Response: Send + 'static, + S::Error: Send + 'static, + S::Future: Send + 'static, { - type Response = S::Response; - type Error = S::Error; - type Future = BoxFuture<'static, Result>; - - fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { - self.service.poll_ready(cx) - } - - fn call(&mut self, req: Request) -> Self::Future { - let metric = self.metric; - let result = self.service.call(req); - result - .inspect_err(move |_| { - if let Some(metric) = metric { - if let Some(mut workunit_store_handle) = get_workunit_store_handle() { - workunit_store_handle.store.increment_counter(metric, 1) - } - } - }) - .boxed() - } + type Response = S::Response; + type Error = S::Error; + type Future = BoxFuture<'static, Result>; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.service.poll_ready(cx) + } + + fn call(&mut self, req: Request) -> Self::Future { + let metric = self.metric; + let result = self.service.call(req); + result + .inspect_err(move |_| { + if let Some(metric) = metric { + if let Some(mut workunit_store_handle) = get_workunit_store_handle() { + workunit_store_handle.store.increment_counter(metric, 1) + } + } + }) + .boxed() + } } #[cfg(test)] mod tests { - mod gen { - tonic::include_proto!("test"); - } - - use std::collections::BTreeMap; - - use async_trait::async_trait; - use futures::FutureExt; - use tokio::sync::oneshot; - use tonic::transport::Server; - use tonic::{Request, Response, Status}; - - use crate::hyper_util::AddrIncomingWithStream; - use crate::{headers_to_http_header_map, layered_service}; - - #[tokio::test] - async fn user_agent_is_set_correctly() { - const EXPECTED_USER_AGENT: &str = "testclient/0.0.1"; - - #[derive(Clone)] - struct UserAgentResponder; - - #[async_trait] - impl gen::test_server::Test for UserAgentResponder { - async fn call(&self, request: Request) -> Result, Status> { - match request.metadata().get("user-agent") { - Some(user_agent_value) => { - let user_agent = user_agent_value.to_str().map_err(|err| { - Status::invalid_argument(format!( - "Unable to convert user-agent header to string: {err}" - )) - })?; - if user_agent.contains(EXPECTED_USER_AGENT) { - Ok(Response::new(gen::Output {})) - } else { - Err(Status::invalid_argument(format!( + mod gen { + tonic::include_proto!("test"); + } + + use std::collections::BTreeMap; + + use async_trait::async_trait; + use futures::FutureExt; + use tokio::sync::oneshot; + use tonic::transport::Server; + use tonic::{Request, Response, Status}; + + use crate::hyper_util::AddrIncomingWithStream; + use crate::{headers_to_http_header_map, layered_service}; + + #[tokio::test] + async fn user_agent_is_set_correctly() { + const EXPECTED_USER_AGENT: &str = "testclient/0.0.1"; + + #[derive(Clone)] + struct UserAgentResponder; + + #[async_trait] + impl gen::test_server::Test for UserAgentResponder { + async fn call( + &self, + request: Request, + ) -> Result, Status> { + match request.metadata().get("user-agent") { + Some(user_agent_value) => { + let user_agent = user_agent_value.to_str().map_err(|err| { + Status::invalid_argument(format!( + "Unable to convert user-agent header to string: {err}" + )) + })?; + if user_agent.contains(EXPECTED_USER_AGENT) { + Ok(Response::new(gen::Output {})) + } else { + Err(Status::invalid_argument(format!( "user-agent header did not contain expected value: actual={user_agent}" ))) + } + } + None => Err(Status::invalid_argument("user-agent header was not set")), + } } - } - None => Err(Status::invalid_argument("user-agent header was not set")), } - } - } - let addr = "127.0.0.1:0".parse().expect("failed to parse IP address"); - let incoming = hyper::server::conn::AddrIncoming::bind(&addr).expect("failed to bind port"); - let local_addr = incoming.local_addr(); - let incoming = AddrIncomingWithStream(incoming); + let addr = "127.0.0.1:0".parse().expect("failed to parse IP address"); + let incoming = hyper::server::conn::AddrIncoming::bind(&addr).expect("failed to bind port"); + let local_addr = incoming.local_addr(); + let incoming = AddrIncomingWithStream(incoming); - // Setup shutdown signal handler. - let (_shutdown_sender, shutdown_receiver) = oneshot::channel::<()>(); + // Setup shutdown signal handler. + let (_shutdown_sender, shutdown_receiver) = oneshot::channel::<()>(); - tokio::spawn(async move { - let mut server = Server::builder(); - let router = server.add_service(gen::test_server::TestServer::new(UserAgentResponder)); - router - .serve_with_incoming_shutdown(incoming, shutdown_receiver.map(drop)) - .await - .unwrap(); - }); + tokio::spawn(async move { + let mut server = Server::builder(); + let router = server.add_service(gen::test_server::TestServer::new(UserAgentResponder)); + router + .serve_with_incoming_shutdown(incoming, shutdown_receiver.map(drop)) + .await + .unwrap(); + }); - let headers = { - let mut h = BTreeMap::new(); - h.insert("user-agent".to_owned(), EXPECTED_USER_AGENT.to_owned()); - h - }; + let headers = { + let mut h = BTreeMap::new(); + h.insert("user-agent".to_owned(), EXPECTED_USER_AGENT.to_owned()); + h + }; - let headers = headers_to_http_header_map(&headers).unwrap(); + let headers = headers_to_http_header_map(&headers).unwrap(); - let channel = super::create_channel(&format!("http://127.0.0.1:{}", local_addr.port()), None) - .await - .unwrap(); + let channel = + super::create_channel(&format!("http://127.0.0.1:{}", local_addr.port()), None) + .await + .unwrap(); - let client = layered_service(channel, 1, headers, None); + let client = layered_service(channel, 1, headers, None); - let mut client = gen::test_client::TestClient::new(client); - client.call(gen::Input {}).await.expect("success"); - } + let mut client = gen::test_client::TestClient::new(client); + client.call(gen::Input {}).await.expect("success"); + } } diff --git a/src/rust/engine/grpc_util/src/metrics.rs b/src/rust/engine/grpc_util/src/metrics.rs index 7238abd3e42..d9e96665408 100644 --- a/src/rust/engine/grpc_util/src/metrics.rs +++ b/src/rust/engine/grpc_util/src/metrics.rs @@ -18,165 +18,165 @@ use workunit_store::{get_workunit_store_handle, ObservationMetric}; #[derive(Clone, Debug)] pub struct NetworkMetricsLayer { - metric_for_path: Arc>, + metric_for_path: Arc>, } impl Layer for NetworkMetricsLayer { - type Service = NetworkMetrics; + type Service = NetworkMetrics; - fn layer(&self, inner: S) -> Self::Service { - NetworkMetrics::new(inner, self.metric_for_path.clone()) - } + fn layer(&self, inner: S) -> Self::Service { + NetworkMetrics::new(inner, self.metric_for_path.clone()) + } } impl NetworkMetricsLayer { - pub fn new(metric_for_path: &Arc>) -> Self { - Self { - metric_for_path: Arc::clone(metric_for_path), + pub fn new(metric_for_path: &Arc>) -> Self { + Self { + metric_for_path: Arc::clone(metric_for_path), + } } - } } #[derive(Clone)] pub struct NetworkMetrics { - inner: S, - metric_for_path: Arc>, + inner: S, + metric_for_path: Arc>, } impl NetworkMetrics { - pub fn new(inner: S, metric_for_path: Arc>) -> Self { - Self { - inner, - metric_for_path, + pub fn new(inner: S, metric_for_path: Arc>) -> Self { + Self { + inner, + metric_for_path, + } } - } } impl fmt::Debug for NetworkMetrics where - S: fmt::Debug, + S: fmt::Debug, { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("NetworkMetrics") - .field("inner", &self.inner) - .finish() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("NetworkMetrics") + .field("inner", &self.inner) + .finish() + } } #[pin_project] pub struct NetworkMetricsFuture { - #[pin] - inner: F, - metric_data: Option<(ObservationMetric, Instant)>, + #[pin] + inner: F, + metric_data: Option<(ObservationMetric, Instant)>, } impl Future for NetworkMetricsFuture where - F: Future, E>> + Send + 'static, + F: Future, E>> + Send + 'static, { - type Output = Result, E>; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - let metric_data = self.metric_data; - let this = self.project(); - let result = ready!(this.inner.poll(cx)); - if let Some((metric, start)) = metric_data { - let workunit_store_handle = get_workunit_store_handle(); - if let Some(workunit_store_handle) = workunit_store_handle { - workunit_store_handle - .store - .record_observation(metric, start.elapsed().as_micros() as u64) - } + type Output = Result, E>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let metric_data = self.metric_data; + let this = self.project(); + let result = ready!(this.inner.poll(cx)); + if let Some((metric, start)) = metric_data { + let workunit_store_handle = get_workunit_store_handle(); + if let Some(workunit_store_handle) = workunit_store_handle { + workunit_store_handle + .store + .record_observation(metric, start.elapsed().as_micros() as u64) + } + } + Poll::Ready(result) } - Poll::Ready(result) - } } impl Service> for NetworkMetrics where - S: Service, Response = Response> + Send + 'static, - ReqBody: Send + 'static, - ResBody: Send + 'static, - S::Response: Send + 'static, - S::Error: Send + 'static, - S::Future: Send + 'static, + S: Service, Response = Response> + Send + 'static, + ReqBody: Send + 'static, + ResBody: Send + 'static, + S::Response: Send + 'static, + S::Error: Send + 'static, + S::Future: Send + 'static, { - type Response = S::Response; - type Error = S::Error; - type Future = NetworkMetricsFuture; - - #[inline] - fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { - self.inner.poll_ready(cx) - } - - fn call(&mut self, req: Request) -> Self::Future { - let metric_data = self - .metric_for_path - .get(req.uri().path()) - .cloned() - .map(|metric| (metric, Instant::now())); - NetworkMetricsFuture { - inner: self.inner.call(req), - metric_data, + type Response = S::Response; + type Error = S::Error; + type Future = NetworkMetricsFuture; + + #[inline] + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, req: Request) -> Self::Future { + let metric_data = self + .metric_for_path + .get(req.uri().path()) + .cloned() + .map(|metric| (metric, Instant::now())); + NetworkMetricsFuture { + inner: self.inner.call(req), + metric_data, + } } - } } #[cfg(test)] mod tests { - use std::collections::HashMap; - use std::convert::Infallible; - use std::sync::Arc; - - use hyper::{Body, Request, Response}; - use tower::{ServiceBuilder, ServiceExt}; - use workunit_store::{Level, ObservationMetric, WorkunitStore}; - - use super::NetworkMetricsLayer; - - async fn handler(_: Request) -> Result, Infallible> { - Ok(Response::new(Body::empty())) - } - - #[tokio::test] - async fn collects_network_metrics() { - let ws = WorkunitStore::new(true, Level::Debug); - ws.init_thread_state(None); - - let metric_for_path: Arc> = { - let mut m = HashMap::new(); - m.insert( - "/this-is-a-metric-path".to_string(), - ObservationMetric::TestObservation, - ); - Arc::new(m) - }; - - let svc = ServiceBuilder::new() - .layer(NetworkMetricsLayer::new(&metric_for_path)) - .service_fn(handler); - - let req = Request::builder() - .uri("/not-a-metric-path") - .body(Body::empty()) - .unwrap(); - - let _ = svc.clone().oneshot(req).await.unwrap(); - let observations = ws.encode_observations().unwrap(); - assert_eq!(observations.len(), 0); // there should be no observations for `/not-a-metric-path` - - let req = Request::builder() - .uri("/this-is-a-metric-path") - .body(Body::empty()) - .unwrap(); - - let _ = svc.clone().oneshot(req).await.unwrap(); - let observations = ws.encode_observations().unwrap(); - assert_eq!(observations.len(), 1); // there should be an observation for `/this-is-a-metric-path` - assert_eq!( - observations.into_keys().collect::>(), - vec!["test_observation"] - ); - } + use std::collections::HashMap; + use std::convert::Infallible; + use std::sync::Arc; + + use hyper::{Body, Request, Response}; + use tower::{ServiceBuilder, ServiceExt}; + use workunit_store::{Level, ObservationMetric, WorkunitStore}; + + use super::NetworkMetricsLayer; + + async fn handler(_: Request) -> Result, Infallible> { + Ok(Response::new(Body::empty())) + } + + #[tokio::test] + async fn collects_network_metrics() { + let ws = WorkunitStore::new(true, Level::Debug); + ws.init_thread_state(None); + + let metric_for_path: Arc> = { + let mut m = HashMap::new(); + m.insert( + "/this-is-a-metric-path".to_string(), + ObservationMetric::TestObservation, + ); + Arc::new(m) + }; + + let svc = ServiceBuilder::new() + .layer(NetworkMetricsLayer::new(&metric_for_path)) + .service_fn(handler); + + let req = Request::builder() + .uri("/not-a-metric-path") + .body(Body::empty()) + .unwrap(); + + let _ = svc.clone().oneshot(req).await.unwrap(); + let observations = ws.encode_observations().unwrap(); + assert_eq!(observations.len(), 0); // there should be no observations for `/not-a-metric-path` + + let req = Request::builder() + .uri("/this-is-a-metric-path") + .body(Body::empty()) + .unwrap(); + + let _ = svc.clone().oneshot(req).await.unwrap(); + let observations = ws.encode_observations().unwrap(); + assert_eq!(observations.len(), 1); // there should be an observation for `/this-is-a-metric-path` + assert_eq!( + observations.into_keys().collect::>(), + vec!["test_observation"] + ); + } } diff --git a/src/rust/engine/grpc_util/src/prost.rs b/src/rust/engine/grpc_util/src/prost.rs index 03ab5740f5a..c5e9169240b 100644 --- a/src/rust/engine/grpc_util/src/prost.rs +++ b/src/rust/engine/grpc_util/src/prost.rs @@ -7,16 +7,15 @@ use bytes::{Bytes, BytesMut}; /// Extension methods on `prost::Message`. pub trait MessageExt: Message where - Self: Sized, + Self: Sized, { - /// Serialize this protobuf message to `bytes::Bytes`. - fn to_bytes(&self) -> Bytes { - let mut buf = BytesMut::with_capacity(self.encoded_len()); - self - .encode(&mut buf) - .expect("illegal state: encoded_len returned wrong length"); - buf.freeze() - } + /// Serialize this protobuf message to `bytes::Bytes`. + fn to_bytes(&self) -> Bytes { + let mut buf = BytesMut::with_capacity(self.encoded_len()); + self.encode(&mut buf) + .expect("illegal state: encoded_len returned wrong length"); + buf.freeze() + } } // Blanket implementation of MessageExt for all prost::Message types. @@ -24,22 +23,22 @@ impl MessageExt for M {} #[cfg(test)] mod tests { - use prost::Message; - use prost_types::Timestamp; + use prost::Message; + use prost_types::Timestamp; - use super::MessageExt; + use super::MessageExt; - #[test] - fn to_bytes_roundtrip_test() { - let t1 = Timestamp { - seconds: 500, - nanos: 10000, - }; + #[test] + fn to_bytes_roundtrip_test() { + let t1 = Timestamp { + seconds: 500, + nanos: 10000, + }; - let bytes = t1.to_bytes(); + let bytes = t1.to_bytes(); - let t2 = Timestamp::decode(bytes).unwrap(); + let t2 = Timestamp::decode(bytes).unwrap(); - assert_eq!(t1, t2); - } + assert_eq!(t1, t2); + } } diff --git a/src/rust/engine/grpc_util/src/retry.rs b/src/rust/engine/grpc_util/src/retry.rs index 9c2aec454fa..9a13f311017 100644 --- a/src/rust/engine/grpc_util/src/retry.rs +++ b/src/rust/engine/grpc_util/src/retry.rs @@ -8,149 +8,149 @@ use rand::{thread_rng, Rng}; use tonic::{Code, Status}; pub fn status_is_retryable(status: &Status) -> bool { - matches!( - status.code(), - Code::Aborted - | Code::Cancelled - | Code::Internal - | Code::ResourceExhausted - | Code::Unavailable - | Code::Unknown - ) + matches!( + status.code(), + Code::Aborted + | Code::Cancelled + | Code::Internal + | Code::ResourceExhausted + | Code::Unavailable + | Code::Unknown + ) } /// Retry a gRPC client operation using exponential back-off to delay between attempts. #[inline] pub async fn retry_call(client: C, mut f: F, is_retryable: G) -> Result where - C: Clone, - F: FnMut(C, u32) -> Fut, - G: Fn(&E) -> bool, - Fut: Future>, + C: Clone, + F: FnMut(C, u32) -> Fut, + G: Fn(&E) -> bool, + Fut: Future>, { - const INTERVAL_DURATION: Duration = Duration::from_millis(20); - const MAX_RETRIES: u32 = 3; - const MAX_BACKOFF_DURATION: Duration = Duration::from_secs(5); - - let mut num_retries = 0; - let last_error = loop { - // Delay before the next send attempt if this is a retry. - if num_retries > 0 { - let multiplier = thread_rng().gen_range(0..2_u32.pow(num_retries) + 1); - let sleep_time = INTERVAL_DURATION * multiplier; - let sleep_time = sleep_time.min(MAX_BACKOFF_DURATION); - tokio::time::sleep(sleep_time).await; - } + const INTERVAL_DURATION: Duration = Duration::from_millis(20); + const MAX_RETRIES: u32 = 3; + const MAX_BACKOFF_DURATION: Duration = Duration::from_secs(5); + + let mut num_retries = 0; + let last_error = loop { + // Delay before the next send attempt if this is a retry. + if num_retries > 0 { + let multiplier = thread_rng().gen_range(0..2_u32.pow(num_retries) + 1); + let sleep_time = INTERVAL_DURATION * multiplier; + let sleep_time = sleep_time.min(MAX_BACKOFF_DURATION); + tokio::time::sleep(sleep_time).await; + } - let client2 = client.clone(); - let result_fut = f(client2, num_retries); - let last_error = match result_fut.await { - Ok(r) => return Ok(r), - Err(err) => { - if is_retryable(&err) { - err - } else { - return Err(err); + let client2 = client.clone(); + let result_fut = f(client2, num_retries); + let last_error = match result_fut.await { + Ok(r) => return Ok(r), + Err(err) => { + if is_retryable(&err) { + err + } else { + return Err(err); + } + } + }; + + num_retries += 1; + + if num_retries >= MAX_RETRIES { + break last_error; } - } }; - num_retries += 1; - - if num_retries >= MAX_RETRIES { - break last_error; - } - }; - - Err(last_error) + Err(last_error) } #[cfg(test)] mod tests { - use std::collections::VecDeque; - use std::sync::Arc; + use std::collections::VecDeque; + use std::sync::Arc; - use parking_lot::Mutex; + use parking_lot::Mutex; - use super::retry_call; + use super::retry_call; - #[derive(Clone, Debug)] - struct MockClient { - values: Arc>>, - } + #[derive(Clone, Debug)] + struct MockClient { + values: Arc>>, + } - impl MockClient { - pub fn new(values: Vec) -> Self { - MockClient { - values: Arc::new(Mutex::new(values.into())), - } + impl MockClient { + pub fn new(values: Vec) -> Self { + MockClient { + values: Arc::new(Mutex::new(values.into())), + } + } + + async fn next(&self) -> T { + let mut values = self.values.lock(); + values.pop_front().unwrap() + } } - async fn next(&self) -> T { - let mut values = self.values.lock(); - values.pop_front().unwrap() + #[derive(Clone, Debug, Eq, PartialEq)] + struct MockError(bool, &'static str); + + #[tokio::test] + async fn retry_call_works_as_expected() { + // several retryable errors + let client = MockClient::new(vec![ + Err(MockError(true, "first")), + Err(MockError(true, "second")), + Ok(3_isize), + Ok(4_isize), + ]); + let mut expected_attempt = 0; + let result = retry_call( + client.clone(), + |client, attempt| { + // check `attempt` is being passed through as expected: starting with 0 for the first + // call, and incriminating for each one after + assert_eq!(attempt, expected_attempt); + expected_attempt += 1; + + async move { client.next().await } + }, + |err| err.0, + ) + .await; + assert_eq!(result, Ok(3_isize)); + assert_eq!(client.values.lock().len(), 1); + + // a non retryable error + let client = MockClient::new(vec![ + Err(MockError(true, "first")), + Err(MockError(false, "second")), + Ok(3_isize), + Ok(4_isize), + ]); + let result = retry_call( + client.clone(), + |client, _| async move { client.next().await }, + |err| err.0, + ) + .await; + assert_eq!(result, Err(MockError(false, "second"))); + assert_eq!(client.values.lock().len(), 2); + + // retryable errors, but too many + let client = MockClient::new(vec![ + Err(MockError(true, "first")), + Err(MockError(true, "second")), + Err(MockError(true, "third")), + Ok(1_isize), + ]); + let result = retry_call( + client.clone(), + |client, _| async move { client.next().await }, + |err| err.0, + ) + .await; + assert_eq!(result, Err(MockError(true, "third"))); + assert_eq!(client.values.lock().len(), 1); } - } - - #[derive(Clone, Debug, Eq, PartialEq)] - struct MockError(bool, &'static str); - - #[tokio::test] - async fn retry_call_works_as_expected() { - // several retryable errors - let client = MockClient::new(vec![ - Err(MockError(true, "first")), - Err(MockError(true, "second")), - Ok(3_isize), - Ok(4_isize), - ]); - let mut expected_attempt = 0; - let result = retry_call( - client.clone(), - |client, attempt| { - // check `attempt` is being passed through as expected: starting with 0 for the first - // call, and incriminating for each one after - assert_eq!(attempt, expected_attempt); - expected_attempt += 1; - - async move { client.next().await } - }, - |err| err.0, - ) - .await; - assert_eq!(result, Ok(3_isize)); - assert_eq!(client.values.lock().len(), 1); - - // a non retryable error - let client = MockClient::new(vec![ - Err(MockError(true, "first")), - Err(MockError(false, "second")), - Ok(3_isize), - Ok(4_isize), - ]); - let result = retry_call( - client.clone(), - |client, _| async move { client.next().await }, - |err| err.0, - ) - .await; - assert_eq!(result, Err(MockError(false, "second"))); - assert_eq!(client.values.lock().len(), 2); - - // retryable errors, but too many - let client = MockClient::new(vec![ - Err(MockError(true, "first")), - Err(MockError(true, "second")), - Err(MockError(true, "third")), - Ok(1_isize), - ]); - let result = retry_call( - client.clone(), - |client, _| async move { client.next().await }, - |err| err.0, - ) - .await; - assert_eq!(result, Err(MockError(true, "third"))); - assert_eq!(client.values.lock().len(), 1); - } } diff --git a/src/rust/engine/grpc_util/src/tls.rs b/src/rust/engine/grpc_util/src/tls.rs index 02dfa4b12cd..01beea0f10e 100644 --- a/src/rust/engine/grpc_util/src/tls.rs +++ b/src/rust/engine/grpc_util/src/tls.rs @@ -8,119 +8,124 @@ use tokio_rustls::rustls::{Certificate, ClientConfig, Error, RootCertStore, Serv #[derive(Default, Clone)] pub struct Config { - pub root_ca_certs: Option>, - pub mtls: Option, - pub certificate_check: CertificateCheck, + pub root_ca_certs: Option>, + pub mtls: Option, + pub certificate_check: CertificateCheck, } impl Config { - /// Creates a new config with the given root CA certs and mTLS config. - pub fn new>( - root_ca_certs: Option, - mtls: Option<(Buf, Buf)>, - ) -> Result { - let root_ca_certs = root_ca_certs - .map(|certs| { - let raw_certs = rustls_pemfile::certs(&mut std::io::Cursor::new(certs.as_ref())) - .map_err(|e| format!("Failed to parse TLS certs data: {e:?}"))?; - Result::<_, String>::Ok(raw_certs.into_iter().map(rustls::Certificate).collect()) - }) - .transpose()?; - - let mtls = mtls - .map(|buffers| MtlsConfig::from_pem_buffers(buffers.0.as_ref(), buffers.1.as_ref())) - .transpose()?; - - Ok(Self { - root_ca_certs, - mtls, - certificate_check: CertificateCheck::Enabled, - }) - } + /// Creates a new config with the given root CA certs and mTLS config. + pub fn new>( + root_ca_certs: Option, + mtls: Option<(Buf, Buf)>, + ) -> Result { + let root_ca_certs = root_ca_certs + .map(|certs| { + let raw_certs = rustls_pemfile::certs(&mut std::io::Cursor::new(certs.as_ref())) + .map_err(|e| format!("Failed to parse TLS certs data: {e:?}"))?; + Result::<_, String>::Ok(raw_certs.into_iter().map(rustls::Certificate).collect()) + }) + .transpose()?; + + let mtls = mtls + .map(|buffers| MtlsConfig::from_pem_buffers(buffers.0.as_ref(), buffers.1.as_ref())) + .transpose()?; + + Ok(Self { + root_ca_certs, + mtls, + certificate_check: CertificateCheck::Enabled, + }) + } } impl TryFrom for ClientConfig { - type Error = String; - - /// Create a rust-tls `ClientConfig` from root CA certs, falling back to the rust-tls-native-certs - /// crate if specific root CA certs were not given. - fn try_from(config: Config) -> Result { - // let tls_config = ClientConfig::builder().with_safe_defaults(); - let tls_config = ClientConfig::builder().with_safe_defaults(); - - // Add the root certificate store. - let tls_config = match config.certificate_check { - CertificateCheck::DangerouslyDisabled => { - let tls_config = tls_config.with_custom_certificate_verifier(Arc::new(NoVerifier)); - if let Some(MtlsConfig { cert_chain, key }) = config.mtls { - tls_config - .with_client_auth_cert(cert_chain, key) - .map_err(|err| format!("Error setting client authentication configuration: {err:?}"))? - } else { - tls_config.with_no_client_auth() - } - } - CertificateCheck::Enabled => { - let tls_config = { - let mut root_cert_store = RootCertStore::empty(); - - match config.root_ca_certs { - Some(certs) => { - for cert in &certs { - root_cert_store - .add(cert) - .map_err(|e| format!("failed adding CA cert to store: {e:?}"))?; - } + type Error = String; + + /// Create a rust-tls `ClientConfig` from root CA certs, falling back to the rust-tls-native-certs + /// crate if specific root CA certs were not given. + fn try_from(config: Config) -> Result { + // let tls_config = ClientConfig::builder().with_safe_defaults(); + let tls_config = ClientConfig::builder().with_safe_defaults(); + + // Add the root certificate store. + let tls_config = match config.certificate_check { + CertificateCheck::DangerouslyDisabled => { + let tls_config = tls_config.with_custom_certificate_verifier(Arc::new(NoVerifier)); + if let Some(MtlsConfig { cert_chain, key }) = config.mtls { + tls_config + .with_client_auth_cert(cert_chain, key) + .map_err(|err| { + format!("Error setting client authentication configuration: {err:?}") + })? + } else { + tls_config.with_no_client_auth() + } } - None => { - let native_root_certs = rustls_native_certs::load_native_certs().map_err(|err| { - format!( + CertificateCheck::Enabled => { + let tls_config = { + let mut root_cert_store = RootCertStore::empty(); + + match config.root_ca_certs { + Some(certs) => { + for cert in &certs { + root_cert_store.add(cert).map_err(|e| { + format!("failed adding CA cert to store: {e:?}") + })?; + } + } + None => { + let native_root_certs = rustls_native_certs::load_native_certs() + .map_err(|err| { + format!( "Could not discover root CA cert files to use TLS with remote caching and remote \ execution. Consider setting `--remote-ca-certs-path` instead to explicitly point to \ the correct PEM file.\n\n{err}", ) - })?; - - for cert in native_root_certs { - root_cert_store.add_parsable_certificates(&[cert.0]); - } + })?; + + for cert in native_root_certs { + root_cert_store.add_parsable_certificates(&[cert.0]); + } + } + } + + tls_config.with_root_certificates(root_cert_store) + }; + + if let Some(MtlsConfig { cert_chain, key }) = config.mtls { + tls_config + .with_client_auth_cert(cert_chain, key) + .map_err(|err| { + format!("Error setting client authentication configuration: {err:?}") + })? + } else { + tls_config.with_no_client_auth() + } } - } - - tls_config.with_root_certificates(root_cert_store) }; - if let Some(MtlsConfig { cert_chain, key }) = config.mtls { - tls_config - .with_client_auth_cert(cert_chain, key) - .map_err(|err| format!("Error setting client authentication configuration: {err:?}"))? - } else { - tls_config.with_no_client_auth() - } - } - }; - - Ok(tls_config) - } + Ok(tls_config) + } } #[derive(Clone)] pub struct MtlsConfig { - /// DER bytes of the certificate used for mTLS. - pub cert_chain: Vec, - /// DER bytes of the private key used for mTLS. - pub key: rustls::PrivateKey, + /// DER bytes of the certificate used for mTLS. + pub cert_chain: Vec, + /// DER bytes of the private key used for mTLS. + pub key: rustls::PrivateKey, } impl MtlsConfig { - pub fn from_pem_buffers(certs: &[u8], key: &[u8]) -> Result { - let cert_chain = rustls_pemfile::certs(&mut std::io::Cursor::new(certs)) - .map_err(|e| format!("Failed to parse client authentication (mTLS) certs data: {e:?}"))? - .into_iter() - .map(rustls::Certificate) - .collect(); - - let keys = rustls_pemfile::read_all(&mut std::io::Cursor::new(key)) + pub fn from_pem_buffers(certs: &[u8], key: &[u8]) -> Result { + let cert_chain = rustls_pemfile::certs(&mut std::io::Cursor::new(certs)) + .map_err(|e| format!("Failed to parse client authentication (mTLS) certs data: {e:?}"))? + .into_iter() + .map(rustls::Certificate) + .collect(); + + let keys = rustls_pemfile::read_all(&mut std::io::Cursor::new(key)) .map_err(|e| format!("Failed to parse client authentication (mTLS) key data: {e:?}"))? .into_iter() .filter(|item| match item { @@ -131,101 +136,102 @@ impl MtlsConfig { _ => true, }); - let mut key = None; - for item in keys { - use rustls_pemfile::Item; + let mut key = None; + for item in keys { + use rustls_pemfile::Item; - match item { - Item::RSAKey(buf) | Item::PKCS8Key(buf) | Item::ECKey(buf) => { - key = Some(rustls::PrivateKey(buf)) + match item { + Item::RSAKey(buf) | Item::PKCS8Key(buf) | Item::ECKey(buf) => { + key = Some(rustls::PrivateKey(buf)) + } + Item::X509Certificate(_) => unreachable!("filtered above"), + _ => unreachable!("rustls_pemfile::read_all returned an unexpected item"), + } } - Item::X509Certificate(_) => unreachable!("filtered above"), - _ => unreachable!("rustls_pemfile::read_all returned an unexpected item"), - } - } - let key = key - .ok_or_else(|| "No private key found in client authentication (mTLS) key data".to_owned())?; + let key = key.ok_or_else(|| { + "No private key found in client authentication (mTLS) key data".to_owned() + })?; - Ok(Self { cert_chain, key }) - } + Ok(Self { cert_chain, key }) + } } #[derive(Clone)] pub enum CertificateCheck { - Enabled, - DangerouslyDisabled, + Enabled, + DangerouslyDisabled, } impl Default for CertificateCheck { - fn default() -> Self { - Self::Enabled - } + fn default() -> Self { + Self::Enabled + } } pub(crate) struct NoVerifier; impl ServerCertVerifier for NoVerifier { - fn verify_server_cert( - &self, - _end_entity: &Certificate, - _intermediates: &[Certificate], - _server_name: &ServerName, - _scts: &mut dyn Iterator, - _ocsp_response: &[u8], - _now: SystemTime, - ) -> Result { - Ok(ServerCertVerified::assertion()) - } + fn verify_server_cert( + &self, + _end_entity: &Certificate, + _intermediates: &[Certificate], + _server_name: &ServerName, + _scts: &mut dyn Iterator, + _ocsp_response: &[u8], + _now: SystemTime, + ) -> Result { + Ok(ServerCertVerified::assertion()) + } } #[cfg(test)] mod test { - use super::Config; - use std::path::PathBuf; + use super::Config; + use std::path::PathBuf; - #[test] - fn test_client_auth_cert_resolver_is_unconfigured_no_mtls() { - let cert_pem = std::fs::read( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("test-certs") - .join("cert.pem"), - ) - .unwrap(); + #[test] + fn test_client_auth_cert_resolver_is_unconfigured_no_mtls() { + let cert_pem = std::fs::read( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-certs") + .join("cert.pem"), + ) + .unwrap(); - let config = Config::new(Some(&cert_pem), None).unwrap(); + let config = Config::new(Some(&cert_pem), None).unwrap(); - assert!(config.root_ca_certs.is_some()); - assert!(config.mtls.is_none()); + assert!(config.root_ca_certs.is_some()); + assert!(config.mtls.is_none()); - let rustls_config: rustls::ClientConfig = config.try_into().unwrap(); + let rustls_config: rustls::ClientConfig = config.try_into().unwrap(); - assert!(!rustls_config.client_auth_cert_resolver.has_certs()); - } + assert!(!rustls_config.client_auth_cert_resolver.has_certs()); + } - #[test] - fn test_client_auth_cert_resolver_is_configured() { - let cert_pem = std::fs::read( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("test-certs") - .join("cert.pem"), - ) - .unwrap(); + #[test] + fn test_client_auth_cert_resolver_is_configured() { + let cert_pem = std::fs::read( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-certs") + .join("cert.pem"), + ) + .unwrap(); - let key_pem = std::fs::read( - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("test-certs") - .join("key.pem"), - ) - .unwrap(); + let key_pem = std::fs::read( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-certs") + .join("key.pem"), + ) + .unwrap(); - let config = Config::new(Some(&cert_pem), Some((&cert_pem, &key_pem))).unwrap(); + let config = Config::new(Some(&cert_pem), Some((&cert_pem, &key_pem))).unwrap(); - assert!(config.root_ca_certs.is_some()); - assert!(config.mtls.is_some()); + assert!(config.root_ca_certs.is_some()); + assert!(config.mtls.is_some()); - let rustls_config: rustls::ClientConfig = config.try_into().unwrap(); + let rustls_config: rustls::ClientConfig = config.try_into().unwrap(); - assert!(rustls_config.client_auth_cert_resolver.has_certs()); - } + assert!(rustls_config.client_auth_cert_resolver.has_certs()); + } } diff --git a/src/rust/engine/hashing/src/digest_tests.rs b/src/rust/engine/hashing/src/digest_tests.rs index 4735d0f6550..9e528d2ddc1 100644 --- a/src/rust/engine/hashing/src/digest_tests.rs +++ b/src/rust/engine/hashing/src/digest_tests.rs @@ -7,25 +7,25 @@ use serde_test; #[test] fn serialize_and_deserialize() { - let digest = Digest::new( - Fingerprint::from_hex_string( - "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", - ) - .unwrap(), - 1, - ); - assert_tokens( - &digest, - &[ - Token::Struct { - name: "digest", - len: 2, - }, - Token::Str("fingerprint"), - Token::Str("0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff"), - Token::Str("size_bytes"), - Token::U64(1), - Token::StructEnd, - ], - ); + let digest = Digest::new( + Fingerprint::from_hex_string( + "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", + ) + .unwrap(), + 1, + ); + assert_tokens( + &digest, + &[ + Token::Struct { + name: "digest", + len: 2, + }, + Token::Str("fingerprint"), + Token::Str("0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff"), + Token::Str("size_bytes"), + Token::U64(1), + Token::StructEnd, + ], + ); } diff --git a/src/rust/engine/hashing/src/fingerprint_tests.rs b/src/rust/engine/hashing/src/fingerprint_tests.rs index 67674ce54d1..b3a74262074 100644 --- a/src/rust/engine/hashing/src/fingerprint_tests.rs +++ b/src/rust/engine/hashing/src/fingerprint_tests.rs @@ -5,80 +5,85 @@ use serde_test::{assert_ser_tokens, Token}; #[test] fn from_bytes_unsafe() { - assert_eq!( - Fingerprint::from_bytes_unsafe(&[ - 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, - 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, - 0xab, 0xab, - ],), - Fingerprint([0xab; 32]) - ); + assert_eq!( + Fingerprint::from_bytes_unsafe(&[ + 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, + 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, + 0xab, 0xab, 0xab, 0xab, + ],), + Fingerprint([0xab; 32]) + ); } #[test] fn from_hex_string() { - assert_eq!( - Fingerprint::from_hex_string( - "0123456789abcdefFEDCBA98765432100000000000000000ffFFfFfFFfFfFFff", + assert_eq!( + Fingerprint::from_hex_string( + "0123456789abcdefFEDCBA98765432100000000000000000ffFFfFfFFfFfFFff", + ) + .unwrap(), + Fingerprint([ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, + 0x32, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + ],) ) - .unwrap(), - Fingerprint([ - 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, - ],) - ) } #[test] fn from_hex_string_not_long_enough() { - Fingerprint::from_hex_string("abcd").expect_err("Want err"); + Fingerprint::from_hex_string("abcd").expect_err("Want err"); } #[test] fn from_hex_string_too_long() { - Fingerprint::from_hex_string("0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0") + Fingerprint::from_hex_string( + "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0", + ) .expect_err("Want err"); } #[test] fn from_hex_string_invalid_chars() { - Fingerprint::from_hex_string("Q123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF") + Fingerprint::from_hex_string( + "Q123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF", + ) .expect_err("Want err"); } #[test] fn to_hex() { - assert_eq!( - Fingerprint([ - 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, - ],) - .to_hex(), - "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff".to_lowercase() - ) + assert_eq!( + Fingerprint([ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, + 0x32, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + ],) + .to_hex(), + "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff".to_lowercase() + ) } #[test] fn display() { - let hex = "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"; - assert_eq!( - Fingerprint::from_hex_string(hex).unwrap().to_hex(), - hex.to_lowercase() - ) + let hex = "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"; + assert_eq!( + Fingerprint::from_hex_string(hex).unwrap().to_hex(), + hex.to_lowercase() + ) } #[test] fn serialize_to_str() { - let fingerprint = Fingerprint([ - 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - ]); - assert_ser_tokens( - &fingerprint, - &[Token::Str( - "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", - )], - ); + let fingerprint = Fingerprint([ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, + ]); + assert_ser_tokens( + &fingerprint, + &[Token::Str( + "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", + )], + ); } diff --git a/src/rust/engine/hashing/src/hasher_tests.rs b/src/rust/engine/hashing/src/hasher_tests.rs index 9a0a8316736..d6c9515bec4 100644 --- a/src/rust/engine/hashing/src/hasher_tests.rs +++ b/src/rust/engine/hashing/src/hasher_tests.rs @@ -6,52 +6,52 @@ use tokio::io::{AsyncReadExt, AsyncWriteExt}; #[test] fn hashes() { - let mut src = "meep".as_bytes(); + let mut src = "meep".as_bytes(); - let dst = Vec::with_capacity(10); - let mut hasher = super::WriterHasher::new(dst); - assert_eq!(std::io::copy(&mut src, &mut hasher).unwrap(), 4); - let want = ( - super::Digest::new( - super::Fingerprint::from_hex_string( - "23e92dfba8fb0c93cfba31ad2962b4e35a47054296d1d375d7f7e13e0185de7a", - ) - .unwrap(), - 4, - ), - "meep".as_bytes().to_vec(), - ); - assert_eq!(hasher.finish(), want); + let dst = Vec::with_capacity(10); + let mut hasher = super::WriterHasher::new(dst); + assert_eq!(std::io::copy(&mut src, &mut hasher).unwrap(), 4); + let want = ( + super::Digest::new( + super::Fingerprint::from_hex_string( + "23e92dfba8fb0c93cfba31ad2962b4e35a47054296d1d375d7f7e13e0185de7a", + ) + .unwrap(), + 4, + ), + "meep".as_bytes().to_vec(), + ); + assert_eq!(hasher.finish(), want); } #[tokio::test] async fn async_hashes() { - let tmpdir = TempDir::new().unwrap(); - let tmppath = tmpdir.path().to_owned(); - let mut src_file = tokio::fs::File::create(tmppath.join("src")).await.unwrap(); - src_file.write_all(b"meep").await.unwrap(); - let mut src_file = tokio::fs::File::open(tmppath.join("src")).await.unwrap(); - let mut dest_file = tokio::fs::File::create(tmppath.join("dest")).await.unwrap(); + let tmpdir = TempDir::new().unwrap(); + let tmppath = tmpdir.path().to_owned(); + let mut src_file = tokio::fs::File::create(tmppath.join("src")).await.unwrap(); + src_file.write_all(b"meep").await.unwrap(); + let mut src_file = tokio::fs::File::open(tmppath.join("src")).await.unwrap(); + let mut dest_file = tokio::fs::File::create(tmppath.join("dest")).await.unwrap(); - let mut hasher = super::WriterHasher::new(&mut dest_file); - assert_eq!( - tokio::io::copy(&mut src_file, &mut hasher).await.unwrap(), - 4 - ); - let want = super::Digest::new( - super::Fingerprint::from_hex_string( - "23e92dfba8fb0c93cfba31ad2962b4e35a47054296d1d375d7f7e13e0185de7a", - ) - .unwrap(), - 4, - ); - assert_eq!(hasher.finish().0, want); - let mut contents = vec![]; - tokio::fs::File::open(tmppath.join("dest")) - .await - .unwrap() - .read_to_end(&mut contents) - .await - .unwrap(); - assert_eq!("meep".as_bytes().to_vec(), contents); + let mut hasher = super::WriterHasher::new(&mut dest_file); + assert_eq!( + tokio::io::copy(&mut src_file, &mut hasher).await.unwrap(), + 4 + ); + let want = super::Digest::new( + super::Fingerprint::from_hex_string( + "23e92dfba8fb0c93cfba31ad2962b4e35a47054296d1d375d7f7e13e0185de7a", + ) + .unwrap(), + 4, + ); + assert_eq!(hasher.finish().0, want); + let mut contents = vec![]; + tokio::fs::File::open(tmppath.join("dest")) + .await + .unwrap() + .read_to_end(&mut contents) + .await + .unwrap(); + assert_eq!("meep".as_bytes().to_vec(), contents); } diff --git a/src/rust/engine/hashing/src/lib.rs b/src/rust/engine/hashing/src/lib.rs index 6e563cd774f..c92e5af166a 100644 --- a/src/rust/engine/hashing/src/lib.rs +++ b/src/rust/engine/hashing/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -42,12 +42,12 @@ use sha2::{Digest as Sha256Digest, Sha256}; use tokio::io::{AsyncRead, AsyncWrite}; pub const EMPTY_FINGERPRINT: Fingerprint = Fingerprint([ - 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, - 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55, + 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, + 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55, ]); pub const EMPTY_DIGEST: Digest = Digest { - hash: EMPTY_FINGERPRINT, - size_bytes: 0, + hash: EMPTY_FINGERPRINT, + size_bytes: 0, }; pub const FINGERPRINT_SIZE: usize = 32; @@ -56,132 +56,133 @@ pub const FINGERPRINT_SIZE: usize = 32; pub struct Fingerprint(pub [u8; FINGERPRINT_SIZE]); impl Fingerprint { - pub fn from_bytes_unsafe(bytes: &[u8]) -> Fingerprint { - if bytes.len() != FINGERPRINT_SIZE { - panic!( - "Input value was not a fingerprint; had length: {}", - bytes.len() - ); + pub fn from_bytes_unsafe(bytes: &[u8]) -> Fingerprint { + if bytes.len() != FINGERPRINT_SIZE { + panic!( + "Input value was not a fingerprint; had length: {}", + bytes.len() + ); + } + + let mut fingerprint = [0; FINGERPRINT_SIZE]; + fingerprint.clone_from_slice(&bytes[0..FINGERPRINT_SIZE]); + Fingerprint(fingerprint) + } + + pub fn from_bytes(bytes: GenericArray) -> Fingerprint { + Fingerprint(bytes.into()) } - let mut fingerprint = [0; FINGERPRINT_SIZE]; - fingerprint.clone_from_slice(&bytes[0..FINGERPRINT_SIZE]); - Fingerprint(fingerprint) - } - - pub fn from_bytes(bytes: GenericArray) -> Fingerprint { - Fingerprint(bytes.into()) - } - - pub fn from_hex_string(hex_string: &str) -> Result { - <[u8; FINGERPRINT_SIZE] as hex::FromHex>::from_hex(hex_string) - .map(Fingerprint) - .map_err(|e| format!("{e:?}")) - } - - pub fn as_bytes(&self) -> &[u8; FINGERPRINT_SIZE] { - &self.0 - } - - #[allow(clippy::wrong_self_convention)] - pub fn to_hex(&self) -> String { - // TODO: Switch to `hex::encode` from the `hex` crate. - let mut s = String::new(); - for &byte in &self.0 { - fmt::Write::write_fmt(&mut s, format_args!("{byte:02x}")).unwrap(); + pub fn from_hex_string(hex_string: &str) -> Result { + <[u8; FINGERPRINT_SIZE] as hex::FromHex>::from_hex(hex_string) + .map(Fingerprint) + .map_err(|e| format!("{e:?}")) + } + + pub fn as_bytes(&self) -> &[u8; FINGERPRINT_SIZE] { + &self.0 + } + + #[allow(clippy::wrong_self_convention)] + pub fn to_hex(&self) -> String { + // TODO: Switch to `hex::encode` from the `hex` crate. + let mut s = String::new(); + for &byte in &self.0 { + fmt::Write::write_fmt(&mut s, format_args!("{byte:02x}")).unwrap(); + } + s + } + + /// + /// Using the fact that a Fingerprint is computed using a strong hash function, computes a strong + /// but short hash value from a prefix. + /// + pub fn prefix_hash(&self) -> u64 { + byteorder::BigEndian::read_u64(&self.0) } - s - } - - /// - /// Using the fact that a Fingerprint is computed using a strong hash function, computes a strong - /// but short hash value from a prefix. - /// - pub fn prefix_hash(&self) -> u64 { - byteorder::BigEndian::read_u64(&self.0) - } } impl fmt::Display for Fingerprint { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.to_hex()) - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.to_hex()) + } } impl fmt::Debug for Fingerprint { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Fingerprint<{}>", self.to_hex()) - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Fingerprint<{}>", self.to_hex()) + } } impl AsRef<[u8]> for Fingerprint { - fn as_ref(&self) -> &[u8] { - &self.0[..] - } + fn as_ref(&self) -> &[u8] { + &self.0[..] + } } impl Serialize for Fingerprint { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(self.to_hex().as_str()) - } + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(self.to_hex().as_str()) + } } impl<'de> Deserialize<'de> for Fingerprint { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct FingerprintVisitor; - - impl<'de> Visitor<'de> for FingerprintVisitor { - type Value = Fingerprint; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { - formatter.write_str("struct Fingerprint") - } - - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { - Fingerprint::from_hex_string(v).map_err(|err| { - serde::de::Error::invalid_value( - serde::de::Unexpected::Str(&format!("{v:?}: {err}")), - &format!("A hex representation of a {FINGERPRINT_SIZE} byte value").as_str(), - ) - }) - } - } + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct FingerprintVisitor; + + impl<'de> Visitor<'de> for FingerprintVisitor { + type Value = Fingerprint; - deserializer.deserialize_string(FingerprintVisitor) - } + fn expecting(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { + formatter.write_str("struct Fingerprint") + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + Fingerprint::from_hex_string(v).map_err(|err| { + serde::de::Error::invalid_value( + serde::de::Unexpected::Str(&format!("{v:?}: {err}")), + &format!("A hex representation of a {FINGERPRINT_SIZE} byte value") + .as_str(), + ) + }) + } + } + + deserializer.deserialize_string(FingerprintVisitor) + } } impl FromStr for Fingerprint { - type Err = String; + type Err = String; - fn from_str(s: &str) -> Result { - Fingerprint::from_hex_string(s) - } + fn from_str(s: &str) -> Result { + Fingerprint::from_hex_string(s) + } } impl TryFrom<&str> for Fingerprint { - type Error = String; + type Error = String; - fn try_from(s: &str) -> Result { - Fingerprint::from_hex_string(s) - } + fn try_from(s: &str) -> Result { + Fingerprint::from_hex_string(s) + } } #[derive(Eq, PartialEq, Ord, PartialOrd)] pub struct AgedFingerprint { - // expired_seconds_ago must be the first field for the Ord implementation. - pub expired_seconds_ago: u64, - pub fingerprint: Fingerprint, - pub size_bytes: usize, + // expired_seconds_ago must be the first field for the Ord implementation. + pub expired_seconds_ago: u64, + pub fingerprint: Fingerprint, + pub size_bytes: usize, } /// @@ -193,179 +194,181 @@ pub struct AgedFingerprint { /// #[derive(Clone, Copy, Debug, DeepSizeOf, Eq, Hash, PartialEq)] pub struct Digest { - pub hash: Fingerprint, - pub size_bytes: usize, + pub hash: Fingerprint, + pub size_bytes: usize, } impl Serialize for Digest { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut obj = serializer.serialize_struct("digest", 2)?; - obj.serialize_field("fingerprint", &self.hash)?; - obj.serialize_field("size_bytes", &self.size_bytes)?; - obj.end() - } + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut obj = serializer.serialize_struct("digest", 2)?; + obj.serialize_field("fingerprint", &self.hash)?; + obj.serialize_field("size_bytes", &self.size_bytes)?; + obj.end() + } } #[derive(Deserialize)] #[serde(field_identifier, rename_all = "snake_case")] enum Field { - Fingerprint, - SizeBytes, + Fingerprint, + SizeBytes, } impl<'de> Deserialize<'de> for Digest { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct DigestVisitor; - - impl<'de> Visitor<'de> for DigestVisitor { - type Value = Digest; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("struct digest") - } - - fn visit_map(self, mut map: V) -> Result - where - V: MapAccess<'de>, - { - use serde::de; - - let mut fingerprint = None; - let mut size_bytes = None; - while let Some(key) = map.next_key()? { - match key { - Field::Fingerprint => { - if fingerprint.is_some() { - return Err(de::Error::duplicate_field("fingerprint")); - } - fingerprint = Some(map.next_value()?); + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct DigestVisitor; + + impl<'de> Visitor<'de> for DigestVisitor { + type Value = Digest; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("struct digest") } - Field::SizeBytes => { - if size_bytes.is_some() { - return Err(de::Error::duplicate_field("size_bytes")); - } - size_bytes = Some(map.next_value()?); + + fn visit_map(self, mut map: V) -> Result + where + V: MapAccess<'de>, + { + use serde::de; + + let mut fingerprint = None; + let mut size_bytes = None; + while let Some(key) = map.next_key()? { + match key { + Field::Fingerprint => { + if fingerprint.is_some() { + return Err(de::Error::duplicate_field("fingerprint")); + } + fingerprint = Some(map.next_value()?); + } + Field::SizeBytes => { + if size_bytes.is_some() { + return Err(de::Error::duplicate_field("size_bytes")); + } + size_bytes = Some(map.next_value()?); + } + } + } + let fingerprint = + fingerprint.ok_or_else(|| de::Error::missing_field("fingerprint"))?; + let size_bytes = + size_bytes.ok_or_else(|| de::Error::missing_field("size_bytes"))?; + Ok(Digest::new(fingerprint, size_bytes)) } - } } - let fingerprint = fingerprint.ok_or_else(|| de::Error::missing_field("fingerprint"))?; - let size_bytes = size_bytes.ok_or_else(|| de::Error::missing_field("size_bytes"))?; - Ok(Digest::new(fingerprint, size_bytes)) - } - } - const FIELDS: &[&str] = &["fingerprint", "size_bytes"]; - deserializer.deserialize_struct("digest", FIELDS, DigestVisitor) - } + const FIELDS: &[&str] = &["fingerprint", "size_bytes"]; + deserializer.deserialize_struct("digest", FIELDS, DigestVisitor) + } } impl Digest { - pub fn new(hash: Fingerprint, size_bytes: usize) -> Digest { - Digest { hash, size_bytes } - } + pub fn new(hash: Fingerprint, size_bytes: usize) -> Digest { + Digest { hash, size_bytes } + } - pub fn of_bytes(bytes: &[u8]) -> Self { - let mut hasher = Sha256::default(); - hasher.update(bytes); + pub fn of_bytes(bytes: &[u8]) -> Self { + let mut hasher = Sha256::default(); + hasher.update(bytes); - Digest::new(Fingerprint::from_bytes(hasher.finalize()), bytes.len()) - } + Digest::new(Fingerprint::from_bytes(hasher.finalize()), bytes.len()) + } } /// A thin wrapper around a Sha256 hasher to preserve the length as well. pub struct Hasher { - hasher: Sha256, - byte_count: usize, + hasher: Sha256, + byte_count: usize, } impl Hasher { - pub fn new() -> Self { - Self { - hasher: Sha256::default(), - byte_count: 0, + pub fn new() -> Self { + Self { + hasher: Sha256::default(), + byte_count: 0, + } + } + + pub fn update(&mut self, buf: &[u8]) { + self.hasher.update(buf); + self.byte_count += buf.len(); + } + + pub fn finish(self) -> Digest { + Digest::new( + Fingerprint::from_bytes(self.hasher.finalize()), + self.byte_count, + ) } - } - - pub fn update(&mut self, buf: &[u8]) { - self.hasher.update(buf); - self.byte_count += buf.len(); - } - - pub fn finish(self) -> Digest { - Digest::new( - Fingerprint::from_bytes(self.hasher.finalize()), - self.byte_count, - ) - } } /// /// A Write instance that fingerprints all data that passes through it. /// pub struct WriterHasher { - hasher: Hasher, - inner: T, + hasher: Hasher, + inner: T, } impl WriterHasher { - pub fn new(inner: T) -> WriterHasher { - WriterHasher { - hasher: Hasher::new(), - inner: inner, + pub fn new(inner: T) -> WriterHasher { + WriterHasher { + hasher: Hasher::new(), + inner: inner, + } + } + + /// + /// Returns the result of fingerprinting this stream, and Drops the stream. + /// + pub fn finish(self) -> (Digest, T) { + (self.hasher.finish(), self.inner) } - } - - /// - /// Returns the result of fingerprinting this stream, and Drops the stream. - /// - pub fn finish(self) -> (Digest, T) { - (self.hasher.finish(), self.inner) - } } impl Write for WriterHasher { - fn write(&mut self, buf: &[u8]) -> io::Result { - let written = self.inner.write(buf)?; - // Hash the bytes that were successfully written. - self.hasher.update(&buf[0..written]); - Ok(written) - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } + fn write(&mut self, buf: &[u8]) -> io::Result { + let written = self.inner.write(buf)?; + // Hash the bytes that were successfully written. + self.hasher.update(&buf[0..written]); + Ok(written) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } } impl AsyncWrite for WriterHasher<&mut AW> { - fn poll_write( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &[u8], - ) -> Poll> { - let inner = Pin::new(&mut *self.inner); - let result = inner.poll_write(cx, buf); - if let Poll::Ready(Ok(written)) = result { - // Hash the bytes that were successfully written. - self.hasher.update(&buf[0..written]); + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let inner = Pin::new(&mut *self.inner); + let result = inner.poll_write(cx, buf); + if let Poll::Ready(Ok(written)) = result { + // Hash the bytes that were successfully written. + self.hasher.update(&buf[0..written]); + } + result + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let inner = Pin::new(&mut *self.inner); + inner.poll_flush(cx) + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let inner = Pin::new(&mut *self.inner); + inner.poll_shutdown(cx) } - result - } - - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let inner = Pin::new(&mut *self.inner); - inner.poll_flush(cx) - } - - fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let inner = Pin::new(&mut *self.inner); - inner.poll_shutdown(cx) - } } /// @@ -373,38 +376,38 @@ impl AsyncWrite for WriterHasher<&mut AW> { /// Use hash() to just hash without copying the data anywhere. /// pub fn sync_copy_and_hash( - reader: &mut R, - writer: &mut W, + reader: &mut R, + writer: &mut W, ) -> io::Result where - R: io::Read, - W: io::Write, + R: io::Read, + W: io::Write, { - let mut hasher = WriterHasher::new(writer); - let _ = io::copy(reader, &mut hasher)?; - Ok(hasher.finish().0) + let mut hasher = WriterHasher::new(writer); + let _ = io::copy(reader, &mut hasher)?; + Ok(hasher.finish().0) } /// /// Copy from reader to writer and return whether the copied data matches expected_digest. /// pub fn sync_verified_copy( - expected_digest: Digest, - data_is_immutable: bool, - reader: &mut R, - writer: &mut W, + expected_digest: Digest, + data_is_immutable: bool, + reader: &mut R, + writer: &mut W, ) -> io::Result where - R: io::Read, - W: io::Write, + R: io::Read, + W: io::Write, { - if data_is_immutable { - // Trust that the data hasn't changed, and only validate its length. - let copied = io::copy(reader, writer)?; - Ok(copied as usize == expected_digest.size_bytes) - } else { - Ok(expected_digest == sync_copy_and_hash(reader, writer)?) - } + if data_is_immutable { + // Trust that the data hasn't changed, and only validate its length. + let copied = io::copy(reader, writer)?; + Ok(copied as usize == expected_digest.size_bytes) + } else { + Ok(expected_digest == sync_copy_and_hash(reader, writer)?) + } } /// @@ -413,34 +416,34 @@ where /// pub async fn async_copy_and_hash(reader: &mut R, writer: &mut W) -> tokio::io::Result where - R: AsyncRead + Unpin + ?Sized, - W: AsyncWrite + Unpin + ?Sized, + R: AsyncRead + Unpin + ?Sized, + W: AsyncWrite + Unpin + ?Sized, { - let mut hasher = WriterHasher::new(writer); - let _ = tokio::io::copy(reader, &mut hasher).await?; - Ok(hasher.finish().0) + let mut hasher = WriterHasher::new(writer); + let _ = tokio::io::copy(reader, &mut hasher).await?; + Ok(hasher.finish().0) } /// /// Copy from reader to writer and return whether the copied data matches expected_digest. /// pub async fn async_verified_copy( - expected_digest: Digest, - data_is_immutable: bool, - reader: &mut R, - writer: &mut W, + expected_digest: Digest, + data_is_immutable: bool, + reader: &mut R, + writer: &mut W, ) -> tokio::io::Result where - R: AsyncRead + Unpin + ?Sized, - W: AsyncWrite + Unpin + ?Sized, + R: AsyncRead + Unpin + ?Sized, + W: AsyncWrite + Unpin + ?Sized, { - if data_is_immutable { - // Trust that the data hasn't changed, and only validate its length. - let copied = tokio::io::copy(reader, writer).await?; - Ok(copied as usize == expected_digest.size_bytes) - } else { - Ok(expected_digest == async_copy_and_hash(reader, writer).await?) - } + if data_is_immutable { + // Trust that the data hasn't changed, and only validate its length. + let copied = tokio::io::copy(reader, writer).await?; + Ok(copied as usize == expected_digest.size_bytes) + } else { + Ok(expected_digest == async_copy_and_hash(reader, writer).await?) + } } #[cfg(test)] diff --git a/src/rust/engine/logging/build.rs b/src/rust/engine/logging/build.rs index db508e8c930..ac258238759 100644 --- a/src/rust/engine/logging/build.rs +++ b/src/rust/engine/logging/build.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -30,30 +30,30 @@ use std::io::Write; use std::path::PathBuf; fn main() { - let manifest_path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()) - .parent() - .unwrap() - .join("Cargo.toml"); - println!("cargo:rerun-if-changed={}", manifest_path.display()); + let manifest_path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()) + .parent() + .unwrap() + .join("Cargo.toml"); + println!("cargo:rerun-if-changed={}", manifest_path.display()); - let metadata = MetadataCommand::new() - .manifest_path(manifest_path) - .no_deps() - .exec() - .expect("Error accessing cargo metadata"); + let metadata = MetadataCommand::new() + .manifest_path(manifest_path) + .no_deps() + .exec() + .expect("Error accessing cargo metadata"); - let mut packages: Vec<_> = metadata - .workspace_members - .iter() - .map(|package_id| metadata[package_id].name.clone()) - .collect(); - packages.sort(); + let mut packages: Vec<_> = metadata + .workspace_members + .iter() + .map(|package_id| metadata[package_id].name.clone()) + .collect(); + packages.sort(); - let mut out_file = - File::create(PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("packages.rs")).unwrap(); - writeln!(out_file, "pub const PANTS_PACKAGE_NAMES: &[&str] = &[").unwrap(); - for package in packages { - writeln!(out_file, " \"{package}\",").unwrap(); - } - writeln!(out_file, "];").unwrap(); + let mut out_file = + File::create(PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("packages.rs")).unwrap(); + writeln!(out_file, "pub const PANTS_PACKAGE_NAMES: &[&str] = &[").unwrap(); + for package in packages { + writeln!(out_file, " \"{package}\",").unwrap(); + } + writeln!(out_file, "];").unwrap(); } diff --git a/src/rust/engine/logging/src/lib.rs b/src/rust/engine/logging/src/lib.rs index 2c1d924e966..90247228203 100644 --- a/src/rust/engine/logging/src/lib.rs +++ b/src/rust/engine/logging/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -51,60 +51,60 @@ use num_enum::TryFromPrimitive; #[derive(Debug, Eq, PartialEq, TryFromPrimitive, Clone, Copy)] #[repr(u64)] pub enum PythonLogLevel { - NotSet = 0, - // Trace doesn't exist in a Python world, so set it to "a bit lower than Debug". - Trace = 5, - Debug = 10, - Info = 20, - Warn = 30, - Error_ = 40, - Critical = 50, + NotSet = 0, + // Trace doesn't exist in a Python world, so set it to "a bit lower than Debug". + Trace = 5, + Debug = 10, + Info = 20, + Warn = 30, + Error_ = 40, + Critical = 50, } impl From for PythonLogLevel { - fn from(level: log::Level) -> Self { - match level { - log::Level::Error => PythonLogLevel::Error_, - log::Level::Warn => PythonLogLevel::Warn, - log::Level::Info => PythonLogLevel::Info, - log::Level::Debug => PythonLogLevel::Debug, - log::Level::Trace => PythonLogLevel::Trace, + fn from(level: log::Level) -> Self { + match level { + log::Level::Error => PythonLogLevel::Error_, + log::Level::Warn => PythonLogLevel::Warn, + log::Level::Info => PythonLogLevel::Info, + log::Level::Debug => PythonLogLevel::Debug, + log::Level::Trace => PythonLogLevel::Trace, + } } - } } impl From for log::LevelFilter { - fn from(level: PythonLogLevel) -> Self { - match level { - PythonLogLevel::NotSet => log::LevelFilter::Off, - PythonLogLevel::Trace => log::LevelFilter::Trace, - PythonLogLevel::Debug => log::LevelFilter::Debug, - PythonLogLevel::Info => log::LevelFilter::Info, - PythonLogLevel::Warn => log::LevelFilter::Warn, - PythonLogLevel::Error_ => log::LevelFilter::Error, - // Rust doesn't have a Critical, so treat them like Errors. - PythonLogLevel::Critical => log::LevelFilter::Error, + fn from(level: PythonLogLevel) -> Self { + match level { + PythonLogLevel::NotSet => log::LevelFilter::Off, + PythonLogLevel::Trace => log::LevelFilter::Trace, + PythonLogLevel::Debug => log::LevelFilter::Debug, + PythonLogLevel::Info => log::LevelFilter::Info, + PythonLogLevel::Warn => log::LevelFilter::Warn, + PythonLogLevel::Error_ => log::LevelFilter::Error, + // Rust doesn't have a Critical, so treat them like Errors. + PythonLogLevel::Critical => log::LevelFilter::Error, + } } - } } impl From for log::Level { - fn from(level: PythonLogLevel) -> Self { - match level { - PythonLogLevel::NotSet => { - panic!("PythonLogLevel::NotSet doesn't have a translation to Level") - } - PythonLogLevel::Trace => log::Level::Trace, - PythonLogLevel::Debug => log::Level::Debug, - PythonLogLevel::Info => log::Level::Info, - PythonLogLevel::Warn => log::Level::Warn, - PythonLogLevel::Error_ => log::Level::Error, - // Rust doesn't have a Critical, so treat them like Errors. - PythonLogLevel::Critical => log::Level::Error, + fn from(level: PythonLogLevel) -> Self { + match level { + PythonLogLevel::NotSet => { + panic!("PythonLogLevel::NotSet doesn't have a translation to Level") + } + PythonLogLevel::Trace => log::Level::Trace, + PythonLogLevel::Debug => log::Level::Debug, + PythonLogLevel::Info => log::Level::Info, + PythonLogLevel::Warn => log::Level::Warn, + PythonLogLevel::Error_ => log::Level::Error, + // Rust doesn't have a Critical, so treat them like Errors. + PythonLogLevel::Critical => log::Level::Error, + } } - } } mod pants_packages { - include!(concat!(env!("OUT_DIR"), "/packages.rs")); + include!(concat!(env!("OUT_DIR"), "/packages.rs")); } diff --git a/src/rust/engine/logging/src/logger.rs b/src/rust/engine/logging/src/logger.rs index cafb60be59f..096ea2209c2 100644 --- a/src/rust/engine/logging/src/logger.rs +++ b/src/rust/engine/logging/src/logger.rs @@ -21,225 +21,225 @@ use regex::Regex; const TIME_FORMAT_STR: &str = "%H:%M:%S"; lazy_static! { - pub static ref PANTS_LOGGER: PantsLogger = PantsLogger::new(); + pub static ref PANTS_LOGGER: PantsLogger = PantsLogger::new(); } struct Inner { - per_run_logs: Mutex>, - log_file: Mutex>, - global_level: LevelFilter, - show_rust_3rdparty_logs: bool, - show_target: bool, - log_level_filters: HashMap, - literal_filters: Vec, - regex_filters: Vec, -} - -pub struct PantsLogger(ArcSwap); - -impl PantsLogger { - pub fn new() -> PantsLogger { - PantsLogger(ArcSwap::from(Arc::new(Inner { - per_run_logs: Mutex::new(None), - log_file: Mutex::new(None), - global_level: LevelFilter::Off, - show_rust_3rdparty_logs: true, - show_target: false, - log_level_filters: HashMap::new(), - literal_filters: Vec::new(), - regex_filters: Vec::new(), - }))) - } - - pub fn init( - max_level: u64, + per_run_logs: Mutex>, + log_file: Mutex>, + global_level: LevelFilter, show_rust_3rdparty_logs: bool, show_target: bool, - log_levels_by_target: HashMap, + log_level_filters: HashMap, literal_filters: Vec, regex_filters: Vec, - log_file_path: PathBuf, - ) -> Result<(), String> { - let log_level_filters = log_levels_by_target - .iter() - .map(|(k, v)| { - let python_level: PythonLogLevel = (*v).try_into().unwrap_or_else(|e| { - panic!("Unrecognized log level from python: {v}: {e}"); - }); - let level: log::LevelFilter = python_level.into(); - (k.clone(), level) - }) - .collect::>(); - - let max_python_level: PythonLogLevel = max_level - .try_into() - .map_err(|e| format!("Unrecognised log level from Python: {max_level}: {e}"))?; - let global_level: LevelFilter = max_python_level.into(); - - let log_file = OpenOptions::new() - .create(true) - .append(true) - .open(log_file_path) - .map_err(|err| format!("Error opening pantsd logfile: {err}"))?; - - PANTS_LOGGER.0.store(Arc::new(Inner { - per_run_logs: Mutex::default(), - log_file: Mutex::new(Some(log_file)), - global_level, - show_rust_3rdparty_logs, - show_target, - log_level_filters, - literal_filters, - regex_filters, - })); - - if set_logger(&*PANTS_LOGGER).is_err() { - debug!("Logging already initialized."); - } - // TODO this should be whatever the most verbose log level specified in log_levels_by_target - - // but I'm not sure if it's actually much of a gain over just setting this to Trace. - set_max_level(LevelFilter::Trace); - // We make per-destination decisions about whether to render color, and should never use - // environment variables to decide. - colored::control::set_override(true); - Ok(()) - } - - pub fn set_per_run_logs(&self, per_run_log_path: Option) { - match per_run_log_path { - None => { - *self.0.load().per_run_logs.lock() = None; - } - Some(path) => { - let file = OpenOptions::new() - .create(true) - .append(true) - .open(path) - .map_err(|err| format!("Error opening per-run logfile: {err}")) - .unwrap(); - *self.0.load().per_run_logs.lock() = Some(file); - } - }; - } - - /// log_from_python is only used in the Python FFI, which in turn is only called within the - /// Python `NativeHandler` class. Every logging call from Python should get proxied through this - /// function, which translates the log message into the Rust log paradigm provided by - /// the `log` crate. - pub fn log_from_python(message: &str, python_level: u64, target: &str) -> Result<(), String> { - let level: PythonLogLevel = python_level.try_into().map_err(|err| format!("{err}"))?; - log!(target: target, level.into(), "{}", message); - Ok(()) - } } -impl Log for PantsLogger { - fn enabled(&self, metadata: &Metadata) -> bool { - let inner = self.0.load(); - let enabled_globally = metadata.level() <= inner.global_level; - let enabled_for_target = inner - .log_level_filters - .get(metadata.target()) - .map(|lf| metadata.level() <= *lf) - .unwrap_or(false); - - enabled_globally || enabled_for_target - } - - fn log(&self, record: &Record) { - if !self.enabled(record.metadata()) { - return; +pub struct PantsLogger(ArcSwap); + +impl PantsLogger { + pub fn new() -> PantsLogger { + PantsLogger(ArcSwap::from(Arc::new(Inner { + per_run_logs: Mutex::new(None), + log_file: Mutex::new(None), + global_level: LevelFilter::Off, + show_rust_3rdparty_logs: true, + show_target: false, + log_level_filters: HashMap::new(), + literal_filters: Vec::new(), + regex_filters: Vec::new(), + }))) } - let inner = self.0.load(); - - let mut should_log = inner.show_rust_3rdparty_logs; - if !should_log { - if let Some(module_path) = record.module_path() { - for pants_package in super::pants_packages::PANTS_PACKAGE_NAMES { - if &module_path.split("::").next().unwrap() == pants_package { - should_log = true; - break; - } + + pub fn init( + max_level: u64, + show_rust_3rdparty_logs: bool, + show_target: bool, + log_levels_by_target: HashMap, + literal_filters: Vec, + regex_filters: Vec, + log_file_path: PathBuf, + ) -> Result<(), String> { + let log_level_filters = log_levels_by_target + .iter() + .map(|(k, v)| { + let python_level: PythonLogLevel = (*v).try_into().unwrap_or_else(|e| { + panic!("Unrecognized log level from python: {v}: {e}"); + }); + let level: log::LevelFilter = python_level.into(); + (k.clone(), level) + }) + .collect::>(); + + let max_python_level: PythonLogLevel = max_level + .try_into() + .map_err(|e| format!("Unrecognised log level from Python: {max_level}: {e}"))?; + let global_level: LevelFilter = max_python_level.into(); + + let log_file = OpenOptions::new() + .create(true) + .append(true) + .open(log_file_path) + .map_err(|err| format!("Error opening pantsd logfile: {err}"))?; + + PANTS_LOGGER.0.store(Arc::new(Inner { + per_run_logs: Mutex::default(), + log_file: Mutex::new(Some(log_file)), + global_level, + show_rust_3rdparty_logs, + show_target, + log_level_filters, + literal_filters, + regex_filters, + })); + + if set_logger(&*PANTS_LOGGER).is_err() { + debug!("Logging already initialized."); } - } else { - should_log = true; - } - } - if !should_log { - return; + // TODO this should be whatever the most verbose log level specified in log_levels_by_target - + // but I'm not sure if it's actually much of a gain over just setting this to Trace. + set_max_level(LevelFilter::Trace); + // We make per-destination decisions about whether to render color, and should never use + // environment variables to decide. + colored::control::set_override(true); + Ok(()) } - let log_msg = format!("{}", record.args()); - if inner - .literal_filters - .iter() - .any(|filt| log_msg.starts_with(filt)) - { - return; + pub fn set_per_run_logs(&self, per_run_log_path: Option) { + match per_run_log_path { + None => { + *self.0.load().per_run_logs.lock() = None; + } + Some(path) => { + let file = OpenOptions::new() + .create(true) + .append(true) + .open(path) + .map_err(|err| format!("Error opening per-run logfile: {err}")) + .unwrap(); + *self.0.load().per_run_logs.lock() = Some(file); + } + }; } - if inner.regex_filters.iter().any(|re| re.is_match(&log_msg)) { - return; + /// log_from_python is only used in the Python FFI, which in turn is only called within the + /// Python `NativeHandler` class. Every logging call from Python should get proxied through this + /// function, which translates the log message into the Rust log paradigm provided by + /// the `log` crate. + pub fn log_from_python(message: &str, python_level: u64, target: &str) -> Result<(), String> { + let level: PythonLogLevel = python_level.try_into().map_err(|err| format!("{err}"))?; + log!(target: target, level.into(), "{}", message); + Ok(()) } +} - let destination = stdio::get_destination(); - - // Build the message string. - let log_string = { - let mut log_string = { - let cur_date = chrono::Local::now(); - format!( - "{}.{:02}", - cur_date.format(TIME_FORMAT_STR), - cur_date.time().nanosecond() / 10_000_000 // Two decimal places of precision. - ) - }; - - let use_color = destination.stderr_use_color(); - - let level = record.level(); - let level_marker = match level { - _ if !use_color => format!("[{level}]").normal().clear(), - Level::Info => format!("[{level}]").normal(), - Level::Error => format!("[{level}]").red(), - Level::Warn => format!("[{level}]").yellow(), - Level::Debug => format!("[{level}]").green(), - Level::Trace => format!("[{level}]").magenta(), - }; - write!(log_string, " {level_marker}").unwrap(); - - if inner.show_target { - write!(log_string, " ({})", record.target()).unwrap(); - }; - writeln!(log_string, " {log_msg}").unwrap(); - log_string - }; - let log_bytes = log_string.as_bytes(); - - { - let mut maybe_per_run_file = inner.per_run_logs.lock(); - if let Some(ref mut file) = *maybe_per_run_file { - // deliberately ignore errors writing to per-run log file - let _ = file.write_all(log_bytes); - } +impl Log for PantsLogger { + fn enabled(&self, metadata: &Metadata) -> bool { + let inner = self.0.load(); + let enabled_globally = metadata.level() <= inner.global_level; + let enabled_for_target = inner + .log_level_filters + .get(metadata.target()) + .map(|lf| metadata.level() <= *lf) + .unwrap_or(false); + + enabled_globally || enabled_for_target } - // Attempt to write to stdio, and write to the pantsd log if we fail (either because we don't - // have a valid stdio instance, or because of an error). - if destination.write_stderr_raw(log_bytes).is_err() { - let mut maybe_file = inner.log_file.lock(); - if let Some(ref mut file) = *maybe_file { - match file.write_all(log_bytes) { - Ok(()) => (), - Err(e) => { - // If we've failed to write to stdio, but also to our log file, our only recourse is to - // try to write to a different file. - fatal_log!("Failed to write to log file {:?}: {}", file, e); - } + fn log(&self, record: &Record) { + if !self.enabled(record.metadata()) { + return; + } + let inner = self.0.load(); + + let mut should_log = inner.show_rust_3rdparty_logs; + if !should_log { + if let Some(module_path) = record.module_path() { + for pants_package in super::pants_packages::PANTS_PACKAGE_NAMES { + if &module_path.split("::").next().unwrap() == pants_package { + should_log = true; + break; + } + } + } else { + should_log = true; + } + } + if !should_log { + return; + } + + let log_msg = format!("{}", record.args()); + if inner + .literal_filters + .iter() + .any(|filt| log_msg.starts_with(filt)) + { + return; + } + + if inner.regex_filters.iter().any(|re| re.is_match(&log_msg)) { + return; + } + + let destination = stdio::get_destination(); + + // Build the message string. + let log_string = { + let mut log_string = { + let cur_date = chrono::Local::now(); + format!( + "{}.{:02}", + cur_date.format(TIME_FORMAT_STR), + cur_date.time().nanosecond() / 10_000_000 // Two decimal places of precision. + ) + }; + + let use_color = destination.stderr_use_color(); + + let level = record.level(); + let level_marker = match level { + _ if !use_color => format!("[{level}]").normal().clear(), + Level::Info => format!("[{level}]").normal(), + Level::Error => format!("[{level}]").red(), + Level::Warn => format!("[{level}]").yellow(), + Level::Debug => format!("[{level}]").green(), + Level::Trace => format!("[{level}]").magenta(), + }; + write!(log_string, " {level_marker}").unwrap(); + + if inner.show_target { + write!(log_string, " ({})", record.target()).unwrap(); + }; + writeln!(log_string, " {log_msg}").unwrap(); + log_string + }; + let log_bytes = log_string.as_bytes(); + + { + let mut maybe_per_run_file = inner.per_run_logs.lock(); + if let Some(ref mut file) = *maybe_per_run_file { + // deliberately ignore errors writing to per-run log file + let _ = file.write_all(log_bytes); + } + } + + // Attempt to write to stdio, and write to the pantsd log if we fail (either because we don't + // have a valid stdio instance, or because of an error). + if destination.write_stderr_raw(log_bytes).is_err() { + let mut maybe_file = inner.log_file.lock(); + if let Some(ref mut file) = *maybe_file { + match file.write_all(log_bytes) { + Ok(()) => (), + Err(e) => { + // If we've failed to write to stdio, but also to our log file, our only recourse is to + // try to write to a different file. + fatal_log!("Failed to write to log file {:?}: {}", file, e); + } + } + } } - } } - } - fn flush(&self) {} + fn flush(&self) {} } diff --git a/src/rust/engine/nailgun/src/client.rs b/src/rust/engine/nailgun/src/client.rs index 9fc94d7a663..a8b7c551a4e 100644 --- a/src/rust/engine/nailgun/src/client.rs +++ b/src/rust/engine/nailgun/src/client.rs @@ -13,72 +13,72 @@ use tokio::net::TcpStream; use tokio::signal::unix::{signal, Signal, SignalKind}; pub enum NailgunClientError { - PreConnect(String), - PostConnect(String), - BrokenPipe, - KeyboardInterrupt, + PreConnect(String), + PostConnect(String), + BrokenPipe, + KeyboardInterrupt, } fn handle_postconnect_stdio(err: io::Error, msg: &str) -> NailgunClientError { - if err.kind() == io::ErrorKind::BrokenPipe { - // A BrokenPipe error is a semi-expected error caused when stdout/stderr closes, and which - // the Python runtime has a special error type and handling for. - NailgunClientError::BrokenPipe - } else { - NailgunClientError::PostConnect(format!("{msg}: {err}")) - } + if err.kind() == io::ErrorKind::BrokenPipe { + // A BrokenPipe error is a semi-expected error caused when stdout/stderr closes, and which + // the Python runtime has a special error type and handling for. + NailgunClientError::BrokenPipe + } else { + NailgunClientError::PostConnect(format!("{msg}: {err}")) + } } async fn handle_client_output( - mut stdio_read: impl Stream + Unpin, - mut signal_stream: Signal, - child: &mut nails::client::Child, + mut stdio_read: impl Stream + Unpin, + mut signal_stream: Signal, + child: &mut nails::client::Child, ) -> Result<(), NailgunClientError> { - let mut stdout = tokio::io::stdout(); - let mut stderr = tokio::io::stderr(); - let mut is_exiting = false; - loop { - tokio::select! { - output = stdio_read.next() => { - match output { - Some(ChildOutput::Stdout(bytes)) => { - stdout.write_all(&bytes).await.map_err(|err| handle_postconnect_stdio(err, "Failed to write to stdout"))? - }, - Some(ChildOutput::Stderr(bytes)) => { - stderr.write_all(&bytes).await.map_err(|err| handle_postconnect_stdio(err, "Failed to write to stderr"))? - }, - None => break, - } - } - _ = signal_stream.recv() => { - if is_exiting { - // This is the second signal: exit uncleanly to drop the child rather than waiting - // further. - return Err(NailgunClientError::KeyboardInterrupt); - } else { - // This is the first signal: trigger shutdown of the Child, which will request that - // the server interrupt the run. - child.shutdown().await; - is_exiting = true; + let mut stdout = tokio::io::stdout(); + let mut stderr = tokio::io::stderr(); + let mut is_exiting = false; + loop { + tokio::select! { + output = stdio_read.next() => { + match output { + Some(ChildOutput::Stdout(bytes)) => { + stdout.write_all(&bytes).await.map_err(|err| handle_postconnect_stdio(err, "Failed to write to stdout"))? + }, + Some(ChildOutput::Stderr(bytes)) => { + stderr.write_all(&bytes).await.map_err(|err| handle_postconnect_stdio(err, "Failed to write to stderr"))? + }, + None => break, + } } - } + _ = signal_stream.recv() => { + if is_exiting { + // This is the second signal: exit uncleanly to drop the child rather than waiting + // further. + return Err(NailgunClientError::KeyboardInterrupt); + } else { + // This is the first signal: trigger shutdown of the Child, which will request that + // the server interrupt the run. + child.shutdown().await; + is_exiting = true; + } + } + } } - } - try_join!(stdout.flush(), stderr.flush()) - .map_err(|err| handle_postconnect_stdio(err, "Failed to flush stdio"))?; - Ok(()) + try_join!(stdout.flush(), stderr.flush()) + .map_err(|err| handle_postconnect_stdio(err, "Failed to flush stdio"))?; + Ok(()) } async fn handle_client_input(mut stdin_write: mpsc::Sender) -> Result<(), io::Error> { - use nails::execution::send_to_io; - let mut stdin = stream_for(tokio::io::stdin()); - while let Some(input_bytes) = stdin.next().await { - stdin_write - .send(ChildInput::Stdin(input_bytes?)) - .await - .map_err(send_to_io)?; - } - Ok(()) + use nails::execution::send_to_io; + let mut stdin = stream_for(tokio::io::stdin()); + while let Some(input_bytes) = stdin.next().await { + stdin_write + .send(ChildInput::Stdin(input_bytes?)) + .await + .map_err(send_to_io)?; + } + Ok(()) } /// @@ -93,49 +93,49 @@ async fn handle_client_input(mut stdin_write: mpsc::Sender) -> Resul /// dedicated to the task of connecting to a nailgun server. /// pub async fn client_execute( - port: u16, - command: String, - args: Vec, - env: Vec<(String, String)>, + port: u16, + command: String, + args: Vec, + env: Vec<(String, String)>, ) -> Result { - use nails::execution::{child_channel, Command}; + use nails::execution::{child_channel, Command}; - let working_dir = - std::env::current_dir().map_err(|e| NailgunClientError::PreConnect(e.to_string()))?; + let working_dir = + std::env::current_dir().map_err(|e| NailgunClientError::PreConnect(e.to_string()))?; - let config = Config::default(); - let command = Command { - command, - args, - env, - working_dir, - }; + let config = Config::default(); + let command = Command { + command, + args, + env, + working_dir, + }; - let signal_stream = signal(SignalKind::interrupt()).map_err(|err| { - NailgunClientError::PreConnect(format!("Failed to install interrupt handler: {err}")) - })?; - let socket = TcpStream::connect((Ipv4Addr::new(127, 0, 0, 1), port)) - .await - .map_err(|err| { - NailgunClientError::PreConnect(format!("Failed to connect to localhost: {err}")) + let signal_stream = signal(SignalKind::interrupt()).map_err(|err| { + NailgunClientError::PreConnect(format!("Failed to install interrupt handler: {err}")) })?; + let socket = TcpStream::connect((Ipv4Addr::new(127, 0, 0, 1), port)) + .await + .map_err(|err| { + NailgunClientError::PreConnect(format!("Failed to connect to localhost: {err}")) + })?; - let mut child = nails::client::handle_connection(config, socket, command, async { - let (stdin_write, stdin_read) = child_channel::(); - let _input_handler = tokio::spawn(handle_client_input(stdin_write)); - stdin_read - }) - .await - .map_err(|err| NailgunClientError::PreConnect(format!("Failed to start: {err}")))?; + let mut child = nails::client::handle_connection(config, socket, command, async { + let (stdin_write, stdin_read) = child_channel::(); + let _input_handler = tokio::spawn(handle_client_input(stdin_write)); + stdin_read + }) + .await + .map_err(|err| NailgunClientError::PreConnect(format!("Failed to start: {err}")))?; - handle_client_output( - child.output_stream.take().unwrap(), - signal_stream, - &mut child, - ) - .await?; + handle_client_output( + child.output_stream.take().unwrap(), + signal_stream, + &mut child, + ) + .await?; - let exit_code: ExitCode = child.wait().await.map_err(|err| { + let exit_code: ExitCode = child.wait().await.map_err(|err| { let err_str = match err.to_string().as_str() { "Client exited before the server's result could be returned." => { "The pantsd process was killed during the run.\n\nIf this was not intentionally done by you, \ @@ -155,5 +155,5 @@ pub async fn client_execute( NailgunClientError::PostConnect(err_str) })?; - Ok(exit_code.0) + Ok(exit_code.0) } diff --git a/src/rust/engine/nailgun/src/lib.rs b/src/rust/engine/nailgun/src/lib.rs index 1470956b91e..0da081aeebd 100644 --- a/src/rust/engine/nailgun/src/lib.rs +++ b/src/rust/engine/nailgun/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] diff --git a/src/rust/engine/nailgun/src/server.rs b/src/rust/engine/nailgun/src/server.rs index 6460c7d02aa..c417155d63a 100644 --- a/src/rust/engine/nailgun/src/server.rs +++ b/src/rust/engine/nailgun/src/server.rs @@ -24,160 +24,159 @@ use tokio::sync::{mpsc, Notify, RwLock}; use tokio_stream::wrappers::UnboundedReceiverStream; pub struct Server { - exit_sender: oneshot::Sender<()>, - exited_receiver: oneshot::Receiver>, - port: u16, + exit_sender: oneshot::Sender<()>, + exited_receiver: oneshot::Receiver>, + port: u16, } impl Server { - /// - /// Spawn the server on a background Task. - /// - /// The port provided here may be `0` in order to request a random port. A caller can use - /// `Server.port()` to determine what port was actually selected. - /// - pub async fn new( - executor: Executor, - port_requested: u16, - runner: impl RawFdRunner + 'static, - ) -> Result { - let listener = TcpListener::bind((Ipv4Addr::new(127, 0, 0, 1), port_requested)) - .await - .map_err(|e| format!("Could not bind to port {port_requested}: {e:?}"))?; - let port_actual = listener - .local_addr() - .map_err(|e| format!("No local address for listener: {e:?}"))? - .port(); - - // NB: The C client requires noisy_stdin (see the `nails` crate for more info), but neither - // `nails` nor the pants python client do. - let config = nails::Config::default().noisy_stdin(false); - let nail = RawFdNail { - executor: executor.clone(), - runner: Arc::new(runner), - }; - - let (exited_sender, exited_receiver) = oneshot::channel(); - let (exit_sender, exit_receiver) = oneshot::channel(); - - let _join = executor.native_spawn(Self::serve( - executor.clone(), - config, - nail, - exit_receiver, - exited_sender, - listener, - )); - - Ok(Server { - exit_sender, - exited_receiver, - port: port_actual, - }) - } - - /// - /// The main loop of the server. Public for testing. - /// - pub(crate) async fn serve( - executor: Executor, - config: nails::Config, - nail: impl Nail, - should_exit: oneshot::Receiver<()>, - exited: oneshot::Sender>, - listener: TcpListener, - ) { - let exit_result = Self::accept_loop(executor, config, nail, should_exit, listener).await; - info!("Server exiting with {:?}", exit_result); - let _ = exited.send(exit_result); - } - - async fn accept_loop( - executor: Executor, - config: nails::Config, - nail: impl Nail, - mut should_exit: oneshot::Receiver<()>, - listener: TcpListener, - ) -> Result<(), String> { - // While connections are ongoing, they acquire `read`; before shutting down, the server - // acquires `write`. - let ongoing_connections = Arc::new(RwLock::new(())); - - let result = loop { - let tcp_stream = match future::select(listener.accept().boxed(), should_exit).await { - future::Either::Left((Ok((tcp_stream, _addr)), s_e)) => { - // Got a connection. - should_exit = s_e; - tcp_stream - } - future::Either::Left((Err(e), _)) => { - break Err(format!("Server failed to accept connections: {e}")); - } - future::Either::Right((_, _)) => { - break Ok(()); - } - }; - - debug!("Accepted connection: {:?}", tcp_stream); - - // There is a slightly delicate dance here: we wait for a connection to have acquired the - // ongoing connections lock before proceeding to the next iteration of the loop. This - // prevents us from observing an empty lock and exiting before the connection has actually - // acquired it. Unfortunately we cannot acquire the lock in this thread and then send the - // guard to the other thread due to its lifetime bounds. - let connection_started = Arc::new(Notify::new()); - let _join = executor.native_spawn({ - let config = config.clone(); - let nail = nail.clone(); - let connection_started = connection_started.clone(); - let ongoing_connections = ongoing_connections.clone(); - async move { - let ongoing_connection_guard = ongoing_connections.read().await; - connection_started.notify_one(); - let result = nails::server::handle_connection(config, nail, tcp_stream).await; - std::mem::drop(ongoing_connection_guard); - result - } - }); - connection_started.notified().await; - }; - - // Before exiting, acquire write access on the ongoing_connections lock to prove that all - // connections have completed. - debug!("Server waiting for connections to complete..."); - let _ = ongoing_connections.write().await; - debug!("All connections completed."); - result - } - - /// - /// The port that the server is listening on. - /// - pub fn port(&self) -> u16 { - self.port - } - - /// - /// Returns a Future that will shut down the server by: - /// 1. stopping accepting new connections - /// 2. waiting for all ongoing connections to have completed - /// - pub async fn shutdown(self) -> Result<(), String> { - // If we fail to send the exit signal, it's because the task is already shut down. - let _ = self.exit_sender.send(()); - self - .exited_receiver - .await - .map_err(|_| "Server exited uncleanly.".to_owned())? - } + /// + /// Spawn the server on a background Task. + /// + /// The port provided here may be `0` in order to request a random port. A caller can use + /// `Server.port()` to determine what port was actually selected. + /// + pub async fn new( + executor: Executor, + port_requested: u16, + runner: impl RawFdRunner + 'static, + ) -> Result { + let listener = TcpListener::bind((Ipv4Addr::new(127, 0, 0, 1), port_requested)) + .await + .map_err(|e| format!("Could not bind to port {port_requested}: {e:?}"))?; + let port_actual = listener + .local_addr() + .map_err(|e| format!("No local address for listener: {e:?}"))? + .port(); + + // NB: The C client requires noisy_stdin (see the `nails` crate for more info), but neither + // `nails` nor the pants python client do. + let config = nails::Config::default().noisy_stdin(false); + let nail = RawFdNail { + executor: executor.clone(), + runner: Arc::new(runner), + }; + + let (exited_sender, exited_receiver) = oneshot::channel(); + let (exit_sender, exit_receiver) = oneshot::channel(); + + let _join = executor.native_spawn(Self::serve( + executor.clone(), + config, + nail, + exit_receiver, + exited_sender, + listener, + )); + + Ok(Server { + exit_sender, + exited_receiver, + port: port_actual, + }) + } + + /// + /// The main loop of the server. Public for testing. + /// + pub(crate) async fn serve( + executor: Executor, + config: nails::Config, + nail: impl Nail, + should_exit: oneshot::Receiver<()>, + exited: oneshot::Sender>, + listener: TcpListener, + ) { + let exit_result = Self::accept_loop(executor, config, nail, should_exit, listener).await; + info!("Server exiting with {:?}", exit_result); + let _ = exited.send(exit_result); + } + + async fn accept_loop( + executor: Executor, + config: nails::Config, + nail: impl Nail, + mut should_exit: oneshot::Receiver<()>, + listener: TcpListener, + ) -> Result<(), String> { + // While connections are ongoing, they acquire `read`; before shutting down, the server + // acquires `write`. + let ongoing_connections = Arc::new(RwLock::new(())); + + let result = loop { + let tcp_stream = match future::select(listener.accept().boxed(), should_exit).await { + future::Either::Left((Ok((tcp_stream, _addr)), s_e)) => { + // Got a connection. + should_exit = s_e; + tcp_stream + } + future::Either::Left((Err(e), _)) => { + break Err(format!("Server failed to accept connections: {e}")); + } + future::Either::Right((_, _)) => { + break Ok(()); + } + }; + + debug!("Accepted connection: {:?}", tcp_stream); + + // There is a slightly delicate dance here: we wait for a connection to have acquired the + // ongoing connections lock before proceeding to the next iteration of the loop. This + // prevents us from observing an empty lock and exiting before the connection has actually + // acquired it. Unfortunately we cannot acquire the lock in this thread and then send the + // guard to the other thread due to its lifetime bounds. + let connection_started = Arc::new(Notify::new()); + let _join = executor.native_spawn({ + let config = config.clone(); + let nail = nail.clone(); + let connection_started = connection_started.clone(); + let ongoing_connections = ongoing_connections.clone(); + async move { + let ongoing_connection_guard = ongoing_connections.read().await; + connection_started.notify_one(); + let result = nails::server::handle_connection(config, nail, tcp_stream).await; + std::mem::drop(ongoing_connection_guard); + result + } + }); + connection_started.notified().await; + }; + + // Before exiting, acquire write access on the ongoing_connections lock to prove that all + // connections have completed. + debug!("Server waiting for connections to complete..."); + let _ = ongoing_connections.write().await; + debug!("All connections completed."); + result + } + + /// + /// The port that the server is listening on. + /// + pub fn port(&self) -> u16 { + self.port + } + + /// + /// Returns a Future that will shut down the server by: + /// 1. stopping accepting new connections + /// 2. waiting for all ongoing connections to have completed + /// + pub async fn shutdown(self) -> Result<(), String> { + // If we fail to send the exit signal, it's because the task is already shut down. + let _ = self.exit_sender.send(()); + self.exited_receiver + .await + .map_err(|_| "Server exited uncleanly.".to_owned())? + } } pub struct RawFdExecution { - pub cmd: execution::Command, - pub cancelled: AsyncLatch, - pub stdin_fd: RawFd, - pub stdout_fd: RawFd, - pub stderr_fd: RawFd, + pub cmd: execution::Command, + pub cancelled: AsyncLatch, + pub stdin_fd: RawFd, + pub stdout_fd: RawFd, + pub stderr_fd: RawFd, } /// @@ -199,188 +198,190 @@ impl ExitCode + Send + Sync> RawFdRunner for T {} /// #[derive(Clone)] struct RawFdNail { - executor: Executor, - runner: Arc, + executor: Executor, + runner: Arc, } impl Nail for RawFdNail { - fn spawn(&self, cmd: execution::Command) -> Result { - let env = cmd.env.iter().cloned().collect::>(); - - // Handle stdin. - let (stdin_handle, stdin_sink) = Self::input(Self::ttypath_from_env(&env, 0))?; - let maybe_stdin_write = if let Some(mut stdin_sink) = stdin_sink { - let (stdin_write, stdin_read) = child_channel::(); - // Spawn a task that will propagate the input stream. - let _join = self.executor.native_spawn(async move { - let mut input_stream = stdin_read.map(|child_input| match child_input { - ChildInput::Stdin(bytes) => Ok(bytes), - }); - let _ = stdin_sink.send_all(&mut input_stream).await; - }); - Some(stdin_write) - } else { - // Stdin will be handled directly by the TTY. - None - }; - - // And stdout/stderr. - let (stdout_stream, stdout_handle) = Self::output(Self::ttypath_from_env(&env, 1), false)?; - // N.B.: POSIX demands stderr is opened read-write and some programs, like pagers, rely on this. - // See: https://pubs.opengroup.org/onlinepubs/9699919799/functions/stdin.html - let (stderr_stream, stderr_handle) = Self::output(Self::ttypath_from_env(&env, 2), true)?; - - // Set up a cancellation token that is triggered on client shutdown. - let cancelled = AsyncLatch::new(); - let shutdown = { - let cancelled = cancelled.clone(); - async move { - cancelled.trigger(); - } - }; - - // Spawn the underlying function as a blocking task, and capture its exit code to append to the - // output stream. - let nail = self.clone(); - let exit_code = self - .executor - .spawn_blocking( - move || { - // NB: This closure captures the stdio handles, and will drop/close them when it completes. - (nail.runner)(RawFdExecution { - cmd, - cancelled, - stdin_fd: stdin_handle.as_raw_fd(), - stdout_fd: stdout_handle.as_raw_fd(), - stderr_fd: stderr_handle.as_raw_fd(), - }) - }, - |e| { - log::warn!("Server exited uncleanly: {e}"); - ExitCode(1) - }, - ) - .boxed(); - - // Select a single stdout/stderr stream. - let stdout_stream = stdout_stream.map_ok(ChildOutput::Stdout); - let stderr_stream = stderr_stream.map_ok(ChildOutput::Stderr); - let output_stream = stream::select(stdout_stream, stderr_stream).boxed(); - - Ok(nails::server::Child::new( - output_stream, - maybe_stdin_write, - exit_code, - Some(shutdown.boxed()), - )) - } + fn spawn(&self, cmd: execution::Command) -> Result { + let env = cmd.env.iter().cloned().collect::>(); + + // Handle stdin. + let (stdin_handle, stdin_sink) = Self::input(Self::ttypath_from_env(&env, 0))?; + let maybe_stdin_write = if let Some(mut stdin_sink) = stdin_sink { + let (stdin_write, stdin_read) = child_channel::(); + // Spawn a task that will propagate the input stream. + let _join = self.executor.native_spawn(async move { + let mut input_stream = stdin_read.map(|child_input| match child_input { + ChildInput::Stdin(bytes) => Ok(bytes), + }); + let _ = stdin_sink.send_all(&mut input_stream).await; + }); + Some(stdin_write) + } else { + // Stdin will be handled directly by the TTY. + None + }; + + // And stdout/stderr. + let (stdout_stream, stdout_handle) = Self::output(Self::ttypath_from_env(&env, 1), false)?; + // N.B.: POSIX demands stderr is opened read-write and some programs, like pagers, rely on this. + // See: https://pubs.opengroup.org/onlinepubs/9699919799/functions/stdin.html + let (stderr_stream, stderr_handle) = Self::output(Self::ttypath_from_env(&env, 2), true)?; + + // Set up a cancellation token that is triggered on client shutdown. + let cancelled = AsyncLatch::new(); + let shutdown = { + let cancelled = cancelled.clone(); + async move { + cancelled.trigger(); + } + }; + + // Spawn the underlying function as a blocking task, and capture its exit code to append to the + // output stream. + let nail = self.clone(); + let exit_code = self + .executor + .spawn_blocking( + move || { + // NB: This closure captures the stdio handles, and will drop/close them when it completes. + (nail.runner)(RawFdExecution { + cmd, + cancelled, + stdin_fd: stdin_handle.as_raw_fd(), + stdout_fd: stdout_handle.as_raw_fd(), + stderr_fd: stderr_handle.as_raw_fd(), + }) + }, + |e| { + log::warn!("Server exited uncleanly: {e}"); + ExitCode(1) + }, + ) + .boxed(); + + // Select a single stdout/stderr stream. + let stdout_stream = stdout_stream.map_ok(ChildOutput::Stdout); + let stderr_stream = stderr_stream.map_ok(ChildOutput::Stderr); + let output_stream = stream::select(stdout_stream, stderr_stream).boxed(); + + Ok(nails::server::Child::new( + output_stream, + maybe_stdin_write, + exit_code, + Some(shutdown.boxed()), + )) + } } impl RawFdNail { - /// - /// Returns a tuple of a readable file handle and an optional sink for nails to send stdin to. - /// - /// In the case of a TTY, the file handle will point directly to the TTY, and no stdin data will - /// flow over the protocol. Otherwise, it will be backed by a new anonymous pipe, and data should - /// be copied to the returned Sink. - /// - fn input( - tty_path: Option, - ) -> Result<(Box, Option>), io::Error> { - if let Some(tty) = Self::try_open_tty(tty_path, OpenOptions::new().read(true)) { - Ok((Box::new(tty), None)) - } else { - let (pipe_reader, pipe_writer) = os_pipe::pipe()?; - let write_handle = File::from_std(std::fs::File::from(OwnedFd::from(pipe_writer))); - Ok((Box::new(pipe_reader), Some(sink_for(write_handle)))) + /// + /// Returns a tuple of a readable file handle and an optional sink for nails to send stdin to. + /// + /// In the case of a TTY, the file handle will point directly to the TTY, and no stdin data will + /// flow over the protocol. Otherwise, it will be backed by a new anonymous pipe, and data should + /// be copied to the returned Sink. + /// + fn input( + tty_path: Option, + ) -> Result<(Box, Option>), io::Error> { + if let Some(tty) = Self::try_open_tty(tty_path, OpenOptions::new().read(true)) { + Ok((Box::new(tty), None)) + } else { + let (pipe_reader, pipe_writer) = os_pipe::pipe()?; + let write_handle = File::from_std(std::fs::File::from(OwnedFd::from(pipe_writer))); + Ok((Box::new(pipe_reader), Some(sink_for(write_handle)))) + } } - } - - /// - /// Returns a tuple of a possibly empty Stream for nails to read data from, and a writable file handle. - /// - /// See `Self::input` and the struct's rustdoc for more info on the TTY case. - /// - #[allow(clippy::type_complexity)] - fn output( - tty_path: Option, - read_write: bool, - ) -> Result< - ( - stream::BoxStream<'static, Result>, - Box, - ), - io::Error, - > { - if let Some(tty) = Self::try_open_tty( - tty_path, - OpenOptions::new() - .read(read_write) - .write(true) - .create(false), - ) { - Ok((stream::empty().boxed(), Box::new(tty))) - } else { - let (pipe_reader, pipe_writer) = os_pipe::pipe()?; - let read_handle = std::fs::File::from(OwnedFd::from(pipe_reader)); - Ok(( - blocking_stream_for(read_handle)?.boxed(), - Box::new(pipe_writer), - )) + + /// + /// Returns a tuple of a possibly empty Stream for nails to read data from, and a writable file handle. + /// + /// See `Self::input` and the struct's rustdoc for more info on the TTY case. + /// + #[allow(clippy::type_complexity)] + fn output( + tty_path: Option, + read_write: bool, + ) -> Result< + ( + stream::BoxStream<'static, Result>, + Box, + ), + io::Error, + > { + if let Some(tty) = Self::try_open_tty( + tty_path, + OpenOptions::new() + .read(read_write) + .write(true) + .create(false), + ) { + Ok((stream::empty().boxed(), Box::new(tty))) + } else { + let (pipe_reader, pipe_writer) = os_pipe::pipe()?; + let read_handle = std::fs::File::from(OwnedFd::from(pipe_reader)); + Ok(( + blocking_stream_for(read_handle)?.boxed(), + Box::new(pipe_writer), + )) + } + } + + /// + /// Attempt to open the given TTY-path, logging any errors. + /// + fn try_open_tty( + tty_path: Option, + open_options: &OpenOptions, + ) -> Option { + let tty_path = tty_path?; + open_options + .open(&tty_path) + .map_err(|e| { + log::debug!( + "Failed to open TTY at {}: {:?}, falling back to socket access.", + tty_path.display(), + e + ); + }) + .ok() + } + + /// + /// Corresponds to `ttynames_to_env` in `nailgun_protocol.py`. See this struct's rustdocs. + /// + fn ttypath_from_env(env: &HashMap, fd_number: usize) -> Option { + env.get(&format!("NAILGUN_TTY_PATH_{fd_number}")) + .map(PathBuf::from) } - } - - /// - /// Attempt to open the given TTY-path, logging any errors. - /// - fn try_open_tty(tty_path: Option, open_options: &OpenOptions) -> Option { - let tty_path = tty_path?; - open_options - .open(&tty_path) - .map_err(|e| { - log::debug!( - "Failed to open TTY at {}: {:?}, falling back to socket access.", - tty_path.display(), - e - ); - }) - .ok() - } - - /// - /// Corresponds to `ttynames_to_env` in `nailgun_protocol.py`. See this struct's rustdocs. - /// - fn ttypath_from_env(env: &HashMap, fd_number: usize) -> Option { - env - .get(&format!("NAILGUN_TTY_PATH_{fd_number}")) - .map(PathBuf::from) - } } // TODO: See https://github.com/pantsbuild/pants/issues/16969. pub fn blocking_stream_for( - mut r: R, + mut r: R, ) -> io::Result>> { - let (sender, receiver) = mpsc::unbounded_channel(); - std::thread::Builder::new() - .name("stdio-reader".to_owned()) - .spawn(move || { - let mut buf = [0; 4096]; - loop { - match r.read(&mut buf) { - Ok(0) => break, - Ok(n) => { - if sender.send(Ok(Bytes::copy_from_slice(&buf[..n]))).is_err() { - break; + let (sender, receiver) = mpsc::unbounded_channel(); + std::thread::Builder::new() + .name("stdio-reader".to_owned()) + .spawn(move || { + let mut buf = [0; 4096]; + loop { + match r.read(&mut buf) { + Ok(0) => break, + Ok(n) => { + if sender.send(Ok(Bytes::copy_from_slice(&buf[..n]))).is_err() { + break; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => { + let _ = sender.send(Err(e)); + break; + } + } } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => { - let _ = sender.send(Err(e)); - break; - } - } - } - })?; - Ok(UnboundedReceiverStream::new(receiver)) + })?; + Ok(UnboundedReceiverStream::new(receiver)) } diff --git a/src/rust/engine/nailgun/src/tests.rs b/src/rust/engine/nailgun/src/tests.rs index c4e6bfce9bb..91182377805 100644 --- a/src/rust/engine/nailgun/src/tests.rs +++ b/src/rust/engine/nailgun/src/tests.rs @@ -16,82 +16,82 @@ use tokio::time::sleep; #[tokio::test] async fn spawn_and_bind() { - let server = Server::new(Executor::new(), 0, |_| ExitCode(0)) - .await - .unwrap(); - // Should have bound a random port. - assert!(0 != server.port()); - server.shutdown().await.unwrap(); + let server = Server::new(Executor::new(), 0, |_| ExitCode(0)) + .await + .unwrap(); + // Should have bound a random port. + assert!(0 != server.port()); + server.shutdown().await.unwrap(); } #[tokio::test] async fn accept() { - let exit_code = ExitCode(42); - let server = Server::new(Executor::new(), 0, move |_| exit_code) - .await - .unwrap(); + let exit_code = ExitCode(42); + let server = Server::new(Executor::new(), 0, move |_| exit_code) + .await + .unwrap(); - // And connect with a client. This Nail will ignore the content of the command, so we're - // only validating the exit code. - let actual_exit_code = run_client(server.port()).await.unwrap(); - assert_eq!(exit_code, actual_exit_code); - server.shutdown().await.unwrap(); + // And connect with a client. This Nail will ignore the content of the command, so we're + // only validating the exit code. + let actual_exit_code = run_client(server.port()).await.unwrap(); + assert_eq!(exit_code, actual_exit_code); + server.shutdown().await.unwrap(); } #[tokio::test] async fn shutdown_awaits_ongoing() { - // A server that waits for a signal to complete a connection. - let connection_accepted = Arc::new(Notify::new()); - let should_complete_connection = Arc::new(Notify::new()); - let exit_code = ExitCode(42); - let server = Server::new(Executor::new(), 0, { - let connection_accepted = connection_accepted.clone(); - let should_complete_connection = should_complete_connection.clone(); - move |_| { - connection_accepted.notify_one(); - tokio::runtime::Handle::current().block_on(should_complete_connection.notified()); - exit_code - } - }) - .await - .unwrap(); + // A server that waits for a signal to complete a connection. + let connection_accepted = Arc::new(Notify::new()); + let should_complete_connection = Arc::new(Notify::new()); + let exit_code = ExitCode(42); + let server = Server::new(Executor::new(), 0, { + let connection_accepted = connection_accepted.clone(); + let should_complete_connection = should_complete_connection.clone(); + move |_| { + connection_accepted.notify_one(); + tokio::runtime::Handle::current().block_on(should_complete_connection.notified()); + exit_code + } + }) + .await + .unwrap(); - // Spawn a connection in the background, and once it has been established, kick off shutdown of - // the server. - let mut client_completed = tokio::spawn(run_client(server.port())); - connection_accepted.notified().await; - let mut server_shutdown = tokio::spawn(server.shutdown()); + // Spawn a connection in the background, and once it has been established, kick off shutdown of + // the server. + let mut client_completed = tokio::spawn(run_client(server.port())); + connection_accepted.notified().await; + let mut server_shutdown = tokio::spawn(server.shutdown()); - // Confirm that the client doesn't return, and that the server doesn't shutdown. - match future::select(client_completed, sleep(Duration::from_millis(500)).boxed()).await { - future::Either::Right((_, c_c)) => client_completed = c_c, - _ => panic!("Client should not have completed"), - } - match future::select(server_shutdown, sleep(Duration::from_millis(500)).boxed()).await { - future::Either::Right((_, s_s)) => server_shutdown = s_s, - _ => panic!("Server should not have shut down"), - } + // Confirm that the client doesn't return, and that the server doesn't shutdown. + match future::select(client_completed, sleep(Duration::from_millis(500)).boxed()).await { + future::Either::Right((_, c_c)) => client_completed = c_c, + _ => panic!("Client should not have completed"), + } + match future::select(server_shutdown, sleep(Duration::from_millis(500)).boxed()).await { + future::Either::Right((_, s_s)) => server_shutdown = s_s, + _ => panic!("Server should not have shut down"), + } - // Then signal completion of the connection, and confirm that both the client and server exit - // cleanly. - should_complete_connection.notify_one(); - assert_eq!(exit_code, client_completed.await.unwrap().unwrap()); - server_shutdown.await.unwrap().unwrap(); + // Then signal completion of the connection, and confirm that both the client and server exit + // cleanly. + should_complete_connection.notify_one(); + assert_eq!(exit_code, client_completed.await.unwrap().unwrap()); + server_shutdown.await.unwrap().unwrap(); } async fn run_client(port: u16) -> Result { - let cmd = Command { - command: "nothing".to_owned(), - args: vec![], - env: vec![], - working_dir: PathBuf::from("/dev/null"), - }; - let stream = TcpStream::connect(("127.0.0.1", port)).await.unwrap(); - let child = nails::client::handle_connection(Config::default(), stream, cmd, async { - let (_stdin_write, stdin_read) = child_channel::(); - stdin_read - }) - .await - .map_err(|e| e.to_string())?; - child.wait().await.map_err(|e| e.to_string()) + let cmd = Command { + command: "nothing".to_owned(), + args: vec![], + env: vec![], + working_dir: PathBuf::from("/dev/null"), + }; + let stream = TcpStream::connect(("127.0.0.1", port)).await.unwrap(); + let child = nails::client::handle_connection(Config::default(), stream, cmd, async { + let (_stdin_write, stdin_read) = child_channel::(); + stdin_read + }) + .await + .map_err(|e| e.to_string())?; + child.wait().await.map_err(|e| e.to_string()) } diff --git a/src/rust/engine/options/src/args.rs b/src/rust/engine/options/src/args.rs index 83c71b64a4e..93e2d45d3b3 100644 --- a/src/rust/engine/options/src/args.rs +++ b/src/rust/engine/options/src/args.rs @@ -11,114 +11,113 @@ use crate::ListEdit; use std::collections::HashMap; pub struct Args { - pub(crate) args: Vec, + pub(crate) args: Vec, } #[derive(PartialEq)] enum Negate { - True, - False, + True, + False, } impl Args { - pub fn new(args: Vec) -> Self { - Self { args } - } - - pub fn argv() -> Self { - Self::new(env::args().collect::>()) - } + pub fn new(args: Vec) -> Self { + Self { args } + } - fn arg_name(id: &OptionId, negate: Negate) -> String { - format!( - "--{}{}{}", - match negate { - Negate::False => "", - Negate::True => "no-", - }, - match &id.0 { - Scope::Global => "".to_string(), - Scope::Scope(scope) => format!("{}-", scope.to_ascii_lowercase()), - }, - id.name("-", NameTransform::ToLower) - ) - } + pub fn argv() -> Self { + Self::new(env::args().collect::>()) + } - fn arg_names(id: &OptionId, negate: Negate) -> HashMap { - let mut arg_names = HashMap::new(); - if let Some(switch) = id.2 { - arg_names.insert(format!("-{switch}"), false); - if negate == Negate::True { - arg_names.insert(format!("--no-{switch}"), true); - } + fn arg_name(id: &OptionId, negate: Negate) -> String { + format!( + "--{}{}{}", + match negate { + Negate::False => "", + Negate::True => "no-", + }, + match &id.0 { + Scope::Global => "".to_string(), + Scope::Scope(scope) => format!("{}-", scope.to_ascii_lowercase()), + }, + id.name("-", NameTransform::ToLower) + ) } - arg_names.insert(Self::arg_name(id, Negate::False), false); - if negate == Negate::True { - arg_names.insert(Self::arg_name(id, Negate::True), true); + + fn arg_names(id: &OptionId, negate: Negate) -> HashMap { + let mut arg_names = HashMap::new(); + if let Some(switch) = id.2 { + arg_names.insert(format!("-{switch}"), false); + if negate == Negate::True { + arg_names.insert(format!("--no-{switch}"), true); + } + } + arg_names.insert(Self::arg_name(id, Negate::False), false); + if negate == Negate::True { + arg_names.insert(Self::arg_name(id, Negate::True), true); + } + arg_names } - arg_names - } - fn find_flag( - &self, - flag_names: HashMap, - ) -> Result, String> { - for arg in self.args.iter().rev() { - let mut components = arg.as_str().splitn(2, '='); - if let Some(name) = components.next() { - if let Some(negated) = flag_names.get(name) { - return Ok(Some(( - name.to_owned(), - components.next().unwrap_or("").to_owned(), - *negated, - ))); + fn find_flag( + &self, + flag_names: HashMap, + ) -> Result, String> { + for arg in self.args.iter().rev() { + let mut components = arg.as_str().splitn(2, '='); + if let Some(name) = components.next() { + if let Some(negated) = flag_names.get(name) { + return Ok(Some(( + name.to_owned(), + components.next().unwrap_or("").to_owned(), + *negated, + ))); + } + } } - } + Ok(None) } - Ok(None) - } } impl OptionsSource for Args { - fn display(&self, id: &OptionId) -> String { - Self::arg_name(id, Negate::False) - } - - fn get_string(&self, id: &OptionId) -> Result, String> { - self - .find_flag(Self::arg_names(id, Negate::False)) - .map(|value| value.map(|(_, v, _)| v)) - } + fn display(&self, id: &OptionId) -> String { + Self::arg_name(id, Negate::False) + } - fn get_bool(&self, id: &OptionId) -> Result, String> { - let arg_names = Self::arg_names(id, Negate::True); - match self.find_flag(arg_names)? { - Some((_, s, negated)) if s.as_str() == "" => Ok(Some(!negated)), - Some((name, ref value, negated)) => parse_bool(value) - .map(|b| Some(b ^ negated)) - .map_err(|e| e.render(name)), - None => Ok(None), + fn get_string(&self, id: &OptionId) -> Result, String> { + self.find_flag(Self::arg_names(id, Negate::False)) + .map(|value| value.map(|(_, v, _)| v)) } - } - fn get_string_list(&self, id: &OptionId) -> Result>>, String> { - let arg_names = Self::arg_names(id, Negate::False); - let mut edits = vec![]; - for arg in &self.args { - let mut components = arg.as_str().splitn(2, '='); - if let Some(name) = components.next() { - if arg_names.contains_key(name) { - let value = components - .next() - .ok_or_else(|| format!("Expected string list option {name} to have a value."))?; - edits.extend(parse_string_list(value).map_err(|e| e.render(name))?) + fn get_bool(&self, id: &OptionId) -> Result, String> { + let arg_names = Self::arg_names(id, Negate::True); + match self.find_flag(arg_names)? { + Some((_, s, negated)) if s.as_str() == "" => Ok(Some(!negated)), + Some((name, ref value, negated)) => parse_bool(value) + .map(|b| Some(b ^ negated)) + .map_err(|e| e.render(name)), + None => Ok(None), } - } } - if edits.is_empty() { - Ok(None) - } else { - Ok(Some(edits)) + + fn get_string_list(&self, id: &OptionId) -> Result>>, String> { + let arg_names = Self::arg_names(id, Negate::False); + let mut edits = vec![]; + for arg in &self.args { + let mut components = arg.as_str().splitn(2, '='); + if let Some(name) = components.next() { + if arg_names.contains_key(name) { + let value = components.next().ok_or_else(|| { + format!("Expected string list option {name} to have a value.") + })?; + edits.extend(parse_string_list(value).map_err(|e| e.render(name))?) + } + } + } + if edits.is_empty() { + Ok(None) + } else { + Ok(Some(edits)) + } } - } } diff --git a/src/rust/engine/options/src/args_tests.rs b/src/rust/engine/options/src/args_tests.rs index 96c4a6223b6..fea9b4c78fa 100644 --- a/src/rust/engine/options/src/args_tests.rs +++ b/src/rust/engine/options/src/args_tests.rs @@ -6,145 +6,143 @@ use crate::option_id; use crate::{ListEdit, ListEditAction, OptionId, OptionsSource}; fn args>(args: I) -> Args { - Args { - args: args.into_iter().map(str::to_owned).collect(), - } + Args { + args: args.into_iter().map(str::to_owned).collect(), + } } #[test] fn test_display() { - let args = args([]); - assert_eq!("--global".to_owned(), args.display(&option_id!("global"))); - assert_eq!( - "--scope-name".to_owned(), - args.display(&option_id!("scope", "name")) - ); - assert_eq!( - "--scope-full-name".to_owned(), - args.display(&option_id!(-'f', "scope", "full", "name")) - ); + let args = args([]); + assert_eq!("--global".to_owned(), args.display(&option_id!("global"))); + assert_eq!( + "--scope-name".to_owned(), + args.display(&option_id!("scope", "name")) + ); + assert_eq!( + "--scope-full-name".to_owned(), + args.display(&option_id!(-'f', "scope", "full", "name")) + ); } #[test] fn test_string() { - let args = args([ - "-u=swallow", - "--foo=bar", - "--baz-spam=eggs", - "--baz-spam=cheese", - ]); - - let assert_string = |expected: &str, id: OptionId| { - assert_eq!(expected.to_owned(), args.get_string(&id).unwrap().unwrap()) - }; - - assert_string("bar", option_id!("foo")); - assert_string("cheese", option_id!("baz", "spam")); - assert_string("swallow", option_id!(-'u', "unladen", "capacity")); - - assert!(args.get_string(&option_id!("dne")).unwrap().is_none()); + let args = args([ + "-u=swallow", + "--foo=bar", + "--baz-spam=eggs", + "--baz-spam=cheese", + ]); + + let assert_string = |expected: &str, id: OptionId| { + assert_eq!(expected.to_owned(), args.get_string(&id).unwrap().unwrap()) + }; + + assert_string("bar", option_id!("foo")); + assert_string("cheese", option_id!("baz", "spam")); + assert_string("swallow", option_id!(-'u', "unladen", "capacity")); + + assert!(args.get_string(&option_id!("dne")).unwrap().is_none()); } #[test] fn test_bool() { - let args = args([ - "-c=swallow", - "--foo=false", - "-f", - "--no-bar", - "--baz=true", - "--baz=FALSE", - "--no-spam-eggs=False", - "--no-b=True", - ]); - - let assert_bool = - |expected: bool, id: OptionId| assert_eq!(expected, args.get_bool(&id).unwrap().unwrap()); - - assert_bool(true, option_id!(-'f', "foo")); - assert_bool(false, option_id!("bar")); - assert_bool(false, option_id!(-'b', "baz")); - assert_bool(true, option_id!("spam", "eggs")); - - assert!(args.get_bool(&option_id!("dne")).unwrap().is_none()); - assert_eq!( - "Got 'swallow' for -c. Expected 'true' or 'false'.".to_owned(), - args - .get_bool(&option_id!(-'c', "unladen", "capacity")) - .unwrap_err() - ); + let args = args([ + "-c=swallow", + "--foo=false", + "-f", + "--no-bar", + "--baz=true", + "--baz=FALSE", + "--no-spam-eggs=False", + "--no-b=True", + ]); + + let assert_bool = + |expected: bool, id: OptionId| assert_eq!(expected, args.get_bool(&id).unwrap().unwrap()); + + assert_bool(true, option_id!(-'f', "foo")); + assert_bool(false, option_id!("bar")); + assert_bool(false, option_id!(-'b', "baz")); + assert_bool(true, option_id!("spam", "eggs")); + + assert!(args.get_bool(&option_id!("dne")).unwrap().is_none()); + assert_eq!( + "Got 'swallow' for -c. Expected 'true' or 'false'.".to_owned(), + args.get_bool(&option_id!(-'c', "unladen", "capacity")) + .unwrap_err() + ); } #[test] fn test_float() { - let args = args([ - "-j=4", - "--foo=42", - "--foo=3.14", - "--baz-spam=1.137", - "--bad=swallow", - ]); - - let assert_float = - |expected: f64, id: OptionId| assert_eq!(expected, args.get_float(&id).unwrap().unwrap()); - - assert_float(4_f64, option_id!(-'j', "jobs")); - assert_float(3.14, option_id!("foo")); - assert_float(1.137, option_id!("baz", "spam")); - - assert!(args.get_float(&option_id!("dne")).unwrap().is_none()); - - assert_eq!( - "Problem parsing --bad value swallow as a float value: invalid float literal".to_owned(), - args.get_float(&option_id!("bad")).unwrap_err() - ); + let args = args([ + "-j=4", + "--foo=42", + "--foo=3.14", + "--baz-spam=1.137", + "--bad=swallow", + ]); + + let assert_float = + |expected: f64, id: OptionId| assert_eq!(expected, args.get_float(&id).unwrap().unwrap()); + + assert_float(4_f64, option_id!(-'j', "jobs")); + assert_float(3.14, option_id!("foo")); + assert_float(1.137, option_id!("baz", "spam")); + + assert!(args.get_float(&option_id!("dne")).unwrap().is_none()); + + assert_eq!( + "Problem parsing --bad value swallow as a float value: invalid float literal".to_owned(), + args.get_float(&option_id!("bad")).unwrap_err() + ); } #[test] fn test_string_list() { - let args = args([ - "--bad=['mis', 'matched')", - "--phases=initial", - "-p=['one']", - "--phases=+['two','three'],-['one']", - ]); - - assert_eq!( - vec![ - ListEdit { - action: ListEditAction::Add, - items: vec!["initial".to_owned()] - }, - ListEdit { - action: ListEditAction::Replace, - items: vec!["one".to_owned()] - }, - ListEdit { - action: ListEditAction::Add, - items: vec!["two".to_owned(), "three".to_owned()] - }, - ListEdit { - action: ListEditAction::Remove, - items: vec!["one".to_owned()] - }, - ], - args - .get_string_list(&option_id!(-'p', "phases")) - .unwrap() - .unwrap() - ); - - assert!(args.get_string_list(&option_id!("dne")).unwrap().is_none()); - - let expected_error_msg = "\ + let args = args([ + "--bad=['mis', 'matched')", + "--phases=initial", + "-p=['one']", + "--phases=+['two','three'],-['one']", + ]); + + assert_eq!( + vec![ + ListEdit { + action: ListEditAction::Add, + items: vec!["initial".to_owned()] + }, + ListEdit { + action: ListEditAction::Replace, + items: vec!["one".to_owned()] + }, + ListEdit { + action: ListEditAction::Add, + items: vec!["two".to_owned(), "three".to_owned()] + }, + ListEdit { + action: ListEditAction::Remove, + items: vec!["one".to_owned()] + }, + ], + args.get_string_list(&option_id!(-'p', "phases")) + .unwrap() + .unwrap() + ); + + assert!(args.get_string_list(&option_id!("dne")).unwrap().is_none()); + + let expected_error_msg = "\ Problem parsing --bad string list value: 1:['mis', 'matched') -----------------^ Expected \",\" or the end of a list indicated by ']' at line 1 column 18" - .to_owned(); + .to_owned(); - assert_eq!( - expected_error_msg, - args.get_string_list(&option_id!("bad")).unwrap_err() - ); + assert_eq!( + expected_error_msg, + args.get_string_list(&option_id!("bad")).unwrap_err() + ); } diff --git a/src/rust/engine/options/src/build_root.rs b/src/rust/engine/options/src/build_root.rs index 513228bff6e..9157b8b8a6c 100644 --- a/src/rust/engine/options/src/build_root.rs +++ b/src/rust/engine/options/src/build_root.rs @@ -11,54 +11,54 @@ use log::debug; pub struct BuildRoot(PathBuf); impl BuildRoot { - const SENTINEL_FILES: &'static [&'static str] = &["pants.toml", "BUILDROOT", "BUILD_ROOT"]; + const SENTINEL_FILES: &'static [&'static str] = &["pants.toml", "BUILDROOT", "BUILD_ROOT"]; - pub fn find() -> Result { - let cwd = env::current_dir().map_err(|e| format!("Failed to determine $CWD: {e}"))?; - Self::find_from(&cwd) - } + pub fn find() -> Result { + let cwd = env::current_dir().map_err(|e| format!("Failed to determine $CWD: {e}"))?; + Self::find_from(&cwd) + } - pub(crate) fn find_from(start: &Path) -> Result { - let mut build_root = start; - loop { - for sentinel in Self::SENTINEL_FILES { - let sentinel_path = build_root.join(sentinel); - if !sentinel_path.exists() { - continue; - } - let sentinel_path_metadata = sentinel_path.metadata().map_err(|e| { - format!( - "\ + pub(crate) fn find_from(start: &Path) -> Result { + let mut build_root = start; + loop { + for sentinel in Self::SENTINEL_FILES { + let sentinel_path = build_root.join(sentinel); + if !sentinel_path.exists() { + continue; + } + let sentinel_path_metadata = sentinel_path.metadata().map_err(|e| { + format!( + "\ Failed to read metadata for {path} to determine if is a build root sentinel file: {err}\ ", - path = sentinel_path.display(), - err = e - ) - })?; - if sentinel_path_metadata.is_file() { - let root = BuildRoot(build_root.to_path_buf()); - debug!("Found {:?} starting search from {}.", root, start.display()); - return Ok(root); - } - } + path = sentinel_path.display(), + err = e + ) + })?; + if sentinel_path_metadata.is_file() { + let root = BuildRoot(build_root.to_path_buf()); + debug!("Found {:?} starting search from {}.", root, start.display()); + return Ok(root); + } + } - build_root = build_root.parent().ok_or(format!( - "\ + build_root = build_root.parent().ok_or(format!( + "\ No build root detected for the current directory of {cwd}. Pants detects the build root \ by looking for at least one file from {sentinel_files} in the cwd and its ancestors. If \ you have none of these files, you can create an empty file in your build root.\ ", - cwd = start.display(), - sentinel_files = Self::SENTINEL_FILES.join(", ") - ))?; + cwd = start.display(), + sentinel_files = Self::SENTINEL_FILES.join(", ") + ))?; + } } - } } impl Deref for BuildRoot { - type Target = PathBuf; + type Target = PathBuf; - fn deref(&self) -> &PathBuf { - &self.0 - } + fn deref(&self) -> &PathBuf { + &self.0 + } } diff --git a/src/rust/engine/options/src/build_root_tests.rs b/src/rust/engine/options/src/build_root_tests.rs index 75d9a05ca7f..c8c61d774b2 100644 --- a/src/rust/engine/options/src/build_root_tests.rs +++ b/src/rust/engine/options/src/build_root_tests.rs @@ -11,47 +11,47 @@ use std::ops::Deref; #[test] fn test_find_cwd() { - let buildroot = TempDir::new().unwrap(); - let buildroot_path = buildroot.path().to_path_buf(); - let mut sentinel: Option = None; - - let mut assert_sentinel = |name| { - if let Some(prior_sentinel) = sentinel.take() { - fs::remove_file(prior_sentinel).unwrap(); - } - assert!(BuildRoot::find_from(&buildroot_path).is_err()); - - let file = buildroot.path().join(name); - fs::write(&file, []).unwrap(); - sentinel = Some(file); - assert_eq!( - &buildroot_path, - BuildRoot::find_from(&buildroot_path).unwrap().deref() - ); - }; - - assert_sentinel("BUILDROOT"); - assert_sentinel("BUILD_ROOT"); - assert_sentinel("pants.toml"); + let buildroot = TempDir::new().unwrap(); + let buildroot_path = buildroot.path().to_path_buf(); + let mut sentinel: Option = None; + + let mut assert_sentinel = |name| { + if let Some(prior_sentinel) = sentinel.take() { + fs::remove_file(prior_sentinel).unwrap(); + } + assert!(BuildRoot::find_from(&buildroot_path).is_err()); + + let file = buildroot.path().join(name); + fs::write(&file, []).unwrap(); + sentinel = Some(file); + assert_eq!( + &buildroot_path, + BuildRoot::find_from(&buildroot_path).unwrap().deref() + ); + }; + + assert_sentinel("BUILDROOT"); + assert_sentinel("BUILD_ROOT"); + assert_sentinel("pants.toml"); } #[test] fn test_find_subdir() { - let buildroot = TempDir::new().unwrap(); - let buildroot_path = buildroot.path().to_path_buf(); - let subdir = buildroot_path.join("foo").join("bar"); - - assert!(BuildRoot::find_from(&buildroot_path).is_err()); - assert!(BuildRoot::find_from(&subdir).is_err()); - - let sentinel = &buildroot.path().join("pants.toml"); - fs::write(sentinel, []).unwrap(); - assert_eq!( - &buildroot_path, - BuildRoot::find_from(&buildroot_path).unwrap().deref() - ); - assert_eq!( - &buildroot_path, - BuildRoot::find_from(&subdir).unwrap().deref() - ); + let buildroot = TempDir::new().unwrap(); + let buildroot_path = buildroot.path().to_path_buf(); + let subdir = buildroot_path.join("foo").join("bar"); + + assert!(BuildRoot::find_from(&buildroot_path).is_err()); + assert!(BuildRoot::find_from(&subdir).is_err()); + + let sentinel = &buildroot.path().join("pants.toml"); + fs::write(sentinel, []).unwrap(); + assert_eq!( + &buildroot_path, + BuildRoot::find_from(&buildroot_path).unwrap().deref() + ); + assert_eq!( + &buildroot_path, + BuildRoot::find_from(&subdir).unwrap().deref() + ); } diff --git a/src/rust/engine/options/src/config.rs b/src/rust/engine/options/src/config.rs index 9be903bf699..43c674bd26a 100644 --- a/src/rust/engine/options/src/config.rs +++ b/src/rust/engine/options/src/config.rs @@ -15,214 +15,219 @@ use super::{ListEdit, ListEditAction, OptionsSource}; #[derive(Clone)] pub(crate) struct Config { - config: Value, + config: Value, } impl Config { - pub(crate) fn default() -> Config { - Config { - config: Value::Table(Table::new()), + pub(crate) fn default() -> Config { + Config { + config: Value::Table(Table::new()), + } } - } - pub(crate) fn parse>(file: P) -> Result { - let config_contents = fs::read_to_string(&file).map_err(|e| { - format!( - "Failed to read config file {}: {}", - file.as_ref().display(), - e - ) - })?; - let config = config_contents.parse::().map_err(|e| { - format!( - "Failed to parse config file {}: {}", - file.as_ref().display(), - e - ) - })?; - if !config.is_table() { - return Err(format!( - "Expected the config file {} to contain a table but contained a {}: {}", - file.as_ref().display(), - config.type_str(), - config - )); - } - if let Some((key, section)) = config - .as_table() - .unwrap() - .iter() - .find(|(_, section)| !section.is_table()) - { - return Err(format!( + pub(crate) fn parse>(file: P) -> Result { + let config_contents = fs::read_to_string(&file).map_err(|e| { + format!( + "Failed to read config file {}: {}", + file.as_ref().display(), + e + ) + })?; + let config = config_contents.parse::().map_err(|e| { + format!( + "Failed to parse config file {}: {}", + file.as_ref().display(), + e + ) + })?; + if !config.is_table() { + return Err(format!( + "Expected the config file {} to contain a table but contained a {}: {}", + file.as_ref().display(), + config.type_str(), + config + )); + } + if let Some((key, section)) = config + .as_table() + .unwrap() + .iter() + .find(|(_, section)| !section.is_table()) + { + return Err(format!( "Expected the config file {} to contain tables per section, but section {} contained a {}: {}", file.as_ref().display(), key, section.type_str(), section )); + } + + Ok(Config { config }) } - Ok(Config { config }) - } - - pub(crate) fn merged>(files: &[P]) -> Result { - files - .iter() - .map(Config::parse) - .try_fold(Config::default(), |config, parse_result| { - parse_result.map(|parsed| config.merge(parsed)) - }) - } - - fn option_name(id: &OptionId) -> String { - id.name("_", NameTransform::None) - } - - fn extract_string_list(option_name: &str, value: &Value) -> Result, String> { - if let Some(array) = value.as_array() { - let mut items = vec![]; - for item in array { - if let Some(value) = item.as_str() { - items.push(value.to_owned()) - } else { - return Err(format!( + pub(crate) fn merged>(files: &[P]) -> Result { + files + .iter() + .map(Config::parse) + .try_fold(Config::default(), |config, parse_result| { + parse_result.map(|parsed| config.merge(parsed)) + }) + } + + fn option_name(id: &OptionId) -> String { + id.name("_", NameTransform::None) + } + + fn extract_string_list(option_name: &str, value: &Value) -> Result, String> { + if let Some(array) = value.as_array() { + let mut items = vec![]; + for item in array { + if let Some(value) = item.as_str() { + items.push(value.to_owned()) + } else { + return Err(format!( "Expected {option_name} to be an array of strings but given {value} containing non string item {item}" )); + } + } + Ok(items) + } else { + Err(format!( + "Expected {option_name} to be a toml array or Python sequence, but given {value}." + )) } - } - Ok(items) - } else { - Err(format!( - "Expected {option_name} to be a toml array or Python sequence, but given {value}." - )) } - } - - fn get_value(&self, id: &OptionId) -> Option<&Value> { - self - .config - .get(id.scope()) - .and_then(|table| table.get(Self::option_name(id))) - } - - pub(crate) fn merge(mut self, mut other: Config) -> Config { - let mut map = mem::take(self.config.as_table_mut().unwrap()); - let mut other = mem::take(other.config.as_table_mut().unwrap()); - // Merge overlapping sections. - for (scope, table) in &mut map { - if let Some(mut other_table) = other.remove(scope) { - table - .as_table_mut() - .unwrap() - .extend(mem::take(other_table.as_table_mut().unwrap())); - } + + fn get_value(&self, id: &OptionId) -> Option<&Value> { + self.config + .get(id.scope()) + .and_then(|table| table.get(Self::option_name(id))) } - // And then extend non-overlapping sections. - map.extend(other); - Config { - config: Value::Table(map), + + pub(crate) fn merge(mut self, mut other: Config) -> Config { + let mut map = mem::take(self.config.as_table_mut().unwrap()); + let mut other = mem::take(other.config.as_table_mut().unwrap()); + // Merge overlapping sections. + for (scope, table) in &mut map { + if let Some(mut other_table) = other.remove(scope) { + table + .as_table_mut() + .unwrap() + .extend(mem::take(other_table.as_table_mut().unwrap())); + } + } + // And then extend non-overlapping sections. + map.extend(other); + Config { + config: Value::Table(map), + } } - } } impl OptionsSource for Config { - fn display(&self, id: &OptionId) -> String { - format!("{id}") - } - - fn get_string(&self, id: &OptionId) -> Result, String> { - if let Some(value) = self.get_value(id) { - if let Some(string) = value.as_str() { - Ok(Some(string.to_owned())) - } else { - Err(format!("Expected {id} to be a string but given {value}.")) - } - } else { - Ok(None) + fn display(&self, id: &OptionId) -> String { + format!("{id}") + } + + fn get_string(&self, id: &OptionId) -> Result, String> { + if let Some(value) = self.get_value(id) { + if let Some(string) = value.as_str() { + Ok(Some(string.to_owned())) + } else { + Err(format!("Expected {id} to be a string but given {value}.")) + } + } else { + Ok(None) + } } - } - - fn get_bool(&self, id: &OptionId) -> Result, String> { - if let Some(value) = self.get_value(id) { - if let Some(bool) = value.as_bool() { - Ok(Some(bool)) - } else { - Err(format!("Expected {id} to be a bool but given {value}.")) - } - } else { - Ok(None) + + fn get_bool(&self, id: &OptionId) -> Result, String> { + if let Some(value) = self.get_value(id) { + if let Some(bool) = value.as_bool() { + Ok(Some(bool)) + } else { + Err(format!("Expected {id} to be a bool but given {value}.")) + } + } else { + Ok(None) + } } - } - - fn get_int(&self, id: &OptionId) -> Result, String> { - if let Some(value) = self.get_value(id) { - if let Some(int) = value.as_integer() { - Ok(Some(int)) - } else { - Err(format!("Expected {id} to be an int but given {value}.")) - } - } else { - Ok(None) + + fn get_int(&self, id: &OptionId) -> Result, String> { + if let Some(value) = self.get_value(id) { + if let Some(int) = value.as_integer() { + Ok(Some(int)) + } else { + Err(format!("Expected {id} to be an int but given {value}.")) + } + } else { + Ok(None) + } } - } - - fn get_float(&self, id: &OptionId) -> Result, String> { - if let Some(value) = self.get_value(id) { - if let Some(float) = value.as_float() { - Ok(Some(float)) - } else { - Err(format!("Expected {id} to be a float but given {value}.")) - } - } else { - Ok(None) + + fn get_float(&self, id: &OptionId) -> Result, String> { + if let Some(value) = self.get_value(id) { + if let Some(float) = value.as_float() { + Ok(Some(float)) + } else { + Err(format!("Expected {id} to be a float but given {value}.")) + } + } else { + Ok(None) + } } - } - - fn get_string_list(&self, id: &OptionId) -> Result>>, String> { - if let Some(table) = self.config.get(id.scope()) { - let option_name = Self::option_name(id); - let mut list_edits = vec![]; - if let Some(value) = table.get(&option_name) { - match value { - Value::Table(sub_table) => { - if sub_table.is_empty() - || !sub_table.keys().collect::>().is_subset( - &["add".to_owned(), "remove".to_owned()] - .iter() - .collect::>(), - ) - { - return Err(format!( + + fn get_string_list(&self, id: &OptionId) -> Result>>, String> { + if let Some(table) = self.config.get(id.scope()) { + let option_name = Self::option_name(id); + let mut list_edits = vec![]; + if let Some(value) = table.get(&option_name) { + match value { + Value::Table(sub_table) => { + if sub_table.is_empty() + || !sub_table.keys().collect::>().is_subset( + &["add".to_owned(), "remove".to_owned()] + .iter() + .collect::>(), + ) + { + return Err(format!( "Expected {option_name} to contain an 'add' element, a 'remove' element or both but found: {sub_table:?}" )); + } + if let Some(add) = sub_table.get("add") { + list_edits.push(ListEdit { + action: ListEditAction::Add, + items: Self::extract_string_list( + &format!("{option_name}.add"), + add, + )?, + }) + } + if let Some(remove) = sub_table.get("remove") { + list_edits.push(ListEdit { + action: ListEditAction::Remove, + items: Self::extract_string_list( + &format!("{option_name}.remove"), + remove, + )?, + }) + } + } + Value::String(v) => { + list_edits.extend(parse_string_list(v).map_err(|e| e.render(option_name))?); + } + value => list_edits.push(ListEdit { + action: ListEditAction::Replace, + items: Self::extract_string_list(&option_name, value)?, + }), + } } - if let Some(add) = sub_table.get("add") { - list_edits.push(ListEdit { - action: ListEditAction::Add, - items: Self::extract_string_list(&format!("{option_name}.add"), add)?, - }) - } - if let Some(remove) = sub_table.get("remove") { - list_edits.push(ListEdit { - action: ListEditAction::Remove, - items: Self::extract_string_list(&format!("{option_name}.remove"), remove)?, - }) + if !list_edits.is_empty() { + return Ok(Some(list_edits)); } - } - Value::String(v) => { - list_edits.extend(parse_string_list(v).map_err(|e| e.render(option_name))?); - } - value => list_edits.push(ListEdit { - action: ListEditAction::Replace, - items: Self::extract_string_list(&option_name, value)?, - }), } - } - if !list_edits.is_empty() { - return Ok(Some(list_edits)); - } + Ok(None) } - Ok(None) - } } diff --git a/src/rust/engine/options/src/config_tests.rs b/src/rust/engine/options/src/config_tests.rs index d006662d294..a5652bcdf1b 100644 --- a/src/rust/engine/options/src/config_tests.rs +++ b/src/rust/engine/options/src/config_tests.rs @@ -10,56 +10,56 @@ use crate::{option_id, OptionId, OptionsSource}; use tempfile::TempDir; fn config>(file_contents: I) -> Config { - let dir = TempDir::new().unwrap(); - let files = file_contents - .into_iter() - .enumerate() - .map(|(idx, file_content)| { - let path = dir.path().join(format!("{idx}.toml")); - File::create(&path) - .unwrap() - .write_all(file_content.as_bytes()) - .unwrap(); - path - }) - .collect::>(); - Config::merged(&files).unwrap() + let dir = TempDir::new().unwrap(); + let files = file_contents + .into_iter() + .enumerate() + .map(|(idx, file_content)| { + let path = dir.path().join(format!("{idx}.toml")); + File::create(&path) + .unwrap() + .write_all(file_content.as_bytes()) + .unwrap(); + path + }) + .collect::>(); + Config::merged(&files).unwrap() } #[test] fn test_display() { - let config = config([]); - assert_eq!( - "[GLOBAL] name".to_owned(), - config.display(&option_id!("name")) - ); - assert_eq!( - "[scope] name".to_owned(), - config.display(&option_id!(["scope"], "name")) - ); - assert_eq!( - "[scope] full_name".to_owned(), - config.display(&option_id!(-'f', ["scope"], "full", "name")) - ); + let config = config([]); + assert_eq!( + "[GLOBAL] name".to_owned(), + config.display(&option_id!("name")) + ); + assert_eq!( + "[scope] name".to_owned(), + config.display(&option_id!(["scope"], "name")) + ); + assert_eq!( + "[scope] full_name".to_owned(), + config.display(&option_id!(-'f', ["scope"], "full", "name")) + ); } #[test] fn test_section_overlap() { - // Two files with the same section should result in merged content for that section. - let config = config([ - "[section]\n\ + // Two files with the same section should result in merged content for that section. + let config = config([ + "[section]\n\ field1 = 'something'\n", - "[section]\n\ + "[section]\n\ field2 = 'something else'\n", - ]); + ]); - let assert_string = |expected: &str, id: OptionId| { - assert_eq!( - expected.to_owned(), - config.get_string(&id).unwrap().unwrap() - ) - }; + let assert_string = |expected: &str, id: OptionId| { + assert_eq!( + expected.to_owned(), + config.get_string(&id).unwrap().unwrap() + ) + }; - assert_string("something", option_id!(["section"], "field1")); - assert_string("something else", option_id!(["section"], "field2")); + assert_string("something", option_id!(["section"], "field1")); + assert_string("something else", option_id!(["section"], "field2")); } diff --git a/src/rust/engine/options/src/env.rs b/src/rust/engine/options/src/env.rs index 5cbce069603..1b34ae5947f 100644 --- a/src/rust/engine/options/src/env.rs +++ b/src/rust/engine/options/src/env.rs @@ -12,104 +12,103 @@ use crate::ListEdit; #[derive(Debug)] pub struct Env { - pub(crate) env: HashMap, + pub(crate) env: HashMap, } #[derive(Debug)] pub struct DroppedEnvVars { - pub non_utf8_keys: Vec, - pub keys_with_non_utf8_values: Vec, + pub non_utf8_keys: Vec, + pub keys_with_non_utf8_values: Vec, } impl Env { - pub fn new(env: HashMap) -> Self { - Self { env } - } + pub fn new(env: HashMap) -> Self { + Self { env } + } - pub fn capture_lossy() -> (Self, DroppedEnvVars) { - Self::do_capture_lossy(env::vars_os()) - } + pub fn capture_lossy() -> (Self, DroppedEnvVars) { + Self::do_capture_lossy(env::vars_os()) + } - pub(crate) fn do_capture_lossy(env_os: I) -> (Self, DroppedEnvVars) - where - I: Iterator, - { - let mut env: HashMap = HashMap::with_capacity(env_os.size_hint().0); - let mut dropped = DroppedEnvVars { - non_utf8_keys: Vec::new(), - keys_with_non_utf8_values: Vec::new(), - }; - for (os_key, os_val) in env_os { - match (os_key.into_string(), os_val.into_string()) { - (Ok(key), Ok(val)) => { - env.insert(key, val); + pub(crate) fn do_capture_lossy(env_os: I) -> (Self, DroppedEnvVars) + where + I: Iterator, + { + let mut env: HashMap = HashMap::with_capacity(env_os.size_hint().0); + let mut dropped = DroppedEnvVars { + non_utf8_keys: Vec::new(), + keys_with_non_utf8_values: Vec::new(), + }; + for (os_key, os_val) in env_os { + match (os_key.into_string(), os_val.into_string()) { + (Ok(key), Ok(val)) => { + env.insert(key, val); + } + (Ok(key), Err(_)) => dropped.keys_with_non_utf8_values.push(key), + (Err(os_key), _) => dropped.non_utf8_keys.push(os_key), + } } - (Ok(key), Err(_)) => dropped.keys_with_non_utf8_values.push(key), - (Err(os_key), _) => dropped.non_utf8_keys.push(os_key), - } + (Self::new(env), dropped) } - (Self::new(env), dropped) - } - fn env_var_names(id: &OptionId) -> Vec { - let name = id.name("_", NameTransform::ToUpper); - let mut names = vec![format!( - "PANTS_{}_{}", - id.0.name().replace('-', "_").to_ascii_uppercase(), - name - )]; - if id.0 == Scope::Global { - names.push(format!("PANTS_{name}")); - } - if name.starts_with("PANTS_") { - names.push(name); + fn env_var_names(id: &OptionId) -> Vec { + let name = id.name("_", NameTransform::ToUpper); + let mut names = vec![format!( + "PANTS_{}_{}", + id.0.name().replace('-', "_").to_ascii_uppercase(), + name + )]; + if id.0 == Scope::Global { + names.push(format!("PANTS_{name}")); + } + if name.starts_with("PANTS_") { + names.push(name); + } + names } - names - } } impl From<&Env> for Vec<(String, String)> { - fn from(env: &Env) -> Self { - env - .env - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect::>() - } + fn from(env: &Env) -> Self { + env.env + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect::>() + } } impl OptionsSource for Env { - fn display(&self, id: &OptionId) -> String { - Self::env_var_names(id).pop().unwrap() - } + fn display(&self, id: &OptionId) -> String { + Self::env_var_names(id).pop().unwrap() + } - fn get_string(&self, id: &OptionId) -> Result, String> { - let env_var_names = Self::env_var_names(id); - for env_var_name in &env_var_names { - if let Some(value) = self.env.get(env_var_name) { - return Ok(Some(value.to_owned())); - } + fn get_string(&self, id: &OptionId) -> Result, String> { + let env_var_names = Self::env_var_names(id); + for env_var_name in &env_var_names { + if let Some(value) = self.env.get(env_var_name) { + return Ok(Some(value.to_owned())); + } + } + Ok(None) } - Ok(None) - } - fn get_bool(&self, id: &OptionId) -> Result, String> { - if let Some(value) = self.get_string(id)? { - parse_bool(&value) - .map(Some) - .map_err(|e| e.render(self.display(id))) - } else { - Ok(None) + fn get_bool(&self, id: &OptionId) -> Result, String> { + if let Some(value) = self.get_string(id)? { + parse_bool(&value) + .map(Some) + .map_err(|e| e.render(self.display(id))) + } else { + Ok(None) + } } - } - fn get_string_list(&self, id: &OptionId) -> Result>>, String> { - if let Some(value) = self.get_string(id)? { - parse_string_list(&value) - .map(Some) - .map_err(|e| e.render(self.display(id))) - } else { - Ok(None) + fn get_string_list(&self, id: &OptionId) -> Result>>, String> { + if let Some(value) = self.get_string(id)? { + parse_string_list(&value) + .map(Some) + .map_err(|e| e.render(self.display(id))) + } else { + Ok(None) + } } - } } diff --git a/src/rust/engine/options/src/env_tests.rs b/src/rust/engine/options/src/env_tests.rs index 5f52a869961..4356c75f4b0 100644 --- a/src/rust/engine/options/src/env_tests.rs +++ b/src/rust/engine/options/src/env_tests.rs @@ -8,190 +8,191 @@ use std::collections::HashMap; use std::ffi::OsString; fn env>(vars: I) -> Env { - Env { - env: vars - .into_iter() - .map(|(k, v)| (k.to_owned(), v.to_owned())) - .collect::>(), - } + Env { + env: vars + .into_iter() + .map(|(k, v)| (k.to_owned(), v.to_owned())) + .collect::>(), + } } #[test] #[cfg(not(target_os = "windows"))] fn test_capture_lossy() { - // OsString::from_vec(Vec[u8]) requires unix. - use std::os::unix::ffi::OsStringExt; - - let fake_vars: Vec<(OsString, OsString)> = vec![ - ("GOOD_KEY1".into(), "GOOD_VALUE".into()), - ( - OsString::from_vec(b"BAD_\xa5KEY".to_vec()), - "GOOD_VALUE".into(), - ), - ( - "GOOD_KEY2".into(), - OsString::from_vec(b"BAD_\xa5VALUE".to_vec()), - ), - ]; - let (env, dropped) = Env::do_capture_lossy(fake_vars.into_iter()); - let captured_vars: Vec<(String, String)> = (&env).into(); - assert_eq!( - captured_vars, - vec![(String::from("GOOD_KEY1"), String::from("GOOD_VALUE"))] - ); - assert_eq!( - dropped.non_utf8_keys, - vec![OsString::from_vec(b"BAD_\xa5KEY".to_vec())] - ); - assert_eq!( - dropped.keys_with_non_utf8_values, - vec![String::from("GOOD_KEY2")] - ); + // OsString::from_vec(Vec[u8]) requires unix. + use std::os::unix::ffi::OsStringExt; + + let fake_vars: Vec<(OsString, OsString)> = vec![ + ("GOOD_KEY1".into(), "GOOD_VALUE".into()), + ( + OsString::from_vec(b"BAD_\xa5KEY".to_vec()), + "GOOD_VALUE".into(), + ), + ( + "GOOD_KEY2".into(), + OsString::from_vec(b"BAD_\xa5VALUE".to_vec()), + ), + ]; + let (env, dropped) = Env::do_capture_lossy(fake_vars.into_iter()); + let captured_vars: Vec<(String, String)> = (&env).into(); + assert_eq!( + captured_vars, + vec![(String::from("GOOD_KEY1"), String::from("GOOD_VALUE"))] + ); + assert_eq!( + dropped.non_utf8_keys, + vec![OsString::from_vec(b"BAD_\xa5KEY".to_vec())] + ); + assert_eq!( + dropped.keys_with_non_utf8_values, + vec![String::from("GOOD_KEY2")] + ); } #[test] fn test_display() { - let env = env([]); - assert_eq!("PANTS_NAME".to_owned(), env.display(&option_id!("name"))); - assert_eq!( - "PANTS_SCOPE_NAME".to_owned(), - env.display(&option_id!(["scope"], "name")) - ); - assert_eq!( - "PANTS_SCOPE_FULL_NAME".to_owned(), - env.display(&option_id!(-'f', ["scope"], "full", "name")) - ); + let env = env([]); + assert_eq!("PANTS_NAME".to_owned(), env.display(&option_id!("name"))); + assert_eq!( + "PANTS_SCOPE_NAME".to_owned(), + env.display(&option_id!(["scope"], "name")) + ); + assert_eq!( + "PANTS_SCOPE_FULL_NAME".to_owned(), + env.display(&option_id!(-'f', ["scope"], "full", "name")) + ); } #[test] fn test_scope() { - let env = env([("PANTS_PYTHON_EXAMPLE", "true")]); - assert!(env - .get_bool(&option_id!(["python"], "example")) - .unwrap() - .unwrap()); + let env = env([("PANTS_PYTHON_EXAMPLE", "true")]); + assert!(env + .get_bool(&option_id!(["python"], "example")) + .unwrap() + .unwrap()); } #[test] fn test_string() { - let env = env([ - ("PANTS_FOO", "bar"), - ("PANTS_BAZ_SPAM", "cheese"), - ("PANTS_EGGS", "swallow"), - ("PANTS_GLOBAL_BOB", "African"), - ("PANTS_PANTS_JANE", "elderberry"), - ]); - - let assert_string = |expected: &str, id: OptionId| { - assert_eq!(expected.to_owned(), env.get_string(&id).unwrap().unwrap()) - }; - - assert_string("bar", option_id!("foo")); - assert_string("cheese", option_id!("baz", "spam")); - assert_string("swallow", option_id!("pants", "eggs")); - assert_string("African", option_id!("bob")); - assert_string("elderberry", option_id!("pants", "jane")); - - assert!(env.get_string(&option_id!("dne")).unwrap().is_none()); + let env = env([ + ("PANTS_FOO", "bar"), + ("PANTS_BAZ_SPAM", "cheese"), + ("PANTS_EGGS", "swallow"), + ("PANTS_GLOBAL_BOB", "African"), + ("PANTS_PANTS_JANE", "elderberry"), + ]); + + let assert_string = |expected: &str, id: OptionId| { + assert_eq!(expected.to_owned(), env.get_string(&id).unwrap().unwrap()) + }; + + assert_string("bar", option_id!("foo")); + assert_string("cheese", option_id!("baz", "spam")); + assert_string("swallow", option_id!("pants", "eggs")); + assert_string("African", option_id!("bob")); + assert_string("elderberry", option_id!("pants", "jane")); + + assert!(env.get_string(&option_id!("dne")).unwrap().is_none()); } #[test] fn test_bool() { - let env = env([ - ("PANTS_FOO", "true"), - ("PANTS_BAR_BAZ", "False"), - ("PANTS_EGGS", "swallow"), - ]); - - let assert_bool = - |expected: bool, id: OptionId| assert_eq!(expected, env.get_bool(&id).unwrap().unwrap()); - - assert_bool(true, option_id!("foo")); - assert_bool(false, option_id!("bar", "baz")); - - assert!(env.get_bool(&option_id!("dne")).unwrap().is_none()); - assert_eq!( - "Got 'swallow' for PANTS_EGGS. Expected 'true' or 'false'.".to_owned(), - env.get_bool(&option_id!("pants", "eggs")).unwrap_err() - ); + let env = env([ + ("PANTS_FOO", "true"), + ("PANTS_BAR_BAZ", "False"), + ("PANTS_EGGS", "swallow"), + ]); + + let assert_bool = + |expected: bool, id: OptionId| assert_eq!(expected, env.get_bool(&id).unwrap().unwrap()); + + assert_bool(true, option_id!("foo")); + assert_bool(false, option_id!("bar", "baz")); + + assert!(env.get_bool(&option_id!("dne")).unwrap().is_none()); + assert_eq!( + "Got 'swallow' for PANTS_EGGS. Expected 'true' or 'false'.".to_owned(), + env.get_bool(&option_id!("pants", "eggs")).unwrap_err() + ); } #[test] fn test_float() { - let env = env([ - ("PANTS_FOO", "4"), - ("PANTS_BAR_BAZ", "3.14"), - ("PANTS_EGGS", "1.137"), - ("PANTS_BAD", "swallow"), - ]); - - let assert_float = - |expected: f64, id: OptionId| assert_eq!(expected, env.get_float(&id).unwrap().unwrap()); - - assert_float(4_f64, option_id!("foo")); - assert_float(3.14, option_id!("bar", "baz")); - assert_float(1.137, option_id!("pants", "eggs")); - - assert!(env.get_float(&option_id!("dne")).unwrap().is_none()); - - assert_eq!( - "Problem parsing PANTS_BAD value swallow as a float value: invalid float literal".to_owned(), - env.get_float(&option_id!("pants", "bad")).unwrap_err() - ); + let env = env([ + ("PANTS_FOO", "4"), + ("PANTS_BAR_BAZ", "3.14"), + ("PANTS_EGGS", "1.137"), + ("PANTS_BAD", "swallow"), + ]); + + let assert_float = + |expected: f64, id: OptionId| assert_eq!(expected, env.get_float(&id).unwrap().unwrap()); + + assert_float(4_f64, option_id!("foo")); + assert_float(3.14, option_id!("bar", "baz")); + assert_float(1.137, option_id!("pants", "eggs")); + + assert!(env.get_float(&option_id!("dne")).unwrap().is_none()); + + assert_eq!( + "Problem parsing PANTS_BAD value swallow as a float value: invalid float literal" + .to_owned(), + env.get_float(&option_id!("pants", "bad")).unwrap_err() + ); } #[test] fn test_string_list() { - let env = env([ - ("PANTS_BAD", "('mis', 'matched']"), - ("PANTS_IMPLICIT_ADD", "initial"), - ("PANTS_RESET", "['one']"), - ("PANTS_EDITS", "+['two','three'],-['one']"), - ]); - - let get_string_list = |id| env.get_string_list(&id).unwrap().unwrap(); - - assert_eq!( - vec![ListEdit { - action: ListEditAction::Add, - items: vec!["initial".to_owned()] - },], - get_string_list(option_id!("implicit", "add")) - ); - - assert_eq!( - vec![ListEdit { - action: ListEditAction::Replace, - items: vec!["one".to_owned()] - },], - get_string_list(option_id!("reset")) - ); - - assert_eq!( - vec![ - ListEdit { - action: ListEditAction::Add, - items: vec!["two".to_owned(), "three".to_owned()] - }, - ListEdit { - action: ListEditAction::Remove, - items: vec!["one".to_owned()] - }, - ], - get_string_list(option_id!("edits")) - ); - - assert!(env.get_string_list(&option_id!("dne")).unwrap().is_none()); - - let expected_error_msg = "\ + let env = env([ + ("PANTS_BAD", "('mis', 'matched']"), + ("PANTS_IMPLICIT_ADD", "initial"), + ("PANTS_RESET", "['one']"), + ("PANTS_EDITS", "+['two','three'],-['one']"), + ]); + + let get_string_list = |id| env.get_string_list(&id).unwrap().unwrap(); + + assert_eq!( + vec![ListEdit { + action: ListEditAction::Add, + items: vec!["initial".to_owned()] + },], + get_string_list(option_id!("implicit", "add")) + ); + + assert_eq!( + vec![ListEdit { + action: ListEditAction::Replace, + items: vec!["one".to_owned()] + },], + get_string_list(option_id!("reset")) + ); + + assert_eq!( + vec![ + ListEdit { + action: ListEditAction::Add, + items: vec!["two".to_owned(), "three".to_owned()] + }, + ListEdit { + action: ListEditAction::Remove, + items: vec!["one".to_owned()] + }, + ], + get_string_list(option_id!("edits")) + ); + + assert!(env.get_string_list(&option_id!("dne")).unwrap().is_none()); + + let expected_error_msg = "\ Problem parsing PANTS_BAD string list value: 1:('mis', 'matched'] -----------------^ Expected \",\" or the end of a tuple indicated by ')' at line 1 column 18" - .to_owned(); + .to_owned(); - assert_eq!( - expected_error_msg, - env.get_string_list(&option_id!("bad")).unwrap_err() - ); + assert_eq!( + expected_error_msg, + env.get_string_list(&option_id!("bad")).unwrap_err() + ); } diff --git a/src/rust/engine/options/src/id.rs b/src/rust/engine/options/src/id.rs index 41ea374bf90..d39d592dcf2 100644 --- a/src/rust/engine/options/src/id.rs +++ b/src/rust/engine/options/src/id.rs @@ -6,64 +6,64 @@ use std::fmt::{Display, Formatter}; #[derive(Clone, Debug, Eq, PartialEq)] pub enum Scope { - Global, - Scope(String), + Global, + Scope(String), } impl Scope { - pub fn named(name: &str) -> Scope { - match name { - "GLOBAL" => Scope::Global, - scope => Scope::Scope(scope.to_owned()), + pub fn named(name: &str) -> Scope { + match name { + "GLOBAL" => Scope::Global, + scope => Scope::Scope(scope.to_owned()), + } } - } - pub fn name(&self) -> &str { - match self { - Scope::Global => "GLOBAL", - Scope::Scope(scope) => scope.as_str(), + pub fn name(&self) -> &str { + match self { + Scope::Global => "GLOBAL", + Scope::Scope(scope) => scope.as_str(), + } } - } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct OptionId( - pub(crate) Scope, - pub(crate) Vec, - pub(crate) Option, + pub(crate) Scope, + pub(crate) Vec, + pub(crate) Option, ); impl OptionId { - pub fn new( - scope: Scope, - name: Name, - switch: Option, - ) -> Result - where - Component: AsRef, - Name: Iterator, - { - let name_components = name - .map(|component| component.as_ref().to_string()) - .collect::>(); - if name_components.is_empty() { - return Err(format!( - "Cannot create an OptionId with an empty name. Given a scope of {scope:?}." - )); + pub fn new( + scope: Scope, + name: Name, + switch: Option, + ) -> Result + where + Component: AsRef, + Name: Iterator, + { + let name_components = name + .map(|component| component.as_ref().to_string()) + .collect::>(); + if name_components.is_empty() { + return Err(format!( + "Cannot create an OptionId with an empty name. Given a scope of {scope:?}." + )); + } + Ok(OptionId(scope, name_components, switch)) } - Ok(OptionId(scope, name_components, switch)) - } } impl Display for OptionId { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "[{}] {}", - self.scope(), - self.name("_", NameTransform::None) - ) - } + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "[{}] {}", + self.scope(), + self.name("_", NameTransform::None) + ) + } } #[macro_export] @@ -99,30 +99,29 @@ macro_rules! option_id { } pub(crate) enum NameTransform { - None, - ToLower, - ToUpper, + None, + ToLower, + ToUpper, } impl OptionId { - pub(crate) fn scope(&self) -> &str { - self.0.name() - } + pub(crate) fn scope(&self) -> &str { + self.0.name() + } - pub(crate) fn name(&self, sep: &str, transform: NameTransform) -> String { - self - .1 - .iter() - .map(|component| match transform { - NameTransform::None => component.to_owned(), - NameTransform::ToLower => component.to_ascii_lowercase(), - NameTransform::ToUpper => component.to_ascii_uppercase(), - }) - .collect::>() - .join(sep) - } + pub(crate) fn name(&self, sep: &str, transform: NameTransform) -> String { + self.1 + .iter() + .map(|component| match transform { + NameTransform::None => component.to_owned(), + NameTransform::ToLower => component.to_ascii_lowercase(), + NameTransform::ToUpper => component.to_ascii_uppercase(), + }) + .collect::>() + .join(sep) + } - pub fn name_underscored(&self) -> String { - self.name("_", NameTransform::None) - } + pub fn name_underscored(&self) -> String { + self.name("_", NameTransform::None) + } } diff --git a/src/rust/engine/options/src/id_tests.rs b/src/rust/engine/options/src/id_tests.rs index 60e0941aac1..9e73351cc16 100644 --- a/src/rust/engine/options/src/id_tests.rs +++ b/src/rust/engine/options/src/id_tests.rs @@ -6,50 +6,50 @@ use crate::option_id; #[test] fn test_option_id_global_switch() { - let option_id = option_id!(-'x', "bar", "baz"); - assert_eq!( - OptionId::new(Scope::Global, ["bar", "baz"].iter(), Some('x')).unwrap(), - option_id - ); - assert_eq!("GLOBAL", option_id.scope()); + let option_id = option_id!(-'x', "bar", "baz"); + assert_eq!( + OptionId::new(Scope::Global, ["bar", "baz"].iter(), Some('x')).unwrap(), + option_id + ); + assert_eq!("GLOBAL", option_id.scope()); } #[test] fn test_option_id_global() { - let option_id = option_id!("bar", "baz"); - assert_eq!( - OptionId::new(Scope::Global, ["bar", "baz"].iter(), None).unwrap(), - option_id - ); - assert_eq!("GLOBAL", option_id.scope()); + let option_id = option_id!("bar", "baz"); + assert_eq!( + OptionId::new(Scope::Global, ["bar", "baz"].iter(), None).unwrap(), + option_id + ); + assert_eq!("GLOBAL", option_id.scope()); } #[test] fn test_option_id_scope_switch() { - let option_id = option_id!(-'f', ["foo-bar"], "baz", "spam"); - assert_eq!( - OptionId::new( - Scope::Scope("foo-bar".to_owned()), - ["baz", "spam"].iter(), - Some('f') - ) - .unwrap(), - option_id - ); - assert_eq!("foo-bar", option_id.scope()); + let option_id = option_id!(-'f', ["foo-bar"], "baz", "spam"); + assert_eq!( + OptionId::new( + Scope::Scope("foo-bar".to_owned()), + ["baz", "spam"].iter(), + Some('f') + ) + .unwrap(), + option_id + ); + assert_eq!("foo-bar", option_id.scope()); } #[test] fn test_option_id_scope() { - let option_id = option_id!(["foo-bar"], "baz", "spam"); - assert_eq!( - OptionId::new( - Scope::Scope("foo-bar".to_owned()), - ["baz", "spam"].iter(), - None - ) - .unwrap(), - option_id - ); - assert_eq!("foo-bar", option_id.scope()); + let option_id = option_id!(["foo-bar"], "baz", "spam"); + assert_eq!( + OptionId::new( + Scope::Scope("foo-bar".to_owned()), + ["baz", "spam"].iter(), + None + ) + .unwrap(), + option_id + ); + assert_eq!("foo-bar", option_id.scope()); } diff --git a/src/rust/engine/options/src/lib.rs b/src/rust/engine/options/src/lib.rs index 09ea7d14105..48a2e4de082 100644 --- a/src/rust/engine/options/src/lib.rs +++ b/src/rust/engine/options/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -66,15 +66,15 @@ pub use types::OptionType; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub(crate) enum ListEditAction { - Replace, - Add, - Remove, + Replace, + Add, + Remove, } #[derive(Debug, Eq, PartialEq)] pub(crate) struct ListEdit { - pub action: ListEditAction, - pub items: Vec, + pub action: ListEditAction, + pub items: Vec, } /// @@ -84,237 +84,245 @@ pub(crate) struct ListEdit { /// Implementations should mimic the behavior of the equivalent python source. /// pub(crate) trait OptionsSource { - /// - /// Get a display version of the option `id` that most closely matches the syntax used to supply - /// the id at runtime. For example, an global option of "bob" would display as "--bob" for use in - /// flag based options and "BOB" in environment variable based options. - /// - fn display(&self, id: &OptionId) -> String; - - /// - /// Get the string option identified by `id` from this source. - /// Errors when this source has an option value for `id` but that value is not a string. - /// - fn get_string(&self, id: &OptionId) -> Result, String>; - - /// - /// Get the boolean option identified by `id` from this source. - /// Errors when this source has an option value for `id` but that value is not a boolean. - /// - fn get_bool(&self, id: &OptionId) -> Result, String>; - - /// - /// Get the int option identified by `id` from this source. - /// Errors when this source has an option value for `id` but that value is not an int. - /// - /// The default implementation looks for a string value for `id` and then attempts to parse it as - /// a int value. - /// - fn get_int(&self, id: &OptionId) -> Result, String> { - if let Some(value) = self.get_string(id)? { - value.parse().map(Some).map_err(|e| { - format!( - "Problem parsing {} value {} as an int value: {}", - self.display(id), - value, - e - ) - }) - } else { - Ok(None) + /// + /// Get a display version of the option `id` that most closely matches the syntax used to supply + /// the id at runtime. For example, an global option of "bob" would display as "--bob" for use in + /// flag based options and "BOB" in environment variable based options. + /// + fn display(&self, id: &OptionId) -> String; + + /// + /// Get the string option identified by `id` from this source. + /// Errors when this source has an option value for `id` but that value is not a string. + /// + fn get_string(&self, id: &OptionId) -> Result, String>; + + /// + /// Get the boolean option identified by `id` from this source. + /// Errors when this source has an option value for `id` but that value is not a boolean. + /// + fn get_bool(&self, id: &OptionId) -> Result, String>; + + /// + /// Get the int option identified by `id` from this source. + /// Errors when this source has an option value for `id` but that value is not an int. + /// + /// The default implementation looks for a string value for `id` and then attempts to parse it as + /// a int value. + /// + fn get_int(&self, id: &OptionId) -> Result, String> { + if let Some(value) = self.get_string(id)? { + value.parse().map(Some).map_err(|e| { + format!( + "Problem parsing {} value {} as an int value: {}", + self.display(id), + value, + e + ) + }) + } else { + Ok(None) + } } - } - - /// - /// Get the float option identified by `id` from this source. - /// Errors when this source has an option value for `id` but that value is not a float. - /// - /// The default implementation looks for a string value for `id` and then attempts to parse it as - /// a float value. - /// - fn get_float(&self, id: &OptionId) -> Result, String> { - if let Some(value) = self.get_string(id)? { - value.parse().map(Some).map_err(|e| { - format!( - "Problem parsing {} value {} as a float value: {}", - self.display(id), - value, - e - ) - }) - } else { - Ok(None) + + /// + /// Get the float option identified by `id` from this source. + /// Errors when this source has an option value for `id` but that value is not a float. + /// + /// The default implementation looks for a string value for `id` and then attempts to parse it as + /// a float value. + /// + fn get_float(&self, id: &OptionId) -> Result, String> { + if let Some(value) = self.get_string(id)? { + value.parse().map(Some).map_err(|e| { + format!( + "Problem parsing {} value {} as a float value: {}", + self.display(id), + value, + e + ) + }) + } else { + Ok(None) + } } - } - /// - /// Get the string list option identified by `id` from this source. - /// Errors when this source has an option value for `id` but that value is not a string list. - /// - fn get_string_list(&self, id: &OptionId) -> Result>>, String>; + /// + /// Get the string list option identified by `id` from this source. + /// Errors when this source has an option value for `id` but that value is not a string list. + /// + fn get_string_list(&self, id: &OptionId) -> Result>>, String>; } #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] pub enum Source { - Flag, - Env, - Config, - Default, + Flag, + Env, + Config, + Default, } #[derive(Debug)] pub struct OptionValue { - pub source: Source, - pub value: T, + pub source: Source, + pub value: T, } impl Deref for OptionValue { - type Target = T; + type Target = T; - fn deref(&self) -> &Self::Target { - &self.value - } + fn deref(&self) -> &Self::Target { + &self.value + } } pub struct OptionParser { - sources: BTreeMap>, + sources: BTreeMap>, } impl OptionParser { - pub fn new(env: Env, args: Args) -> Result { - let mut sources: BTreeMap> = BTreeMap::new(); - sources.insert(Source::Env, Rc::new(env)); - sources.insert(Source::Flag, Rc::new(args)); - let mut parser = OptionParser { - sources: sources.clone(), - }; - - let config_path = BuildRoot::find()?.join("pants.toml"); - let repo_config_files = parser.parse_string_list( - &option_id!("pants", "config", "files"), - &[ - std::str::from_utf8(config_path.as_os_str().as_bytes()).map_err(|e| { - format!( - "Failed to decode build root path {}: {}", - config_path.display(), - e - ) - })?, - ], - )?; - let mut config = Config::merged(&repo_config_files)?; - sources.insert(Source::Config, Rc::new(config.clone())); - parser = OptionParser { - sources: sources.clone(), - }; - - if *parser.parse_bool(&option_id!("pantsrc"), true)? { - for rcfile in parser.parse_string_list( - &option_id!("pantsrc", "files"), - &["/etc/pantsrc", shellexpand::tilde("~/.pants.rc").as_ref()], - )? { - let rcfile_path = Path::new(&rcfile); - if rcfile_path.exists() { - let rc_config = Config::parse(rcfile_path)?; - config = config.merge(rc_config); + pub fn new(env: Env, args: Args) -> Result { + let mut sources: BTreeMap> = BTreeMap::new(); + sources.insert(Source::Env, Rc::new(env)); + sources.insert(Source::Flag, Rc::new(args)); + let mut parser = OptionParser { + sources: sources.clone(), + }; + + let config_path = BuildRoot::find()?.join("pants.toml"); + let repo_config_files = parser.parse_string_list( + &option_id!("pants", "config", "files"), + &[ + std::str::from_utf8(config_path.as_os_str().as_bytes()).map_err(|e| { + format!( + "Failed to decode build root path {}: {}", + config_path.display(), + e + ) + })?, + ], + )?; + let mut config = Config::merged(&repo_config_files)?; + sources.insert(Source::Config, Rc::new(config.clone())); + parser = OptionParser { + sources: sources.clone(), + }; + + if *parser.parse_bool(&option_id!("pantsrc"), true)? { + for rcfile in parser.parse_string_list( + &option_id!("pantsrc", "files"), + &["/etc/pantsrc", shellexpand::tilde("~/.pants.rc").as_ref()], + )? { + let rcfile_path = Path::new(&rcfile); + if rcfile_path.exists() { + let rc_config = Config::parse(rcfile_path)?; + config = config.merge(rc_config); + } + } } - } + sources.insert(Source::Config, Rc::new(config)); + Ok(OptionParser { sources }) } - sources.insert(Source::Config, Rc::new(config)); - Ok(OptionParser { sources }) - } - - pub fn parse_bool(&self, id: &OptionId, default: bool) -> Result, String> { - for (source_type, source) in self.sources.iter() { - if let Some(value) = source.get_bool(id)? { - return Ok(OptionValue { - source: *source_type, - value, - }); - } - } - Ok(OptionValue { - source: Source::Default, - value: default, - }) - } - - pub fn parse_int(&self, id: &OptionId, default: i64) -> Result, String> { - for (source_type, source) in self.sources.iter() { - if let Some(value) = source.get_int(id)? { - return Ok(OptionValue { - source: *source_type, - value, - }); - } + + pub fn parse_bool(&self, id: &OptionId, default: bool) -> Result, String> { + for (source_type, source) in self.sources.iter() { + if let Some(value) = source.get_bool(id)? { + return Ok(OptionValue { + source: *source_type, + value, + }); + } + } + Ok(OptionValue { + source: Source::Default, + value: default, + }) } - Ok(OptionValue { - source: Source::Default, - value: default, - }) - } - - pub fn parse_float(&self, id: &OptionId, default: f64) -> Result, String> { - for (source_type, source) in self.sources.iter() { - if let Some(value) = source.get_float(id)? { - return Ok(OptionValue { - source: *source_type, - value, - }); - } + + pub fn parse_int(&self, id: &OptionId, default: i64) -> Result, String> { + for (source_type, source) in self.sources.iter() { + if let Some(value) = source.get_int(id)? { + return Ok(OptionValue { + source: *source_type, + value, + }); + } + } + Ok(OptionValue { + source: Source::Default, + value: default, + }) } - Ok(OptionValue { - source: Source::Default, - value: default, - }) - } - - pub fn parse_string(&self, id: &OptionId, default: &str) -> Result, String> { - for (source_type, source) in self.sources.iter() { - if let Some(value) = source.get_string(id)? { - return Ok(OptionValue { - source: *source_type, - value, - }); - } + + pub fn parse_float(&self, id: &OptionId, default: f64) -> Result, String> { + for (source_type, source) in self.sources.iter() { + if let Some(value) = source.get_float(id)? { + return Ok(OptionValue { + source: *source_type, + value, + }); + } + } + Ok(OptionValue { + source: Source::Default, + value: default, + }) } - Ok(OptionValue { - source: Source::Default, - value: default.to_string(), - }) - } - - pub fn parse_string_list(&self, id: &OptionId, default: &[&str]) -> Result, String> { - let mut list_edits = vec![]; - for (_, source) in self.sources.iter() { - if let Some(edits) = source.get_string_list(id)? { - list_edits.extend(edits); - } + + pub fn parse_string( + &self, + id: &OptionId, + default: &str, + ) -> Result, String> { + for (source_type, source) in self.sources.iter() { + if let Some(value) = source.get_string(id)? { + return Ok(OptionValue { + source: *source_type, + value, + }); + } + } + Ok(OptionValue { + source: Source::Default, + value: default.to_string(), + }) } - let mut string_list = default.iter().map(|s| s.to_string()).collect::>(); - for list_edit in list_edits { - match list_edit.action { - ListEditAction::Replace => string_list = list_edit.items, - ListEditAction::Add => string_list.extend(list_edit.items), - ListEditAction::Remove => { - let to_remove = list_edit.items.iter().collect::>(); - string_list = string_list - .iter() - .filter(|item| !to_remove.contains(item)) - .map(|s| s.to_owned()) - .collect::>(); + + pub fn parse_string_list( + &self, + id: &OptionId, + default: &[&str], + ) -> Result, String> { + let mut list_edits = vec![]; + for (_, source) in self.sources.iter() { + if let Some(edits) = source.get_string_list(id)? { + list_edits.extend(edits); + } } - } + let mut string_list = default.iter().map(|s| s.to_string()).collect::>(); + for list_edit in list_edits { + match list_edit.action { + ListEditAction::Replace => string_list = list_edit.items, + ListEditAction::Add => string_list.extend(list_edit.items), + ListEditAction::Remove => { + let to_remove = list_edit.items.iter().collect::>(); + string_list = string_list + .iter() + .filter(|item| !to_remove.contains(item)) + .map(|s| s.to_owned()) + .collect::>(); + } + } + } + Ok(string_list) } - Ok(string_list) - } } pub fn render_choice(items: &[&str]) -> Option { - match items { - [] => None, - [this] => Some(this.to_string()), - [this, that] => Some(format!("{this} or {that}")), - [these @ .., that] => Some(format!("{} or {}", these.join(", "), that)), - } + match items { + [] => None, + [this] => Some(this.to_string()), + [this, that] => Some(format!("{this} or {that}")), + [these @ .., that] => Some(format!("{} or {}", these.join(", "), that)), + } } diff --git a/src/rust/engine/options/src/parse.rs b/src/rust/engine/options/src/parse.rs index 8fc8af7459e..8d5224b0243 100644 --- a/src/rust/engine/options/src/parse.rs +++ b/src/rust/engine/options/src/parse.rs @@ -119,88 +119,88 @@ peg::parser! { } mod err { - #[derive(Debug, Eq, PartialEq)] - pub(crate) struct ParseError { - template: String, - } - - impl ParseError { - pub(super) fn new>(template: S) -> ParseError { - let template_ref = template.as_ref(); - assert!( - template_ref.contains("{name}"), - "\ + #[derive(Debug, Eq, PartialEq)] + pub(crate) struct ParseError { + template: String, + } + + impl ParseError { + pub(super) fn new>(template: S) -> ParseError { + let template_ref = template.as_ref(); + assert!( + template_ref.contains("{name}"), + "\ Expected the template to contain at least one `{{name}}` placeholder, but found none: \ {template_ref}.\ " - ); - ParseError { - template: template_ref.to_owned(), - } - } + ); + ParseError { + template: template_ref.to_owned(), + } + } - pub(crate) fn render>(&self, name: S) -> String { - self.template.replace("{name}", name.as_ref()) + pub(crate) fn render>(&self, name: S) -> String { + self.template.replace("{name}", name.as_ref()) + } } - } } pub(crate) use err::ParseError; fn format_parse_error( - type_id: &str, - value: &str, - parse_error: peg::error::ParseError, + type_id: &str, + value: &str, + parse_error: peg::error::ParseError, ) -> ParseError { - let value_with_marker = value - .split('\n') - .enumerate() - .map(|(index, line)| (index + 1, line)) - .map(|(line_no, line)| { - if line_no == parse_error.location.line { - format!( - "{}:{}\n {}^", - line_no, - line, - "-".repeat(parse_error.location.column - 1) - ) - } else { - format!("{line_no}:{line}") - } - }) - .collect::>() - .join("\n"); - - let mut choices = parse_error.expected.tokens().collect::>(); - // N.B.: It appears to be the case that the peg parser parses alternatives concurrently and so - // the ordering of choices is observed to be unstable. As such sort them for consistent error - // messages. - choices.sort_unstable(); - - ParseError::new(format!( - "\ + let value_with_marker = value + .split('\n') + .enumerate() + .map(|(index, line)| (index + 1, line)) + .map(|(line_no, line)| { + if line_no == parse_error.location.line { + format!( + "{}:{}\n {}^", + line_no, + line, + "-".repeat(parse_error.location.column - 1) + ) + } else { + format!("{line_no}:{line}") + } + }) + .collect::>() + .join("\n"); + + let mut choices = parse_error.expected.tokens().collect::>(); + // N.B.: It appears to be the case that the peg parser parses alternatives concurrently and so + // the ordering of choices is observed to be unstable. As such sort them for consistent error + // messages. + choices.sort_unstable(); + + ParseError::new(format!( + "\ Problem parsing {{name}} {type_id} value:\n{value_with_marker}\nExpected {choices} at \ line {line} column {column}\ ", - type_id = type_id, - value_with_marker = value_with_marker, - choices = render_choice(choices.as_slice()).unwrap_or_else(|| "nothing".to_owned()), - line = parse_error.location.line, - column = parse_error.location.column, - )) + type_id = type_id, + value_with_marker = value_with_marker, + choices = render_choice(choices.as_slice()).unwrap_or_else(|| "nothing".to_owned()), + line = parse_error.location.line, + column = parse_error.location.column, + )) } pub(crate) fn parse_string_list(value: &str) -> Result>, ParseError> { - option_value_parser::string_list_edits(value) - .map_err(|e| format_parse_error("string list", value, e)) + option_value_parser::string_list_edits(value) + .map_err(|e| format_parse_error("string list", value, e)) } pub(crate) fn parse_bool(value: &str) -> Result { - match value.to_lowercase().as_str() { - "true" => Ok(true), - "false" => Ok(false), - _ => Err(ParseError::new(format!( - "Got '{value}' for {{name}}. Expected 'true' or 'false'." - ))), - } + match value.to_lowercase().as_str() { + "true" => Ok(true), + "false" => Ok(false), + _ => Err(ParseError::new(format!( + "Got '{value}' for {{name}}. Expected 'true' or 'false'." + ))), + } } diff --git a/src/rust/engine/options/src/parse_tests.rs b/src/rust/engine/options/src/parse_tests.rs index d2135cab0f9..30a1e3891b8 100644 --- a/src/rust/engine/options/src/parse_tests.rs +++ b/src/rust/engine/options/src/parse_tests.rs @@ -6,198 +6,198 @@ use crate::{ListEdit, ListEditAction}; #[test] fn test_parse_bool() { - assert_eq!(Ok(true), parse_bool("true")); - assert_eq!(Ok(true), parse_bool("True")); - assert_eq!(Ok(true), parse_bool("TRUE")); - - assert_eq!(Ok(false), parse_bool("false")); - assert_eq!(Ok(false), parse_bool("False")); - assert_eq!(Ok(false), parse_bool("FALSE")); - - assert_eq!( - "Got '1' for foo. Expected 'true' or 'false'.".to_owned(), - parse_bool("1").unwrap_err().render("foo") - ) + assert_eq!(Ok(true), parse_bool("true")); + assert_eq!(Ok(true), parse_bool("True")); + assert_eq!(Ok(true), parse_bool("TRUE")); + + assert_eq!(Ok(false), parse_bool("false")); + assert_eq!(Ok(false), parse_bool("False")); + assert_eq!(Ok(false), parse_bool("FALSE")); + + assert_eq!( + "Got '1' for foo. Expected 'true' or 'false'.".to_owned(), + parse_bool("1").unwrap_err().render("foo") + ) } #[test] fn test_parse_string_list_empty() { - assert!(parse_string_list("").unwrap().is_empty()); + assert!(parse_string_list("").unwrap().is_empty()); } fn list_edit>( - action: ListEditAction, - items: I, + action: ListEditAction, + items: I, ) -> ListEdit { - ListEdit { - action, - items: items.into_iter().map(str::to_owned).collect(), - } + ListEdit { + action, + items: items.into_iter().map(str::to_owned).collect(), + } } const EMPTY_STRING_LIST: [&str; 0] = []; #[test] fn test_parse_string_list_replace() { - assert_eq!( - vec![list_edit(ListEditAction::Replace, EMPTY_STRING_LIST)], - parse_string_list("[]").unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo"])], - parse_string_list("['foo']").unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], - parse_string_list("['foo','bar']").unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, EMPTY_STRING_LIST)], + parse_string_list("[]").unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo"])], + parse_string_list("['foo']").unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], + parse_string_list("['foo','bar']").unwrap() + ); } #[test] fn test_parse_string_list_add() { - assert_eq!( - vec![list_edit(ListEditAction::Add, EMPTY_STRING_LIST)], - parse_string_list("+[]").unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Add, EMPTY_STRING_LIST)], + parse_string_list("+[]").unwrap() + ); } #[test] fn test_parse_string_list_remove() { - assert_eq!( - vec![list_edit(ListEditAction::Remove, EMPTY_STRING_LIST)], - parse_string_list("-[]").unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Remove, EMPTY_STRING_LIST)], + parse_string_list("-[]").unwrap() + ); } #[test] fn test_parse_string_list_edits() { - assert_eq!( - vec![ - list_edit(ListEditAction::Remove, ["foo", "bar"]), - list_edit(ListEditAction::Add, ["baz"]), - list_edit(ListEditAction::Remove, EMPTY_STRING_LIST), - ], - parse_string_list("-['foo', 'bar'],+['baz'],-[]").unwrap() - ); + assert_eq!( + vec![ + list_edit(ListEditAction::Remove, ["foo", "bar"]), + list_edit(ListEditAction::Add, ["baz"]), + list_edit(ListEditAction::Remove, EMPTY_STRING_LIST), + ], + parse_string_list("-['foo', 'bar'],+['baz'],-[]").unwrap() + ); } #[test] fn test_parse_string_list_edits_whitespace() { - assert_eq!( - vec![ - list_edit(ListEditAction::Remove, ["foo"]), - list_edit(ListEditAction::Add, ["bar"]), - ], - parse_string_list(" - [ 'foo' , ] , + [ 'bar' ] ").unwrap() - ); + assert_eq!( + vec![ + list_edit(ListEditAction::Remove, ["foo"]), + list_edit(ListEditAction::Add, ["bar"]), + ], + parse_string_list(" - [ 'foo' , ] , + [ 'bar' ] ").unwrap() + ); } #[test] fn test_parse_string_list_implicit_add() { - assert_eq!( - vec![list_edit(ListEditAction::Add, vec!["foo"])], - parse_string_list("foo").unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Add, vec!["foo bar"])], - parse_string_list("foo bar").unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Add, ["--bar"])], - parse_string_list("--bar").unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Add, vec!["foo"])], + parse_string_list("foo").unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Add, vec!["foo bar"])], + parse_string_list("foo bar").unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Add, ["--bar"])], + parse_string_list("--bar").unwrap() + ); } #[test] fn test_parse_string_list_quoted_chars() { - assert_eq!( - vec![list_edit(ListEditAction::Add, vec!["[]"])], - parse_string_list(r"\[]").unwrap(), - "Expected an implicit add of the literal string `[]` via an escaped opening `[`." - ); - assert_eq!( - vec![list_edit(ListEditAction::Add, vec![" "])], - parse_string_list(r"\ ").unwrap(), - "Expected an implicit add of the literal string ` `." - ); - assert_eq!( - vec![list_edit(ListEditAction::Add, vec!["+"])], - parse_string_list(r"\+").unwrap(), - "Expected an implicit add of the literal string `+`." - ); - assert_eq!( - vec![list_edit(ListEditAction::Add, vec!["-"])], - parse_string_list(r"\-").unwrap(), - "Expected an implicit add of the literal string `-`." - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, vec!["'foo", r"\"])], - parse_string_list(r"['\'foo', '\\']").unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Add, vec!["[]"])], + parse_string_list(r"\[]").unwrap(), + "Expected an implicit add of the literal string `[]` via an escaped opening `[`." + ); + assert_eq!( + vec![list_edit(ListEditAction::Add, vec![" "])], + parse_string_list(r"\ ").unwrap(), + "Expected an implicit add of the literal string ` `." + ); + assert_eq!( + vec![list_edit(ListEditAction::Add, vec!["+"])], + parse_string_list(r"\+").unwrap(), + "Expected an implicit add of the literal string `+`." + ); + assert_eq!( + vec![list_edit(ListEditAction::Add, vec!["-"])], + parse_string_list(r"\-").unwrap(), + "Expected an implicit add of the literal string `-`." + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, vec!["'foo", r"\"])], + parse_string_list(r"['\'foo', '\\']").unwrap() + ); } #[test] fn test_parse_string_list_quote_forms() { - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo"])], - parse_string_list(r#"["foo"]"#).unwrap(), - "Expected double quotes to work." - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], - parse_string_list(r#"["foo", 'bar']"#).unwrap(), - "Expected mixed quote forms to work." - ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo"])], + parse_string_list(r#"["foo"]"#).unwrap(), + "Expected double quotes to work." + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], + parse_string_list(r#"["foo", 'bar']"#).unwrap(), + "Expected mixed quote forms to work." + ); } #[test] fn test_parse_string_list_trailing_comma() { - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo"])], - parse_string_list("['foo',]").unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], - parse_string_list("['foo','bar',]").unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo"])], + parse_string_list("['foo',]").unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], + parse_string_list("['foo','bar',]").unwrap() + ); } #[test] fn test_parse_string_list_whitespace() { - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo"])], - parse_string_list(" [ 'foo' ] ").unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], - parse_string_list(" [ 'foo' , 'bar' , ] ").unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo"])], + parse_string_list(" [ 'foo' ] ").unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], + parse_string_list(" [ 'foo' , 'bar' , ] ").unwrap() + ); } #[test] fn test_parse_string_list_tuple() { - assert_eq!( - vec![list_edit(ListEditAction::Replace, EMPTY_STRING_LIST)], - parse_string_list("()").unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo"])], - parse_string_list(r#"("foo")"#).unwrap() - ); - assert_eq!( - vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], - parse_string_list(r#" ('foo', "bar",)"#).unwrap() - ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, EMPTY_STRING_LIST)], + parse_string_list("()").unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo"])], + parse_string_list(r#"("foo")"#).unwrap() + ); + assert_eq!( + vec![list_edit(ListEditAction::Replace, ["foo", "bar"])], + parse_string_list(r#" ('foo', "bar",)"#).unwrap() + ); } #[test] fn test_parse_string_list_error_formatting() { - let bad_input = "\ + let bad_input = "\ -['/etc/hosts'], ?(\"/dev/null\") "; - let expected_error_msg = "\ + let expected_error_msg = "\ Problem parsing foo string list value: 1:-['/etc/hosts'], 2: ?(\"/dev/null\") @@ -205,9 +205,9 @@ Problem parsing foo string list value: 3: Expected an optional list edit action of '+' indicating `add` \ or '-' indicating `remove` at line 2 column 10" - .to_owned(); - assert_eq!( - expected_error_msg, - parse_string_list(bad_input).unwrap_err().render("foo") - ) + .to_owned(); + assert_eq!( + expected_error_msg, + parse_string_list(bad_input).unwrap_err().render("foo") + ) } diff --git a/src/rust/engine/options/src/types.rs b/src/rust/engine/options/src/types.rs index aa97c717cfa..066bc910b1b 100644 --- a/src/rust/engine/options/src/types.rs +++ b/src/rust/engine/options/src/types.rs @@ -6,46 +6,46 @@ /// Currently only used for `pantsd` fingerprinting, which is defined in Rust. Options in general /// are registered in Python (see in particular `global_options.py`). pub enum OptionType { - Bool(bool), - Int(i64), - Float(f64), - String(String), - StringList(Vec), - // NB: Notably missing is `Dict`: but that type is not yet supported by the Rust parser. + Bool(bool), + Int(i64), + Float(f64), + String(String), + StringList(Vec), + // NB: Notably missing is `Dict`: but that type is not yet supported by the Rust parser. } impl From for OptionType { - fn from(v: bool) -> Self { - OptionType::Bool(v) - } + fn from(v: bool) -> Self { + OptionType::Bool(v) + } } impl From for OptionType { - fn from(v: i64) -> Self { - OptionType::Int(v) - } + fn from(v: i64) -> Self { + OptionType::Int(v) + } } impl From for OptionType { - fn from(v: f64) -> Self { - OptionType::Float(v) - } + fn from(v: f64) -> Self { + OptionType::Float(v) + } } impl From<&str> for OptionType { - fn from(v: &str) -> Self { - OptionType::String(v.to_owned()) - } + fn from(v: &str) -> Self { + OptionType::String(v.to_owned()) + } } impl From for OptionType { - fn from(v: String) -> Self { - OptionType::String(v) - } + fn from(v: String) -> Self { + OptionType::String(v) + } } impl From> for OptionType { - fn from(v: Vec<&str>) -> Self { - OptionType::StringList(v.into_iter().map(|s| s.to_owned()).collect()) - } + fn from(v: Vec<&str>) -> Self { + OptionType::StringList(v.into_iter().map(|s| s.to_owned()).collect()) + } } diff --git a/src/rust/engine/pantsd/src/lib.rs b/src/rust/engine/pantsd/src/lib.rs index 22035b84c86..89104c75ea4 100644 --- a/src/rust/engine/pantsd/src/lib.rs +++ b/src/rust/engine/pantsd/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -40,133 +40,131 @@ use sha2::{Digest, Sha256}; use sysinfo::{ProcessExt, ProcessStatus, System, SystemExt}; pub struct ConnectionSettings { - pub port: u16, - pub timeout_limit: f64, - pub dynamic_ui: bool, + pub port: u16, + pub timeout_limit: f64, + pub dynamic_ui: bool, } impl ConnectionSettings { - pub fn new(port: u16) -> ConnectionSettings { - ConnectionSettings { - port, - timeout_limit: 60.0, - dynamic_ui: true, + pub fn new(port: u16) -> ConnectionSettings { + ConnectionSettings { + port, + timeout_limit: 60.0, + dynamic_ui: true, + } } - } } pub(crate) struct Metadata { - metadata_dir: PathBuf, + metadata_dir: PathBuf, } impl Metadata { - pub(crate) fn mount>(directory: P) -> Result { - let info = uname::uname().map_err(|e| format!("{e}"))?; - let host_hash = Sha256::new() - .chain(&info.sysname) - .chain(&info.nodename) - .chain(&info.release) - .chain(&info.version) - .chain(&info.machine) - .finalize(); + pub(crate) fn mount>(directory: P) -> Result { + let info = uname::uname().map_err(|e| format!("{e}"))?; + let host_hash = Sha256::new() + .chain(&info.sysname) + .chain(&info.nodename) + .chain(&info.release) + .chain(&info.version) + .chain(&info.machine) + .finalize(); - const HOST_FINGERPRINT_LENGTH: usize = 6; - let hex_digest = hex::encode(&host_hash[..HOST_FINGERPRINT_LENGTH]); + const HOST_FINGERPRINT_LENGTH: usize = 6; + let hex_digest = hex::encode(&host_hash[..HOST_FINGERPRINT_LENGTH]); - let metadata_dir = directory.as_ref().join(hex_digest).join("pantsd"); - if metadata_dir.is_dir() { - Ok(Metadata { metadata_dir }) - } else { - Err(format!( - "There is no pantsd metadata at {metadata_dir}.", - metadata_dir = metadata_dir.display() - )) + let metadata_dir = directory.as_ref().join(hex_digest).join("pantsd"); + if metadata_dir.is_dir() { + Ok(Metadata { metadata_dir }) + } else { + Err(format!( + "There is no pantsd metadata at {metadata_dir}.", + metadata_dir = metadata_dir.display() + )) + } } - } - fn pid(&self) -> Result { - self - .read_metadata("pid") - .and_then(|(pid_metadata_path, value)| { - value - .parse() - .map(|pid| { - debug!( - "Parsed pid {pid} from {pid_metadata_path}.", - pid = pid, - pid_metadata_path = pid_metadata_path.display() - ); - pid - }) - .map_err(|e| { - format!( - "Failed to parse pantsd pid from {pid_metadata_path}: {err}", - pid_metadata_path = pid_metadata_path.display(), - err = e - ) - }) - }) - } + fn pid(&self) -> Result { + self.read_metadata("pid") + .and_then(|(pid_metadata_path, value)| { + value + .parse() + .map(|pid| { + debug!( + "Parsed pid {pid} from {pid_metadata_path}.", + pid = pid, + pid_metadata_path = pid_metadata_path.display() + ); + pid + }) + .map_err(|e| { + format!( + "Failed to parse pantsd pid from {pid_metadata_path}: {err}", + pid_metadata_path = pid_metadata_path.display(), + err = e + ) + }) + }) + } - fn process_name(&self) -> Result { - self.read_metadata("process_name").map(|(_, value)| value) - } + fn process_name(&self) -> Result { + self.read_metadata("process_name").map(|(_, value)| value) + } - pub(crate) fn port(&self) -> Result { - self - .read_metadata("socket") - .and_then(|(socket_metadata_path, value)| { - value - .parse() - .map(|port| { - debug!( - "Parsed port {port} from {socket_metadata_path}.", - port = port, - socket_metadata_path = socket_metadata_path.display() - ); - port - }) - .map_err(|e| { - format!( - "Failed to parse pantsd port from {socket_metadata_path}: {err}", - socket_metadata_path = &socket_metadata_path.display(), - err = e - ) - }) - }) - } + pub(crate) fn port(&self) -> Result { + self.read_metadata("socket") + .and_then(|(socket_metadata_path, value)| { + value + .parse() + .map(|port| { + debug!( + "Parsed port {port} from {socket_metadata_path}.", + port = port, + socket_metadata_path = socket_metadata_path.display() + ); + port + }) + .map_err(|e| { + format!( + "Failed to parse pantsd port from {socket_metadata_path}: {err}", + socket_metadata_path = &socket_metadata_path.display(), + err = e + ) + }) + }) + } - fn fingerprint(&self) -> Result { - self.read_metadata("fingerprint").map(|(_, value)| value) - } + fn fingerprint(&self) -> Result { + self.read_metadata("fingerprint").map(|(_, value)| value) + } - fn read_metadata(&self, name: &str) -> Result<(PathBuf, String), String> { - let metadata_path = self.metadata_dir.join(name); - fs::read_to_string(&metadata_path) - .map_err(|e| { - format!( - "Failed to read {name} from {metadata_path}: {err}", - name = name, - metadata_path = &metadata_path.display(), - err = e - ) - }) - .map(|value| (metadata_path, value)) - } + fn read_metadata(&self, name: &str) -> Result<(PathBuf, String), String> { + let metadata_path = self.metadata_dir.join(name); + fs::read_to_string(&metadata_path) + .map_err(|e| { + format!( + "Failed to read {name} from {metadata_path}: {err}", + name = name, + metadata_path = &metadata_path.display(), + err = e + ) + }) + .map(|value| (metadata_path, value)) + } } pub struct FingerprintedOption { - pub id: OptionId, - pub option_type: OptionType, + pub id: OptionId, + pub option_type: OptionType, } impl FingerprintedOption { - pub fn new(id: OptionId, option_type: impl Into) -> Self { - Self { - id, - option_type: option_type.into(), + pub fn new(id: OptionId, option_type: impl Into) -> Self { + Self { + id, + option_type: option_type.into(), + } } - } } type Fingerprint = String; @@ -174,117 +172,118 @@ type Fingerprint = String; /// If there is a live `pantsd` process for a valid fingerprint in the given build root, return the /// ConnectionSettings to use to connect to it. pub fn find_pantsd( - build_root: &BuildRoot, - options_parser: &OptionParser, + build_root: &BuildRoot, + options_parser: &OptionParser, ) -> Result { - let pants_subprocessdir = option_id!("pants", "subprocessdir"); - let option_value = options_parser.parse_string( - &pants_subprocessdir, - Path::new(".pants.d").join("pids").to_str().unwrap(), - )?; - let metadata_dir = { - let path = PathBuf::from(&option_value.value); - if path.is_absolute() { - path - } else { - match build_root.join(&path) { - p if p.is_absolute() => p, - p => p.canonicalize().map_err(|e| { - format!( + let pants_subprocessdir = option_id!("pants", "subprocessdir"); + let option_value = options_parser.parse_string( + &pants_subprocessdir, + Path::new(".pants.d").join("pids").to_str().unwrap(), + )?; + let metadata_dir = { + let path = PathBuf::from(&option_value.value); + if path.is_absolute() { + path + } else { + match build_root.join(&path) { + p if p.is_absolute() => p, + p => p.canonicalize().map_err(|e| { + format!( "Failed to resolve relative pants subprocessdir specified via {:?} as {}: {}", option_value, path.display(), e ) - })?, - } - } - }; - debug!( - "\ + })?, + } + } + }; + debug!( + "\ Looking for pantsd metadata in {metadata_dir} as specified by {option} = {value} via \ {source:?}.\ ", - metadata_dir = metadata_dir.display(), - option = pants_subprocessdir, - value = option_value.value, - source = option_value.source - ); - let port = probe(build_root, &metadata_dir, options_parser)?; - let mut pantsd_settings = ConnectionSettings::new(port); - pantsd_settings.timeout_limit = options_parser - .parse_float( - &option_id!("pantsd", "timeout", "when", "multiple", "invocations"), - pantsd_settings.timeout_limit, - )? - .value; - pantsd_settings.dynamic_ui = options_parser - .parse_bool(&option_id!("dynamic", "ui"), pantsd_settings.dynamic_ui)? - .value; - Ok(pantsd_settings) + metadata_dir = metadata_dir.display(), + option = pants_subprocessdir, + value = option_value.value, + source = option_value.source + ); + let port = probe(build_root, &metadata_dir, options_parser)?; + let mut pantsd_settings = ConnectionSettings::new(port); + pantsd_settings.timeout_limit = options_parser + .parse_float( + &option_id!("pantsd", "timeout", "when", "multiple", "invocations"), + pantsd_settings.timeout_limit, + )? + .value; + pantsd_settings.dynamic_ui = options_parser + .parse_bool(&option_id!("dynamic", "ui"), pantsd_settings.dynamic_ui)? + .value; + Ok(pantsd_settings) } pub(crate) fn probe( - build_root: &BuildRoot, - metadata_dir: &Path, - options_parser: &OptionParser, + build_root: &BuildRoot, + metadata_dir: &Path, + options_parser: &OptionParser, ) -> Result { - let pantsd_metadata = Metadata::mount(metadata_dir)?; + let pantsd_metadata = Metadata::mount(metadata_dir)?; - // Grab the purported port early. If we can't get that, then none of the following checks - // are useful. - let port = pantsd_metadata.port()?; + // Grab the purported port early. If we can't get that, then none of the following checks + // are useful. + let port = pantsd_metadata.port()?; - let expected_fingerprint = pantsd_metadata.fingerprint()?; - let actual_fingerprint = fingerprint_compute(build_root, options_parser)?; - if expected_fingerprint != actual_fingerprint { - return Err(format!( - "Fingerprint mismatched: {expected_fingerprint} vs {actual_fingerprint}." - )); - } + let expected_fingerprint = pantsd_metadata.fingerprint()?; + let actual_fingerprint = fingerprint_compute(build_root, options_parser)?; + if expected_fingerprint != actual_fingerprint { + return Err(format!( + "Fingerprint mismatched: {expected_fingerprint} vs {actual_fingerprint}." + )); + } - let pid = pantsd_metadata.pid()?; - let mut system = System::new(); - system.refresh_process(pid); - // Check that the recorded pid is a live process. - match system.process(pid) { - None => Err(format!( - "\ + let pid = pantsd_metadata.pid()?; + let mut system = System::new(); + system.refresh_process(pid); + // Check that the recorded pid is a live process. + match system.process(pid) { + None => Err(format!( + "\ The last pid for the pantsd controlling {build_root} was {pid} but it no longer appears \ to be running.\ ", - build_root = build_root.display(), - pid = pid, - )), - Some(process) => { - // Check that the live process is in fact the expected pantsd process (i.e.: pids have not - // wrapped). - if std::mem::discriminant(&ProcessStatus::Zombie) == std::mem::discriminant(&process.status()) - { - return Err(format!("The pantsd at pid {pid} is a zombie.")); - } - let expected_process_name_prefix = pantsd_metadata.process_name()?; - let actual_argv0 = { - let actual_command_line = process.cmd(); - if actual_command_line.is_empty() { - process.name() - } else { - &actual_command_line[0] - } - }; - // It appears that the daemon only records a prefix of the process name, so we just check that. - if actual_argv0.starts_with(&expected_process_name_prefix) { - Ok(port) - } else { - Err(format!( - "\ + build_root = build_root.display(), + pid = pid, + )), + Some(process) => { + // Check that the live process is in fact the expected pantsd process (i.e.: pids have not + // wrapped). + if std::mem::discriminant(&ProcessStatus::Zombie) + == std::mem::discriminant(&process.status()) + { + return Err(format!("The pantsd at pid {pid} is a zombie.")); + } + let expected_process_name_prefix = pantsd_metadata.process_name()?; + let actual_argv0 = { + let actual_command_line = process.cmd(); + if actual_command_line.is_empty() { + process.name() + } else { + &actual_command_line[0] + } + }; + // It appears that the daemon only records a prefix of the process name, so we just check that. + if actual_argv0.starts_with(&expected_process_name_prefix) { + Ok(port) + } else { + Err(format!( + "\ The process with pid {pid} is not pantsd. Expected a process name matching \ {expected_process_name_prefix} but is {actual_argv0}.\ " - )) - } + )) + } + } } - } } /// Computes a fingerprint of the relevant options for `pantsd` (see `fingerprinted_options`). @@ -296,42 +295,42 @@ pub(crate) fn probe( /// TODO: Eventually, the Python `class ProcessManager` should be replaced with the `Metadata` /// struct in this crate, rather than having two codepaths for the reading/writing of metadata. pub fn fingerprint_compute( - build_root: &BuildRoot, - options_parser: &OptionParser, + build_root: &BuildRoot, + options_parser: &OptionParser, ) -> Result { - let mut hasher = Sha256::new(); - for option in fingerprinted_options(build_root)? { - // TODO: As the Rust options crate expands, more of this logic should be included on - // `OptionParser` or on `OptionValue`. - match option.option_type { - OptionType::Bool(default) => { - let val = options_parser.parse_bool(&option.id, default)?; - let byte = if val.value { 1_u8 } else { 0_u8 }; - Digest::update(&mut hasher, [byte]); - } - OptionType::Int(default) => { - let val = options_parser.parse_int(&option.id, default)?; - Digest::update(&mut hasher, val.value.to_be_bytes()); - } - OptionType::Float(default) => { - let val = options_parser.parse_float(&option.id, default)?; - Digest::update(&mut hasher, val.value.to_be_bytes()); - } - OptionType::String(default) => { - let val = options_parser.parse_string(&option.id, &default)?; - Digest::update(&mut hasher, val.value.as_bytes()); - } - OptionType::StringList(default) => { - let default = default.iter().map(|s| s.as_str()).collect::>(); - let val = options_parser.parse_string_list(&option.id, &default)?; - for item in val { - Digest::update(&mut hasher, item.as_bytes()); + let mut hasher = Sha256::new(); + for option in fingerprinted_options(build_root)? { + // TODO: As the Rust options crate expands, more of this logic should be included on + // `OptionParser` or on `OptionValue`. + match option.option_type { + OptionType::Bool(default) => { + let val = options_parser.parse_bool(&option.id, default)?; + let byte = if val.value { 1_u8 } else { 0_u8 }; + Digest::update(&mut hasher, [byte]); + } + OptionType::Int(default) => { + let val = options_parser.parse_int(&option.id, default)?; + Digest::update(&mut hasher, val.value.to_be_bytes()); + } + OptionType::Float(default) => { + let val = options_parser.parse_float(&option.id, default)?; + Digest::update(&mut hasher, val.value.to_be_bytes()); + } + OptionType::String(default) => { + let val = options_parser.parse_string(&option.id, &default)?; + Digest::update(&mut hasher, val.value.as_bytes()); + } + OptionType::StringList(default) => { + let default = default.iter().map(|s| s.as_str()).collect::>(); + let val = options_parser.parse_string_list(&option.id, &default)?; + for item in val { + Digest::update(&mut hasher, item.as_bytes()); + } + } } - } } - } - let hash = hasher.finalize(); - Ok(hex::encode(hash)) + let hash = hasher.finalize(); + Ok(hex::encode(hash)) } /// The options which are fingerprinted to decide when to restart `pantsd`. @@ -345,40 +344,40 @@ pub fn fingerprint_compute( /// options (because we have redundancy of options definitions between `global_options.py` and what /// the Rust native client uses). pub fn fingerprinted_options(build_root: &BuildRoot) -> Result, String> { - let dot_pants_dot_d_subdir = |subdir: &str| -> Result { - build_root - .join(".pants.d") - .join(subdir) - .into_os_string() - .into_string() - .map_err(|e| format!("Build root was not UTF8: {e:?}")) - }; + let dot_pants_dot_d_subdir = |subdir: &str| -> Result { + build_root + .join(".pants.d") + .join(subdir) + .into_os_string() + .into_string() + .map_err(|e| format!("Build root was not UTF8: {e:?}")) + }; - Ok(vec![ - FingerprintedOption::new(option_id!(-'l', "level"), "info"), - FingerprintedOption::new(option_id!("show", "log", "target"), false), - // TODO: No support for parsing dictionaries, so not fingerprinted. But should be. See #19832. - // FingerprintedOption::new(option_id!("log", "levels", "by", "target"), ...), - FingerprintedOption::new(option_id!("log", "show", "rust", "3rdparty"), false), - FingerprintedOption::new(option_id!("ignore", "warnings"), vec![]), - FingerprintedOption::new( - option_id!("pants", "version"), - include_str!("../../VERSION"), - ), - FingerprintedOption::new( - option_id!("pants", "workdir"), - dot_pants_dot_d_subdir("workdir")?, - ), - // Optional strings are not currently supported by the Rust options parser, but we're only - // using these for fingerprinting, and so can use a placeholder default. - FingerprintedOption::new(option_id!("pants", "physical", "workdir", "base"), ""), - FingerprintedOption::new( - option_id!("pants", "subprocessdir"), - dot_pants_dot_d_subdir("pids")?, - ), - FingerprintedOption::new(option_id!("logdir"), ""), - FingerprintedOption::new(option_id!("pantsd"), true), - FingerprintedOption::new(option_id!("pantsd", "pailgun", "port"), 0), - FingerprintedOption::new(option_id!("pantsd", "invalidation", "globs"), vec![]), - ]) + Ok(vec![ + FingerprintedOption::new(option_id!(-'l', "level"), "info"), + FingerprintedOption::new(option_id!("show", "log", "target"), false), + // TODO: No support for parsing dictionaries, so not fingerprinted. But should be. See #19832. + // FingerprintedOption::new(option_id!("log", "levels", "by", "target"), ...), + FingerprintedOption::new(option_id!("log", "show", "rust", "3rdparty"), false), + FingerprintedOption::new(option_id!("ignore", "warnings"), vec![]), + FingerprintedOption::new( + option_id!("pants", "version"), + include_str!("../../VERSION"), + ), + FingerprintedOption::new( + option_id!("pants", "workdir"), + dot_pants_dot_d_subdir("workdir")?, + ), + // Optional strings are not currently supported by the Rust options parser, but we're only + // using these for fingerprinting, and so can use a placeholder default. + FingerprintedOption::new(option_id!("pants", "physical", "workdir", "base"), ""), + FingerprintedOption::new( + option_id!("pants", "subprocessdir"), + dot_pants_dot_d_subdir("pids")?, + ), + FingerprintedOption::new(option_id!("logdir"), ""), + FingerprintedOption::new(option_id!("pantsd"), true), + FingerprintedOption::new(option_id!("pantsd", "pailgun", "port"), 0), + FingerprintedOption::new(option_id!("pantsd", "invalidation", "globs"), vec![]), + ]) } diff --git a/src/rust/engine/pantsd/src/pantsd_testing.rs b/src/rust/engine/pantsd/src/pantsd_testing.rs index df5d7197282..696cd33d913 100644 --- a/src/rust/engine/pantsd/src/pantsd_testing.rs +++ b/src/rust/engine/pantsd/src/pantsd_testing.rs @@ -11,46 +11,45 @@ use tempfile::TempDir; use options::{Args, BuildRoot, Env, OptionParser}; pub fn launch_pantsd() -> (BuildRoot, OptionParser, TempDir) { - let build_root = BuildRoot::find() - .expect("Expected test to be run inside the Pants repo but no build root was detected."); - let pants_subprocessdir = TempDir::new().unwrap(); + let build_root = BuildRoot::find() + .expect("Expected test to be run inside the Pants repo but no build root was detected."); + let pants_subprocessdir = TempDir::new().unwrap(); - let args = vec![ - "--pants-config-files=[]".to_owned(), - "--no-pantsrc".to_owned(), - "--pantsd".to_owned(), - format!( - "--pants-subprocessdir={}", - pants_subprocessdir.path().display() - ), - "-V".to_owned(), - ]; - let options_parser = - OptionParser::new(Env::new(HashMap::new()), Args::new(args.clone())).unwrap(); + let args = vec![ + "--pants-config-files=[]".to_owned(), + "--no-pantsrc".to_owned(), + "--pantsd".to_owned(), + format!( + "--pants-subprocessdir={}", + pants_subprocessdir.path().display() + ), + "-V".to_owned(), + ]; + let options_parser = + OptionParser::new(Env::new(HashMap::new()), Args::new(args.clone())).unwrap(); - let mut cmd = Command::new(build_root.join("pants")); - cmd - .current_dir(build_root.as_path()) - .args(args) - .env_clear() - .envs(std::env::vars().filter(|(k, _v)| !k.starts_with("PANTS_"))) - .stderr(Stdio::inherit()); + let mut cmd = Command::new(build_root.join("pants")); + cmd.current_dir(build_root.as_path()) + .args(args) + .env_clear() + .envs(std::env::vars().filter(|(k, _v)| !k.starts_with("PANTS_"))) + .stderr(Stdio::inherit()); - let result = cmd - .output() - .map_err(|e| format!("Problem running command {cmd:?}: {e}")) - .unwrap(); - assert_eq!(Some(0), result.status.code()); - assert_eq!( - fs::read_to_string( - build_root - .join("src") - .join("python") - .join("pants") - .join("VERSION") - ) - .unwrap(), - from_utf8(result.stdout.as_slice()).unwrap() - ); - (build_root, options_parser, pants_subprocessdir) + let result = cmd + .output() + .map_err(|e| format!("Problem running command {cmd:?}: {e}")) + .unwrap(); + assert_eq!(Some(0), result.status.code()); + assert_eq!( + fs::read_to_string( + build_root + .join("src") + .join("python") + .join("pants") + .join("VERSION") + ) + .unwrap(), + from_utf8(result.stdout.as_slice()).unwrap() + ); + (build_root, options_parser, pants_subprocessdir) } diff --git a/src/rust/engine/pantsd/src/pantsd_tests.rs b/src/rust/engine/pantsd/src/pantsd_tests.rs index 7bf4993d129..7bc4e12a551 100644 --- a/src/rust/engine/pantsd/src/pantsd_tests.rs +++ b/src/rust/engine/pantsd/src/pantsd_tests.rs @@ -6,28 +6,28 @@ use std::net::TcpStream; use crate::pantsd_testing::launch_pantsd; fn assert_connect(port: u16) { - assert!( - port >= 1024, - "Pantsd should never be running on a privileged port." - ); + assert!( + port >= 1024, + "Pantsd should never be running on a privileged port." + ); - let stream = TcpStream::connect(("0.0.0.0", port)).unwrap(); - assert_eq!(port, stream.peer_addr().unwrap().port()); + let stream = TcpStream::connect(("0.0.0.0", port)).unwrap(); + assert_eq!(port, stream.peer_addr().unwrap().port()); } #[test] fn test_address_integration() { - let (_, _, pants_subprocessdir) = launch_pantsd(); + let (_, _, pants_subprocessdir) = launch_pantsd(); - let pantsd_metadata = crate::Metadata::mount(&pants_subprocessdir).unwrap(); - let port = pantsd_metadata.port().unwrap(); - assert_connect(port); + let pantsd_metadata = crate::Metadata::mount(&pants_subprocessdir).unwrap(); + let port = pantsd_metadata.port().unwrap(); + assert_connect(port); } #[test] fn test_find_pantsd() { - let (build_root, options_parser, _tmpdir) = launch_pantsd(); + let (build_root, options_parser, _tmpdir) = launch_pantsd(); - let connection_settings = crate::find_pantsd(&build_root, &options_parser).unwrap(); - assert_connect(connection_settings.port); + let connection_settings = crate::find_pantsd(&build_root, &options_parser).unwrap(); + assert_connect(connection_settings.port); } diff --git a/src/rust/engine/process_execution/docker/src/docker.rs b/src/rust/engine/process_execution/docker/src/docker.rs index 1e1d169a79d..ac712a60f00 100644 --- a/src/rust/engine/process_execution/docker/src/docker.rs +++ b/src/rust/engine/process_execution/docker/src/docker.rs @@ -29,12 +29,12 @@ use task_executor::Executor; use workunit_store::{in_workunit, Metric, RunningWorkunit}; use process_execution::local::{ - apply_chroot, collect_child_outputs, create_sandbox, prepare_workdir, setup_run_sh_script, - CapturedWorkdir, ChildOutput, KeepSandboxes, + apply_chroot, collect_child_outputs, create_sandbox, prepare_workdir, setup_run_sh_script, + CapturedWorkdir, ChildOutput, KeepSandboxes, }; use process_execution::{ - Context, FallibleProcessResultWithPlatform, NamedCaches, Platform, Process, ProcessError, - ProcessExecutionStrategy, + Context, FallibleProcessResultWithPlatform, NamedCaches, Platform, Process, ProcessError, + ProcessExecutionStrategy, }; pub(crate) const SANDBOX_BASE_PATH_IN_CONTAINER: &str = "/pants-sandbox"; @@ -49,33 +49,33 @@ pub static DOCKER: Lazy = Lazy::new(DockerOnceCell::new); /// `CommandRunner` that executes processes using a local Docker client. pub struct CommandRunner<'a> { - store: Store, - executor: Executor, - docker: &'a DockerOnceCell, - work_dir_base: PathBuf, - immutable_inputs: ImmutableInputs, - keep_sandboxes: KeepSandboxes, - container_cache: ContainerCache<'a>, + store: Store, + executor: Executor, + docker: &'a DockerOnceCell, + work_dir_base: PathBuf, + immutable_inputs: ImmutableInputs, + keep_sandboxes: KeepSandboxes, + container_cache: ContainerCache<'a>, } #[derive(Clone)] pub struct DockerOnceCell { - cell: Arc>, + cell: Arc>, } impl DockerOnceCell { - pub fn new() -> Self { - Self { - cell: Arc::new(OnceCell::new()), + pub fn new() -> Self { + Self { + cell: Arc::new(OnceCell::new()), + } } - } - pub fn initialized(&self) -> bool { - self.cell.initialized() - } + pub fn initialized(&self) -> bool { + self.cell.initialized() + } - pub async fn get(&self) -> Result<&Docker, String> { - self + pub async fn get(&self) -> Result<&Docker, String> { + self .cell .get_or_try_init(async move { let docker = Docker::connect_with_local_defaults() @@ -102,7 +102,7 @@ impl DockerOnceCell { Ok(docker) }) .await - } + } } /// Represents a "scope" during which images will not be pulled again. This is usually associated @@ -111,165 +111,168 @@ impl DockerOnceCell { pub struct ImagePullScope(Arc); impl ImagePullScope { - pub fn new(build_id: &str) -> Self { - Self(Arc::new(build_id.to_string())) - } + pub fn new(build_id: &str) -> Self { + Self(Arc::new(build_id.to_string())) + } } #[derive(Default)] struct ImagePullCacheInner { - /// Map an "image pull scope" (usually a build ID) to another map which is used to debounce - /// image pull attempts made during that scope. The inner map goes from image name to a - /// `OnceCell` which ensures that only one pull for that image occurs at a time within the - /// relevant image pull scope. - cache: BTreeMap>>>, + /// Map an "image pull scope" (usually a build ID) to another map which is used to debounce + /// image pull attempts made during that scope. The inner map goes from image name to a + /// `OnceCell` which ensures that only one pull for that image occurs at a time within the + /// relevant image pull scope. + cache: BTreeMap>>>, } #[derive(Clone)] pub struct ImagePullCache { - /// Image pull cache and current build generation ID. - inner: Arc>, + /// Image pull cache and current build generation ID. + inner: Arc>, } fn docker_platform_identifier(platform: &Platform) -> &'static str { - match platform { - Platform::Linux_x86_64 => "linux/amd64", - Platform::Linux_arm64 => "linux/arm64", - Platform::Macos_x86_64 => "darwin/amd64", - Platform::Macos_arm64 => "darwin/arm64", - } + match platform { + Platform::Linux_x86_64 => "linux/amd64", + Platform::Linux_arm64 => "linux/arm64", + Platform::Macos_x86_64 => "darwin/amd64", + Platform::Macos_arm64 => "darwin/arm64", + } } impl ImagePullCache { - pub fn new() -> Self { - Self { - inner: Arc::default(), + pub fn new() -> Self { + Self { + inner: Arc::default(), + } } - } - pub async fn pull_image( - &self, - docker: &Docker, - executor: &Executor, - image: &str, - platform: &Platform, - image_pull_scope: ImagePullScope, - image_pull_policy: ImagePullPolicy, - ) -> Result<(), String> { - let image_cell = { - let mut inner = self.inner.lock(); - - let scope = inner.cache.entry(image_pull_scope).or_default(); - - let cell = scope - .entry(image.to_string()) - .or_insert_with(|| Arc::new(OnceCell::new())); - - cell.clone() - }; + pub async fn pull_image( + &self, + docker: &Docker, + executor: &Executor, + image: &str, + platform: &Platform, + image_pull_scope: ImagePullScope, + image_pull_policy: ImagePullPolicy, + ) -> Result<(), String> { + let image_cell = { + let mut inner = self.inner.lock(); + + let scope = inner.cache.entry(image_pull_scope).or_default(); + + let cell = scope + .entry(image.to_string()) + .or_insert_with(|| Arc::new(OnceCell::new())); + + cell.clone() + }; - image_cell - .get_or_try_init(pull_image( - docker, - executor, - image, - platform, - image_pull_policy, - )) - .await?; + image_cell + .get_or_try_init(pull_image( + docker, + executor, + image, + platform, + image_pull_policy, + )) + .await?; - Ok(()) - } + Ok(()) + } } #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ImagePullPolicy { - Always, - IfMissing, - Never, - OnlyIfLatestOrMissing, + Always, + IfMissing, + Never, + OnlyIfLatestOrMissing, } async fn credentials_for_image( - executor: &Executor, - image: &str, + executor: &Executor, + image: &str, ) -> Result, String> { - // An image name has an optional domain component before the first `/`. While the grammar (linked - // below) seems to imply that the domain can be statically differentiated from the path, it's not - // clear how. So to confirm that it is a domain, we attempt to DNS resolve it. - // - // https://github.com/distribution/distribution/blob/e5d5810851d1f17a5070e9b6f940d8af98ea3c29/reference/reference.go#L4-L26 - let Some((server, _)) = image.split_once('/') else { - return Ok(None); - }; - let server = server.to_owned(); - - executor - .spawn_blocking( - move || { - // Resolve the server as a DNS name to confirm that it is actually a registry. - let Ok(_) = (server.as_ref(), 80) - .to_socket_addrs() - .or_else(|_| server.to_socket_addrs()) - else { - return Ok(None); - }; - - // TODO: https://github.com/keirlawson/docker_credential/issues/7 means that this will only - // work for credential helpers and credentials encoded directly in the docker config, - // rather than for general credStore implementations. - let credential = docker_credential::get_credential(&server) - .map_err(|e| format!("Failed to retrieve credentials for server `{server}`: {e}"))?; - - let bollard_credentials = match credential { - docker_credential::DockerCredential::IdentityToken(token) => DockerCredentials { - identitytoken: Some(token), - ..DockerCredentials::default() - }, - docker_credential::DockerCredential::UsernamePassword(username, password) => { - DockerCredentials { - username: Some(username), - password: Some(password), - ..DockerCredentials::default() - } - } - }; - - Ok(Some(bollard_credentials)) - }, - |e| Err(format!("Credentials task failed: {e}")), - ) - .await + // An image name has an optional domain component before the first `/`. While the grammar (linked + // below) seems to imply that the domain can be statically differentiated from the path, it's not + // clear how. So to confirm that it is a domain, we attempt to DNS resolve it. + // + // https://github.com/distribution/distribution/blob/e5d5810851d1f17a5070e9b6f940d8af98ea3c29/reference/reference.go#L4-L26 + let Some((server, _)) = image.split_once('/') else { + return Ok(None); + }; + let server = server.to_owned(); + + executor + .spawn_blocking( + move || { + // Resolve the server as a DNS name to confirm that it is actually a registry. + let Ok(_) = (server.as_ref(), 80) + .to_socket_addrs() + .or_else(|_| server.to_socket_addrs()) + else { + return Ok(None); + }; + + // TODO: https://github.com/keirlawson/docker_credential/issues/7 means that this will only + // work for credential helpers and credentials encoded directly in the docker config, + // rather than for general credStore implementations. + let credential = docker_credential::get_credential(&server).map_err(|e| { + format!("Failed to retrieve credentials for server `{server}`: {e}") + })?; + + let bollard_credentials = match credential { + docker_credential::DockerCredential::IdentityToken(token) => { + DockerCredentials { + identitytoken: Some(token), + ..DockerCredentials::default() + } + } + docker_credential::DockerCredential::UsernamePassword(username, password) => { + DockerCredentials { + username: Some(username), + password: Some(password), + ..DockerCredentials::default() + } + } + }; + + Ok(Some(bollard_credentials)) + }, + |e| Err(format!("Credentials task failed: {e}")), + ) + .await } /// Pull an image given its name and the image pull policy. This method is debounced by /// the "image pull cache" in the `CommandRunner`. async fn pull_image( - docker: &Docker, - executor: &Executor, - image: &str, - platform: &Platform, - policy: ImagePullPolicy, + docker: &Docker, + executor: &Executor, + image: &str, + platform: &Platform, + policy: ImagePullPolicy, ) -> Result<(), String> { - let has_latest_tag = { - if let Some((_, suffix)) = image.rsplit_once(':') { - suffix == "latest" - } else { - false - } - }; + let has_latest_tag = { + if let Some((_, suffix)) = image.rsplit_once(':') { + suffix == "latest" + } else { + false + } + }; - let image_exists = { - match docker.inspect_image(image).await { - Ok(_) => true, - Err(DockerError::DockerResponseServerError { - status_code: 404, .. - }) => false, - Err(err) => return Err(format!("Failed to inspect Docker image `{image}`: {err:?}")), - } - }; + let image_exists = { + match docker.inspect_image(image).await { + Ok(_) => true, + Err(DockerError::DockerResponseServerError { + status_code: 404, .. + }) => false, + Err(err) => return Err(format!("Failed to inspect Docker image `{image}`: {err:?}")), + } + }; - let (do_pull, pull_reason) = match (policy, image_exists) { + let (do_pull, pull_reason) = match (policy, image_exists) { (ImagePullPolicy::Always, _) => { (true, "the image pull policy is set to \"always\"") }, @@ -290,165 +293,170 @@ async fn pull_image( _ => (false, "") }; - if do_pull { - in_workunit!( - "pull_docker_image", - Level::Info, - desc = Some(format!( - "Pulling Docker image `{image}` because {pull_reason}." - )), - |_workunit| async move { - let credentials = credentials_for_image(executor, image) - .await - .map_err(|e| format!("Failed to pull Docker image `{image}`: {e}"))?; - - let create_image_options = CreateImageOptions:: { - from_image: image.to_string(), - platform: docker_platform_identifier(platform).to_string(), - ..CreateImageOptions::default() - }; - - let mut result_stream = docker.create_image(Some(create_image_options), None, credentials); - while let Some(msg) = result_stream.next().await { - log::trace!("pull {}: {:?}", image, msg); - match msg { - Ok(msg) => match msg { - CreateImageInfo { - error: Some(error), .. - } => { - return Err(format!("Failed to pull Docker image `{image}`: {error}")); - } - CreateImageInfo { - status: Some(status), - .. - } => { - log::debug!("Docker pull status: {status}"); - } - // Ignore content in other event fields, namely `id`, `progress`, and `progress_detail`. - _ => (), - }, - Err(err) => return Err(format!("Failed to pull Docker image `{image}`: {err:?}")), - } - } - - Ok(()) - } - ) - .await?; - } + if do_pull { + in_workunit!( + "pull_docker_image", + Level::Info, + desc = Some(format!( + "Pulling Docker image `{image}` because {pull_reason}." + )), + |_workunit| async move { + let credentials = credentials_for_image(executor, image) + .await + .map_err(|e| format!("Failed to pull Docker image `{image}`: {e}"))?; + + let create_image_options = CreateImageOptions:: { + from_image: image.to_string(), + platform: docker_platform_identifier(platform).to_string(), + ..CreateImageOptions::default() + }; + + let mut result_stream = + docker.create_image(Some(create_image_options), None, credentials); + while let Some(msg) = result_stream.next().await { + log::trace!("pull {}: {:?}", image, msg); + match msg { + Ok(msg) => match msg { + CreateImageInfo { + error: Some(error), .. + } => { + return Err(format!( + "Failed to pull Docker image `{image}`: {error}" + )); + } + CreateImageInfo { + status: Some(status), + .. + } => { + log::debug!("Docker pull status: {status}"); + } + // Ignore content in other event fields, namely `id`, `progress`, and `progress_detail`. + _ => (), + }, + Err(err) => { + return Err(format!("Failed to pull Docker image `{image}`: {err:?}")) + } + } + } + + Ok(()) + } + ) + .await?; + } - Ok(()) + Ok(()) } impl<'a> CommandRunner<'a> { - pub fn new( - store: Store, - executor: Executor, - docker: &'a DockerOnceCell, - image_pull_cache: &'a ImagePullCache, - work_dir_base: PathBuf, - immutable_inputs: ImmutableInputs, - keep_sandboxes: KeepSandboxes, - ) -> Result { - let container_cache = ContainerCache::new( - docker, - image_pull_cache, - executor.clone(), - &work_dir_base, - &immutable_inputs, - )?; - - Ok(CommandRunner { - store, - executor, - docker, - work_dir_base, - immutable_inputs, - keep_sandboxes, - container_cache, - }) - } + pub fn new( + store: Store, + executor: Executor, + docker: &'a DockerOnceCell, + image_pull_cache: &'a ImagePullCache, + work_dir_base: PathBuf, + immutable_inputs: ImmutableInputs, + keep_sandboxes: KeepSandboxes, + ) -> Result { + let container_cache = ContainerCache::new( + docker, + image_pull_cache, + executor.clone(), + &work_dir_base, + &immutable_inputs, + )?; + + Ok(CommandRunner { + store, + executor, + docker, + work_dir_base, + immutable_inputs, + keep_sandboxes, + container_cache, + }) + } } impl fmt::Debug for CommandRunner<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("docker::CommandRunner") - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("docker::CommandRunner") + .finish_non_exhaustive() + } } #[async_trait] impl<'a> process_execution::CommandRunner for CommandRunner<'a> { - async fn run( - &self, - context: Context, - _workunit: &mut RunningWorkunit, - req: Process, - ) -> Result { - let req_debug_repr = format!("{req:#?}"); - in_workunit!( - "run_local_process_via_docker", - req.level, - // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit - // renders at the Process's level. - desc = Some(req.description.clone()), - |workunit| async move { - let mut workdir = create_sandbox( - self.executor.clone(), - &self.work_dir_base, - &req.description, - self.keep_sandboxes, - )?; - - // Obtain ID of the base container in which to run the execution for this process. - let (container_id, named_caches) = { - let ProcessExecutionStrategy::Docker(image) = &req.execution_environment.strategy else { - return Err(ProcessError::Unclassified( - "The Docker execution strategy was not set on the Process, but \ + async fn run( + &self, + context: Context, + _workunit: &mut RunningWorkunit, + req: Process, + ) -> Result { + let req_debug_repr = format!("{req:#?}"); + in_workunit!( + "run_local_process_via_docker", + req.level, + // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit + // renders at the Process's level. + desc = Some(req.description.clone()), + |workunit| async move { + let mut workdir = create_sandbox( + self.executor.clone(), + &self.work_dir_base, + &req.description, + self.keep_sandboxes, + )?; + + // Obtain ID of the base container in which to run the execution for this process. + let (container_id, named_caches) = { + let ProcessExecutionStrategy::Docker(image) = + &req.execution_environment.strategy + else { + return Err(ProcessError::Unclassified( + "The Docker execution strategy was not set on the Process, but \ the Docker CommandRunner was used." - .to_owned(), - )); - }; - - self - .container_cache - .container_for_image( - image, - &req.execution_environment.platform, - &context.build_id, - ) - .await? - }; - - // Start working on a mutable version of the process. - let mut req = req; - - // Compute the absolute working directory within the container, and update the env to - // replace `{chroot}` placeholders with the path to the sandbox within the Docker container. - let working_dir = { - let sandbox_relpath = - workdir - .path() - .strip_prefix(&self.work_dir_base) - .map_err(|err| { - format!( + .to_owned(), + )); + }; + + self.container_cache + .container_for_image( + image, + &req.execution_environment.platform, + &context.build_id, + ) + .await? + }; + + // Start working on a mutable version of the process. + let mut req = req; + + // Compute the absolute working directory within the container, and update the env to + // replace `{chroot}` placeholders with the path to the sandbox within the Docker container. + let working_dir = { + let sandbox_relpath = workdir + .path() + .strip_prefix(&self.work_dir_base) + .map_err(|err| { + format!( "Internal error - base directory was not prefix of sandbox directory: {err}" ) - })?; - let sandbox_path_in_container = Path::new(&SANDBOX_BASE_PATH_IN_CONTAINER) + })?; + let sandbox_path_in_container = Path::new(&SANDBOX_BASE_PATH_IN_CONTAINER) .join(sandbox_relpath) .into_os_string() .into_string() .map_err(|s| { format!("Unable to convert sandbox path to string due to non UTF-8 characters: {s:?}") })?; - apply_chroot(&sandbox_path_in_container, &mut req); - log::trace!( - "sandbox_path_in_container = {:?}", - &sandbox_path_in_container - ); + apply_chroot(&sandbox_path_in_container, &mut req); + log::trace!( + "sandbox_path_in_container = {:?}", + &sandbox_path_in_container + ); - req + req .working_directory .as_ref() .map(|relpath| Path::new(&sandbox_path_in_container).join(relpath)) @@ -458,274 +466,274 @@ impl<'a> process_execution::CommandRunner for CommandRunner<'a> { .map_err(|s| { format!("Unable to convert working directory due to non UTF-8 characters: {s:?}") })? - }; - - // Prepare the workdir. - // DOCKER-NOTE: The input root will be bind mounted into the container. - let exclusive_spawn = prepare_workdir( - workdir.path().to_owned(), - &self.work_dir_base, - &req, - req.input_digests.inputs.clone(), - &self.store, - &named_caches, - &self.immutable_inputs, - Some(Path::new(NAMED_CACHES_BASE_PATH_IN_CONTAINER)), - Some(Path::new(IMMUTABLE_INPUTS_BASE_PATH_IN_CONTAINER)), + }; + + // Prepare the workdir. + // DOCKER-NOTE: The input root will be bind mounted into the container. + let exclusive_spawn = prepare_workdir( + workdir.path().to_owned(), + &self.work_dir_base, + &req, + req.input_digests.inputs.clone(), + &self.store, + &named_caches, + &self.immutable_inputs, + Some(Path::new(NAMED_CACHES_BASE_PATH_IN_CONTAINER)), + Some(Path::new(IMMUTABLE_INPUTS_BASE_PATH_IN_CONTAINER)), + ) + .await?; + + workunit.increment_counter(Metric::DockerExecutionRequests, 1); + + let res = self + .run_and_capture_workdir( + req.clone(), + context, + self.store.clone(), + self.executor.clone(), + workdir.path().to_owned(), + (container_id, working_dir), + exclusive_spawn, + ) + .map_err(|msg| { + // Processes that experience no infrastructure issues should result in an "Ok" return, + // potentially with an exit code that indicates that they failed (with more information + // on stderr). Actually failing at this level indicates a failure to start or otherwise + // interact with the process, which would generally be an infrastructure or implementation + // error (something missing from the sandbox, incorrect permissions, etc). + // + // Given that this is expected to be rare, we dump the entire process definition in the + // error. + ProcessError::Unclassified(format!( + "Failed to execute: {req_debug_repr}\n\n{msg}" + )) + }) + .await; + + match &res { + Ok(_) => workunit.increment_counter(Metric::DockerExecutionSuccesses, 1), + Err(_) => workunit.increment_counter(Metric::DockerExecutionErrors, 1), + } + + if self.keep_sandboxes == KeepSandboxes::Always + || self.keep_sandboxes == KeepSandboxes::OnFailure + && res.as_ref().map(|r| r.exit_code).unwrap_or(1) != 0 + { + workdir.keep(&req.description); + setup_run_sh_script( + workdir.path(), + &req.env, + &req.working_directory, + &req.argv, + workdir.path(), + )?; + } + + res + } ) - .await?; - - workunit.increment_counter(Metric::DockerExecutionRequests, 1); - - let res = self - .run_and_capture_workdir( - req.clone(), - context, - self.store.clone(), - self.executor.clone(), - workdir.path().to_owned(), - (container_id, working_dir), - exclusive_spawn, - ) - .map_err(|msg| { - // Processes that experience no infrastructure issues should result in an "Ok" return, - // potentially with an exit code that indicates that they failed (with more information - // on stderr). Actually failing at this level indicates a failure to start or otherwise - // interact with the process, which would generally be an infrastructure or implementation - // error (something missing from the sandbox, incorrect permissions, etc). - // - // Given that this is expected to be rare, we dump the entire process definition in the - // error. - ProcessError::Unclassified(format!("Failed to execute: {req_debug_repr}\n\n{msg}")) - }) - .await; - - match &res { - Ok(_) => workunit.increment_counter(Metric::DockerExecutionSuccesses, 1), - Err(_) => workunit.increment_counter(Metric::DockerExecutionErrors, 1), - } - - if self.keep_sandboxes == KeepSandboxes::Always - || self.keep_sandboxes == KeepSandboxes::OnFailure - && res.as_ref().map(|r| r.exit_code).unwrap_or(1) != 0 - { - workdir.keep(&req.description); - setup_run_sh_script( - workdir.path(), - &req.env, - &req.working_directory, - &req.argv, - workdir.path(), - )?; - } - - res - } - ) - .await - } + .await + } - async fn shutdown(&self) -> Result<(), String> { - self.container_cache.shutdown().await - } + async fn shutdown(&self) -> Result<(), String> { + self.container_cache.shutdown().await + } } #[async_trait] impl<'a> CapturedWorkdir for CommandRunner<'a> { - type WorkdirToken = (String, String); - - // TODO: This method currently violates the `Drop` constraint of `CapturedWorkdir`, because the - // Docker container is not necessarily killed when the returned value is Dropped. - // - // see https://github.com/pantsbuild/pants/issues/18210 - async fn run_in_workdir<'s, 'c, 'w, 'r>( - &'s self, - _context: &'c Context, - _workdir_path: &'w Path, - (container_id, working_dir): Self::WorkdirToken, - req: Process, - _exclusive_spawn: bool, - ) -> Result>, String> { - let docker = self.docker.get().await?; - - Command::new(req.argv) - .env(req.env) - .working_dir(working_dir) - .spawn(docker, container_id) - .await - } - - async fn prepare_workdir_for_capture( - &self, - _context: &Context, - _workdir_path: &Path, - (container_id, working_dir): Self::WorkdirToken, - req: &Process, - ) -> Result<(), String> { - // Docker on Linux will frequently produce root-owned output files in bind mounts, because we - // do not assume anything about the users that exist in the image. But Docker on macOS (at least - // version 14.6.2 using gRPC-FUSE filesystem virtualization) creates files in bind mounts as the - // user running Docker. See https://github.com/pantsbuild/pants/issues/18306. + type WorkdirToken = (String, String); + + // TODO: This method currently violates the `Drop` constraint of `CapturedWorkdir`, because the + // Docker container is not necessarily killed when the returned value is Dropped. // - // TODO: Changing permissions allows the files to be captured, but not for them to be removed. - // See https://github.com/pantsbuild/pants/issues/18329. - if matches!( - Platform::current()?, - Platform::Macos_x86_64 | Platform::Macos_arm64 - ) { - return Ok(()); + // see https://github.com/pantsbuild/pants/issues/18210 + async fn run_in_workdir<'s, 'c, 'w, 'r>( + &'s self, + _context: &'c Context, + _workdir_path: &'w Path, + (container_id, working_dir): Self::WorkdirToken, + req: Process, + _exclusive_spawn: bool, + ) -> Result>, String> { + let docker = self.docker.get().await?; + + Command::new(req.argv) + .env(req.env) + .working_dir(working_dir) + .spawn(docker, container_id) + .await } - let docker = self.docker.get().await?; - - let args = ["chmod", "a+r", "-R"] - .into_iter() - .map(OsStr::new) - .chain( - req - .output_files - .iter() - .chain(req.output_directories.iter()) - .map(|p| p.as_ref().as_os_str()), - ) - .map(|s| { - s.to_owned().into_string().map_err(|s| { - format!( - "Unable to convert output_files or output_directories due to \ - non UTF-8 characters: {s:?}" - ) - }) - }) - .collect::, _>>()?; + async fn prepare_workdir_for_capture( + &self, + _context: &Context, + _workdir_path: &Path, + (container_id, working_dir): Self::WorkdirToken, + req: &Process, + ) -> Result<(), String> { + // Docker on Linux will frequently produce root-owned output files in bind mounts, because we + // do not assume anything about the users that exist in the image. But Docker on macOS (at least + // version 14.6.2 using gRPC-FUSE filesystem virtualization) creates files in bind mounts as the + // user running Docker. See https://github.com/pantsbuild/pants/issues/18306. + // + // TODO: Changing permissions allows the files to be captured, but not for them to be removed. + // See https://github.com/pantsbuild/pants/issues/18329. + if matches!( + Platform::current()?, + Platform::Macos_x86_64 | Platform::Macos_arm64 + ) { + return Ok(()); + } - let (exit_code, stdout, stderr) = Command::new(args) - .working_dir(working_dir) - .output(docker, container_id) - .await?; + let docker = self.docker.get().await?; - // Failing processes may not create their output files, so we do not treat this as fatal. - if exit_code != 0 { - log::debug!( - "Failed to chmod process outputs in Docker container:\n\ + let args = ["chmod", "a+r", "-R"] + .into_iter() + .map(OsStr::new) + .chain( + req.output_files + .iter() + .chain(req.output_directories.iter()) + .map(|p| p.as_ref().as_os_str()), + ) + .map(|s| { + s.to_owned().into_string().map_err(|s| { + format!( + "Unable to convert output_files or output_directories due to \ + non UTF-8 characters: {s:?}" + ) + }) + }) + .collect::, _>>()?; + + let (exit_code, stdout, stderr) = Command::new(args) + .working_dir(working_dir) + .output(docker, container_id) + .await?; + + // Failing processes may not create their output files, so we do not treat this as fatal. + if exit_code != 0 { + log::debug!( + "Failed to chmod process outputs in Docker container:\n\ stdout:\n{}\n\ stderr:\n{}\n", - String::from_utf8_lossy(&stdout), - String::from_utf8_lossy(&stderr) - ); - } + String::from_utf8_lossy(&stdout), + String::from_utf8_lossy(&stderr) + ); + } - Ok(()) - } + Ok(()) + } } /// A loose clone of `std::process:Command` for Docker `exec`. struct Command(bollard::exec::CreateExecOptions); impl Command { - fn new(argv: Vec) -> Self { - Self(bollard::exec::CreateExecOptions { - cmd: Some(argv), - attach_stdout: Some(true), - attach_stderr: Some(true), - ..bollard::exec::CreateExecOptions::default() - }) - } - - fn working_dir(&mut self, working_dir: String) -> &mut Self { - self.0.working_dir = Some(working_dir); - self - } - - fn env>(&mut self, env: I) -> &mut Self { - self.0.env = Some( - env - .into_iter() - .map(|(key, value)| format!("{key}={value}")) - .collect(), - ); - self - } - - /// Execute the command that has been specified, and return a stream of ChildOutputs. - /// - /// NB: See the TODO on the `ChildOutput` definition. - async fn spawn<'a, 'b>( - &'a mut self, - docker: &'a Docker, - container_id: String, - ) -> Result>, String> { - log::trace!("creating execution with config: {:?}", self.0); - - let exec = docker - .create_exec::(&container_id, self.0.clone()) - .await - .map_err(|err| format!("Failed to create Docker execution in container: {err:?}"))?; - - log::trace!("created execution {}", &exec.id); + fn new(argv: Vec) -> Self { + Self(bollard::exec::CreateExecOptions { + cmd: Some(argv), + attach_stdout: Some(true), + attach_stderr: Some(true), + ..bollard::exec::CreateExecOptions::default() + }) + } - let exec_result = docker - .start_exec(&exec.id, None) - .await - .map_err(|err| format!("Failed to start Docker execution `{}`: {:?}", &exec.id, err))?; - let mut output_stream = if let StartExecResults::Attached { output, .. } = exec_result { - output.boxed() - } else { - panic!("Unexpected value returned from start_exec: {exec_result:?}"); - }; + fn working_dir(&mut self, working_dir: String) -> &mut Self { + self.0.working_dir = Some(working_dir); + self + } - log::trace!("started execution {}", &exec.id); + fn env>(&mut self, env: I) -> &mut Self { + self.0.env = Some( + env.into_iter() + .map(|(key, value)| format!("{key}={value}")) + .collect(), + ); + self + } - let exec_id = exec.id.to_owned(); - let docker = docker.to_owned(); + /// Execute the command that has been specified, and return a stream of ChildOutputs. + /// + /// NB: See the TODO on the `ChildOutput` definition. + async fn spawn<'a, 'b>( + &'a mut self, + docker: &'a Docker, + container_id: String, + ) -> Result>, String> { + log::trace!("creating execution with config: {:?}", self.0); + + let exec = docker + .create_exec::(&container_id, self.0.clone()) + .await + .map_err(|err| format!("Failed to create Docker execution in container: {err:?}"))?; + + log::trace!("created execution {}", &exec.id); + + let exec_result = docker + .start_exec(&exec.id, None) + .await + .map_err(|err| format!("Failed to start Docker execution `{}`: {:?}", &exec.id, err))?; + let mut output_stream = if let StartExecResults::Attached { output, .. } = exec_result { + output.boxed() + } else { + panic!("Unexpected value returned from start_exec: {exec_result:?}"); + }; - let stream = async_stream::try_stream! { - // Read output from the execution. - while let Some(output_msg) = output_stream.next().await { - match output_msg { - Ok(LogOutput::StdOut { message }) => { - log::trace!("execution {} wrote {} bytes to stdout", &exec_id, message.len()); - yield ChildOutput::Stdout(message); - } - Ok(LogOutput::StdErr { message }) => { - log::trace!("execution {} wrote {} bytes to stderr", &exec_id, message.len()); - yield ChildOutput::Stderr(message); + log::trace!("started execution {}", &exec.id); + + let exec_id = exec.id.to_owned(); + let docker = docker.to_owned(); + + let stream = async_stream::try_stream! { + // Read output from the execution. + while let Some(output_msg) = output_stream.next().await { + match output_msg { + Ok(LogOutput::StdOut { message }) => { + log::trace!("execution {} wrote {} bytes to stdout", &exec_id, message.len()); + yield ChildOutput::Stdout(message); + } + Ok(LogOutput::StdErr { message }) => { + log::trace!("execution {} wrote {} bytes to stderr", &exec_id, message.len()); + yield ChildOutput::Stderr(message); + } + Ok(_) => (), + Err(err) => { + log::trace!("error while capturing output of execution {}: {:?}", &exec_id, err); + } } - Ok(_) => (), - Err(err) => { - log::trace!("error while capturing output of execution {}: {:?}", &exec_id, err); - } - } - } + } - let exec_metadata = docker - .inspect_exec(&exec_id) - .await - .map_err(|err| format!("Failed to inspect Docker execution `{}`: {:?}", &exec_id, err))?; + let exec_metadata = docker + .inspect_exec(&exec_id) + .await + .map_err(|err| format!("Failed to inspect Docker execution `{}`: {:?}", &exec_id, err))?; - let status_code = exec_metadata - .exit_code - .ok_or_else(|| format!("Inspected execution `{}` for exit status but status was missing.", &exec_id))?; + let status_code = exec_metadata + .exit_code + .ok_or_else(|| format!("Inspected execution `{}` for exit status but status was missing.", &exec_id))?; - log::trace!("execution {} exited with status code {}", &exec_id, status_code); + log::trace!("execution {} exited with status code {}", &exec_id, status_code); - yield ChildOutput::Exit(ExitCode(status_code as i32)); - }; + yield ChildOutput::Exit(ExitCode(status_code as i32)); + }; - Ok(stream.boxed()) - } + Ok(stream.boxed()) + } - /// Execute the command that has been specified, and return its exit code, stdout, and stderr. - async fn output( - &mut self, - docker: &Docker, - container_id: String, - ) -> Result<(i32, Bytes, Bytes), String> { - let child_outputs = self.spawn(docker, container_id).await?; - let mut stdout = BytesMut::with_capacity(8192); - let mut stderr = BytesMut::with_capacity(8192); - let exit_code = collect_child_outputs(&mut stdout, &mut stderr, child_outputs).await?; - Ok((exit_code, stdout.freeze(), stderr.freeze())) - } + /// Execute the command that has been specified, and return its exit code, stdout, and stderr. + async fn output( + &mut self, + docker: &Docker, + container_id: String, + ) -> Result<(i32, Bytes, Bytes), String> { + let child_outputs = self.spawn(docker, container_id).await?; + let mut stdout = BytesMut::with_capacity(8192); + let mut stderr = BytesMut::with_capacity(8192); + let exit_code = collect_child_outputs(&mut stdout, &mut stderr, child_outputs).await?; + Ok((exit_code, stdout.freeze(), stderr.freeze())) + } } /// Container ID and NamedCaches for that container. async_oncecell::OnceCell is used so that @@ -736,30 +744,32 @@ type CachedContainer = Arc>; /// Caches running containers so that build actions can be invoked by running "executions" /// within those cached containers. pub(crate) struct ContainerCache<'a> { - docker: &'a DockerOnceCell, - image_pull_cache: &'a ImagePullCache, - executor: Executor, - work_dir_base: String, - immutable_inputs_base_dir: String, - /// Cache that maps image name / platform to a cached container. - containers: Mutex>, -} - -impl<'a> ContainerCache<'a> { - pub fn new( docker: &'a DockerOnceCell, image_pull_cache: &'a ImagePullCache, executor: Executor, - work_dir_base: &Path, - immutable_inputs: &ImmutableInputs, - ) -> Result { - let work_dir_base = work_dir_base - .to_path_buf() - .into_os_string() - .into_string() - .map_err(|s| format!("Unable to convert workdir_path due to non UTF-8 characters: {s:?}"))?; + work_dir_base: String, + immutable_inputs_base_dir: String, + /// Cache that maps image name / platform to a cached container. + containers: Mutex>, +} + +impl<'a> ContainerCache<'a> { + pub fn new( + docker: &'a DockerOnceCell, + image_pull_cache: &'a ImagePullCache, + executor: Executor, + work_dir_base: &Path, + immutable_inputs: &ImmutableInputs, + ) -> Result { + let work_dir_base = work_dir_base + .to_path_buf() + .into_os_string() + .into_string() + .map_err(|s| { + format!("Unable to convert workdir_path due to non UTF-8 characters: {s:?}") + })?; - let immutable_inputs_base_dir = immutable_inputs + let immutable_inputs_base_dir = immutable_inputs .workdir() .to_path_buf() .into_os_string() @@ -768,255 +778,259 @@ impl<'a> ContainerCache<'a> { format!("Unable to convert immutable_inputs base dir due to non UTF-8 characters: {s:?}") })?; - Ok(Self { - docker, - image_pull_cache, - executor, - work_dir_base, - immutable_inputs_base_dir, - containers: Mutex::default(), - }) - } - - /// Creates a container, and creates (if necessary) and attaches a volume for image specific - /// (named) caches. - async fn make_container( - docker: Docker, - executor: Executor, - image_name: String, - platform: Platform, - image_pull_scope: ImagePullScope, - image_pull_cache: ImagePullCache, - work_dir_base: String, - immutable_inputs_base_dir: String, - ) -> Result { - // Pull the image. - image_pull_cache - .pull_image( - &docker, - &executor, - &image_name, - &platform, - image_pull_scope, - ImagePullPolicy::OnlyIfLatestOrMissing, - ) - .await?; - - let named_cache_volume_name = Self::maybe_make_named_cache_volume(&docker, &image_name) - .await - .map_err(|e| format!("Failed to create named cache volume for {image_name}: {e}"))?; - - let config = bollard::container::Config { - entrypoint: Some(vec!["/bin/sh".to_string()]), - host_config: Some(bollard::service::HostConfig { - binds: Some(vec![ - format!("{work_dir_base}:{SANDBOX_BASE_PATH_IN_CONTAINER}"), - format!("{named_cache_volume_name}:{NAMED_CACHES_BASE_PATH_IN_CONTAINER}",), - // DOCKER-TODO: Consider making this bind mount read-only. - format!("{immutable_inputs_base_dir}:{IMMUTABLE_INPUTS_BASE_PATH_IN_CONTAINER}"), - ]), - // The init process ensures that child processes are properly reaped. - init: Some(true), - ..bollard::service::HostConfig::default() - }), - image: Some(image_name.clone()), - tty: Some(true), - open_stdin: Some(true), - ..bollard::container::Config::default() - }; + Ok(Self { + docker, + image_pull_cache, + executor, + work_dir_base, + immutable_inputs_base_dir, + containers: Mutex::default(), + }) + } - log::trace!("creating cached container with config for image `{image_name}`: {config:?}",); + /// Creates a container, and creates (if necessary) and attaches a volume for image specific + /// (named) caches. + async fn make_container( + docker: Docker, + executor: Executor, + image_name: String, + platform: Platform, + image_pull_scope: ImagePullScope, + image_pull_cache: ImagePullCache, + work_dir_base: String, + immutable_inputs_base_dir: String, + ) -> Result { + // Pull the image. + image_pull_cache + .pull_image( + &docker, + &executor, + &image_name, + &platform, + image_pull_scope, + ImagePullPolicy::OnlyIfLatestOrMissing, + ) + .await?; + + let named_cache_volume_name = Self::maybe_make_named_cache_volume(&docker, &image_name) + .await + .map_err(|e| format!("Failed to create named cache volume for {image_name}: {e}"))?; + + let config = bollard::container::Config { + entrypoint: Some(vec!["/bin/sh".to_string()]), + host_config: Some(bollard::service::HostConfig { + binds: Some(vec![ + format!("{work_dir_base}:{SANDBOX_BASE_PATH_IN_CONTAINER}"), + format!("{named_cache_volume_name}:{NAMED_CACHES_BASE_PATH_IN_CONTAINER}",), + // DOCKER-TODO: Consider making this bind mount read-only. + format!( + "{immutable_inputs_base_dir}:{IMMUTABLE_INPUTS_BASE_PATH_IN_CONTAINER}" + ), + ]), + // The init process ensures that child processes are properly reaped. + init: Some(true), + ..bollard::service::HostConfig::default() + }), + image: Some(image_name.clone()), + tty: Some(true), + open_stdin: Some(true), + ..bollard::container::Config::default() + }; - let create_options = CreateContainerOptions::<&str> { - name: "", - platform: Some(docker_platform_identifier(&platform)), - }; - let container = docker - .create_container::<&str, String>(Some(create_options), config) - .await - .map_err(|err| format!("Failed to create Docker container: {err:?}"))?; + log::trace!("creating cached container with config for image `{image_name}`: {config:?}",); - docker - .start_container::(&container.id, None) - .await - .map_err(|err| { - format!( - "Failed to start Docker container `{}` for image `{image_name}`: {err:?}", - &container.id - ) - })?; + let create_options = CreateContainerOptions::<&str> { + name: "", + platform: Some(docker_platform_identifier(&platform)), + }; + let container = docker + .create_container::<&str, String>(Some(create_options), config) + .await + .map_err(|err| format!("Failed to create Docker container: {err:?}"))?; + + docker + .start_container::(&container.id, None) + .await + .map_err(|err| { + format!( + "Failed to start Docker container `{}` for image `{image_name}`: {err:?}", + &container.id + ) + })?; - log::debug!( - "started container `{}` for image `{image_name}`", - &container.id, - ); + log::debug!( + "started container `{}` for image `{image_name}`", + &container.id, + ); - Ok(container.id) - } + Ok(container.id) + } - /// Creates a volume for named caches for the given image name. In production usage, the image - /// name will have been expanded to include its SHA256 fingerprint, so the named cache will be - /// dedicated to a particular image version. That is conservative, so we might consider making - /// it configurable whether the image version is attached in future releases. - async fn maybe_make_named_cache_volume( - docker: &Docker, - image_name: &str, - ) -> Result { - let image_hash = Digest::of_bytes(image_name.as_bytes()) - .hash - .to_hex() - .chars() - .take(12) - .collect::(); - let named_cache_volume_name = format!("pants-named-caches-{image_hash}"); - // TODO: Use a filter on volume name. - let volume_exists = docker - .list_volumes::<&str>(None) - .await? - .volumes - .map(|volumes| volumes.iter().any(|v| v.name == named_cache_volume_name)) - .unwrap_or(false); - if volume_exists { - return Ok(named_cache_volume_name); + /// Creates a volume for named caches for the given image name. In production usage, the image + /// name will have been expanded to include its SHA256 fingerprint, so the named cache will be + /// dedicated to a particular image version. That is conservative, so we might consider making + /// it configurable whether the image version is attached in future releases. + async fn maybe_make_named_cache_volume( + docker: &Docker, + image_name: &str, + ) -> Result { + let image_hash = Digest::of_bytes(image_name.as_bytes()) + .hash + .to_hex() + .chars() + .take(12) + .collect::(); + let named_cache_volume_name = format!("pants-named-caches-{image_hash}"); + // TODO: Use a filter on volume name. + let volume_exists = docker + .list_volumes::<&str>(None) + .await? + .volumes + .map(|volumes| volumes.iter().any(|v| v.name == named_cache_volume_name)) + .unwrap_or(false); + if volume_exists { + return Ok(named_cache_volume_name); + } + + let mut labels = HashMap::new(); + labels.insert("image_name", image_name); + docker + .create_volume::<&str>(CreateVolumeOptions { + name: &named_cache_volume_name, + driver: "local", + labels, + ..CreateVolumeOptions::default() + }) + .await?; + + Ok(named_cache_volume_name) } - let mut labels = HashMap::new(); - labels.insert("image_name", image_name); - docker - .create_volume::<&str>(CreateVolumeOptions { - name: &named_cache_volume_name, - driver: "local", - labels, - ..CreateVolumeOptions::default() - }) - .await?; - - Ok(named_cache_volume_name) - } - - async fn make_named_cache_directory( - docker: Docker, - container_id: String, - directory: PathBuf, - ) -> Result<(), String> { - let directory = directory.into_os_string().into_string().map_err(|s| { - format!("Unable to convert named cache path to string due to non UTF-8 characters: {s:?}") - })?; - let (exit_code, stdout, stderr) = Command::new(vec![ - "mkdir".to_owned(), - "-p".to_owned(), - directory.to_owned(), - ]) - .output(&docker, container_id) - .await?; - - if exit_code == 0 { - Ok(()) - } else { - Err(format!( - "Failed to create parent directory for named cache in Docker container:\n\ + async fn make_named_cache_directory( + docker: Docker, + container_id: String, + directory: PathBuf, + ) -> Result<(), String> { + let directory = directory.into_os_string().into_string().map_err(|s| { + format!( + "Unable to convert named cache path to string due to non UTF-8 characters: {s:?}" + ) + })?; + let (exit_code, stdout, stderr) = Command::new(vec![ + "mkdir".to_owned(), + "-p".to_owned(), + directory.to_owned(), + ]) + .output(&docker, container_id) + .await?; + + if exit_code == 0 { + Ok(()) + } else { + Err(format!( + "Failed to create parent directory for named cache in Docker container:\n\ stdout:\n{}\n\ stderr:\n{}\n", - String::from_utf8_lossy(&stdout), - String::from_utf8_lossy(&stderr) - )) + String::from_utf8_lossy(&stdout), + String::from_utf8_lossy(&stderr) + )) + } } - } - - /// Return the container ID and NamedCaches for a container running `image_name` for use as a place - /// to invoke build actions as executions within the cached container. - pub async fn container_for_image( - &self, - image_name: &str, - platform: &Platform, - build_generation: &str, - ) -> Result<(String, NamedCaches), String> { - let docker = self.docker.get().await?.clone(); - let executor = self.executor.clone(); - - let container_id_cell = { - let mut containers = self.containers.lock(); - let cell = containers - .entry((image_name.to_string(), *platform)) - .or_insert_with(|| Arc::new(OnceCell::new())); - cell.clone() - }; - - let work_dir_base = self.work_dir_base.clone(); - let immutable_inputs_base_dir = self.immutable_inputs_base_dir.clone(); - let image_pull_scope = ImagePullScope::new(build_generation); - let container_id = container_id_cell - .get_or_try_init(async move { - let container_id = Self::make_container( - docker.clone(), - executor, - image_name.to_string(), - *platform, - image_pull_scope, - self.image_pull_cache.clone(), - work_dir_base, - immutable_inputs_base_dir, - ) - .await?; - - let named_caches = { - let docker = docker.to_owned(); - let container_id = container_id.clone(); - NamedCaches::new(NAMED_CACHES_BASE_PATH_IN_CONTAINER.into(), move |dst| { - ContainerCache::make_named_cache_directory( - docker.clone(), - container_id.clone(), - dst.to_owned(), - ) - .boxed() - }) + /// Return the container ID and NamedCaches for a container running `image_name` for use as a place + /// to invoke build actions as executions within the cached container. + pub async fn container_for_image( + &self, + image_name: &str, + platform: &Platform, + build_generation: &str, + ) -> Result<(String, NamedCaches), String> { + let docker = self.docker.get().await?.clone(); + let executor = self.executor.clone(); + + let container_id_cell = { + let mut containers = self.containers.lock(); + let cell = containers + .entry((image_name.to_string(), *platform)) + .or_insert_with(|| Arc::new(OnceCell::new())); + cell.clone() }; - Ok::<_, String>((container_id, named_caches)) - }) - .await?; - - Ok(container_id.to_owned()) - } - - pub async fn shutdown(&self) -> Result<(), String> { - // Skip shutting down if Docker was never used in the first place. - if self.containers.lock().is_empty() { - return Ok(()); + let work_dir_base = self.work_dir_base.clone(); + let immutable_inputs_base_dir = self.immutable_inputs_base_dir.clone(); + let image_pull_scope = ImagePullScope::new(build_generation); + + let container_id = container_id_cell + .get_or_try_init(async move { + let container_id = Self::make_container( + docker.clone(), + executor, + image_name.to_string(), + *platform, + image_pull_scope, + self.image_pull_cache.clone(), + work_dir_base, + immutable_inputs_base_dir, + ) + .await?; + + let named_caches = { + let docker = docker.to_owned(); + let container_id = container_id.clone(); + NamedCaches::new(NAMED_CACHES_BASE_PATH_IN_CONTAINER.into(), move |dst| { + ContainerCache::make_named_cache_directory( + docker.clone(), + container_id.clone(), + dst.to_owned(), + ) + .boxed() + }) + }; + + Ok::<_, String>((container_id, named_caches)) + }) + .await?; + + Ok(container_id.to_owned()) } - let docker = match self.docker.get().await { - Ok(d) => d, - Err(err) => { - return Err(format!( - "Failed to get Docker connection during container removal: {err}" - )) - } - }; + pub async fn shutdown(&self) -> Result<(), String> { + // Skip shutting down if Docker was never used in the first place. + if self.containers.lock().is_empty() { + return Ok(()); + } - #[allow(clippy::needless_collect)] - // allow is necessary otherwise will get "temporary value dropped while borrowed" error - let container_ids = self - .containers - .lock() - .values() - .flat_map(|v| v.get()) - .cloned() - .collect::>(); - - let removal_futures = container_ids.into_iter().map(|(id, _)| async move { - let remove_options = RemoveContainerOptions { - force: true, - ..RemoveContainerOptions::default() - }; - docker - .remove_container(&id, Some(remove_options)) - .await - .map_err(|err| format!("Failed to remove Docker container `{id}`: {err:?}")) - }); + let docker = match self.docker.get().await { + Ok(d) => d, + Err(err) => { + return Err(format!( + "Failed to get Docker connection during container removal: {err}" + )) + } + }; - futures::future::try_join_all(removal_futures).await?; - Ok(()) - } + #[allow(clippy::needless_collect)] + // allow is necessary otherwise will get "temporary value dropped while borrowed" error + let container_ids = self + .containers + .lock() + .values() + .flat_map(|v| v.get()) + .cloned() + .collect::>(); + + let removal_futures = container_ids.into_iter().map(|(id, _)| async move { + let remove_options = RemoveContainerOptions { + force: true, + ..RemoveContainerOptions::default() + }; + docker + .remove_container(&id, Some(remove_options)) + .await + .map_err(|err| format!("Failed to remove Docker container `{id}`: {err:?}")) + }); + + futures::future::try_join_all(removal_futures).await?; + Ok(()) + } } diff --git a/src/rust/engine/process_execution/docker/src/docker_tests.rs b/src/rust/engine/process_execution/docker/src/docker_tests.rs index 093c3249e37..f8a9e18e9f5 100644 --- a/src/rust/engine/process_execution/docker/src/docker_tests.rs +++ b/src/rust/engine/process_execution/docker/src/docker_tests.rs @@ -17,8 +17,8 @@ use workunit_store::{RunningWorkunit, WorkunitStore}; use crate::docker::{DockerOnceCell, ImagePullCache, SANDBOX_BASE_PATH_IN_CONTAINER}; use process_execution::local::KeepSandboxes; use process_execution::{ - local, CacheName, CommandRunner, Context, FallibleProcessResultWithPlatform, InputDigests, - Platform, Process, ProcessError, + local, CacheName, CommandRunner, Context, FallibleProcessResultWithPlatform, InputDigests, + Platform, Process, ProcessError, }; /// Docker image to use for most tests in this file. @@ -29,773 +29,774 @@ const SH_PATH: &str = "/bin/sh"; #[derive(PartialEq, Debug)] struct LocalTestResult { - original: FallibleProcessResultWithPlatform, - stdout_bytes: Vec, - stderr_bytes: Vec, + original: FallibleProcessResultWithPlatform, + stdout_bytes: Vec, + stderr_bytes: Vec, } /// Skips a test if Docker is not available in macOS CI. macro_rules! skip_if_no_docker_available_in_macos_ci { - () => {{ - let docker = match Docker::connect_with_local_defaults() { - Ok(docker) => docker, - Err(err) => { - if cfg!(target_os = "macos") && env::var_os("GITHUB_ACTIONS").is_some() { - println!("Skipping test due to Docker not being available: {:?}", err); - return; - } else { - panic!("Docker should have been available for this test: {:?}", err); + () => {{ + let docker = match Docker::connect_with_local_defaults() { + Ok(docker) => docker, + Err(err) => { + if cfg!(target_os = "macos") && env::var_os("GITHUB_ACTIONS").is_some() { + println!("Skipping test due to Docker not being available: {:?}", err); + return; + } else { + panic!("Docker should have been available for this test: {:?}", err); + } + } + }; + + let ping_response = docker.ping().await; + if ping_response.is_err() { + if cfg!(target_os = "macos") && env::var_os("GITHUB_ACTIONS").is_some() { + println!( + "Skipping test due to Docker not being available: {:?}", + ping_response + ); + return; + } else { + panic!( + "Docker should have been available for this test: {:?}", + ping_response + ); + } } - } - }; - - let ping_response = docker.ping().await; - if ping_response.is_err() { - if cfg!(target_os = "macos") && env::var_os("GITHUB_ACTIONS").is_some() { - println!( - "Skipping test due to Docker not being available: {:?}", - ping_response - ); - return; - } else { - panic!( - "Docker should have been available for this test: {:?}", - ping_response - ); - } - } - }}; + }}; } fn platform_for_tests() -> Result { - Platform::current().map(|platform| match platform { - Platform::Macos_arm64 => Platform::Linux_arm64, - Platform::Macos_x86_64 => Platform::Linux_x86_64, - p => p, - }) + Platform::current().map(|platform| match platform { + Platform::Macos_arm64 => Platform::Linux_arm64, + Platform::Macos_x86_64 => Platform::Linux_x86_64, + p => p, + }) } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[cfg(unix)] async fn runner_errors_if_docker_image_not_set() { - skip_if_no_docker_available_in_macos_ci!(); - - // Because `docker_image` is set but it does not exist, this process should fail. - let err = run_command_via_docker( - Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])) - .docker("does-not-exist:latest".to_owned()), - ) - .await - .unwrap_err(); - if let ProcessError::Unclassified(msg) = err { - assert!(msg.contains("Failed to pull Docker image")); - } else { - panic!("unexpected value: {err:?}") - } - - // Otherwise, if docker_image is not set, use the local runner. - let err = run_command_via_docker(Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"]))) + skip_if_no_docker_available_in_macos_ci!(); + + // Because `docker_image` is set but it does not exist, this process should fail. + let err = run_command_via_docker( + Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])) + .docker("does-not-exist:latest".to_owned()), + ) .await .unwrap_err(); - if let ProcessError::Unclassified(msg) = &err { - assert!( + if let ProcessError::Unclassified(msg) = err { + assert!(msg.contains("Failed to pull Docker image")); + } else { + panic!("unexpected value: {err:?}") + } + + // Otherwise, if docker_image is not set, use the local runner. + let err = run_command_via_docker(Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"]))) + .await + .unwrap_err(); + if let ProcessError::Unclassified(msg) = &err { + assert!( msg.contains("The Docker execution strategy was not set on the Process, but the Docker CommandRunner was used") ); - } else { - panic!("unexpected value: {err:?}") - } + } else { + panic!("unexpected value: {err:?}") + } } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[cfg(unix)] async fn stdout() { - skip_if_no_docker_available_in_macos_ci!(); - let result = run_command_via_docker( - Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])).docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "foo".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + skip_if_no_docker_available_in_macos_ci!(); + let result = run_command_via_docker( + Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])).docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "foo".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[cfg(unix)] async fn stdout_and_stderr_and_exit_code() { - skip_if_no_docker_available_in_macos_ci!(); - let result = run_command_via_docker( - Process::new(owned_string_vec(&[ - SH_PATH, - "-c", - "echo -n foo ; echo >&2 -n bar ; exit 1", - ])) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "foo".as_bytes()); - assert_eq!(result.stderr_bytes, "bar".as_bytes()); - assert_eq!(result.original.exit_code, 1); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + skip_if_no_docker_available_in_macos_ci!(); + let result = run_command_via_docker( + Process::new(owned_string_vec(&[ + SH_PATH, + "-c", + "echo -n foo ; echo >&2 -n bar ; exit 1", + ])) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "foo".as_bytes()); + assert_eq!(result.stderr_bytes, "bar".as_bytes()); + assert_eq!(result.original.exit_code, 1); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[cfg(unix)] async fn capture_exit_code_signal() { - skip_if_no_docker_available_in_macos_ci!(); - - // Launch a process that kills itself with a signal. - let result = run_command_via_docker( - Process::new(owned_string_vec(&[SH_PATH, "-c", "kill $$"])).docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - // DOCKER-TODO: Figure out a way to capture the signal from the container. Docker does not - // seem to make that available. The `143` code comes from the init process in the container. - // assert_eq!(result.original.exit_code, -15); - assert_eq!(result.original.exit_code, 143); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + skip_if_no_docker_available_in_macos_ci!(); + + // Launch a process that kills itself with a signal. + let result = run_command_via_docker( + Process::new(owned_string_vec(&[SH_PATH, "-c", "kill $$"])).docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + // DOCKER-TODO: Figure out a way to capture the signal from the container. Docker does not + // seem to make that available. The `143` code comes from the init process in the container. + // assert_eq!(result.original.exit_code, -15); + assert_eq!(result.original.exit_code, 143); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } fn extract_env( - content: Vec, - exclude_keys: &[&str], + content: Vec, + exclude_keys: &[&str], ) -> Result, String> { - let content = - String::from_utf8(content).map_err(|_| "Invalid UTF-8 in env output".to_string())?; - let result = content - .split('\n') - .filter(|line| !line.is_empty()) - .map(|line| line.splitn(2, '=')) - .map(|mut parts| { - ( - parts.next().unwrap().to_string(), - parts.next().unwrap_or("").to_string(), - ) - }) - .filter(|x| !exclude_keys.iter().any(|&k| k == x.0)) - .collect(); - Ok(result) + let content = + String::from_utf8(content).map_err(|_| "Invalid UTF-8 in env output".to_string())?; + let result = content + .split('\n') + .filter(|line| !line.is_empty()) + .map(|line| line.splitn(2, '=')) + .map(|mut parts| { + ( + parts.next().unwrap().to_string(), + parts.next().unwrap_or("").to_string(), + ) + }) + .filter(|x| !exclude_keys.iter().any(|&k| k == x.0)) + .collect(); + Ok(result) } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[cfg(unix)] async fn env() { - skip_if_no_docker_available_in_macos_ci!(); - - let mut env: BTreeMap = BTreeMap::new(); - env.insert("FOO".to_string(), "foo".to_string()); - env.insert("BAR".to_string(), "not foo".to_string()); - - let result = run_command_via_docker( - Process::new(owned_string_vec(&["/bin/env"])) - .env(env.clone()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - let exclude_keys = &["PATH", "HOME", "HOSTNAME"]; - let got_env = extract_env(result.stdout_bytes, exclude_keys).unwrap(); - assert_eq!(env, got_env); + skip_if_no_docker_available_in_macos_ci!(); + + let mut env: BTreeMap = BTreeMap::new(); + env.insert("FOO".to_string(), "foo".to_string()); + env.insert("BAR".to_string(), "not foo".to_string()); + + let result = run_command_via_docker( + Process::new(owned_string_vec(&["/bin/env"])) + .env(env.clone()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + let exclude_keys = &["PATH", "HOME", "HOSTNAME"]; + let got_env = extract_env(result.stdout_bytes, exclude_keys).unwrap(); + assert_eq!(env, got_env); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[cfg(unix)] async fn env_is_deterministic() { - skip_if_no_docker_available_in_macos_ci!(); + skip_if_no_docker_available_in_macos_ci!(); + + fn make_request() -> Process { + let mut env = BTreeMap::new(); + env.insert("FOO".to_string(), "foo".to_string()); + env.insert("BAR".to_string(), "not foo".to_string()); + Process::new(owned_string_vec(&["/bin/env"])) + .env(env) + .docker(IMAGE.to_owned()) + } - fn make_request() -> Process { - let mut env = BTreeMap::new(); - env.insert("FOO".to_string(), "foo".to_string()); - env.insert("BAR".to_string(), "not foo".to_string()); - Process::new(owned_string_vec(&["/bin/env"])) - .env(env) - .docker(IMAGE.to_owned()) - } - - let result1 = run_command_via_docker(make_request()).await.unwrap(); - let result2 = run_command_via_docker(make_request()).await.unwrap(); - - let exclude_keys = &["PATH", "HOME", "HOSTNAME"]; - let env1 = extract_env(result1.stdout_bytes, exclude_keys).unwrap(); - let env2 = extract_env(result2.stdout_bytes, exclude_keys).unwrap(); - assert_eq!(env1, env2); + let result1 = run_command_via_docker(make_request()).await.unwrap(); + let result2 = run_command_via_docker(make_request()).await.unwrap(); + + let exclude_keys = &["PATH", "HOME", "HOSTNAME"]; + let env1 = extract_env(result1.stdout_bytes, exclude_keys).unwrap(); + let env2 = extract_env(result2.stdout_bytes, exclude_keys).unwrap(); + assert_eq!(env1, env2); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn binary_not_found() { - skip_if_no_docker_available_in_macos_ci!(); - - // Use `xyzzy` as a command that should not exist. - let result = run_command_via_docker( - Process::new(owned_string_vec(&["xyzzy", "-n", "foo"])).docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - let stdout = String::from_utf8(result.stdout_bytes).unwrap(); - assert!(stdout.contains("exec failed")); + skip_if_no_docker_available_in_macos_ci!(); + + // Use `xyzzy` as a command that should not exist. + let result = run_command_via_docker( + Process::new(owned_string_vec(&["xyzzy", "-n", "foo"])).docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + let stdout = String::from_utf8(result.stdout_bytes).unwrap(); + assert!(stdout.contains("exec failed")); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_files_none() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(owned_string_vec(&[SH_PATH, "-c", "exit 0"])).docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(owned_string_vec(&[SH_PATH, "-c", "exit 0"])).docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_files_one() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!("echo -n {} > roland.ext", TestData::roland().string()), - ]) - .output_files(relative_paths(&["roland.ext"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!("echo -n {} > roland.ext", TestData::roland().string()), + ]) + .output_files(relative_paths(&["roland.ext"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_dirs() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!( - "/bin/mkdir cats && echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", - TestData::roland().string(), - TestData::catnip().string() - ), - ]) - .output_files(relative_paths(&["treats.ext"]).collect()) - .output_directories(relative_paths(&["cats"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::recursive().directory_digest() - ); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!( + "/bin/mkdir cats && echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", + TestData::roland().string(), + TestData::catnip().string() + ), + ]) + .output_files(relative_paths(&["treats.ext"]).collect()) + .output_directories(relative_paths(&["cats"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::recursive().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_files_many() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!( - "echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", - TestData::roland().string(), - TestData::catnip().string() - ), - ]) - .output_files(relative_paths(&["cats/roland.ext", "treats.ext"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::recursive().directory_digest() - ); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!( + "echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", + TestData::roland().string(), + TestData::catnip().string() + ), + ]) + .output_files(relative_paths(&["cats/roland.ext", "treats.ext"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::recursive().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_files_execution_failure() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!( - "echo -n {} > roland.ext ; exit 1", - TestData::roland().string() - ), - ]) - .output_files(relative_paths(&["roland.ext"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 1); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!( + "echo -n {} > roland.ext ; exit 1", + TestData::roland().string() + ), + ]) + .output_files(relative_paths(&["roland.ext"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 1); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_files_partial_output() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!("echo -n {} > roland.ext", TestData::roland().string()), - ]) - .output_files( - relative_paths(&["roland.ext", "susannah"]) - .into_iter() - .collect(), + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!("echo -n {} > roland.ext", TestData::roland().string()), + ]) + .output_files( + relative_paths(&["roland.ext", "susannah"]) + .into_iter() + .collect(), + ) + .docker(IMAGE.to_owned()), ) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_overlapping_file_and_dir() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!("echo -n {} > cats/roland.ext", TestData::roland().string()), - ]) - .output_files(relative_paths(&["cats/roland.ext"]).collect()) - .output_directories(relative_paths(&["cats"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::nested().directory_digest() - ); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!("echo -n {} > cats/roland.ext", TestData::roland().string()), + ]) + .output_files(relative_paths(&["cats/roland.ext"]).collect()) + .output_directories(relative_paths(&["cats"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::nested().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn append_only_cache_created() { - skip_if_no_docker_available_in_macos_ci!(); - - let name = "geo"; - let dest_base = ".cache"; - let cache_name = CacheName::new(name.to_owned()).unwrap(); - let cache_dest = RelativePath::new(format!("{dest_base}/{name}")).unwrap(); - let result = run_command_via_docker( - Process::new(owned_string_vec(&["/bin/ls", dest_base])) - .append_only_caches(vec![(cache_name, cache_dest)].into_iter().collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, format!("{name}\n").as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + skip_if_no_docker_available_in_macos_ci!(); + + let name = "geo"; + let dest_base = ".cache"; + let cache_name = CacheName::new(name.to_owned()).unwrap(); + let cache_dest = RelativePath::new(format!("{dest_base}/{name}")).unwrap(); + let result = run_command_via_docker( + Process::new(owned_string_vec(&["/bin/ls", dest_base])) + .append_only_caches(vec![(cache_name, cache_dest)].into_iter().collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, format!("{name}\n").as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] #[cfg(unix)] async fn test_apply_chroot() { - let mut env: BTreeMap = BTreeMap::new(); - env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); + let mut env: BTreeMap = BTreeMap::new(); + env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); - let work_dir = TempDir::new().unwrap(); - let mut req = Process::new(owned_string_vec(&["/usr/bin/env"])) - .env(env.clone()) - .docker(IMAGE.to_owned()); - local::apply_chroot(work_dir.path().to_str().unwrap(), &mut req); + let work_dir = TempDir::new().unwrap(); + let mut req = Process::new(owned_string_vec(&["/usr/bin/env"])) + .env(env.clone()) + .docker(IMAGE.to_owned()); + local::apply_chroot(work_dir.path().to_str().unwrap(), &mut req); - let path = format!("/usr/bin:{}/bin", work_dir.path().to_str().unwrap()); + let path = format!("/usr/bin:{}/bin", work_dir.path().to_str().unwrap()); - assert_eq!(&path, req.env.get(&"PATH".to_string()).unwrap()); + assert_eq!(&path, req.env.get(&"PATH".to_string()).unwrap()); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn test_chroot_placeholder() { - skip_if_no_docker_available_in_macos_ci!(); - - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let mut env: BTreeMap = BTreeMap::new(); - env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); - - let work_tmpdir = TempDir::new().unwrap(); - let work_root = work_tmpdir.path().to_owned(); - - let result = run_command_via_docker_in_dir( - Process::new(vec!["/bin/env".to_owned()]) - .env(env.clone()) - .docker(IMAGE.to_owned()), - work_root.clone(), - KeepSandboxes::Always, - &mut workunit, - None, - None, - ) - .await - .unwrap(); - - let got_env = extract_env(result.stdout_bytes, &[]).unwrap(); - let path = format!("/usr/bin:{SANDBOX_BASE_PATH_IN_CONTAINER}"); - assert!(got_env.get(&"PATH".to_string()).unwrap().starts_with(&path)); - assert!(got_env.get(&"PATH".to_string()).unwrap().ends_with("/bin")); + skip_if_no_docker_available_in_macos_ci!(); + + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let mut env: BTreeMap = BTreeMap::new(); + env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); + + let work_tmpdir = TempDir::new().unwrap(); + let work_root = work_tmpdir.path().to_owned(); + + let result = run_command_via_docker_in_dir( + Process::new(vec!["/bin/env".to_owned()]) + .env(env.clone()) + .docker(IMAGE.to_owned()), + work_root.clone(), + KeepSandboxes::Always, + &mut workunit, + None, + None, + ) + .await + .unwrap(); + + let got_env = extract_env(result.stdout_bytes, &[]).unwrap(); + let path = format!("/usr/bin:{SANDBOX_BASE_PATH_IN_CONTAINER}"); + assert!(got_env.get(&"PATH".to_string()).unwrap().starts_with(&path)); + assert!(got_env.get(&"PATH".to_string()).unwrap().ends_with("/bin")); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn all_containing_directories_for_outputs_are_created() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!( - // mkdir would normally fail, since birds/ doesn't yet exist, as would echo, since cats/ - // does not exist, but we create the containing directories for all outputs before the - // process executes. - "/bin/mkdir birds/falcons && echo -n {} > cats/roland.ext", - TestData::roland().string() - ), - ]) - .output_files(relative_paths(&["cats/roland.ext"]).collect()) - .output_directories(relative_paths(&["birds/falcons"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::nested_dir_and_file().directory_digest() - ); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!( + // mkdir would normally fail, since birds/ doesn't yet exist, as would echo, since cats/ + // does not exist, but we create the containing directories for all outputs before the + // process executes. + "/bin/mkdir birds/falcons && echo -n {} > cats/roland.ext", + TestData::roland().string() + ), + ]) + .output_files(relative_paths(&["cats/roland.ext"]).collect()) + .output_directories(relative_paths(&["birds/falcons"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::nested_dir_and_file().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn outputs_readable_only_by_container_user_are_captured() { - skip_if_no_docker_available_in_macos_ci!(); + skip_if_no_docker_available_in_macos_ci!(); - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - format!( + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + format!( // Ensure that files are only readable by the container user (which on Linux would usually // mean that a non-root user outside the container would not have access). "/bin/mkdir birds/falcons && echo -n {} > cats/roland.ext && chmod o-r -R birds cats", TestData::roland().string() ), - ]) - .output_files(relative_paths(&["cats/roland.ext"]).collect()) - .output_directories(relative_paths(&["birds/falcons"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::nested_dir_and_file().directory_digest() - ); + ]) + .output_files(relative_paths(&["cats/roland.ext"]).collect()) + .output_directories(relative_paths(&["birds/falcons"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::nested_dir_and_file().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn output_empty_dir() { - skip_if_no_docker_available_in_macos_ci!(); - - let result = run_command_via_docker( - Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - "/bin/mkdir falcons".to_string(), - ]) - .output_directories(relative_paths(&["falcons"]).collect()) - .docker(IMAGE.to_owned()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_falcons_dir().directory_digest() - ); + skip_if_no_docker_available_in_macos_ci!(); + + let result = run_command_via_docker( + Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + "/bin/mkdir falcons".to_string(), + ]) + .output_directories(relative_paths(&["falcons"]).collect()) + .docker(IMAGE.to_owned()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_falcons_dir().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn timeout() { - skip_if_no_docker_available_in_macos_ci!(); - - let argv = vec![ - SH_PATH.to_string(), - "-c".to_owned(), - "/bin/echo -n 'Calculating...'; /bin/sleep 5; /bin/echo -n 'European Burmese'".to_string(), - ]; - - let mut process = Process::new(argv).docker(IMAGE.to_owned()); - process.timeout = Some(Duration::from_millis(500)); - process.description = "sleepy-cat".to_string(); - - let result = run_command_via_docker(process).await.unwrap(); - - assert_eq!(result.original.exit_code, -15); - let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); - let stderr = String::from_utf8(result.stderr_bytes.to_vec()).unwrap(); - assert!(&stdout.contains("Calculating...")); - assert!(&stderr.contains("Exceeded timeout")); - assert!(&stderr.contains("sleepy-cat")); + skip_if_no_docker_available_in_macos_ci!(); + + let argv = vec![ + SH_PATH.to_string(), + "-c".to_owned(), + "/bin/echo -n 'Calculating...'; /bin/sleep 5; /bin/echo -n 'European Burmese'".to_string(), + ]; + + let mut process = Process::new(argv).docker(IMAGE.to_owned()); + process.timeout = Some(Duration::from_millis(500)); + process.description = "sleepy-cat".to_string(); + + let result = run_command_via_docker(process).await.unwrap(); + + assert_eq!(result.original.exit_code, -15); + let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); + let stderr = String::from_utf8(result.stderr_bytes.to_vec()).unwrap(); + assert!(&stdout.contains("Calculating...")); + assert!(&stderr.contains("Exceeded timeout")); + assert!(&stderr.contains("sleepy-cat")); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn working_directory() { - skip_if_no_docker_available_in_macos_ci!(); - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - - // Prepare the store to contain /cats/roland.ext, because the EPR needs to materialize it and - // then run from the ./cats directory. - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .record_directory(&TestDirectory::containing_roland().directory(), true) - .await - .expect("Error saving directory"); - store - .record_directory(&TestDirectory::nested().directory(), true) + skip_if_no_docker_available_in_macos_ci!(); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + + // Prepare the store to contain /cats/roland.ext, because the EPR needs to materialize it and + // then run from the ./cats directory. + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .record_directory(&TestDirectory::containing_roland().directory(), true) + .await + .expect("Error saving directory"); + store + .record_directory(&TestDirectory::nested().directory(), true) + .await + .expect("Error saving directory"); + + let work_dir = TempDir::new().unwrap(); + + let mut process = Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + "/bin/ls".to_string(), + ]) + .docker(IMAGE.to_owned()); + process.working_directory = Some(RelativePath::new("cats").unwrap()); + process.output_directories = relative_paths(&["roland.ext"]).collect::>(); + process.input_digests = + InputDigests::with_input_files(TestDirectory::nested().directory_digest()); + process.timeout = Some(Duration::from_secs(1)); + process.description = "confused-cat".to_string(); + + let result = run_command_via_docker_in_dir( + process, + work_dir.path().to_owned(), + KeepSandboxes::Never, + &mut workunit, + Some(store), + Some(executor), + ) .await - .expect("Error saving directory"); - - let work_dir = TempDir::new().unwrap(); - - let mut process = Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - "/bin/ls".to_string(), - ]) - .docker(IMAGE.to_owned()); - process.working_directory = Some(RelativePath::new("cats").unwrap()); - process.output_directories = relative_paths(&["roland.ext"]).collect::>(); - process.input_digests = - InputDigests::with_input_files(TestDirectory::nested().directory_digest()); - process.timeout = Some(Duration::from_secs(1)); - process.description = "confused-cat".to_string(); - - let result = run_command_via_docker_in_dir( - process, - work_dir.path().to_owned(), - KeepSandboxes::Never, - &mut workunit, - Some(store), - Some(executor), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "roland.ext\n".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + .unwrap(); + + assert_eq!(result.stdout_bytes, "roland.ext\n".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn immutable_inputs() { - skip_if_no_docker_available_in_macos_ci!(); - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .record_directory(&TestDirectory::containing_roland().directory(), true) + skip_if_no_docker_available_in_macos_ci!(); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .record_directory(&TestDirectory::containing_roland().directory(), true) + .await + .expect("Error saving directory"); + store + .record_directory(&TestDirectory::containing_falcons_dir().directory(), true) + .await + .expect("Error saving directory"); + + let work_dir = TempDir::new().unwrap(); + + let mut process = Process::new(vec![ + SH_PATH.to_string(), + "-c".to_owned(), + "/bin/ls".to_string(), + ]) + .docker(IMAGE.to_owned()); + process.input_digests = InputDigests::new( + &store, + TestDirectory::containing_falcons_dir().directory_digest(), + { + let mut map = BTreeMap::new(); + map.insert( + RelativePath::new("cats").unwrap(), + TestDirectory::containing_roland().directory_digest(), + ); + map + }, + BTreeSet::default(), + ) .await - .expect("Error saving directory"); - store - .record_directory(&TestDirectory::containing_falcons_dir().directory(), true) + .unwrap(); + process.timeout = Some(Duration::from_secs(1)); + process.description = "confused-cat".to_string(); + + let result = run_command_via_docker_in_dir( + process, + work_dir.path().to_owned(), + KeepSandboxes::Never, + &mut workunit, + Some(store), + Some(executor), + ) .await - .expect("Error saving directory"); - - let work_dir = TempDir::new().unwrap(); - - let mut process = Process::new(vec![ - SH_PATH.to_string(), - "-c".to_owned(), - "/bin/ls".to_string(), - ]) - .docker(IMAGE.to_owned()); - process.input_digests = InputDigests::new( - &store, - TestDirectory::containing_falcons_dir().directory_digest(), - { - let mut map = BTreeMap::new(); - map.insert( - RelativePath::new("cats").unwrap(), - TestDirectory::containing_roland().directory_digest(), - ); - map - }, - BTreeSet::default(), - ) - .await - .unwrap(); - process.timeout = Some(Duration::from_secs(1)); - process.description = "confused-cat".to_string(); - - let result = run_command_via_docker_in_dir( - process, - work_dir.path().to_owned(), - KeepSandboxes::Never, - &mut workunit, - Some(store), - Some(executor), - ) - .await - .unwrap(); - - let stdout_lines = std::str::from_utf8(&result.stdout_bytes) - .unwrap() - .lines() - .collect::>(); - assert_eq!(stdout_lines, hashset! {"falcons", "cats"}); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); + .unwrap(); + + let stdout_lines = std::str::from_utf8(&result.stdout_bytes) + .unwrap() + .lines() + .collect::>(); + assert_eq!(stdout_lines, hashset! {"falcons", "cats"}); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); } async fn run_command_via_docker_in_dir( - mut req: Process, - dir: PathBuf, - cleanup: KeepSandboxes, - workunit: &mut RunningWorkunit, - store: Option, - executor: Option, + mut req: Process, + dir: PathBuf, + cleanup: KeepSandboxes, + workunit: &mut RunningWorkunit, + store: Option, + executor: Option, ) -> Result { - req.execution_environment.platform = platform_for_tests().map_err(ProcessError::Unclassified)?; - let store_dir = TempDir::new().unwrap(); - let executor = executor.unwrap_or_else(task_executor::Executor::new); - let store = - store.unwrap_or_else(|| Store::local_only(executor.clone(), store_dir.path()).unwrap()); - - let root = TempDir::new().unwrap(); - let root_path = root.path().to_owned(); - - let immutable_inputs = ImmutableInputs::new(store.clone(), &root_path).unwrap(); - - let docker = Box::new(DockerOnceCell::new()); - let image_pull_cache = Box::new(ImagePullCache::new()); - let runner = crate::docker::CommandRunner::new( - store.clone(), - executor.clone(), - &docker, - &image_pull_cache, - dir.clone(), - immutable_inputs, - cleanup, - )?; - let result: Result<_, ProcessError> = async { - let original = runner.run(Context::default(), workunit, req).await?; - let stdout_bytes = store - .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) - .await?; - let stderr_bytes = store - .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) - .await?; - Ok((original, stdout_bytes, stderr_bytes)) - } - .await; - let (original, stdout_bytes, stderr_bytes) = result?; - runner.shutdown().await?; - Ok(LocalTestResult { - original, - stdout_bytes, - stderr_bytes, - }) + req.execution_environment.platform = + platform_for_tests().map_err(ProcessError::Unclassified)?; + let store_dir = TempDir::new().unwrap(); + let executor = executor.unwrap_or_else(task_executor::Executor::new); + let store = + store.unwrap_or_else(|| Store::local_only(executor.clone(), store_dir.path()).unwrap()); + + let root = TempDir::new().unwrap(); + let root_path = root.path().to_owned(); + + let immutable_inputs = ImmutableInputs::new(store.clone(), &root_path).unwrap(); + + let docker = Box::new(DockerOnceCell::new()); + let image_pull_cache = Box::new(ImagePullCache::new()); + let runner = crate::docker::CommandRunner::new( + store.clone(), + executor.clone(), + &docker, + &image_pull_cache, + dir.clone(), + immutable_inputs, + cleanup, + )?; + let result: Result<_, ProcessError> = async { + let original = runner.run(Context::default(), workunit, req).await?; + let stdout_bytes = store + .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) + .await?; + let stderr_bytes = store + .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) + .await?; + Ok((original, stdout_bytes, stderr_bytes)) + } + .await; + let (original, stdout_bytes, stderr_bytes) = result?; + runner.shutdown().await?; + Ok(LocalTestResult { + original, + stdout_bytes, + stderr_bytes, + }) } async fn run_command_via_docker(req: Process) -> Result { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let work_dir = TempDir::new().unwrap(); - let work_dir_path = work_dir.path().to_owned(); - run_command_via_docker_in_dir( - req, - work_dir_path, - KeepSandboxes::Never, - &mut workunit, - None, - None, - ) - .await + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let work_dir = TempDir::new().unwrap(); + let work_dir_path = work_dir.path().to_owned(); + run_command_via_docker_in_dir( + req, + work_dir_path, + KeepSandboxes::Never, + &mut workunit, + None, + None, + ) + .await } diff --git a/src/rust/engine/process_execution/docker/src/lib.rs b/src/rust/engine/process_execution/docker/src/lib.rs index b11320427fb..abec12a09c0 100644 --- a/src/rust/engine/process_execution/docker/src/lib.rs +++ b/src/rust/engine/process_execution/docker/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] diff --git a/src/rust/engine/process_execution/pe_nailgun/src/lib.rs b/src/rust/engine/process_execution/pe_nailgun/src/lib.rs index 6ea8afbc5df..b369428679f 100644 --- a/src/rust/engine/process_execution/pe_nailgun/src/lib.rs +++ b/src/rust/engine/process_execution/pe_nailgun/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -42,7 +42,7 @@ use workunit_store::{in_workunit, Metric, RunningWorkunit}; use process_execution::local::{prepare_workdir, CapturedWorkdir, ChildOutput}; use process_execution::{ - Context, FallibleProcessResultWithPlatform, InputDigests, NamedCaches, Process, ProcessError, + Context, FallibleProcessResultWithPlatform, InputDigests, NamedCaches, Process, ProcessError, }; #[cfg(test)] @@ -62,45 +62,45 @@ static NAILGUN_MAIN_CLASS: &str = "com.martiansoftware.nailgun.NGServer"; static ARGS_TO_START_NAILGUN: [&str; 1] = [":0"]; fn construct_nailgun_server_request( - client_request: Process, - input_digests: InputDigests, - nailgun_name: &str, - args_for_the_jvm: Vec, + client_request: Process, + input_digests: InputDigests, + nailgun_name: &str, + args_for_the_jvm: Vec, ) -> Process { - let mut full_args = args_for_the_jvm; - full_args.push(NAILGUN_MAIN_CLASS.to_string()); - full_args.extend(ARGS_TO_START_NAILGUN.iter().map(|&a| a.to_string())); + let mut full_args = args_for_the_jvm; + full_args.push(NAILGUN_MAIN_CLASS.to_string()); + full_args.extend(ARGS_TO_START_NAILGUN.iter().map(|&a| a.to_string())); - Process { - argv: full_args, - input_digests, - output_files: BTreeSet::new(), - output_directories: BTreeSet::new(), - timeout: None, - description: format!("nailgun server for {nailgun_name}"), - level: log::Level::Info, - execution_slot_variable: None, - env: client_request.env, - append_only_caches: client_request.append_only_caches, - ..client_request - } + Process { + argv: full_args, + input_digests, + output_files: BTreeSet::new(), + output_directories: BTreeSet::new(), + timeout: None, + description: format!("nailgun server for {nailgun_name}"), + level: log::Level::Info, + execution_slot_variable: None, + env: client_request.env, + append_only_caches: client_request.append_only_caches, + ..client_request + } } fn construct_nailgun_client_request( - original_req: Process, - input_digests: InputDigests, - client_main_class: String, - mut client_args: Vec, + original_req: Process, + input_digests: InputDigests, + client_main_class: String, + mut client_args: Vec, ) -> Process { - client_args.insert(0, client_main_class); - Process { - argv: client_args, - jdk_home: None, - input_digests, - // The append_only_caches are created and preserved by the server. - append_only_caches: BTreeMap::new(), - ..original_req - } + client_args.insert(0, client_main_class); + Process { + argv: client_args, + jdk_home: None, + input_digests, + // The append_only_caches are created and preserved by the server. + append_only_caches: BTreeMap::new(), + ..original_req + } } /// @@ -112,195 +112,199 @@ fn construct_nailgun_client_request( /// Otherwise, it will just delegate to the regular local runner. /// pub struct CommandRunner { - nailgun_pool: NailgunPool, - store: Store, - executor: Executor, - named_caches: NamedCaches, - immutable_inputs: ImmutableInputs, -} - -impl CommandRunner { - pub fn new( - workdir_base: PathBuf, + nailgun_pool: NailgunPool, store: Store, executor: Executor, named_caches: NamedCaches, immutable_inputs: ImmutableInputs, - nailgun_pool_size: usize, - ) -> Self { - CommandRunner { - nailgun_pool: NailgunPool::new( - workdir_base, - nailgun_pool_size, - store.clone(), - executor.clone(), - ), - store, - executor, - named_caches, - immutable_inputs, +} + +impl CommandRunner { + pub fn new( + workdir_base: PathBuf, + store: Store, + executor: Executor, + named_caches: NamedCaches, + immutable_inputs: ImmutableInputs, + nailgun_pool_size: usize, + ) -> Self { + CommandRunner { + nailgun_pool: NailgunPool::new( + workdir_base, + nailgun_pool_size, + store.clone(), + executor.clone(), + ), + store, + executor, + named_caches, + immutable_inputs, + } } - } - fn calculate_nailgun_name(main_class: &str) -> String { - format!("nailgun_server_{main_class}") - } + fn calculate_nailgun_name(main_class: &str) -> String { + format!("nailgun_server_{main_class}") + } } impl Debug for CommandRunner { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("nailgun::CommandRunner") - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("nailgun::CommandRunner") + .finish_non_exhaustive() + } } #[async_trait] impl process_execution::CommandRunner for CommandRunner { - async fn run( - &self, - context: Context, - _workunit: &mut RunningWorkunit, - req: Process, - ) -> Result { - debug!("Running request under nailgun:\n {:?}", req); + async fn run( + &self, + context: Context, + _workunit: &mut RunningWorkunit, + req: Process, + ) -> Result { + debug!("Running request under nailgun:\n {:?}", req); - in_workunit!( - "run_nailgun_process", - // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit - // renders at the Process's level. - req.level, - desc = Some(req.description.clone()), - |workunit| async move { - workunit.increment_counter(Metric::LocalExecutionRequests, 1); + in_workunit!( + "run_nailgun_process", + // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit + // renders at the Process's level. + req.level, + desc = Some(req.description.clone()), + |workunit| async move { + workunit.increment_counter(Metric::LocalExecutionRequests, 1); - // Separate argument lists, to form distinct EPRs for - // 1. starting the nailgun server - // 2. running the client against it - let ParsedJVMCommandLines { - nailgun_args, - client_args, - client_main_class, - .. - } = ParsedJVMCommandLines::parse_command_lines(&req.argv) - .map_err(ProcessError::Unclassified)?; + // Separate argument lists, to form distinct EPRs for + // 1. starting the nailgun server + // 2. running the client against it + let ParsedJVMCommandLines { + nailgun_args, + client_args, + client_main_class, + .. + } = ParsedJVMCommandLines::parse_command_lines(&req.argv) + .map_err(ProcessError::Unclassified)?; - let nailgun_name = CommandRunner::calculate_nailgun_name(&client_main_class); - let (client_input_digests, server_input_digests) = - req.input_digests.nailgun_client_and_server(); - let client_req = construct_nailgun_client_request( - req.clone(), - client_input_digests, - client_main_class, - client_args, - ); - let server_req = - construct_nailgun_server_request(req, server_input_digests, &nailgun_name, nailgun_args); - trace!("Running request under nailgun:\n {:#?}", &client_req); + let nailgun_name = CommandRunner::calculate_nailgun_name(&client_main_class); + let (client_input_digests, server_input_digests) = + req.input_digests.nailgun_client_and_server(); + let client_req = construct_nailgun_client_request( + req.clone(), + client_input_digests, + client_main_class, + client_args, + ); + let server_req = construct_nailgun_server_request( + req, + server_input_digests, + &nailgun_name, + nailgun_args, + ); + trace!("Running request under nailgun:\n {:#?}", &client_req); - // Get an instance of a nailgun server for this fingerprint, and then run in its directory. - let mut nailgun_process = self - .nailgun_pool - .acquire(server_req, &self.named_caches, &self.immutable_inputs) - .await - .map_err(|e| e.enrich("Failed to connect to nailgun"))?; + // Get an instance of a nailgun server for this fingerprint, and then run in its directory. + let mut nailgun_process = self + .nailgun_pool + .acquire(server_req, &self.named_caches, &self.immutable_inputs) + .await + .map_err(|e| e.enrich("Failed to connect to nailgun"))?; - // Prepare the workdir. - let exclusive_spawn = prepare_workdir( - nailgun_process.workdir_path().to_owned(), - self.nailgun_pool.workdir_base(), - &client_req, - client_req.input_digests.inputs.clone(), - &self.store, - &self.named_caches, - &self.immutable_inputs, - None, - None, - ) - .await?; + // Prepare the workdir. + let exclusive_spawn = prepare_workdir( + nailgun_process.workdir_path().to_owned(), + self.nailgun_pool.workdir_base(), + &client_req, + client_req.input_digests.inputs.clone(), + &self.store, + &self.named_caches, + &self.immutable_inputs, + None, + None, + ) + .await?; - let res = self - .run_and_capture_workdir( - client_req, - context, - self.store.clone(), - self.executor.clone(), - nailgun_process.workdir_path().to_owned(), - (nailgun_process.name().to_owned(), nailgun_process.address()), - exclusive_spawn, - ) - .await; + let res = self + .run_and_capture_workdir( + client_req, + context, + self.store.clone(), + self.executor.clone(), + nailgun_process.workdir_path().to_owned(), + (nailgun_process.name().to_owned(), nailgun_process.address()), + exclusive_spawn, + ) + .await; - // NB: We explicitly release the BorrowedNailgunProcess, because when it is Dropped without - // release, it assumes that it has been canceled and kills the server. - nailgun_process.release().await?; + // NB: We explicitly release the BorrowedNailgunProcess, because when it is Dropped without + // release, it assumes that it has been canceled and kills the server. + nailgun_process.release().await?; - Ok(res?) - } - ) - .await - } + Ok(res?) + } + ) + .await + } - async fn shutdown(&self) -> Result<(), String> { - Ok(()) - } + async fn shutdown(&self) -> Result<(), String> { + Ok(()) + } } #[async_trait] impl CapturedWorkdir for CommandRunner { - type WorkdirToken = (String, SocketAddr); + type WorkdirToken = (String, SocketAddr); - async fn run_in_workdir<'s, 'c, 'w, 'r>( - &'s self, - _context: &'c Context, - workdir_path: &'w Path, - workdir_token: Self::WorkdirToken, - req: Process, - _exclusive_spawn: bool, - ) -> Result>, String> { - let client_workdir = if let Some(working_directory) = &req.working_directory { - workdir_path.join(working_directory) - } else { - workdir_path.to_path_buf() - }; + async fn run_in_workdir<'s, 'c, 'w, 'r>( + &'s self, + _context: &'c Context, + workdir_path: &'w Path, + workdir_token: Self::WorkdirToken, + req: Process, + _exclusive_spawn: bool, + ) -> Result>, String> { + let client_workdir = if let Some(working_directory) = &req.working_directory { + workdir_path.join(working_directory) + } else { + workdir_path.to_path_buf() + }; - let (name, addr) = workdir_token; - debug!("Connected to nailgun instance {} at {}...", name, addr); - let mut child = { - // Run the client request in the nailgun we have active. - let cmd = Command { - command: req.argv[0].clone(), - args: req.argv[1..].to_vec(), - env: req - .env - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect(), - working_dir: client_workdir, - }; - TcpStream::connect(addr) - .and_then(move |stream| { - nails::client::handle_connection(nails::Config::default(), stream, cmd, async { - let (_stdin_write, stdin_read) = child_channel::(); - stdin_read - }) - }) - .map_err(|e| format!("Error communicating with nailgun server: {e}")) - .await? - }; + let (name, addr) = workdir_token; + debug!("Connected to nailgun instance {} at {}...", name, addr); + let mut child = { + // Run the client request in the nailgun we have active. + let cmd = Command { + command: req.argv[0].clone(), + args: req.argv[1..].to_vec(), + env: req + .env + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(), + working_dir: client_workdir, + }; + TcpStream::connect(addr) + .and_then(move |stream| { + nails::client::handle_connection(nails::Config::default(), stream, cmd, async { + let (_stdin_write, stdin_read) = child_channel::(); + stdin_read + }) + }) + .map_err(|e| format!("Error communicating with nailgun server: {e}")) + .await? + }; - let output_stream = child - .output_stream - .take() - .unwrap() - .map(|output| match output { - execution::ChildOutput::Stdout(bytes) => Ok(ChildOutput::Stdout(bytes)), - execution::ChildOutput::Stderr(bytes) => Ok(ChildOutput::Stderr(bytes)), - }); - let exit_code = child - .wait() - .map_ok(ChildOutput::Exit) - .map_err(|e| format!("Error communicating with nailgun server: {e}")); + let output_stream = child + .output_stream + .take() + .unwrap() + .map(|output| match output { + execution::ChildOutput::Stdout(bytes) => Ok(ChildOutput::Stdout(bytes)), + execution::ChildOutput::Stderr(bytes) => Ok(ChildOutput::Stderr(bytes)), + }); + let exit_code = child + .wait() + .map_ok(ChildOutput::Exit) + .map_err(|e| format!("Error communicating with nailgun server: {e}")); - Ok(futures::stream::select(output_stream, exit_code.into_stream()).boxed()) - } + Ok(futures::stream::select(output_stream, exit_code.into_stream()).boxed()) + } } diff --git a/src/rust/engine/process_execution/pe_nailgun/src/nailgun_pool.rs b/src/rust/engine/process_execution/pe_nailgun/src/nailgun_pool.rs index 90fad7a9a4f..f78791fe281 100644 --- a/src/rust/engine/process_execution/pe_nailgun/src/nailgun_pool.rs +++ b/src/rust/engine/process_execution/pe_nailgun/src/nailgun_pool.rs @@ -28,22 +28,22 @@ use process_execution::local::prepare_workdir; use process_execution::{NamedCaches, Process, ProcessError}; lazy_static! { - static ref NAILGUN_PORT_REGEX: Regex = Regex::new(r".*\s+port\s+(\d+)\.$").unwrap(); + static ref NAILGUN_PORT_REGEX: Regex = Regex::new(r".*\s+port\s+(\d+)\.$").unwrap(); } struct PoolEntry { - fingerprint: NailgunProcessFingerprint, - last_used: Instant, - // Because `NailgunProcess` instances are started outside of the NailgunPool's lock, the inner - // instance is an `Option`. But since they are started eagerly by the task that adds them to the - // pool, any acquirer that encounters an empty instance here can assume that it died while - // starting, and re-create it. - // - // This uses a `Mutex>` rather than something like `DoubleCheckedCell` because the - // outer `Mutex` is used to track while the `NailgunProcess` is in use. - // - // See also: `NailgunProcessRef`. - process: Arc>>, + fingerprint: NailgunProcessFingerprint, + last_used: Instant, + // Because `NailgunProcess` instances are started outside of the NailgunPool's lock, the inner + // instance is an `Option`. But since they are started eagerly by the task that adds them to the + // pool, any acquirer that encounters an empty instance here can assume that it died while + // starting, and re-create it. + // + // This uses a `Mutex>` rather than something like `DoubleCheckedCell` because the + // outer `Mutex` is used to track while the `NailgunProcess` is in use. + // + // See also: `NailgunProcessRef`. + process: Arc>>, } pub type Port = u16; @@ -57,199 +57,201 @@ pub type Port = u16; /// #[derive(Clone)] pub struct NailgunPool { - workdir_base: PathBuf, - size: usize, - sema: Arc, - store: Store, - executor: Executor, - processes: Arc>>, + workdir_base: PathBuf, + size: usize, + sema: Arc, + store: Store, + executor: Executor, + processes: Arc>>, } impl NailgunPool { - pub fn new(workdir_base: PathBuf, size: usize, store: Store, executor: Executor) -> Self { - NailgunPool { - workdir_base, - size, - sema: Arc::new(Semaphore::new(size)), - store, - executor, - processes: Arc::default(), + pub fn new(workdir_base: PathBuf, size: usize, store: Store, executor: Executor) -> Self { + NailgunPool { + workdir_base, + size, + sema: Arc::new(Semaphore::new(size)), + store, + executor, + processes: Arc::default(), + } } - } - - pub fn workdir_base(&self) -> &Path { - &self.workdir_base - } - - /// - /// Given a name and a `Process` configuration, return a port of a nailgun server running - /// under that name and configuration. - /// - /// If the server is not running, or if it's running with a different configuration, - /// this code will start a new server as a side effect. - /// - pub async fn acquire( - &self, - server_process: Process, - named_caches: &NamedCaches, - immutable_inputs: &ImmutableInputs, - ) -> Result { - let name = server_process.description.clone(); - let requested_fingerprint = - NailgunProcessFingerprint::new(name.clone(), &server_process, &self.store).await?; - let semaphore_acquisition = self.sema.clone().acquire_owned(); - let permit = in_workunit!( - "acquire_nailgun_process", - // TODO: See also `acquire_command_runner_slot` in `bounded::CommandRunner`. - // https://github.com/pantsbuild/pants/issues/14680 - Level::Debug, - |workunit| async move { - let _blocking_token = workunit.blocking(); - semaphore_acquisition - .await - .expect("Semaphore should not have been closed.") - } - ) - .await; - - let mut process_ref = { - let mut processes = self.processes.lock().await; - - // Start by seeing whether there are any idle processes with a matching fingerprint. - if let Some((_idx, process)) = Self::find_usable(&mut processes, &requested_fingerprint)? { - return Ok(BorrowedNailgunProcess::new(process, permit)); - } - - // There wasn't a matching, valid, available process. We need to start one. - if processes.len() >= self.size { - // Find the oldest idle non-matching process and remove it. - let idx = Self::find_lru_idle(&mut processes)?.ok_or_else(|| { - // NB: We've acquired a semaphore permit, so this should be impossible. - "No idle slots in nailgun pool.".to_owned() - })?; - processes.swap_remove(idx); - } - - // Add a new entry for the process, and immediately acquire its mutex, but wait to spawn it - // until we're outside the pool's mutex. - let process = Arc::new(Mutex::new(None)); - processes.push(PoolEntry { - fingerprint: requested_fingerprint.clone(), - last_used: Instant::now(), - process: process.clone(), - }); - process.lock_arc().await - }; - - // Now that we're outside the pool's mutex, spawn and return the process. - *process_ref = Some( - NailgunProcess::start_new( - name.clone(), - server_process, - &self.workdir_base, - &self.store, - self.executor.clone(), - named_caches, - immutable_inputs, - requested_fingerprint, - ) - .await?, - ); + pub fn workdir_base(&self) -> &Path { + &self.workdir_base + } + + /// + /// Given a name and a `Process` configuration, return a port of a nailgun server running + /// under that name and configuration. + /// + /// If the server is not running, or if it's running with a different configuration, + /// this code will start a new server as a side effect. + /// + pub async fn acquire( + &self, + server_process: Process, + named_caches: &NamedCaches, + immutable_inputs: &ImmutableInputs, + ) -> Result { + let name = server_process.description.clone(); + let requested_fingerprint = + NailgunProcessFingerprint::new(name.clone(), &server_process, &self.store).await?; + let semaphore_acquisition = self.sema.clone().acquire_owned(); + let permit = in_workunit!( + "acquire_nailgun_process", + // TODO: See also `acquire_command_runner_slot` in `bounded::CommandRunner`. + // https://github.com/pantsbuild/pants/issues/14680 + Level::Debug, + |workunit| async move { + let _blocking_token = workunit.blocking(); + semaphore_acquisition + .await + .expect("Semaphore should not have been closed.") + } + ) + .await; + + let mut process_ref = { + let mut processes = self.processes.lock().await; + + // Start by seeing whether there are any idle processes with a matching fingerprint. + if let Some((_idx, process)) = + Self::find_usable(&mut processes, &requested_fingerprint)? + { + return Ok(BorrowedNailgunProcess::new(process, permit)); + } + + // There wasn't a matching, valid, available process. We need to start one. + if processes.len() >= self.size { + // Find the oldest idle non-matching process and remove it. + let idx = Self::find_lru_idle(&mut processes)?.ok_or_else(|| { + // NB: We've acquired a semaphore permit, so this should be impossible. + "No idle slots in nailgun pool.".to_owned() + })?; + + processes.swap_remove(idx); + } + + // Add a new entry for the process, and immediately acquire its mutex, but wait to spawn it + // until we're outside the pool's mutex. + let process = Arc::new(Mutex::new(None)); + processes.push(PoolEntry { + fingerprint: requested_fingerprint.clone(), + last_used: Instant::now(), + process: process.clone(), + }); + process.lock_arc().await + }; + + // Now that we're outside the pool's mutex, spawn and return the process. + *process_ref = Some( + NailgunProcess::start_new( + name.clone(), + server_process, + &self.workdir_base, + &self.store, + self.executor.clone(), + named_caches, + immutable_inputs, + requested_fingerprint, + ) + .await?, + ); - Ok(BorrowedNailgunProcess::new(process_ref, permit)) - } - - /// - /// Find a usable process in the pool that matches the given fingerprint. - /// - fn find_usable( - pool_entries: &mut Vec, - fingerprint: &NailgunProcessFingerprint, - ) -> Result, String> { - let mut dead_processes = Vec::new(); - for (idx, pool_entry) in pool_entries.iter_mut().enumerate() { - if &pool_entry.fingerprint != fingerprint { - continue; - } - - match Self::try_use(pool_entry)? { - TryUse::Usable(process) => return Ok(Some((idx, process))), - TryUse::Dead => dead_processes.push(idx), - TryUse::Busy => continue, - } + Ok(BorrowedNailgunProcess::new(process_ref, permit)) } - // NB: We'll only prune dead processes if we don't find a live match, but that's fine. - for dead_process_idx in dead_processes.into_iter().rev() { - pool_entries.swap_remove(dead_process_idx); + + /// + /// Find a usable process in the pool that matches the given fingerprint. + /// + fn find_usable( + pool_entries: &mut Vec, + fingerprint: &NailgunProcessFingerprint, + ) -> Result, String> { + let mut dead_processes = Vec::new(); + for (idx, pool_entry) in pool_entries.iter_mut().enumerate() { + if &pool_entry.fingerprint != fingerprint { + continue; + } + + match Self::try_use(pool_entry)? { + TryUse::Usable(process) => return Ok(Some((idx, process))), + TryUse::Dead => dead_processes.push(idx), + TryUse::Busy => continue, + } + } + // NB: We'll only prune dead processes if we don't find a live match, but that's fine. + for dead_process_idx in dead_processes.into_iter().rev() { + pool_entries.swap_remove(dead_process_idx); + } + Ok(None) } - Ok(None) - } - - /// - /// Find the least recently used idle (but not necessarily usable) process in the pool. - /// - fn find_lru_idle(pool_entries: &mut [PoolEntry]) -> Result, String> { - // 24 hours of clock skew would be surprising? - let mut lru_age = Instant::now() + Duration::from_secs(60 * 60 * 24); - let mut lru = None; - for (idx, pool_entry) in pool_entries.iter_mut().enumerate() { - if pool_entry.process.try_lock_arc().is_some() && pool_entry.last_used < lru_age { - lru = Some(idx); - lru_age = pool_entry.last_used; - } + + /// + /// Find the least recently used idle (but not necessarily usable) process in the pool. + /// + fn find_lru_idle(pool_entries: &mut [PoolEntry]) -> Result, String> { + // 24 hours of clock skew would be surprising? + let mut lru_age = Instant::now() + Duration::from_secs(60 * 60 * 24); + let mut lru = None; + for (idx, pool_entry) in pool_entries.iter_mut().enumerate() { + if pool_entry.process.try_lock_arc().is_some() && pool_entry.last_used < lru_age { + lru = Some(idx); + lru_age = pool_entry.last_used; + } + } + Ok(lru) } - Ok(lru) - } - - fn try_use(pool_entry: &mut PoolEntry) -> Result { - let mut process_guard = if let Some(process_guard) = pool_entry.process.try_lock_arc() { - process_guard - } else { - return Ok(TryUse::Busy); - }; - let process = if let Some(process) = process_guard.as_mut() { - process - } else { - return Ok(TryUse::Dead); - }; - - pool_entry.last_used = Instant::now(); - - debug!( - "Checking if nailgun server {} is still alive at port {}...", - process.name, process.port - ); - // Check if it's alive using the handle. - let status = process - .handle - .try_wait() - .map_err(|e| format!("Error getting the process status from nailgun: {e}"))?; - match status { - None => { - // Process hasn't exited yet. + fn try_use(pool_entry: &mut PoolEntry) -> Result { + let mut process_guard = if let Some(process_guard) = pool_entry.process.try_lock_arc() { + process_guard + } else { + return Ok(TryUse::Busy); + }; + let process = if let Some(process) = process_guard.as_mut() { + process + } else { + return Ok(TryUse::Dead); + }; + + pool_entry.last_used = Instant::now(); + debug!( - "Found nailgun process {}, with fingerprint {:?}", - process.name, process.fingerprint + "Checking if nailgun server {} is still alive at port {}...", + process.name, process.port ); - Ok(TryUse::Usable(process_guard)) - } - Some(status) => { - // The process has exited with some exit code: restart it. - if status.signal() != Some(9) { - // TODO: BorrowedNailgunProcess cancellation uses `kill` currently, so we avoid warning - // for that. In future it would be nice to find a better cancellation strategy. - log::warn!( - "The nailgun server for {} exited with {}.", - process.name, - status - ); + + // Check if it's alive using the handle. + let status = process + .handle + .try_wait() + .map_err(|e| format!("Error getting the process status from nailgun: {e}"))?; + match status { + None => { + // Process hasn't exited yet. + debug!( + "Found nailgun process {}, with fingerprint {:?}", + process.name, process.fingerprint + ); + Ok(TryUse::Usable(process_guard)) + } + Some(status) => { + // The process has exited with some exit code: restart it. + if status.signal() != Some(9) { + // TODO: BorrowedNailgunProcess cancellation uses `kill` currently, so we avoid warning + // for that. In future it would be nice to find a better cancellation strategy. + log::warn!( + "The nailgun server for {} exited with {}.", + process.name, + status + ); + } + Ok(TryUse::Dead) + } } - Ok(TryUse::Dead) - } } - } } /// A borrowed `PoolEntry::process` which has already been validated to be present: see those docs. @@ -259,166 +261,166 @@ impl NailgunPool { type NailgunProcessRef = MutexGuardArc>; enum TryUse { - Usable(NailgunProcessRef), - Busy, - Dead, + Usable(NailgunProcessRef), + Busy, + Dead, } /// Representation of a running nailgun server. pub struct NailgunProcess { - pub name: String, - fingerprint: NailgunProcessFingerprint, - workdir: TempDir, - workdir_include_names: HashSet, - port: Port, - executor: task_executor::Executor, - handle: std::process::Child, + pub name: String, + fingerprint: NailgunProcessFingerprint, + workdir: TempDir, + workdir_include_names: HashSet, + port: Port, + executor: task_executor::Executor, + handle: std::process::Child, } /// Spawn a nailgun process, and read its port from stdout. /// /// NB: Uses blocking APIs, so should be backgrounded on an executor. fn spawn_and_read_port( - process: Process, - workdir: PathBuf, + process: Process, + workdir: PathBuf, ) -> Result<(std::process::Child, Port), String> { - let cmd = process.argv[0].clone(); - // TODO: This is an expensive operation, and thus we info! it. - // If it becomes annoying, we can downgrade the logging to just debug! - info!( - "Starting new nailgun server with cmd: {:?}, args {:?}, in cwd {}", - cmd, - &process.argv[1..], - workdir.display() - ); - - let mut child = std::process::Command::new(&cmd) - .args(&process.argv[1..]) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .env_clear() - .envs(&process.env) - .current_dir(&workdir) - .spawn() - .map_err(|e| { - format!( - "Failed to create child handle for nailgun with cmd: {} options {:#?}: {}", - &cmd, &process, e - ) - })?; - - let stdout = child - .stdout - .as_mut() - .ok_or_else(|| "No stdout found!".to_string()); - let port_line = stdout - .and_then(|stdout| { - let reader = io::BufReader::new(stdout); - reader - .lines() - .next() - .ok_or_else(|| "There is no line ready in the child's output".to_string()) - }) - .and_then(|res| res.map_err(|e| format!("Failed to read stdout from nailgun: {e}"))); - - // If we failed to read a port line and the child has exited, report that. - if port_line.is_err() { - if let Some(exit_status) = child.try_wait().map_err(|e| e.to_string())? { - let mut stderr = String::new(); - child - .stderr - .take() - .unwrap() - .read_to_string(&mut stderr) - .map_err(|e| e.to_string())?; - return Err(format!( - "Nailgun failed to start: exited with {exit_status}, stderr:\n{stderr}" - )); + let cmd = process.argv[0].clone(); + // TODO: This is an expensive operation, and thus we info! it. + // If it becomes annoying, we can downgrade the logging to just debug! + info!( + "Starting new nailgun server with cmd: {:?}, args {:?}, in cwd {}", + cmd, + &process.argv[1..], + workdir.display() + ); + + let mut child = std::process::Command::new(&cmd) + .args(&process.argv[1..]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .env_clear() + .envs(&process.env) + .current_dir(&workdir) + .spawn() + .map_err(|e| { + format!( + "Failed to create child handle for nailgun with cmd: {} options {:#?}: {}", + &cmd, &process, e + ) + })?; + + let stdout = child + .stdout + .as_mut() + .ok_or_else(|| "No stdout found!".to_string()); + let port_line = stdout + .and_then(|stdout| { + let reader = io::BufReader::new(stdout); + reader + .lines() + .next() + .ok_or_else(|| "There is no line ready in the child's output".to_string()) + }) + .and_then(|res| res.map_err(|e| format!("Failed to read stdout from nailgun: {e}"))); + + // If we failed to read a port line and the child has exited, report that. + if port_line.is_err() { + if let Some(exit_status) = child.try_wait().map_err(|e| e.to_string())? { + let mut stderr = String::new(); + child + .stderr + .take() + .unwrap() + .read_to_string(&mut stderr) + .map_err(|e| e.to_string())?; + return Err(format!( + "Nailgun failed to start: exited with {exit_status}, stderr:\n{stderr}" + )); + } } - } - let port_line = port_line?; - - let port_str = &NAILGUN_PORT_REGEX - .captures_iter(&port_line) - .next() - .ok_or_else(|| format!("Output for nailgun server was unexpected:\n{port_line:?}"))?[1]; - let port = port_str - .parse::() - .map_err(|e| format!("Error parsing nailgun port {port_str}: {e}"))?; - - Ok((child, port)) + let port_line = port_line?; + + let port_str = &NAILGUN_PORT_REGEX + .captures_iter(&port_line) + .next() + .ok_or_else(|| format!("Output for nailgun server was unexpected:\n{port_line:?}"))?[1]; + let port = port_str + .parse::() + .map_err(|e| format!("Error parsing nailgun port {port_str}: {e}"))?; + + Ok((child, port)) } impl NailgunProcess { - async fn start_new( - name: String, - startup_options: Process, - workdir_base: &Path, - store: &Store, - executor: Executor, - named_caches: &NamedCaches, - immutable_inputs: &ImmutableInputs, - nailgun_server_fingerprint: NailgunProcessFingerprint, - ) -> Result { - let workdir = tempfile::Builder::new() - .prefix("pants-sandbox-") - .tempdir_in(workdir_base) - .map_err(|err| format!("Error making tempdir for nailgun server: {err:?}"))?; - - // Prepare the workdir, and then list it to identify the base set of names which should be - // preserved across runs. TODO: This is less efficient than computing the set of names - // directly from the Process (or returning them from `prepare_workdir`), but it's also much - // simpler. - prepare_workdir( - workdir.path().to_owned(), - workdir_base, - &startup_options, - startup_options.input_digests.inputs.clone(), - store, - named_caches, - immutable_inputs, - None, - None, - ) - .await?; - let workdir_include_names = list_workdir(workdir.path()).await?; - - // Spawn the process and read its port from stdout. - let (child, port) = executor - .spawn_blocking( - { - let workdir = workdir.path().to_owned(); - move || spawn_and_read_port(startup_options, workdir) - }, - |e| Err(format!("Nailgun spawn task failed: {e}")), - ) - .await?; - debug!( - "Created nailgun server process with pid {} and port {}", - child.id(), - port - ); + async fn start_new( + name: String, + startup_options: Process, + workdir_base: &Path, + store: &Store, + executor: Executor, + named_caches: &NamedCaches, + immutable_inputs: &ImmutableInputs, + nailgun_server_fingerprint: NailgunProcessFingerprint, + ) -> Result { + let workdir = tempfile::Builder::new() + .prefix("pants-sandbox-") + .tempdir_in(workdir_base) + .map_err(|err| format!("Error making tempdir for nailgun server: {err:?}"))?; + + // Prepare the workdir, and then list it to identify the base set of names which should be + // preserved across runs. TODO: This is less efficient than computing the set of names + // directly from the Process (or returning them from `prepare_workdir`), but it's also much + // simpler. + prepare_workdir( + workdir.path().to_owned(), + workdir_base, + &startup_options, + startup_options.input_digests.inputs.clone(), + store, + named_caches, + immutable_inputs, + None, + None, + ) + .await?; + let workdir_include_names = list_workdir(workdir.path()).await?; + + // Spawn the process and read its port from stdout. + let (child, port) = executor + .spawn_blocking( + { + let workdir = workdir.path().to_owned(); + move || spawn_and_read_port(startup_options, workdir) + }, + |e| Err(format!("Nailgun spawn task failed: {e}")), + ) + .await?; + debug!( + "Created nailgun server process with pid {} and port {}", + child.id(), + port + ); - Ok(NailgunProcess { - port, - fingerprint: nailgun_server_fingerprint, - workdir, - workdir_include_names, - name, - executor, - handle: child, - }) - } + Ok(NailgunProcess { + port, + fingerprint: nailgun_server_fingerprint, + workdir, + workdir_include_names, + name, + executor, + handle: child, + }) + } } impl Drop for NailgunProcess { - fn drop(&mut self) { - debug!("Exiting nailgun server process {:?}", self.name); - if self.handle.kill().is_ok() { - // NB: This is blocking, but should be a short wait in general. - let _ = self.handle.wait(); + fn drop(&mut self) { + debug!("Exiting nailgun server process {:?}", self.name); + if self.handle.kill().is_ok() { + // NB: This is blocking, but should be a short wait in general. + let _ = self.handle.wait(); + } } - } } /// The fingerprint of an nailgun server process. @@ -428,19 +430,19 @@ impl Drop for NailgunProcess { /// - The path to the jdk #[derive(Clone, Hash, PartialEq, Eq, Debug)] struct NailgunProcessFingerprint { - pub name: String, - pub fingerprint: Fingerprint, + pub name: String, + pub fingerprint: Fingerprint, } impl NailgunProcessFingerprint { - pub async fn new(name: String, nailgun_req: &Process, store: &Store) -> Result { - let nailgun_req_digest = - process_execution::get_digest(nailgun_req, None, None, store, None).await; - Ok(NailgunProcessFingerprint { - name, - fingerprint: nailgun_req_digest.hash, - }) - } + pub async fn new(name: String, nailgun_req: &Process, store: &Store) -> Result { + let nailgun_req_digest = + process_execution::get_digest(nailgun_req, None, None, store, None).await; + Ok(NailgunProcessFingerprint { + name, + fingerprint: nailgun_req_digest.hash, + }) + } } /// @@ -450,115 +452,115 @@ impl NailgunProcessFingerprint { pub struct BorrowedNailgunProcess(Option, OwnedSemaphorePermit); impl BorrowedNailgunProcess { - fn new(process: NailgunProcessRef, permit: OwnedSemaphorePermit) -> Self { - assert!(process.is_some()); - Self(Some(process), permit) - } - - pub fn name(&self) -> &str { - &self.0.as_ref().unwrap().as_ref().unwrap().name - } - - pub fn port(&self) -> u16 { - self.0.as_ref().unwrap().as_ref().unwrap().port - } - - pub fn address(&self) -> SocketAddr { - SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), self.port()) - } - - pub fn workdir_path(&self) -> &Path { - self.0.as_ref().unwrap().as_ref().unwrap().workdir.path() - } - - /// - /// Return the NailgunProcess to the pool. - /// - /// Clears the working directory for the process before returning it. - /// - pub async fn release(&mut self) -> Result<(), String> { - let process = self - .0 - .as_ref() - .expect("release may only be called once.") - .as_ref() - .unwrap(); - - clear_workdir( - &process.executor, - process.workdir.path(), - &process.workdir_include_names, - ) - .await?; - - // Once we've successfully cleaned up, remove the process. - let _ = self.0.take(); - Ok(()) - } + fn new(process: NailgunProcessRef, permit: OwnedSemaphorePermit) -> Self { + assert!(process.is_some()); + Self(Some(process), permit) + } + + pub fn name(&self) -> &str { + &self.0.as_ref().unwrap().as_ref().unwrap().name + } + + pub fn port(&self) -> u16 { + self.0.as_ref().unwrap().as_ref().unwrap().port + } + + pub fn address(&self) -> SocketAddr { + SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), self.port()) + } + + pub fn workdir_path(&self) -> &Path { + self.0.as_ref().unwrap().as_ref().unwrap().workdir.path() + } + + /// + /// Return the NailgunProcess to the pool. + /// + /// Clears the working directory for the process before returning it. + /// + pub async fn release(&mut self) -> Result<(), String> { + let process = self + .0 + .as_ref() + .expect("release may only be called once.") + .as_ref() + .unwrap(); + + clear_workdir( + &process.executor, + process.workdir.path(), + &process.workdir_include_names, + ) + .await?; + + // Once we've successfully cleaned up, remove the process. + let _ = self.0.take(); + Ok(()) + } } impl Drop for BorrowedNailgunProcess { - fn drop(&mut self) { - if let Some(mut process) = self.0.take() { - // Kill the process, but rely on the pool to notice that it is dead and restart it. - debug!( - "Killing nailgun process {:?} due to cancellation.", - process.as_ref().unwrap().name - ); - if process.as_mut().unwrap().handle.kill().is_ok() { - // NB: This is blocking, but should be a short wait in general. - let _ = process.as_mut().unwrap().handle.wait(); - } + fn drop(&mut self) { + if let Some(mut process) = self.0.take() { + // Kill the process, but rely on the pool to notice that it is dead and restart it. + debug!( + "Killing nailgun process {:?} due to cancellation.", + process.as_ref().unwrap().name + ); + if process.as_mut().unwrap().handle.kill().is_ok() { + // NB: This is blocking, but should be a short wait in general. + let _ = process.as_mut().unwrap().handle.wait(); + } + } } - } } async fn clear_workdir( - executor: &Executor, - workdir: &Path, - exclude_names: &HashSet, + executor: &Executor, + workdir: &Path, + exclude_names: &HashSet, ) -> Result<(), String> { - // Move all content into a temporary directory. - let garbage_dir = tempfile::Builder::new() - .prefix("pants-sandbox-") - .tempdir_in(workdir.parent().unwrap()) - .map_err(|err| format!("Error making garbage directory for nailgun cleanup: {err:?}"))?; - let moves = list_workdir(workdir) - .await? - .into_iter() - .filter(|n| !exclude_names.contains(n)) - .map(|name| async { - tokio::fs::rename(workdir.join(&name), garbage_dir.path().join(&name)) - .await - .map_err(|e| { - format!( - "Failed to move {} to garbage: {}", - workdir.join(name).display(), - e - ) + // Move all content into a temporary directory. + let garbage_dir = tempfile::Builder::new() + .prefix("pants-sandbox-") + .tempdir_in(workdir.parent().unwrap()) + .map_err(|err| format!("Error making garbage directory for nailgun cleanup: {err:?}"))?; + let moves = list_workdir(workdir) + .await? + .into_iter() + .filter(|n| !exclude_names.contains(n)) + .map(|name| async { + tokio::fs::rename(workdir.join(&name), garbage_dir.path().join(&name)) + .await + .map_err(|e| { + format!( + "Failed to move {} to garbage: {}", + workdir.join(name).display(), + e + ) + }) }) - }) - .collect::>(); - future::try_join_all(moves).await?; + .collect::>(); + future::try_join_all(moves).await?; - // And drop it in the background. - let fut = executor.native_spawn_blocking(move || std::mem::drop(garbage_dir)); - drop(fut); + // And drop it in the background. + let fut = executor.native_spawn_blocking(move || std::mem::drop(garbage_dir)); + drop(fut); - Ok(()) + Ok(()) } async fn list_workdir(workdir: &Path) -> Result, String> { - let mut dir_entries = tokio::fs::read_dir(workdir) - .await - .map_err(|e| format!("Failed to read nailgun process directory: {e}"))?; - let mut names = HashSet::new(); - while let Some(dir_entry) = dir_entries - .next_entry() - .await - .map_err(|e| format!("Failed to read entry in nailgun process directory: {e}"))? - { - names.insert(dir_entry.file_name()); - } - Ok(names) + let mut dir_entries = tokio::fs::read_dir(workdir) + .await + .map_err(|e| format!("Failed to read nailgun process directory: {e}"))?; + let mut names = HashSet::new(); + while let Some(dir_entry) = dir_entries + .next_entry() + .await + .map_err(|e| format!("Failed to read entry in nailgun process directory: {e}"))? + { + names.insert(dir_entry.file_name()); + } + Ok(names) } diff --git a/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines.rs b/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines.rs index 361eaa34c21..ffd1ee6688a 100644 --- a/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines.rs +++ b/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines.rs @@ -8,115 +8,111 @@ use std::slice::Iter; /// to allow for deep fingerprinting. #[derive(PartialEq, Eq, Debug)] pub struct ParsedJVMCommandLines { - pub nailgun_args: Vec, - pub client_args: Vec, - pub client_main_class: String, + pub nailgun_args: Vec, + pub client_args: Vec, + pub client_main_class: String, } impl ParsedJVMCommandLines { - /// - /// Given a list of args that one would likely pass to a java call, - /// we automatically split it to generate two argument lists: - /// - nailgun arguments: The list of arguments needed to start the nailgun server. - /// These arguments include everything in the arg list up to (but not including) the main class. - /// These arguments represent roughly JVM options (-Xmx...), and the classpath (-cp ...). - /// - /// - client arguments: The list of arguments that will be used to run the jvm program under nailgun. - /// These arguments can be thought of as "passthrough args" that are sent to the jvm via the nailgun client. - /// These arguments include everything starting from the main class. - /// - /// We assume that: - /// - Every args list has a main class. - /// - There is exactly one argument that doesn't begin with a `-` in the command line before the main class, - /// and it's the value of the classpath (i.e. `-cp scala-library.jar`). - /// - /// We think these assumptions are valid as per: https://github.com/pantsbuild/pants/issues/8387 - /// - pub fn parse_command_lines(args: &[String]) -> Result { - let mut args_to_consume = args.iter(); + /// + /// Given a list of args that one would likely pass to a java call, + /// we automatically split it to generate two argument lists: + /// - nailgun arguments: The list of arguments needed to start the nailgun server. + /// These arguments include everything in the arg list up to (but not including) the main class. + /// These arguments represent roughly JVM options (-Xmx...), and the classpath (-cp ...). + /// + /// - client arguments: The list of arguments that will be used to run the jvm program under nailgun. + /// These arguments can be thought of as "passthrough args" that are sent to the jvm via the nailgun client. + /// These arguments include everything starting from the main class. + /// + /// We assume that: + /// - Every args list has a main class. + /// - There is exactly one argument that doesn't begin with a `-` in the command line before the main class, + /// and it's the value of the classpath (i.e. `-cp scala-library.jar`). + /// + /// We think these assumptions are valid as per: https://github.com/pantsbuild/pants/issues/8387 + /// + pub fn parse_command_lines(args: &[String]) -> Result { + let mut args_to_consume = args.iter(); - let nailgun_args_before_classpath = Self::parse_to_classpath(&mut args_to_consume)?; - let (classpath_flag, classpath_value) = Self::parse_classpath(&mut args_to_consume)?; - let nailgun_args_after_classpath = Self::parse_jvm_args(&mut args_to_consume)?; - let main_class = Self::parse_main_class(&mut args_to_consume)?; - let client_args = Self::parse_to_end(&mut args_to_consume)?; + let nailgun_args_before_classpath = Self::parse_to_classpath(&mut args_to_consume)?; + let (classpath_flag, classpath_value) = Self::parse_classpath(&mut args_to_consume)?; + let nailgun_args_after_classpath = Self::parse_jvm_args(&mut args_to_consume)?; + let main_class = Self::parse_main_class(&mut args_to_consume)?; + let client_args = Self::parse_to_end(&mut args_to_consume)?; - if args_to_consume.clone().peekable().peek().is_some() { - return Err(format!( - "Malformed command line: There are still arguments to consume: {:?}", - &args_to_consume - )); - } + if args_to_consume.clone().peekable().peek().is_some() { + return Err(format!( + "Malformed command line: There are still arguments to consume: {:?}", + &args_to_consume + )); + } - let mut nailgun_args = nailgun_args_before_classpath; - nailgun_args.push(classpath_flag); - nailgun_args.push(classpath_value); - nailgun_args.extend(nailgun_args_after_classpath); + let mut nailgun_args = nailgun_args_before_classpath; + nailgun_args.push(classpath_flag); + nailgun_args.push(classpath_value); + nailgun_args.extend(nailgun_args_after_classpath); - Ok(ParsedJVMCommandLines { - nailgun_args, - client_args, - client_main_class: main_class, - }) - } + Ok(ParsedJVMCommandLines { + nailgun_args, + client_args, + client_main_class: main_class, + }) + } - fn parse_to_classpath(args_to_consume: &mut Iter) -> Result, String> { - Ok( - args_to_consume - .take_while_ref(|elem| !ParsedJVMCommandLines::is_classpath_flag(elem)) - .cloned() - .collect(), - ) - } + fn parse_to_classpath(args_to_consume: &mut Iter) -> Result, String> { + Ok(args_to_consume + .take_while_ref(|elem| !ParsedJVMCommandLines::is_classpath_flag(elem)) + .cloned() + .collect()) + } - fn parse_classpath(args_to_consume: &mut Iter) -> Result<(String, String), String> { - let classpath_flag = args_to_consume - .next() - .filter(|e| ParsedJVMCommandLines::is_classpath_flag(e)) - .ok_or_else(|| "No classpath flag found.".to_string()) - .map(|e| e.clone())?; + fn parse_classpath(args_to_consume: &mut Iter) -> Result<(String, String), String> { + let classpath_flag = args_to_consume + .next() + .filter(|e| ParsedJVMCommandLines::is_classpath_flag(e)) + .ok_or_else(|| "No classpath flag found.".to_string()) + .map(|e| e.clone())?; - let classpath_value = args_to_consume - .next() - .ok_or_else(|| "No classpath value found!".to_string()) - .and_then(|elem| { - if ParsedJVMCommandLines::is_flag(elem) { - Err(format!("Classpath value has incorrect formatting {elem}.")) - } else { - Ok(elem) - } - })? - .clone(); + let classpath_value = args_to_consume + .next() + .ok_or_else(|| "No classpath value found!".to_string()) + .and_then(|elem| { + if ParsedJVMCommandLines::is_flag(elem) { + Err(format!("Classpath value has incorrect formatting {elem}.")) + } else { + Ok(elem) + } + })? + .clone(); - Ok((classpath_flag, classpath_value)) - } + Ok((classpath_flag, classpath_value)) + } - fn parse_jvm_args(args_to_consume: &mut Iter) -> Result, String> { - Ok( - args_to_consume - .take_while_ref(|elem| ParsedJVMCommandLines::is_flag(elem)) - .cloned() - .collect(), - ) - } + fn parse_jvm_args(args_to_consume: &mut Iter) -> Result, String> { + Ok(args_to_consume + .take_while_ref(|elem| ParsedJVMCommandLines::is_flag(elem)) + .cloned() + .collect()) + } - fn parse_main_class(args_to_consume: &mut Iter) -> Result { - args_to_consume - .next() - .filter(|e| !ParsedJVMCommandLines::is_flag(e)) - .ok_or_else(|| "No main class provided.".to_string()) - .map(|e| e.clone()) - } + fn parse_main_class(args_to_consume: &mut Iter) -> Result { + args_to_consume + .next() + .filter(|e| !ParsedJVMCommandLines::is_flag(e)) + .ok_or_else(|| "No main class provided.".to_string()) + .map(|e| e.clone()) + } - fn parse_to_end(args_to_consume: &mut Iter) -> Result, String> { - Ok(args_to_consume.cloned().collect()) - } + fn parse_to_end(args_to_consume: &mut Iter) -> Result, String> { + Ok(args_to_consume.cloned().collect()) + } - fn is_flag(arg: &str) -> bool { - arg.starts_with('-') || arg.starts_with('@') - } + fn is_flag(arg: &str) -> bool { + arg.starts_with('-') || arg.starts_with('@') + } - fn is_classpath_flag(arg: &str) -> bool { - arg == "-cp" || arg == "-classpath" - } + fn is_classpath_flag(arg: &str) -> bool { + arg == "-cp" || arg == "-classpath" + } } diff --git a/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines_tests.rs b/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines_tests.rs index 8e506858193..d1d58f51963 100644 --- a/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines_tests.rs +++ b/src/rust/engine/process_execution/pe_nailgun/src/parsed_jvm_command_lines_tests.rs @@ -5,185 +5,186 @@ use crate::parsed_jvm_command_lines::ParsedJVMCommandLines; // TODO we should be able to use https://docs.rs/crate/derive_builder/0.8.0 #[derive(Debug)] struct CLIBuilder { - jdk: Option, - args_before_classpath: Vec, - classpath_flag: Option, - classpath_value: Option, - args_after_classpath: Vec, - main_class: Option, - client_args: Vec, + jdk: Option, + args_before_classpath: Vec, + classpath_flag: Option, + classpath_value: Option, + args_after_classpath: Vec, + main_class: Option, + client_args: Vec, } impl CLIBuilder { - pub fn empty() -> CLIBuilder { - CLIBuilder { - jdk: None, - args_before_classpath: vec![], - classpath_flag: None, - classpath_value: None, - args_after_classpath: vec![], - main_class: None, - client_args: vec![], + pub fn empty() -> CLIBuilder { + CLIBuilder { + jdk: None, + args_before_classpath: vec![], + classpath_flag: None, + classpath_value: None, + args_after_classpath: vec![], + main_class: None, + client_args: vec![], + } } - } - - pub fn build(&self) -> CLIBuilder { - CLIBuilder { - jdk: self.jdk.clone(), - args_before_classpath: self.args_before_classpath.clone(), - classpath_flag: self.classpath_flag.clone(), - classpath_value: self.classpath_value.clone(), - args_after_classpath: self.args_after_classpath.clone(), - main_class: self.main_class.clone(), - client_args: self.client_args.clone(), + + pub fn build(&self) -> CLIBuilder { + CLIBuilder { + jdk: self.jdk.clone(), + args_before_classpath: self.args_before_classpath.clone(), + classpath_flag: self.classpath_flag.clone(), + classpath_value: self.classpath_value.clone(), + args_after_classpath: self.args_after_classpath.clone(), + main_class: self.main_class.clone(), + client_args: self.client_args.clone(), + } + } + + pub fn with_jdk(&mut self) -> &mut CLIBuilder { + self.jdk = Some(".jdk/bin/java".to_string()); + self + } + + pub fn with_nailgun_args(&mut self) -> &mut CLIBuilder { + self.args_before_classpath = vec!["-Xmx4g".to_string()]; + self.args_after_classpath = vec!["-Xmx4g".to_string()]; + self + } + + pub fn with_classpath(&mut self) -> &mut CLIBuilder { + self.with_classpath_flag().with_classpath_value() + } + + pub fn with_classpath_flag(&mut self) -> &mut CLIBuilder { + self.classpath_flag = Some("-cp".to_string()); + self + } + + pub fn with_classpath_value(&mut self) -> &mut CLIBuilder { + self.classpath_value = Some("scala-compiler.jar:scala-library.jar".to_string()); + self + } + + pub fn with_main_class(&mut self) -> &mut CLIBuilder { + self.main_class = Some("org.pantsbuild.zinc.compiler.Main".to_string()); + self + } + + pub fn with_client_args(&mut self) -> &mut CLIBuilder { + self.client_args = vec!["-some-arg-for-zinc".to_string(), "@argfile".to_string()]; + self } - } - - pub fn with_jdk(&mut self) -> &mut CLIBuilder { - self.jdk = Some(".jdk/bin/java".to_string()); - self - } - - pub fn with_nailgun_args(&mut self) -> &mut CLIBuilder { - self.args_before_classpath = vec!["-Xmx4g".to_string()]; - self.args_after_classpath = vec!["-Xmx4g".to_string()]; - self - } - - pub fn with_classpath(&mut self) -> &mut CLIBuilder { - self.with_classpath_flag().with_classpath_value() - } - - pub fn with_classpath_flag(&mut self) -> &mut CLIBuilder { - self.classpath_flag = Some("-cp".to_string()); - self - } - - pub fn with_classpath_value(&mut self) -> &mut CLIBuilder { - self.classpath_value = Some("scala-compiler.jar:scala-library.jar".to_string()); - self - } - - pub fn with_main_class(&mut self) -> &mut CLIBuilder { - self.main_class = Some("org.pantsbuild.zinc.compiler.Main".to_string()); - self - } - - pub fn with_client_args(&mut self) -> &mut CLIBuilder { - self.client_args = vec!["-some-arg-for-zinc".to_string(), "@argfile".to_string()]; - self - } - - pub fn with_everything() -> CLIBuilder { - CLIBuilder::empty() - .with_jdk() - .with_nailgun_args() - .with_classpath() - .with_main_class() - .with_client_args() - .build() - } - - pub fn render_to_full_cli(&self) -> Vec { - let mut cli = vec![]; - cli.extend(self.jdk.clone()); - cli.extend(self.args_before_classpath.clone()); - cli.extend(self.classpath_flag.clone()); - cli.extend(self.classpath_value.clone()); - cli.extend(self.args_after_classpath.clone()); - cli.extend(self.main_class.clone()); - cli.extend(self.client_args.clone()); - cli - } - - pub fn render_to_parsed_args(&self) -> ParsedJVMCommandLines { - let mut nailgun_args: Vec = self.jdk.iter().cloned().collect(); - nailgun_args.extend(self.args_before_classpath.clone()); - nailgun_args.extend(self.classpath_flag.clone()); - nailgun_args.extend(self.classpath_value.clone()); - nailgun_args.extend(self.args_after_classpath.clone()); - ParsedJVMCommandLines { - nailgun_args: nailgun_args, - client_args: self.client_args.clone(), - client_main_class: self.main_class.clone().unwrap(), + + pub fn with_everything() -> CLIBuilder { + CLIBuilder::empty() + .with_jdk() + .with_nailgun_args() + .with_classpath() + .with_main_class() + .with_client_args() + .build() + } + + pub fn render_to_full_cli(&self) -> Vec { + let mut cli = vec![]; + cli.extend(self.jdk.clone()); + cli.extend(self.args_before_classpath.clone()); + cli.extend(self.classpath_flag.clone()); + cli.extend(self.classpath_value.clone()); + cli.extend(self.args_after_classpath.clone()); + cli.extend(self.main_class.clone()); + cli.extend(self.client_args.clone()); + cli + } + + pub fn render_to_parsed_args(&self) -> ParsedJVMCommandLines { + let mut nailgun_args: Vec = self.jdk.iter().cloned().collect(); + nailgun_args.extend(self.args_before_classpath.clone()); + nailgun_args.extend(self.classpath_flag.clone()); + nailgun_args.extend(self.classpath_value.clone()); + nailgun_args.extend(self.args_after_classpath.clone()); + ParsedJVMCommandLines { + nailgun_args: nailgun_args, + client_args: self.client_args.clone(), + client_main_class: self.main_class.clone().unwrap(), + } } - } } #[test] fn parses_correctly_formatted_cli() { - let correctly_formatted_cli = CLIBuilder::with_everything(); + let correctly_formatted_cli = CLIBuilder::with_everything(); - let parse_result = - ParsedJVMCommandLines::parse_command_lines(&correctly_formatted_cli.render_to_full_cli()); + let parse_result = + ParsedJVMCommandLines::parse_command_lines(&correctly_formatted_cli.render_to_full_cli()); - assert_eq!( - parse_result, - Ok(correctly_formatted_cli.render_to_parsed_args()) - ) + assert_eq!( + parse_result, + Ok(correctly_formatted_cli.render_to_parsed_args()) + ) } #[test] fn parses_cli_without_jvm_args() { - let cli_without_jvm_args = CLIBuilder::empty() - .with_jdk() - .with_classpath() - .with_main_class() - .with_client_args() - .build(); - - let parse_result = - ParsedJVMCommandLines::parse_command_lines(&cli_without_jvm_args.render_to_full_cli()); - - assert_eq!( - parse_result, - Ok(cli_without_jvm_args.render_to_parsed_args()) - ) + let cli_without_jvm_args = CLIBuilder::empty() + .with_jdk() + .with_classpath() + .with_main_class() + .with_client_args() + .build(); + + let parse_result = + ParsedJVMCommandLines::parse_command_lines(&cli_without_jvm_args.render_to_full_cli()); + + assert_eq!( + parse_result, + Ok(cli_without_jvm_args.render_to_parsed_args()) + ) } #[test] fn fails_to_parse_cli_without_main_class() { - let cli_without_main_class = CLIBuilder::empty() - .with_jdk() - .with_classpath() - .with_client_args() - .build(); + let cli_without_main_class = CLIBuilder::empty() + .with_jdk() + .with_classpath() + .with_client_args() + .build(); - let parse_result = - ParsedJVMCommandLines::parse_command_lines(&cli_without_main_class.render_to_full_cli()); + let parse_result = + ParsedJVMCommandLines::parse_command_lines(&cli_without_main_class.render_to_full_cli()); - assert_eq!(parse_result, Err("No main class provided.".to_string())) + assert_eq!(parse_result, Err("No main class provided.".to_string())) } #[test] fn fails_to_parse_cli_without_classpath() { - let cli_without_classpath = CLIBuilder::empty() - .with_jdk() - .with_nailgun_args() - .with_main_class() - .with_client_args() - .build(); + let cli_without_classpath = CLIBuilder::empty() + .with_jdk() + .with_nailgun_args() + .with_main_class() + .with_client_args() + .build(); - let parse_result = - ParsedJVMCommandLines::parse_command_lines(&cli_without_classpath.render_to_full_cli()); + let parse_result = + ParsedJVMCommandLines::parse_command_lines(&cli_without_classpath.render_to_full_cli()); - assert_eq!(parse_result, Err("No classpath flag found.".to_string())) + assert_eq!(parse_result, Err("No classpath flag found.".to_string())) } #[test] fn fails_to_parse_cli_without_classpath_value() { - let cli_without_classpath_value = CLIBuilder::empty() - .with_jdk() - .with_classpath_flag() - .with_nailgun_args() - .with_main_class() - .build(); - - let parse_result = - ParsedJVMCommandLines::parse_command_lines(&cli_without_classpath_value.render_to_full_cli()); - - assert_eq!( - parse_result, - Err("Classpath value has incorrect formatting -Xmx4g.".to_string()) - ) + let cli_without_classpath_value = CLIBuilder::empty() + .with_jdk() + .with_classpath_flag() + .with_nailgun_args() + .with_main_class() + .build(); + + let parse_result = ParsedJVMCommandLines::parse_command_lines( + &cli_without_classpath_value.render_to_full_cli(), + ); + + assert_eq!( + parse_result, + Err("Classpath value has incorrect formatting -Xmx4g.".to_string()) + ) } diff --git a/src/rust/engine/process_execution/pe_nailgun/src/tests.rs b/src/rust/engine/process_execution/pe_nailgun/src/tests.rs index 7148ee9c26c..e9af467413e 100644 --- a/src/rust/engine/process_execution/pe_nailgun/src/tests.rs +++ b/src/rust/engine/process_execution/pe_nailgun/src/tests.rs @@ -12,52 +12,52 @@ use crate::NailgunPool; use crate::{NamedCaches, Process}; fn pool(size: usize) -> (NailgunPool, NamedCaches, ImmutableInputs, TempDir) { - let _ = WorkunitStore::setup_for_tests(); - let base_dir = TempDir::new().unwrap(); - let named_caches_dir = base_dir.path().join("named"); - let store_dir = base_dir.path().join("store"); - let executor = Executor::new(); - let store = Store::local_only(executor.clone(), &store_dir).unwrap(); - - let pool = NailgunPool::new(base_dir.path().to_owned(), size, store.clone(), executor); - ( - pool, - NamedCaches::new_local(named_caches_dir), - ImmutableInputs::new(store, base_dir.path()).unwrap(), - base_dir, - ) + let _ = WorkunitStore::setup_for_tests(); + let base_dir = TempDir::new().unwrap(); + let named_caches_dir = base_dir.path().join("named"); + let store_dir = base_dir.path().join("store"); + let executor = Executor::new(); + let store = Store::local_only(executor.clone(), &store_dir).unwrap(); + + let pool = NailgunPool::new(base_dir.path().to_owned(), size, store.clone(), executor); + ( + pool, + NamedCaches::new_local(named_caches_dir), + ImmutableInputs::new(store, base_dir.path()).unwrap(), + base_dir, + ) } async fn run(pool: &(NailgunPool, NamedCaches, ImmutableInputs, TempDir), port: u16) -> PathBuf { - let mut p = pool - .0 - .acquire( - Process::new(owned_string_vec(&[ - "/bin/bash", - "-c", - &format!("echo Mock port {port}.; sleep 10"), - ])), - &pool.1, - &pool.2, - ) - .await - .unwrap(); - assert_eq!(port, p.port()); - let workdir = p.workdir_path().to_owned(); - p.release().await.unwrap(); - workdir + let mut p = pool + .0 + .acquire( + Process::new(owned_string_vec(&[ + "/bin/bash", + "-c", + &format!("echo Mock port {port}.; sleep 10"), + ])), + &pool.1, + &pool.2, + ) + .await + .unwrap(); + assert_eq!(port, p.port()); + let workdir = p.workdir_path().to_owned(); + p.release().await.unwrap(); + workdir } #[tokio::test] async fn acquire() { - let pool = pool(1); + let pool = pool(1); - // Sequential calls with the same fingerprint reuse the entry. - let workdir_one = run(&pool, 100).await; - let workdir_two = run(&pool, 100).await; - assert_eq!(workdir_one, workdir_two); + // Sequential calls with the same fingerprint reuse the entry. + let workdir_one = run(&pool, 100).await; + let workdir_two = run(&pool, 100).await; + assert_eq!(workdir_one, workdir_two); - // A call with a different fingerprint launches in a new workdir and succeeds. - let workdir_three = run(&pool, 200).await; - assert_ne!(workdir_two, workdir_three); + // A call with a different fingerprint launches in a new workdir and succeeds. + let workdir_three = run(&pool, 200).await; + assert_ne!(workdir_two, workdir_three); } diff --git a/src/rust/engine/process_execution/remote/src/lib.rs b/src/rust/engine/process_execution/remote/src/lib.rs index a851b818c2d..e355849bbb8 100644 --- a/src/rust/engine/process_execution/remote/src/lib.rs +++ b/src/rust/engine/process_execution/remote/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] diff --git a/src/rust/engine/process_execution/remote/src/remote.rs b/src/rust/engine/process_execution/remote/src/remote.rs index a24516473a6..a142f7dde0f 100644 --- a/src/rust/engine/process_execution/remote/src/remote.rs +++ b/src/rust/engine/process_execution/remote/src/remote.rs @@ -16,14 +16,14 @@ use log::{debug, trace, warn, Level}; use prost::Message; use protos::gen::build::bazel::remote::execution::v2 as remexec; use protos::gen::google::longrunning::{ - operations_client::OperationsClient, CancelOperationRequest, Operation, + operations_client::OperationsClient, CancelOperationRequest, Operation, }; use protos::gen::google::rpc::{PreconditionFailure, Status as StatusProto}; use rand::{thread_rng, Rng}; use remexec::{ - capabilities_client::CapabilitiesClient, execution_client::ExecutionClient, - execution_stage::Value as ExecutionStageValue, Action, Command, ExecuteRequest, ExecuteResponse, - ExecutedActionMetadata, ServerCapabilities, WaitExecutionRequest, + capabilities_client::CapabilitiesClient, execution_client::ExecutionClient, + execution_stage::Value as ExecutionStageValue, Action, Command, ExecuteRequest, + ExecuteResponse, ExecutedActionMetadata, ServerCapabilities, WaitExecutionRequest, }; use tonic::{Code, Request, Status}; @@ -37,34 +37,34 @@ use remote_provider_reapi::apply_headers; use store::{Store, StoreError}; use task_executor::Executor; use workunit_store::{ - in_workunit, Metric, ObservationMetric, RunId, RunningWorkunit, SpanId, UserMetadataItem, - WorkunitMetadata, WorkunitStore, + in_workunit, Metric, ObservationMetric, RunId, RunningWorkunit, SpanId, UserMetadataItem, + WorkunitMetadata, WorkunitStore, }; use process_execution::{ - make_execute_request, populate_fallible_execution_result, Context, EntireExecuteRequest, - FallibleProcessResultWithPlatform, Process, ProcessError, ProcessExecutionEnvironment, - ProcessResultMetadata, ProcessResultSource, + make_execute_request, populate_fallible_execution_result, Context, EntireExecuteRequest, + FallibleProcessResultWithPlatform, Process, ProcessError, ProcessExecutionEnvironment, + ProcessResultMetadata, ProcessResultSource, }; #[derive(Debug)] pub enum OperationOrStatus { - Operation(Operation), - Status(StatusProto), + Operation(Operation), + Status(StatusProto), } #[derive(Debug, PartialEq, Eq)] pub enum ExecutionError { - Fatal(ProcessError), - // Digests are Files and Directories which have been reported to be missing remotely (unlike - // `{Process,Store}Error::MissingDigest`, which indicates that a digest doesn't exist anywhere - // in the configured Stores). May be incomplete. - MissingRemoteDigests(Vec), - // The server indicated that the request hit a timeout. Generally this is the timeout that the - // client has pushed down on the ExecutionRequest. - Timeout, - // String is the error message. - Retryable(String), + Fatal(ProcessError), + // Digests are Files and Directories which have been reported to be missing remotely (unlike + // `{Process,Store}Error::MissingDigest`, which indicates that a digest doesn't exist anywhere + // in the configured Stores). May be incomplete. + MissingRemoteDigests(Vec), + // The server indicated that the request hit a timeout. Generally this is the timeout that the + // client has pushed down on the ExecutionRequest. + Timeout, + // String is the error message. + Retryable(String), } /// Implementation of CommandRunner that runs a command via the Bazel Remote Execution API @@ -84,989 +84,1009 @@ pub enum ExecutionError { /// optimizations to shave off a round-trip in the future. #[derive(Clone)] pub struct CommandRunner { - instance_name: Option, - process_cache_namespace: Option, - append_only_caches_base_path: Option, - store: Store, - executor: Executor, - execution_client: Arc>, - operations_client: Arc>, - overall_deadline: Duration, - retry_interval_duration: Duration, - capabilities_cell: Arc>, - capabilities_client: Arc>, + instance_name: Option, + process_cache_namespace: Option, + append_only_caches_base_path: Option, + store: Store, + executor: Executor, + execution_client: Arc>, + operations_client: Arc>, + overall_deadline: Duration, + retry_interval_duration: Duration, + capabilities_cell: Arc>, + capabilities_client: Arc>, } enum StreamOutcome { - Complete(OperationOrStatus), - StreamClosed, + Complete(OperationOrStatus), + StreamClosed, } enum OperationStreamItem { - Running(ExecutionStageValue), - Outcome(StreamOutcome), + Running(ExecutionStageValue), + Outcome(StreamOutcome), } /// A single remote Operation, with a `Drop` implementation to cancel the work if our client goes /// away. struct RunningOperation { - name: Option, - operations_client: Arc>, - executor: Executor, - process_level: Level, - process_description: String, -} - -impl RunningOperation { - fn new( + name: Option, operations_client: Arc>, executor: Executor, process_level: Level, process_description: String, - ) -> Self { - Self { - name: None, - operations_client, - executor, - process_level, - process_description, +} + +impl RunningOperation { + fn new( + operations_client: Arc>, + executor: Executor, + process_level: Level, + process_description: String, + ) -> Self { + Self { + name: None, + operations_client, + executor, + process_level, + process_description, + } } - } - /// Marks the operation completed, which will avoid attempts to cancel it when this struct is - /// dropped. - fn completed(&mut self) { - let _ = self.name.take(); - } + /// Marks the operation completed, which will avoid attempts to cancel it when this struct is + /// dropped. + fn completed(&mut self) { + let _ = self.name.take(); + } } impl Drop for RunningOperation { - fn drop(&mut self) { - if let Some(operation_name) = self.name.take() { - debug!("Canceling remote operation {operation_name}"); - let mut operations_client = self.operations_client.as_ref().clone(); - let fut = self.executor.native_spawn(async move { - operations_client - .cancel_operation(CancelOperationRequest { - name: operation_name, - }) - .await - }); - drop(fut); + fn drop(&mut self) { + if let Some(operation_name) = self.name.take() { + debug!("Canceling remote operation {operation_name}"); + let mut operations_client = self.operations_client.as_ref().clone(); + let fut = self.executor.native_spawn(async move { + operations_client + .cancel_operation(CancelOperationRequest { + name: operation_name, + }) + .await + }); + drop(fut); + } } - } } impl CommandRunner { - /// Construct a new CommandRunner - pub async fn new( - execution_address: &str, - instance_name: Option, - process_cache_namespace: Option, - append_only_caches_base_path: Option, - tls_config: grpc_util::tls::Config, - headers: BTreeMap, - store: Store, - executor: Executor, - overall_deadline: Duration, - retry_interval_duration: Duration, - execution_concurrency_limit: usize, - capabilities_cell_opt: Option>>, - ) -> Result { - let needs_tls = execution_address.starts_with("https://"); - - let tls_client_config: Option<_> = needs_tls.then(|| tls_config.try_into()).transpose()?; - - let execution_endpoint = - grpc_util::create_channel(execution_address, tls_client_config.as_ref()).await?; - - let execution_http_headers = headers_to_http_header_map(&headers)?; - let execution_channel = layered_service( - execution_endpoint, - execution_concurrency_limit, - execution_http_headers, - None, - ); - let execution_client = Arc::new(ExecutionClient::new(execution_channel.clone())); - let operations_client = Arc::new(OperationsClient::new(execution_channel.clone())); - let capabilities_client = Arc::new(CapabilitiesClient::new(execution_channel)); - - let command_runner = CommandRunner { - instance_name, - process_cache_namespace, - append_only_caches_base_path, - execution_client, - operations_client, - store, - executor, - overall_deadline, - retry_interval_duration, - capabilities_cell: capabilities_cell_opt.unwrap_or_else(|| Arc::new(OnceCell::new())), - capabilities_client, - }; - - Ok(command_runner) - } + /// Construct a new CommandRunner + pub async fn new( + execution_address: &str, + instance_name: Option, + process_cache_namespace: Option, + append_only_caches_base_path: Option, + tls_config: grpc_util::tls::Config, + headers: BTreeMap, + store: Store, + executor: Executor, + overall_deadline: Duration, + retry_interval_duration: Duration, + execution_concurrency_limit: usize, + capabilities_cell_opt: Option>>, + ) -> Result { + let needs_tls = execution_address.starts_with("https://"); + + let tls_client_config: Option<_> = needs_tls.then(|| tls_config.try_into()).transpose()?; + + let execution_endpoint = + grpc_util::create_channel(execution_address, tls_client_config.as_ref()).await?; + + let execution_http_headers = headers_to_http_header_map(&headers)?; + let execution_channel = layered_service( + execution_endpoint, + execution_concurrency_limit, + execution_http_headers, + None, + ); + let execution_client = Arc::new(ExecutionClient::new(execution_channel.clone())); + let operations_client = Arc::new(OperationsClient::new(execution_channel.clone())); + let capabilities_client = Arc::new(CapabilitiesClient::new(execution_channel)); + + let command_runner = CommandRunner { + instance_name, + process_cache_namespace, + append_only_caches_base_path, + execution_client, + operations_client, + store, + executor, + overall_deadline, + retry_interval_duration, + capabilities_cell: capabilities_cell_opt.unwrap_or_else(|| Arc::new(OnceCell::new())), + capabilities_client, + }; - async fn get_capabilities(&self) -> Result<&remexec::ServerCapabilities, String> { - let capabilities_fut = async { - let mut request = remexec::GetCapabilitiesRequest::default(); - if let Some(s) = self.instance_name.as_ref() { - request.instance_name = s.clone(); - } + Ok(command_runner) + } - let request = apply_headers(Request::new(request), ""); + async fn get_capabilities(&self) -> Result<&remexec::ServerCapabilities, String> { + let capabilities_fut = async { + let mut request = remexec::GetCapabilitiesRequest::default(); + if let Some(s) = self.instance_name.as_ref() { + request.instance_name = s.clone(); + } - let mut client = self.capabilities_client.as_ref().clone(); - client - .get_capabilities(request) - .await - .map(|r| r.into_inner()) - .map_err(status_to_str) - }; + let request = apply_headers(Request::new(request), ""); - self - .capabilities_cell - .get_or_try_init(capabilities_fut) - .await - } + let mut client = self.capabilities_client.as_ref().clone(); + client + .get_capabilities(request) + .await + .map(|r| r.into_inner()) + .map_err(status_to_str) + }; - async fn wait_on_operation_stream_item( - stream: &mut S, - context: &Context, - running_operation: &mut RunningOperation, - start_time_opt: &mut Option, - ) -> OperationStreamItem - where - S: Stream> + Unpin, - { - let item = stream.next().await; - - if let Some(start_time) = start_time_opt.take() { - let timing: Result = Instant::now() - .duration_since(start_time) - .as_micros() - .try_into(); - if let Ok(obs) = timing { - context.workunit_store.record_observation( - ObservationMetric::RemoteExecutionRPCFirstResponseTimeMicros, - obs, - ); - } + self.capabilities_cell + .get_or_try_init(capabilities_fut) + .await } - match item { - Some(Ok(operation)) => { - trace!( - "wait_on_operation_stream (build_id={}): got operation: {:?}", - &context.build_id, - &operation - ); - - // Extract the operation name. - // Note: protobuf can return empty string for an empty field so convert empty strings - // to None. - running_operation.name = Some(operation.name.clone()).filter(|s| !s.trim().is_empty()); - - if operation.done { - // Continue monitoring if the operation is not complete. - OperationStreamItem::Outcome(StreamOutcome::Complete(OperationOrStatus::Operation( - operation, - ))) - } else { - // Otherwise, return to the main loop with the operation as the result. - OperationStreamItem::Running( - Self::maybe_extract_execution_stage(&operation).unwrap_or(ExecutionStageValue::Unknown), - ) - } - } - - Some(Err(err)) => { - debug!("wait_on_operation_stream: got error: {:?}", err); - let status_proto = StatusProto { - code: err.code() as i32, - message: err.message().to_string(), - ..StatusProto::default() - }; - OperationStreamItem::Outcome(StreamOutcome::Complete(OperationOrStatus::Status( - status_proto, - ))) - } - - None => { - // Stream disconnected unexpectedly. - debug!("wait_on_operation_stream: unexpected disconnect from RE server"); - OperationStreamItem::Outcome(StreamOutcome::StreamClosed) - } - } - } - - /// Monitors the operation stream returned by the REv2 Execute and WaitExecution methods. - /// Outputs progress reported by the server and returns the next actionable operation - /// or gRPC status back to the main loop (plus the operation name so the main loop can - /// reconnect). - async fn wait_on_operation_stream( - mut stream: S, - context: &Context, - running_operation: &mut RunningOperation, - ) -> StreamOutcome - where - S: Stream> + Unpin + Send, - { - let mut start_time_opt = Some(Instant::now()); - - trace!( - "wait_on_operation_stream (build_id={}): monitoring stream", - &context.build_id - ); - - // If the server returns an `ExecutionStage` other than `Unknown`, then we assume that it - // implements reporting when the operation actually begins `Executing` (as opposed to being - // `Queued`, etc), and will wait to create a workunit until we see the `Executing` stage. - // - // We start by consuming the prefix of the stream before we receive an `Executing` or `Unknown` stage. - loop { - match Self::wait_on_operation_stream_item( - &mut stream, - context, - running_operation, - &mut start_time_opt, - ) - .await - { - OperationStreamItem::Running( - ExecutionStageValue::Unknown | ExecutionStageValue::Executing, - ) => { - // Either the server doesn't know how to report the stage, or the operation has - // actually begun executing serverside: proceed to the suffix. - break; - } - OperationStreamItem::Running(_) => { - // The operation has not reached an ExecutionStage that we recognize as - // "executing" (likely: it is queued, doing a cache lookup, etc): keep waiting. - continue; + async fn wait_on_operation_stream_item( + stream: &mut S, + context: &Context, + running_operation: &mut RunningOperation, + start_time_opt: &mut Option, + ) -> OperationStreamItem + where + S: Stream> + Unpin, + { + let item = stream.next().await; + + if let Some(start_time) = start_time_opt.take() { + let timing: Result = Instant::now() + .duration_since(start_time) + .as_micros() + .try_into(); + if let Ok(obs) = timing { + context.workunit_store.record_observation( + ObservationMetric::RemoteExecutionRPCFirstResponseTimeMicros, + obs, + ); + } } - OperationStreamItem::Outcome(outcome) => return outcome, - } - } - // Start a workunit to represent the execution of the work, and consume the rest of the stream. - in_workunit!( - "run_remote_process", - // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit - // renders at the Process's level. - running_operation.process_level, - desc = Some(running_operation.process_description.clone()), - |_workunit| async move { - loop { - match Self::wait_on_operation_stream_item( - &mut stream, - context, - running_operation, - &mut start_time_opt, - ) - .await - { - OperationStreamItem::Running( - ExecutionStageValue::Queued | ExecutionStageValue::CacheCheck, - ) => { - // The server must have cancelled and requeued the work: although this isn't an error - // per-se, it is much easier for us to re-open the stream than to treat this as a - // nested loop. In particular: - // 1. we can't break/continue out of a workunit - // 2. the stream needs to move into the workunit, and can't move back out - break StreamOutcome::StreamClosed; + match item { + Some(Ok(operation)) => { + trace!( + "wait_on_operation_stream (build_id={}): got operation: {:?}", + &context.build_id, + &operation + ); + + // Extract the operation name. + // Note: protobuf can return empty string for an empty field so convert empty strings + // to None. + running_operation.name = + Some(operation.name.clone()).filter(|s| !s.trim().is_empty()); + + if operation.done { + // Continue monitoring if the operation is not complete. + OperationStreamItem::Outcome(StreamOutcome::Complete( + OperationOrStatus::Operation(operation), + )) + } else { + // Otherwise, return to the main loop with the operation as the result. + OperationStreamItem::Running( + Self::maybe_extract_execution_stage(&operation) + .unwrap_or(ExecutionStageValue::Unknown), + ) + } } - OperationStreamItem::Running(_) => { - // The operation is still running. - continue; + + Some(Err(err)) => { + debug!("wait_on_operation_stream: got error: {:?}", err); + let status_proto = StatusProto { + code: err.code() as i32, + message: err.message().to_string(), + ..StatusProto::default() + }; + OperationStreamItem::Outcome(StreamOutcome::Complete(OperationOrStatus::Status( + status_proto, + ))) } - OperationStreamItem::Outcome(outcome) => break outcome, - } - } - } - ) - .await - } - - // Store the remote timings into the workunit store. - fn save_workunit_timings( - &self, - execute_response: &ExecuteResponse, - metadata: &ExecutedActionMetadata, - ) { - let workunit_thread_handle = workunit_store::expect_workunit_store_handle(); - let workunit_store = workunit_thread_handle.store; - let parent_id = workunit_thread_handle.parent_id; - let result_cached = execute_response.cached_result; - - if let (Some(queued_timestamp), Some(worker_start_timestamp)) = ( - metadata.queued_timestamp.as_ref(), - metadata.worker_start_timestamp.as_ref(), - ) { - let span_result = - TimeSpan::from_start_and_end(queued_timestamp, worker_start_timestamp, "remote queue"); - match span_result { - Ok(time_span) => maybe_add_workunit( - result_cached, - "remote execution action scheduling", - Level::Trace, - time_span, - parent_id, - &workunit_store, - WorkunitMetadata::default(), - ), - Err(s) => warn!("{}", s), - } - } - if let (Some(input_fetch_start_timestamp), Some(input_fetch_completed_timestamp)) = ( - metadata.input_fetch_start_timestamp.as_ref(), - metadata.input_fetch_completed_timestamp.as_ref(), - ) { - let span_result = TimeSpan::from_start_and_end( - input_fetch_start_timestamp, - input_fetch_completed_timestamp, - "remote input fetch", - ); - match span_result { - Ok(time_span) => maybe_add_workunit( - result_cached, - "remote execution worker input fetching", - Level::Trace, - time_span, - parent_id, - &workunit_store, - WorkunitMetadata::default(), - ), - Err(s) => warn!("{}", s), - } + None => { + // Stream disconnected unexpectedly. + debug!("wait_on_operation_stream: unexpected disconnect from RE server"); + OperationStreamItem::Outcome(StreamOutcome::StreamClosed) + } + } } - if let (Some(execution_start_timestamp), Some(execution_completed_timestamp)) = ( - metadata.execution_start_timestamp.as_ref(), - metadata.execution_completed_timestamp.as_ref(), - ) { - let span_result = TimeSpan::from_start_and_end( - execution_start_timestamp, - execution_completed_timestamp, - "remote execution", - ); - match span_result { - Ok(time_span) => maybe_add_workunit( - result_cached, - "remote execution worker command executing", - Level::Trace, - time_span, - parent_id, - &workunit_store, - WorkunitMetadata::default(), - ), - Err(s) => warn!("{}", s), - } - } + /// Monitors the operation stream returned by the REv2 Execute and WaitExecution methods. + /// Outputs progress reported by the server and returns the next actionable operation + /// or gRPC status back to the main loop (plus the operation name so the main loop can + /// reconnect). + async fn wait_on_operation_stream( + mut stream: S, + context: &Context, + running_operation: &mut RunningOperation, + ) -> StreamOutcome + where + S: Stream> + Unpin + Send, + { + let mut start_time_opt = Some(Instant::now()); - if let (Some(output_upload_start_timestamp), Some(output_upload_completed_timestamp)) = ( - metadata.output_upload_start_timestamp.as_ref(), - metadata.output_upload_completed_timestamp.as_ref(), - ) { - let span_result = TimeSpan::from_start_and_end( - output_upload_start_timestamp, - output_upload_completed_timestamp, - "remote output store", - ); - match span_result { - Ok(time_span) => maybe_add_workunit( - result_cached, - "remote execution worker output uploading", - Level::Trace, - time_span, - parent_id, - &workunit_store, - WorkunitMetadata::default(), - ), - Err(s) => warn!("{}", s), - } - } - } + trace!( + "wait_on_operation_stream (build_id={}): monitoring stream", + &context.build_id + ); - fn extract_missing_digests(&self, precondition_failure: &PreconditionFailure) -> ExecutionError { - let mut missing_digests = Vec::with_capacity(precondition_failure.violations.len()); + // If the server returns an `ExecutionStage` other than `Unknown`, then we assume that it + // implements reporting when the operation actually begins `Executing` (as opposed to being + // `Queued`, etc), and will wait to create a workunit until we see the `Executing` stage. + // + // We start by consuming the prefix of the stream before we receive an `Executing` or `Unknown` stage. + loop { + match Self::wait_on_operation_stream_item( + &mut stream, + context, + running_operation, + &mut start_time_opt, + ) + .await + { + OperationStreamItem::Running( + ExecutionStageValue::Unknown | ExecutionStageValue::Executing, + ) => { + // Either the server doesn't know how to report the stage, or the operation has + // actually begun executing serverside: proceed to the suffix. + break; + } + OperationStreamItem::Running(_) => { + // The operation has not reached an ExecutionStage that we recognize as + // "executing" (likely: it is queued, doing a cache lookup, etc): keep waiting. + continue; + } + OperationStreamItem::Outcome(outcome) => return outcome, + } + } - for violation in &precondition_failure.violations { - if violation.r#type != "MISSING" { - return ExecutionError::Fatal( - format!("Unknown PreconditionFailure violation: {violation:?}").into(), - ); - } - - let parts: Vec<_> = violation.subject.split('/').collect(); - if parts.len() != 3 || parts[0] != "blobs" { - return ExecutionError::Fatal( - format!( - "Received FailedPrecondition MISSING but didn't recognize subject {}", - violation.subject - ) - .into(), - ); - } + // Start a workunit to represent the execution of the work, and consume the rest of the stream. + in_workunit!( + "run_remote_process", + // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit + // renders at the Process's level. + running_operation.process_level, + desc = Some(running_operation.process_description.clone()), + |_workunit| async move { + loop { + match Self::wait_on_operation_stream_item( + &mut stream, + context, + running_operation, + &mut start_time_opt, + ) + .await + { + OperationStreamItem::Running( + ExecutionStageValue::Queued | ExecutionStageValue::CacheCheck, + ) => { + // The server must have cancelled and requeued the work: although this isn't an error + // per-se, it is much easier for us to re-open the stream than to treat this as a + // nested loop. In particular: + // 1. we can't break/continue out of a workunit + // 2. the stream needs to move into the workunit, and can't move back out + break StreamOutcome::StreamClosed; + } + OperationStreamItem::Running(_) => { + // The operation is still running. + continue; + } + OperationStreamItem::Outcome(outcome) => break outcome, + } + } + } + ) + .await + } - let fingerprint = match Fingerprint::from_hex_string(parts[1]) { - Ok(f) => f, - Err(e) => { - return ExecutionError::Fatal( - format!("Bad digest in missing blob: {}: {}", parts[1], e).into(), - ) + // Store the remote timings into the workunit store. + fn save_workunit_timings( + &self, + execute_response: &ExecuteResponse, + metadata: &ExecutedActionMetadata, + ) { + let workunit_thread_handle = workunit_store::expect_workunit_store_handle(); + let workunit_store = workunit_thread_handle.store; + let parent_id = workunit_thread_handle.parent_id; + let result_cached = execute_response.cached_result; + + if let (Some(queued_timestamp), Some(worker_start_timestamp)) = ( + metadata.queued_timestamp.as_ref(), + metadata.worker_start_timestamp.as_ref(), + ) { + let span_result = TimeSpan::from_start_and_end( + queued_timestamp, + worker_start_timestamp, + "remote queue", + ); + match span_result { + Ok(time_span) => maybe_add_workunit( + result_cached, + "remote execution action scheduling", + Level::Trace, + time_span, + parent_id, + &workunit_store, + WorkunitMetadata::default(), + ), + Err(s) => warn!("{}", s), + } } - }; - let size = match parts[2].parse::() { - Ok(s) => s, - Err(e) => { - return ExecutionError::Fatal( - format!("Missing blob had bad size: {}: {}", parts[2], e).into(), - ) + if let (Some(input_fetch_start_timestamp), Some(input_fetch_completed_timestamp)) = ( + metadata.input_fetch_start_timestamp.as_ref(), + metadata.input_fetch_completed_timestamp.as_ref(), + ) { + let span_result = TimeSpan::from_start_and_end( + input_fetch_start_timestamp, + input_fetch_completed_timestamp, + "remote input fetch", + ); + match span_result { + Ok(time_span) => maybe_add_workunit( + result_cached, + "remote execution worker input fetching", + Level::Trace, + time_span, + parent_id, + &workunit_store, + WorkunitMetadata::default(), + ), + Err(s) => warn!("{}", s), + } } - }; - missing_digests.push(Digest::new(fingerprint, size)); - } + if let (Some(execution_start_timestamp), Some(execution_completed_timestamp)) = ( + metadata.execution_start_timestamp.as_ref(), + metadata.execution_completed_timestamp.as_ref(), + ) { + let span_result = TimeSpan::from_start_and_end( + execution_start_timestamp, + execution_completed_timestamp, + "remote execution", + ); + match span_result { + Ok(time_span) => maybe_add_workunit( + result_cached, + "remote execution worker command executing", + Level::Trace, + time_span, + parent_id, + &workunit_store, + WorkunitMetadata::default(), + ), + Err(s) => warn!("{}", s), + } + } - if missing_digests.is_empty() { - return ExecutionError::Fatal( - "Error from remote execution: FailedPrecondition, but no details" - .to_owned() - .into(), - ); + if let (Some(output_upload_start_timestamp), Some(output_upload_completed_timestamp)) = ( + metadata.output_upload_start_timestamp.as_ref(), + metadata.output_upload_completed_timestamp.as_ref(), + ) { + let span_result = TimeSpan::from_start_and_end( + output_upload_start_timestamp, + output_upload_completed_timestamp, + "remote output store", + ); + match span_result { + Ok(time_span) => maybe_add_workunit( + result_cached, + "remote execution worker output uploading", + Level::Trace, + time_span, + parent_id, + &workunit_store, + WorkunitMetadata::default(), + ), + Err(s) => warn!("{}", s), + } + } } - ExecutionError::MissingRemoteDigests(missing_digests) - } + fn extract_missing_digests( + &self, + precondition_failure: &PreconditionFailure, + ) -> ExecutionError { + let mut missing_digests = Vec::with_capacity(precondition_failure.violations.len()); - /// If set, extract `ExecuteOperationMetadata` from the `Operation`. - fn maybe_extract_execution_stage(operation: &Operation) -> Option { - let metadata = operation.metadata.as_ref()?; + for violation in &precondition_failure.violations { + if violation.r#type != "MISSING" { + return ExecutionError::Fatal( + format!("Unknown PreconditionFailure violation: {violation:?}").into(), + ); + } - let eom = remexec::ExecuteOperationMetadata::decode(&metadata.value[..]) - .map(Some) - .unwrap_or_else(|e| { - log::warn!("Invalid ExecuteOperationMetadata from server: {e:?}"); - None - })?; + let parts: Vec<_> = violation.subject.split('/').collect(); + if parts.len() != 3 || parts[0] != "blobs" { + return ExecutionError::Fatal( + format!( + "Received FailedPrecondition MISSING but didn't recognize subject {}", + violation.subject + ) + .into(), + ); + } - ExecutionStageValue::from_i32(eom.stage) - } + let fingerprint = match Fingerprint::from_hex_string(parts[1]) { + Ok(f) => f, + Err(e) => { + return ExecutionError::Fatal( + format!("Bad digest in missing blob: {}: {}", parts[1], e).into(), + ) + } + }; + + let size = match parts[2].parse::() { + Ok(s) => s, + Err(e) => { + return ExecutionError::Fatal( + format!("Missing blob had bad size: {}: {}", parts[2], e).into(), + ) + } + }; + + missing_digests.push(Digest::new(fingerprint, size)); + } - // pub(crate) for testing - pub(crate) async fn extract_execute_response( - &self, - run_id: RunId, - environment: ProcessExecutionEnvironment, - operation_or_status: OperationOrStatus, - ) -> Result { - trace!("Got operation response: {:?}", operation_or_status); - - let status = match operation_or_status { - OperationOrStatus::Operation(operation) => { - assert!(operation.done, "operation was not marked done"); - - use protos::gen::google::longrunning::operation::Result as OperationResult; - let execute_response = match operation.result { - Some(OperationResult::Response(response_any)) => { - remexec::ExecuteResponse::decode(&response_any.value[..]).map_err(|e| { - ExecutionError::Fatal(format!("Invalid ExecuteResponse: {e:?}").into()) - })? - } - Some(OperationResult::Error(rpc_status)) => { - // Infrastructure error. Retry it. - let msg = format_error(&rpc_status); - debug!("got operation error for runid {:?}: {}", &run_id, &msg); - return Err(ExecutionError::Retryable(msg)); - } - None => { - return Err(ExecutionError::Fatal( - "Operation finished but no response supplied" - .to_owned() - .into(), - )); - } - }; + if missing_digests.is_empty() { + return ExecutionError::Fatal( + "Error from remote execution: FailedPrecondition, but no details" + .to_owned() + .into(), + ); + } - debug!("Got (nested) execute response: {:?}", execute_response); + ExecutionError::MissingRemoteDigests(missing_digests) + } - if let Some(ref metadata) = execute_response - .result - .as_ref() - .and_then(|ar| ar.execution_metadata.clone()) - { - self.save_workunit_timings(&execute_response, metadata); - } + /// If set, extract `ExecuteOperationMetadata` from the `Operation`. + fn maybe_extract_execution_stage(operation: &Operation) -> Option { + let metadata = operation.metadata.as_ref()?; - let rpc_status = execute_response.status.unwrap_or_default(); - if rpc_status.code == Code::Ok as i32 { - let action_result = if let Some(ref action_result) = execute_response.result { - action_result - } else { - warn!("REv2 protocol violation: action result not set"); - return Err(ExecutionError::Fatal( - "REv2 protocol violation: action result not set" - .to_owned() - .into(), - )); - }; - - return populate_fallible_execution_result( - self.store.clone(), - run_id, - action_result, - false, - if execute_response.cached_result { - ProcessResultSource::HitRemotely - } else { - ProcessResultSource::Ran - }, - environment, - ) - .await - .map_err(|e| ExecutionError::Fatal(e.into())); - } + let eom = remexec::ExecuteOperationMetadata::decode(&metadata.value[..]) + .map(Some) + .unwrap_or_else(|e| { + log::warn!("Invalid ExecuteOperationMetadata from server: {e:?}"); + None + })?; - rpc_status - } - OperationOrStatus::Status(status) => status, - }; + ExecutionStageValue::from_i32(eom.stage) + } - match Code::from_i32(status.code) { - Code::Ok => unreachable!(), - - Code::DeadlineExceeded => Err(ExecutionError::Timeout), - - Code::FailedPrecondition => { - let details = if status.details.is_empty() { - return Err(ExecutionError::Fatal(status.message.into())); - } else if status.details.len() > 1 { - // TODO(tonic): Should we be able to handle multiple details protos? - return Err(ExecutionError::Fatal( - "too many detail protos for precondition failure" - .to_owned() - .into(), - )); - } else { - &status.details[0] + // pub(crate) for testing + pub(crate) async fn extract_execute_response( + &self, + run_id: RunId, + environment: ProcessExecutionEnvironment, + operation_or_status: OperationOrStatus, + ) -> Result { + trace!("Got operation response: {:?}", operation_or_status); + + let status = match operation_or_status { + OperationOrStatus::Operation(operation) => { + assert!(operation.done, "operation was not marked done"); + + use protos::gen::google::longrunning::operation::Result as OperationResult; + let execute_response = match operation.result { + Some(OperationResult::Response(response_any)) => { + remexec::ExecuteResponse::decode(&response_any.value[..]).map_err(|e| { + ExecutionError::Fatal(format!("Invalid ExecuteResponse: {e:?}").into()) + })? + } + Some(OperationResult::Error(rpc_status)) => { + // Infrastructure error. Retry it. + let msg = format_error(&rpc_status); + debug!("got operation error for runid {:?}: {}", &run_id, &msg); + return Err(ExecutionError::Retryable(msg)); + } + None => { + return Err(ExecutionError::Fatal( + "Operation finished but no response supplied" + .to_owned() + .into(), + )); + } + }; + + debug!("Got (nested) execute response: {:?}", execute_response); + + if let Some(ref metadata) = execute_response + .result + .as_ref() + .and_then(|ar| ar.execution_metadata.clone()) + { + self.save_workunit_timings(&execute_response, metadata); + } + + let rpc_status = execute_response.status.unwrap_or_default(); + if rpc_status.code == Code::Ok as i32 { + let action_result = if let Some(ref action_result) = execute_response.result { + action_result + } else { + warn!("REv2 protocol violation: action result not set"); + return Err(ExecutionError::Fatal( + "REv2 protocol violation: action result not set" + .to_owned() + .into(), + )); + }; + + return populate_fallible_execution_result( + self.store.clone(), + run_id, + action_result, + false, + if execute_response.cached_result { + ProcessResultSource::HitRemotely + } else { + ProcessResultSource::Ran + }, + environment, + ) + .await + .map_err(|e| ExecutionError::Fatal(e.into())); + } + + rpc_status + } + OperationOrStatus::Status(status) => status, }; - let full_name = format!("type.googleapis.com/{}", "google.rpc.PreconditionFailure"); - if details.type_url != full_name { - return Err(ExecutionError::Fatal( - format!( + match Code::from_i32(status.code) { + Code::Ok => unreachable!(), + + Code::DeadlineExceeded => Err(ExecutionError::Timeout), + + Code::FailedPrecondition => { + let details = if status.details.is_empty() { + return Err(ExecutionError::Fatal(status.message.into())); + } else if status.details.len() > 1 { + // TODO(tonic): Should we be able to handle multiple details protos? + return Err(ExecutionError::Fatal( + "too many detail protos for precondition failure" + .to_owned() + .into(), + )); + } else { + &status.details[0] + }; + + let full_name = format!("type.googleapis.com/{}", "google.rpc.PreconditionFailure"); + if details.type_url != full_name { + return Err(ExecutionError::Fatal( + format!( "Received PreconditionFailure, but didn't know how to resolve it: {}, protobuf type {}", status.message, details.type_url ) - .into(), - )); - } + .into(), + )); + } + + // Decode the precondition failure. + let precondition_failure = PreconditionFailure::decode(Cursor::new(&details.value)) + .map_err(|e| { + ExecutionError::Fatal( + format!("Error deserializing PreconditionFailure proto: {e:?}").into(), + ) + })?; + + Err(self.extract_missing_digests(&precondition_failure)) + } - // Decode the precondition failure. - let precondition_failure = PreconditionFailure::decode(Cursor::new(&details.value)) - .map_err(|e| { - ExecutionError::Fatal( - format!("Error deserializing PreconditionFailure proto: {e:?}").into(), - ) - })?; - - Err(self.extract_missing_digests(&precondition_failure)) - } - - Code::Aborted - | Code::Internal - | Code::ResourceExhausted - | Code::Unavailable - | Code::Unknown => Err(ExecutionError::Retryable(status.message)), - code => Err(ExecutionError::Fatal( - format!( - "Error from remote execution: {:?}: {:?}", - code, status.message, - ) - .into(), - )), + Code::Aborted + | Code::Internal + | Code::ResourceExhausted + | Code::Unavailable + | Code::Unknown => Err(ExecutionError::Retryable(status.message)), + code => Err(ExecutionError::Fatal( + format!( + "Error from remote execution: {:?}: {:?}", + code, status.message, + ) + .into(), + )), + } } - } - - // Main loop: This function connects to the RE server and submits the given remote execution - // request via the REv2 Execute method. It then monitors the operation stream until the - // request completes. It will reconnect using the REv2 WaitExecution method if the connection - // is dropped. - // - // The `run` method on CommandRunner uses this function to implement the bulk of the - // processing for remote execution requests. The `run` method wraps the call with the method - // with an overall deadline timeout. - async fn run_execute_request( - &self, - execute_request: ExecuteRequest, - process: Process, - context: &Context, - workunit: &mut RunningWorkunit, - ) -> Result { - const MAX_RETRIES: u32 = 5; - const MAX_BACKOFF_DURATION: Duration = Duration::from_secs(10); - - let start_time = Instant::now(); - - let mut running_operation = RunningOperation::new( - self.operations_client.clone(), - self.executor.clone(), - process.level, - process.description.clone(), - ); - let mut num_retries = 0; - - loop { - // If we are currently retrying a request, then delay using an exponential backoff. - if num_retries > 0 { - workunit.increment_counter(Metric::RemoteExecutionRPCRetries, 1); - - let multiplier = thread_rng().gen_range(0..2_u32.pow(num_retries) + 1); - let sleep_time = self.retry_interval_duration * multiplier; - let sleep_time = sleep_time.min(MAX_BACKOFF_DURATION); - debug!("delaying {:?} before retry", sleep_time); - tokio::time::sleep(sleep_time).await; - } - - let rpc_result = match running_operation.name { - None => { - // The request has not been submitted yet. Submit the request using the REv2 - // Execute method. - debug!( + + // Main loop: This function connects to the RE server and submits the given remote execution + // request via the REv2 Execute method. It then monitors the operation stream until the + // request completes. It will reconnect using the REv2 WaitExecution method if the connection + // is dropped. + // + // The `run` method on CommandRunner uses this function to implement the bulk of the + // processing for remote execution requests. The `run` method wraps the call with the method + // with an overall deadline timeout. + async fn run_execute_request( + &self, + execute_request: ExecuteRequest, + process: Process, + context: &Context, + workunit: &mut RunningWorkunit, + ) -> Result { + const MAX_RETRIES: u32 = 5; + const MAX_BACKOFF_DURATION: Duration = Duration::from_secs(10); + + let start_time = Instant::now(); + + let mut running_operation = RunningOperation::new( + self.operations_client.clone(), + self.executor.clone(), + process.level, + process.description.clone(), + ); + let mut num_retries = 0; + + loop { + // If we are currently retrying a request, then delay using an exponential backoff. + if num_retries > 0 { + workunit.increment_counter(Metric::RemoteExecutionRPCRetries, 1); + + let multiplier = thread_rng().gen_range(0..2_u32.pow(num_retries) + 1); + let sleep_time = self.retry_interval_duration * multiplier; + let sleep_time = sleep_time.min(MAX_BACKOFF_DURATION); + debug!("delaying {:?} before retry", sleep_time); + tokio::time::sleep(sleep_time).await; + } + + let rpc_result = match running_operation.name { + None => { + // The request has not been submitted yet. Submit the request using the REv2 + // Execute method. + debug!( "no current operation: submitting execute request: build_id={}; execute_request={:?}", context.build_id, &execute_request ); - workunit.increment_counter(Metric::RemoteExecutionRPCExecute, 1); - let mut client = self.execution_client.as_ref().clone(); - let request = apply_headers(Request::new(execute_request.clone()), &context.build_id); - client.execute(request).await - } - - Some(ref operation_name) => { - // The request has been submitted already. Reconnect to the status stream - // using the REv2 WaitExecution method. - debug!( + workunit.increment_counter(Metric::RemoteExecutionRPCExecute, 1); + let mut client = self.execution_client.as_ref().clone(); + let request = + apply_headers(Request::new(execute_request.clone()), &context.build_id); + client.execute(request).await + } + + Some(ref operation_name) => { + // The request has been submitted already. Reconnect to the status stream + // using the REv2 WaitExecution method. + debug!( "existing operation: reconnecting to operation stream: build_id={}; operation_name={}", context.build_id, operation_name ); - workunit.increment_counter(Metric::RemoteExecutionRPCWaitExecution, 1); - let wait_execution_request = WaitExecutionRequest { - name: operation_name.to_owned(), - }; - let mut client = self.execution_client.as_ref().clone(); - let request = apply_headers(Request::new(wait_execution_request), &context.build_id); - client.wait_execution(request).await - } - }; - - // Take action based on whether we received an output stream or whether there is an - // error to resolve. - let actionable_result = match rpc_result { - Ok(operation_stream_response) => { - // Monitor the operation stream until there is an actionable operation - // or status to interpret. - let operation_stream = operation_stream_response.into_inner(); - let stream_outcome = - Self::wait_on_operation_stream(operation_stream, context, &mut running_operation).await; - - match stream_outcome { - StreamOutcome::Complete(status) => { - trace!( - "wait_on_operation_stream (build_id={}) returned completion={:?}", - context.build_id, - status - ); - // We completed this operation. - running_operation.completed(); - status - } - StreamOutcome::StreamClosed => { - trace!( - "wait_on_operation_stream (build_id={}) returned stream close, \ + workunit.increment_counter(Metric::RemoteExecutionRPCWaitExecution, 1); + let wait_execution_request = WaitExecutionRequest { + name: operation_name.to_owned(), + }; + let mut client = self.execution_client.as_ref().clone(); + let request = + apply_headers(Request::new(wait_execution_request), &context.build_id); + client.wait_execution(request).await + } + }; + + // Take action based on whether we received an output stream or whether there is an + // error to resolve. + let actionable_result = match rpc_result { + Ok(operation_stream_response) => { + // Monitor the operation stream until there is an actionable operation + // or status to interpret. + let operation_stream = operation_stream_response.into_inner(); + let stream_outcome = Self::wait_on_operation_stream( + operation_stream, + context, + &mut running_operation, + ) + .await; + + match stream_outcome { + StreamOutcome::Complete(status) => { + trace!( + "wait_on_operation_stream (build_id={}) returned completion={:?}", + context.build_id, + status + ); + // We completed this operation. + running_operation.completed(); + status + } + StreamOutcome::StreamClosed => { + trace!( + "wait_on_operation_stream (build_id={}) returned stream close, \ will retry operation_name={:?}", - context.build_id, - running_operation.name - ); - - // Check if the number of request attempts sent thus far have exceeded the number - // of retries allowed since the last successful connection. (There is no point in - // continually submitting a request if ultimately futile.) - if num_retries >= MAX_RETRIES { - workunit.increment_counter(Metric::RemoteExecutionRPCErrors, 1); - return Err( + context.build_id, + running_operation.name + ); + + // Check if the number of request attempts sent thus far have exceeded the number + // of retries allowed since the last successful connection. (There is no point in + // continually submitting a request if ultimately futile.) + if num_retries >= MAX_RETRIES { + workunit.increment_counter(Metric::RemoteExecutionRPCErrors, 1); + return Err( "Too many failures from server. The last event was the server disconnecting with no error given.".to_owned().into(), ); - } else { - // Increment the retry counter and allow loop to retry. - num_retries += 1; - } - - // Iterate the loop to reconnect to the operation. - continue; + } else { + // Increment the retry counter and allow loop to retry. + num_retries += 1; + } + + // Iterate the loop to reconnect to the operation. + continue; + } + } + } + Err(status) => { + let status_proto = StatusProto { + code: status.code() as i32, + message: status.message().to_owned(), + ..StatusProto::default() + }; + // `OperationOrStatus` always represents a completed operation, so this operation + // is completed. + running_operation.completed(); + OperationOrStatus::Status(status_proto) + } + }; + + match self + .extract_execute_response( + context.run_id, + process.execution_environment.clone(), + actionable_result, + ) + .await + { + Ok(result) => return Ok(result), + Err(err) => match err { + ExecutionError::Fatal(e) => { + workunit.increment_counter(Metric::RemoteExecutionRPCErrors, 1); + return Err(e); + } + ExecutionError::Retryable(e) => { + // Check if the number of request attempts sent thus far have exceeded the number + // of retries allowed since the last successful connection. (There is no point in + // continually submitting a request if ultimately futile.) + trace!("retryable error: {}", e); + if num_retries >= MAX_RETRIES { + workunit.increment_counter(Metric::RemoteExecutionRPCErrors, 1); + return Err(format!( + "Too many failures from server. The last error was: {e}" + ) + .into()); + } else { + // Increment the retry counter and allow loop to retry. + num_retries += 1; + } + } + ExecutionError::MissingRemoteDigests(missing_digests) => { + trace!( + "Server reported missing digests; trying to upload: {:?}", + missing_digests, + ); + + let _ = self + .store + .ensure_remote_has_recursive(missing_digests) + .await?; + } + ExecutionError::Timeout => { + workunit.increment_counter(Metric::RemoteExecutionTimeouts, 1); + let result = populate_fallible_execution_result_for_timeout( + &self.store, + context, + &process.description, + process.timeout, + start_time.elapsed(), + process.execution_environment, + ) + .await?; + return Ok(result); + } + }, } - } } - Err(status) => { - let status_proto = StatusProto { - code: status.code() as i32, - message: status.message().to_owned(), - ..StatusProto::default() - }; - // `OperationOrStatus` always represents a completed operation, so this operation - // is completed. - running_operation.completed(); - OperationOrStatus::Status(status_proto) - } - }; - - match self - .extract_execute_response( - context.run_id, - process.execution_environment.clone(), - actionable_result, - ) - .await - { - Ok(result) => return Ok(result), - Err(err) => match err { - ExecutionError::Fatal(e) => { - workunit.increment_counter(Metric::RemoteExecutionRPCErrors, 1); - return Err(e); - } - ExecutionError::Retryable(e) => { - // Check if the number of request attempts sent thus far have exceeded the number - // of retries allowed since the last successful connection. (There is no point in - // continually submitting a request if ultimately futile.) - trace!("retryable error: {}", e); - if num_retries >= MAX_RETRIES { - workunit.increment_counter(Metric::RemoteExecutionRPCErrors, 1); - return Err(format!("Too many failures from server. The last error was: {e}").into()); - } else { - // Increment the retry counter and allow loop to retry. - num_retries += 1; - } - } - ExecutionError::MissingRemoteDigests(missing_digests) => { - trace!( - "Server reported missing digests; trying to upload: {:?}", - missing_digests, - ); - - let _ = self - .store - .ensure_remote_has_recursive(missing_digests) - .await?; - } - ExecutionError::Timeout => { - workunit.increment_counter(Metric::RemoteExecutionTimeouts, 1); - let result = populate_fallible_execution_result_for_timeout( - &self.store, - context, - &process.description, - process.timeout, - start_time.elapsed(), - process.execution_environment, - ) - .await?; - return Ok(result); - } - }, - } } - } } impl Debug for CommandRunner { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("remote::CommandRunner") - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("remote::CommandRunner") + .finish_non_exhaustive() + } } #[async_trait] impl process_execution::CommandRunner for CommandRunner { - /// Run the given Process via the Remote Execution API. - async fn run( - &self, - context: Context, - _workunit: &mut RunningWorkunit, - request: Process, - ) -> Result { - // Retrieve capabilities for this server. - let capabilities = self.get_capabilities().await?; - trace!("RE capabilities: {:?}", &capabilities); - - // Construct the REv2 ExecuteRequest and related data for this execution request. - let EntireExecuteRequest { - action, - command, - execute_request, - input_root_digest, - } = make_execute_request( - &request, - self.instance_name.clone(), - self.process_cache_namespace.clone(), - &self.store, - self - .append_only_caches_base_path - .as_ref() - .map(|s| s.as_ref()), - ) - .await?; - let build_id = context.build_id.clone(); - - debug!("Remote execution: {}", request.description); - debug!( - "built REv2 request (build_id={}): action={:?}; command={:?}; execute_request={:?}", - &build_id, action, command, execute_request - ); - - // Record the time that we started to process this request, then compute the ultimate - // deadline for execution of this request. - let deadline_duration = self.overall_deadline + request.timeout.unwrap_or_default(); - - // Ensure the action and command are stored locally. - let (command_digest, action_digest) = - ensure_action_stored_locally(&self.store, &command, &action).await?; - - // Upload the action (and related data, i.e. the embedded command and input files). - ensure_action_uploaded( - &self.store, - command_digest, - action_digest, - Some(input_root_digest), - ) - .await?; + /// Run the given Process via the Remote Execution API. + async fn run( + &self, + context: Context, + _workunit: &mut RunningWorkunit, + request: Process, + ) -> Result { + // Retrieve capabilities for this server. + let capabilities = self.get_capabilities().await?; + trace!("RE capabilities: {:?}", &capabilities); + + // Construct the REv2 ExecuteRequest and related data for this execution request. + let EntireExecuteRequest { + action, + command, + execute_request, + input_root_digest, + } = make_execute_request( + &request, + self.instance_name.clone(), + self.process_cache_namespace.clone(), + &self.store, + self.append_only_caches_base_path + .as_ref() + .map(|s| s.as_ref()), + ) + .await?; + let build_id = context.build_id.clone(); - // Submit the execution request to the RE server for execution. - let context2 = context.clone(); - in_workunit!( - "run_execute_request", - // NB: The process has not actually started running until the server has notified us that it - // has: see `wait_on_operation_stream`. - Level::Debug, - user_metadata = vec![( - "action_digest".to_owned(), - UserMetadataItem::String(format!("{action_digest:?}")), - )], - |workunit| async move { - workunit.increment_counter(Metric::RemoteExecutionRequests, 1); - let result_fut = self.run_execute_request(execute_request, request, &context2, workunit); - - // Detect whether the operation ran or hit the deadline timeout. - match tokio::time::timeout(deadline_duration, result_fut).await { - Ok(Ok(result)) => { - workunit.increment_counter(Metric::RemoteExecutionSuccess, 1); - Ok(result) - } - Ok(Err(err)) => { - workunit.increment_counter(Metric::RemoteExecutionErrors, 1); - Err(err.enrich(&format!("For action {action_digest:?}"))) - } - Err(tokio::time::error::Elapsed { .. }) => { - // The Err in this match arm originates from the timeout future. - debug!( - "remote execution for build_id={} timed out after {:?}", - &build_id, deadline_duration - ); - workunit.update_metadata(|initial| { - let initial = initial.map(|(m, _)| m).unwrap_or_default(); - Some(( - WorkunitMetadata { - desc: Some(format!( - "remote execution timed out after {deadline_duration:?}" - )), - ..initial - }, - Level::Error, - )) - }); - workunit.increment_counter(Metric::RemoteExecutionTimeouts, 1); - Err(format!("remote execution timed out after {deadline_duration:?}").into()) - } - } - }, - ) - .await - } + debug!("Remote execution: {}", request.description); + debug!( + "built REv2 request (build_id={}): action={:?}; command={:?}; execute_request={:?}", + &build_id, action, command, execute_request + ); + + // Record the time that we started to process this request, then compute the ultimate + // deadline for execution of this request. + let deadline_duration = self.overall_deadline + request.timeout.unwrap_or_default(); + + // Ensure the action and command are stored locally. + let (command_digest, action_digest) = + ensure_action_stored_locally(&self.store, &command, &action).await?; - async fn shutdown(&self) -> Result<(), String> { - Ok(()) - } + // Upload the action (and related data, i.e. the embedded command and input files). + ensure_action_uploaded( + &self.store, + command_digest, + action_digest, + Some(input_root_digest), + ) + .await?; + + // Submit the execution request to the RE server for execution. + let context2 = context.clone(); + in_workunit!( + "run_execute_request", + // NB: The process has not actually started running until the server has notified us that it + // has: see `wait_on_operation_stream`. + Level::Debug, + user_metadata = vec![( + "action_digest".to_owned(), + UserMetadataItem::String(format!("{action_digest:?}")), + )], + |workunit| async move { + workunit.increment_counter(Metric::RemoteExecutionRequests, 1); + let result_fut = + self.run_execute_request(execute_request, request, &context2, workunit); + + // Detect whether the operation ran or hit the deadline timeout. + match tokio::time::timeout(deadline_duration, result_fut).await { + Ok(Ok(result)) => { + workunit.increment_counter(Metric::RemoteExecutionSuccess, 1); + Ok(result) + } + Ok(Err(err)) => { + workunit.increment_counter(Metric::RemoteExecutionErrors, 1); + Err(err.enrich(&format!("For action {action_digest:?}"))) + } + Err(tokio::time::error::Elapsed { .. }) => { + // The Err in this match arm originates from the timeout future. + debug!( + "remote execution for build_id={} timed out after {:?}", + &build_id, deadline_duration + ); + workunit.update_metadata(|initial| { + let initial = initial.map(|(m, _)| m).unwrap_or_default(); + Some(( + WorkunitMetadata { + desc: Some(format!( + "remote execution timed out after {deadline_duration:?}" + )), + ..initial + }, + Level::Error, + )) + }); + workunit.increment_counter(Metric::RemoteExecutionTimeouts, 1); + Err( + format!("remote execution timed out after {deadline_duration:?}") + .into(), + ) + } + } + }, + ) + .await + } + + async fn shutdown(&self) -> Result<(), String> { + Ok(()) + } } fn maybe_add_workunit( - result_cached: bool, - name: &'static str, - level: Level, - time_span: concrete_time::TimeSpan, - parent_id: Option, - workunit_store: &WorkunitStore, - metadata: WorkunitMetadata, + result_cached: bool, + name: &'static str, + level: Level, + time_span: concrete_time::TimeSpan, + parent_id: Option, + workunit_store: &WorkunitStore, + metadata: WorkunitMetadata, ) { - if !result_cached && workunit_store.max_level() >= level { - let start_time: SystemTime = SystemTime::UNIX_EPOCH + time_span.start.into(); - let end_time: SystemTime = start_time + time_span.duration.into(); - workunit_store.add_completed_workunit(name, level, start_time, end_time, parent_id, metadata); - } + if !result_cached && workunit_store.max_level() >= level { + let start_time: SystemTime = SystemTime::UNIX_EPOCH + time_span.start.into(); + let end_time: SystemTime = start_time + time_span.duration.into(); + workunit_store + .add_completed_workunit(name, level, start_time, end_time, parent_id, metadata); + } } async fn populate_fallible_execution_result_for_timeout( - store: &Store, - context: &Context, - description: &str, - timeout: Option, - elapsed: Duration, - environment: ProcessExecutionEnvironment, + store: &Store, + context: &Context, + description: &str, + timeout: Option, + elapsed: Duration, + environment: ProcessExecutionEnvironment, ) -> Result { - let timeout_msg = if let Some(timeout) = timeout { - format!("user timeout of {timeout:?} after {elapsed:?}") - } else { - format!("server timeout after {elapsed:?}") - }; - let stdout = Bytes::from(format!("Exceeded {timeout_msg} for {description}")); - let stdout_digest = store.store_file_bytes(stdout, true).await?; - - Ok(FallibleProcessResultWithPlatform { - stdout_digest, - stderr_digest: hashing::EMPTY_DIGEST, - exit_code: -libc::SIGTERM, - output_directory: EMPTY_DIRECTORY_DIGEST.clone(), - metadata: ProcessResultMetadata::new( - Some(elapsed.into()), - ProcessResultSource::Ran, - environment, - context.run_id, - ), - }) + let timeout_msg = if let Some(timeout) = timeout { + format!("user timeout of {timeout:?} after {elapsed:?}") + } else { + format!("server timeout after {elapsed:?}") + }; + let stdout = Bytes::from(format!("Exceeded {timeout_msg} for {description}")); + let stdout_digest = store.store_file_bytes(stdout, true).await?; + + Ok(FallibleProcessResultWithPlatform { + stdout_digest, + stderr_digest: hashing::EMPTY_DIGEST, + exit_code: -libc::SIGTERM, + output_directory: EMPTY_DIRECTORY_DIGEST.clone(), + metadata: ProcessResultMetadata::new( + Some(elapsed.into()), + ProcessResultSource::Ran, + environment, + context.run_id, + ), + }) } pub async fn store_proto_locally( - store: &Store, - proto: &P, + store: &Store, + proto: &P, ) -> Result { - store - .store_file_bytes(proto.to_bytes(), true) - .await - .map_err(|e| format!("Error saving proto to local store: {e:?}")) + store + .store_file_bytes(proto.to_bytes(), true) + .await + .map_err(|e| format!("Error saving proto to local store: {e:?}")) } pub async fn ensure_action_stored_locally( - store: &Store, - command: &Command, - action: &Action, + store: &Store, + command: &Command, + action: &Action, ) -> Result<(Digest, Digest), String> { - let (command_digest, action_digest) = future::try_join( - store_proto_locally(store, command), - store_proto_locally(store, action), - ) - .await?; + let (command_digest, action_digest) = future::try_join( + store_proto_locally(store, command), + store_proto_locally(store, action), + ) + .await?; - Ok((command_digest, action_digest)) + Ok((command_digest, action_digest)) } /// @@ -1074,36 +1094,36 @@ pub async fn ensure_action_stored_locally( /// whether we are in a remote execution context, or a pure cache-usage context) are uploaded. /// pub async fn ensure_action_uploaded( - store: &Store, - command_digest: Digest, - action_digest: Digest, - input_files: Option, + store: &Store, + command_digest: Digest, + action_digest: Digest, + input_files: Option, ) -> Result<(), StoreError> { - in_workunit!( - "ensure_action_uploaded", - Level::Trace, - desc = Some(format!("ensure action uploaded for {action_digest:?}")), - |_workunit| async move { - let mut digests = vec![command_digest, action_digest]; - if let Some(input_files) = input_files { - // TODO: Port ensure_remote_has_recursive. See #13112. - store - .ensure_directory_digest_persisted(input_files.clone()) - .await?; - digests.push(input_files.todo_as_digest()); - } - let _ = store.ensure_remote_has_recursive(digests).await?; - Ok(()) - }, - ) - .await + in_workunit!( + "ensure_action_uploaded", + Level::Trace, + desc = Some(format!("ensure action uploaded for {action_digest:?}")), + |_workunit| async move { + let mut digests = vec![command_digest, action_digest]; + if let Some(input_files) = input_files { + // TODO: Port ensure_remote_has_recursive. See #13112. + store + .ensure_directory_digest_persisted(input_files.clone()) + .await?; + digests.push(input_files.todo_as_digest()); + } + let _ = store.ensure_remote_has_recursive(digests).await?; + Ok(()) + }, + ) + .await } pub fn format_error(error: &StatusProto) -> String { - let error_code_enum = Code::from_i32(error.code); - let error_code = match error_code_enum { - Code::Unknown => format!("{:?}", error.code), - x => format!("{x:?}"), - }; - format!("{}: {}", error_code, error.message) + let error_code_enum = Code::from_i32(error.code); + let error_code = match error_code_enum { + Code::Unknown => format!("{:?}", error.code), + x => format!("{x:?}"), + }; + format!("{}: {}", error_code, error.message) } diff --git a/src/rust/engine/process_execution/remote/src/remote_cache.rs b/src/rust/engine/process_execution/remote/src/remote_cache.rs index 605d030a956..a5910cec239 100644 --- a/src/rust/engine/process_execution/remote/src/remote_cache.rs +++ b/src/rust/engine/process_execution/remote/src/remote_cache.rs @@ -17,13 +17,13 @@ use remexec::{ActionResult, Command, Tree}; use remote_provider::{choose_action_cache_provider, ActionCacheProvider}; use store::{Store, StoreError}; use workunit_store::{ - in_workunit, Level, Metric, ObservationMetric, RunningWorkunit, WorkunitMetadata, + in_workunit, Level, Metric, ObservationMetric, RunningWorkunit, WorkunitMetadata, }; use process_execution::{ - check_cache_content, populate_fallible_execution_result, CacheContentBehavior, Context, - FallibleProcessResultWithPlatform, Process, ProcessCacheScope, ProcessError, - ProcessExecutionEnvironment, ProcessResultSource, + check_cache_content, populate_fallible_execution_result, CacheContentBehavior, Context, + FallibleProcessResultWithPlatform, Process, ProcessCacheScope, ProcessError, + ProcessExecutionEnvironment, ProcessResultSource, }; use process_execution::{make_execute_request, EntireExecuteRequest}; @@ -34,22 +34,22 @@ pub use remote_provider::RemoteCacheProviderOptions; #[derive(Clone, Copy, Debug, strum_macros::EnumString)] #[strum(serialize_all = "snake_case")] pub enum RemoteCacheWarningsBehavior { - Ignore, - FirstOnly, - Backoff, + Ignore, + FirstOnly, + Backoff, } pub struct RemoteCacheRunnerOptions { - pub inner: Arc, - pub instance_name: Option, - pub process_cache_namespace: Option, - pub executor: task_executor::Executor, - pub store: Store, - pub cache_read: bool, - pub cache_write: bool, - pub warnings_behavior: RemoteCacheWarningsBehavior, - pub cache_content_behavior: CacheContentBehavior, - pub append_only_caches_base_path: Option, + pub inner: Arc, + pub instance_name: Option, + pub process_cache_namespace: Option, + pub executor: task_executor::Executor, + pub store: Store, + pub cache_read: bool, + pub cache_write: bool, + pub warnings_behavior: RemoteCacheWarningsBehavior, + pub cache_content_behavior: CacheContentBehavior, + pub append_only_caches_base_path: Option, } /// This `CommandRunner` implementation caches results remotely using the Action Cache service @@ -62,259 +62,259 @@ pub struct RemoteCacheRunnerOptions { /// then locally. #[derive(Clone)] pub struct CommandRunner { - inner: Arc, - instance_name: Option, - process_cache_namespace: Option, - append_only_caches_base_path: Option, - executor: task_executor::Executor, - store: Store, - provider: Arc, - cache_read: bool, - cache_write: bool, - cache_content_behavior: CacheContentBehavior, - warnings_behavior: RemoteCacheWarningsBehavior, - read_errors_counter: Arc>>, - write_errors_counter: Arc>>, + inner: Arc, + instance_name: Option, + process_cache_namespace: Option, + append_only_caches_base_path: Option, + executor: task_executor::Executor, + store: Store, + provider: Arc, + cache_read: bool, + cache_write: bool, + cache_content_behavior: CacheContentBehavior, + warnings_behavior: RemoteCacheWarningsBehavior, + read_errors_counter: Arc>>, + write_errors_counter: Arc>>, } impl CommandRunner { - pub fn new( - RemoteCacheRunnerOptions { - inner, - instance_name, - process_cache_namespace, - executor, - store, - cache_read, - cache_write, - warnings_behavior, - cache_content_behavior, - append_only_caches_base_path, - }: RemoteCacheRunnerOptions, - provider: Arc, - ) -> Self { - CommandRunner { - inner, - instance_name, - process_cache_namespace, - append_only_caches_base_path, - executor, - store, - provider, - cache_read, - cache_write, - cache_content_behavior, - warnings_behavior, - read_errors_counter: Arc::new(Mutex::new(BTreeMap::new())), - write_errors_counter: Arc::new(Mutex::new(BTreeMap::new())), + pub fn new( + RemoteCacheRunnerOptions { + inner, + instance_name, + process_cache_namespace, + executor, + store, + cache_read, + cache_write, + warnings_behavior, + cache_content_behavior, + append_only_caches_base_path, + }: RemoteCacheRunnerOptions, + provider: Arc, + ) -> Self { + CommandRunner { + inner, + instance_name, + process_cache_namespace, + append_only_caches_base_path, + executor, + store, + provider, + cache_read, + cache_write, + cache_content_behavior, + warnings_behavior, + read_errors_counter: Arc::new(Mutex::new(BTreeMap::new())), + write_errors_counter: Arc::new(Mutex::new(BTreeMap::new())), + } + } + + pub async fn from_provider_options( + runner_options: RemoteCacheRunnerOptions, + provider_options: RemoteCacheProviderOptions, + ) -> Result { + let provider = choose_action_cache_provider(provider_options).await?; + Ok(Self::new(runner_options, provider)) } - } - - pub async fn from_provider_options( - runner_options: RemoteCacheRunnerOptions, - provider_options: RemoteCacheProviderOptions, - ) -> Result { - let provider = choose_action_cache_provider(provider_options).await?; - Ok(Self::new(runner_options, provider)) - } - - /// Create a REAPI `Tree` protobuf for an output directory by traversing down from a Pants - /// merged final output directory to find the specific path to extract. (REAPI requires - /// output directories to be stored as `Tree` protos that contain all of the `Directory` - /// protos that constitute the directory tree.) - /// - /// Note that the Tree does not include the directory_path as a prefix, per REAPI. This path - /// gets stored on the OutputDirectory proto. - /// - /// Returns the created Tree and any File Digests referenced within it. If the output directory - /// does not exist, then returns Ok(None). - pub(crate) fn make_tree_for_output_directory( - root_trie: &DigestTrie, - directory_path: RelativePath, - ) -> Result)>, String> { - let sub_trie = match root_trie.entry(&directory_path)? { - None => return Ok(None), - Some(directory::Entry::Directory(d)) => d.tree(), - Some(directory::Entry::Symlink(_)) => { - return Err(format!( - "Declared output directory path {directory_path:?} in output \ + + /// Create a REAPI `Tree` protobuf for an output directory by traversing down from a Pants + /// merged final output directory to find the specific path to extract. (REAPI requires + /// output directories to be stored as `Tree` protos that contain all of the `Directory` + /// protos that constitute the directory tree.) + /// + /// Note that the Tree does not include the directory_path as a prefix, per REAPI. This path + /// gets stored on the OutputDirectory proto. + /// + /// Returns the created Tree and any File Digests referenced within it. If the output directory + /// does not exist, then returns Ok(None). + pub(crate) fn make_tree_for_output_directory( + root_trie: &DigestTrie, + directory_path: RelativePath, + ) -> Result)>, String> { + let sub_trie = match root_trie.entry(&directory_path)? { + None => return Ok(None), + Some(directory::Entry::Directory(d)) => d.tree(), + Some(directory::Entry::Symlink(_)) => { + return Err(format!( + "Declared output directory path {directory_path:?} in output \ digest {trie_digest:?} contained a symlink instead.", - trie_digest = root_trie.compute_root_digest(), - )) - } - Some(directory::Entry::File(_)) => { - return Err(format!( - "Declared output directory path {directory_path:?} in output \ + trie_digest = root_trie.compute_root_digest(), + )) + } + Some(directory::Entry::File(_)) => { + return Err(format!( + "Declared output directory path {directory_path:?} in output \ digest {trie_digest:?} contained a file instead.", - trie_digest = root_trie.compute_root_digest(), - )) - } - }; - - let tree = sub_trie.into(); - let mut file_digests = Vec::new(); - sub_trie.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { - directory::Entry::File(f) => file_digests.push(f.digest()), - directory::Entry::Symlink(_) => (), - directory::Entry::Directory(_) => {} - }); - - Ok(Some((tree, file_digests))) - } - - pub(crate) fn extract_output_file( - root_trie: &DigestTrie, - file_path: &str, - ) -> Result, String> { - match root_trie.entry(&RelativePath::new(file_path)?)? { - None => Ok(None), - Some(directory::Entry::File(f)) => { - let output_file = remexec::OutputFile { - digest: Some(f.digest().into()), - path: file_path.to_owned(), - is_executable: f.is_executable(), - ..remexec::OutputFile::default() + trie_digest = root_trie.compute_root_digest(), + )) + } }; - Ok(Some(output_file)) - } - Some(directory::Entry::Symlink(_)) => Err(format!( - "Declared output file path {file_path:?} in output \ + + let tree = sub_trie.into(); + let mut file_digests = Vec::new(); + sub_trie.walk(SymlinkBehavior::Aware, &mut |_, entry| match entry { + directory::Entry::File(f) => file_digests.push(f.digest()), + directory::Entry::Symlink(_) => (), + directory::Entry::Directory(_) => {} + }); + + Ok(Some((tree, file_digests))) + } + + pub(crate) fn extract_output_file( + root_trie: &DigestTrie, + file_path: &str, + ) -> Result, String> { + match root_trie.entry(&RelativePath::new(file_path)?)? { + None => Ok(None), + Some(directory::Entry::File(f)) => { + let output_file = remexec::OutputFile { + digest: Some(f.digest().into()), + path: file_path.to_owned(), + is_executable: f.is_executable(), + ..remexec::OutputFile::default() + }; + Ok(Some(output_file)) + } + Some(directory::Entry::Symlink(_)) => Err(format!( + "Declared output file path {file_path:?} in output \ digest {trie_digest:?} contained a symlink instead.", - trie_digest = root_trie.compute_root_digest(), - )), - Some(directory::Entry::Directory(_)) => Err(format!( - "Declared output file path {file_path:?} in output \ + trie_digest = root_trie.compute_root_digest(), + )), + Some(directory::Entry::Directory(_)) => Err(format!( + "Declared output file path {file_path:?} in output \ digest {trie_digest:?} contained a directory instead.", - trie_digest = root_trie.compute_root_digest(), - )), - } - } - - /// Converts a REAPI `Command` and a `FallibleProcessResultWithPlatform` produced from executing - /// that Command into a REAPI `ActionResult` suitable for upload to the REAPI Action Cache. - /// - /// This function also returns a vector of all `Digest`s referenced directly and indirectly by - /// the `ActionResult` suitable for passing to `Store::ensure_remote_has_recursive`. (The - /// digests may include both File and Tree digests.) - pub(crate) async fn make_action_result( - &self, - command: &Command, - result: &FallibleProcessResultWithPlatform, - store: &Store, - ) -> Result<(ActionResult, Vec), StoreError> { - let output_trie = store - .load_digest_trie(result.output_directory.clone()) - .await?; - - // Keep track of digests that need to be uploaded. - let mut digests = HashSet::new(); - - let mut action_result = ActionResult { - exit_code: result.exit_code, - stdout_digest: Some(result.stdout_digest.into()), - stderr_digest: Some(result.stderr_digest.into()), - execution_metadata: Some(result.metadata.clone().into()), - ..ActionResult::default() - }; - - digests.insert(result.stdout_digest); - digests.insert(result.stderr_digest); - - for output_directory in &command.output_directories { - let (tree, file_digests) = match Self::make_tree_for_output_directory( - &output_trie, - RelativePath::new(output_directory).unwrap(), - )? { - Some(res) => res, - None => continue, - }; - - let tree_digest = crate::remote::store_proto_locally(&self.store, &tree).await?; - digests.insert(tree_digest); - digests.extend(file_digests); - - action_result - .output_directories - .push(remexec::OutputDirectory { - path: output_directory.to_owned(), - tree_digest: Some(tree_digest.into()), - is_topologically_sorted: false, - }); + trie_digest = root_trie.compute_root_digest(), + )), + } } - for output_file_path in &command.output_files { - let output_file = match Self::extract_output_file(&output_trie, output_file_path)? { - Some(output_file) => output_file, - None => continue, - }; + /// Converts a REAPI `Command` and a `FallibleProcessResultWithPlatform` produced from executing + /// that Command into a REAPI `ActionResult` suitable for upload to the REAPI Action Cache. + /// + /// This function also returns a vector of all `Digest`s referenced directly and indirectly by + /// the `ActionResult` suitable for passing to `Store::ensure_remote_has_recursive`. (The + /// digests may include both File and Tree digests.) + pub(crate) async fn make_action_result( + &self, + command: &Command, + result: &FallibleProcessResultWithPlatform, + store: &Store, + ) -> Result<(ActionResult, Vec), StoreError> { + let output_trie = store + .load_digest_trie(result.output_directory.clone()) + .await?; + + // Keep track of digests that need to be uploaded. + let mut digests = HashSet::new(); + + let mut action_result = ActionResult { + exit_code: result.exit_code, + stdout_digest: Some(result.stdout_digest.into()), + stderr_digest: Some(result.stderr_digest.into()), + execution_metadata: Some(result.metadata.clone().into()), + ..ActionResult::default() + }; - digests.insert(require_digest(output_file.digest.as_ref())?); - action_result.output_files.push(output_file); - } + digests.insert(result.stdout_digest); + digests.insert(result.stderr_digest); + + for output_directory in &command.output_directories { + let (tree, file_digests) = match Self::make_tree_for_output_directory( + &output_trie, + RelativePath::new(output_directory).unwrap(), + )? { + Some(res) => res, + None => continue, + }; + + let tree_digest = crate::remote::store_proto_locally(&self.store, &tree).await?; + digests.insert(tree_digest); + digests.extend(file_digests); + + action_result + .output_directories + .push(remexec::OutputDirectory { + path: output_directory.to_owned(), + tree_digest: Some(tree_digest.into()), + is_topologically_sorted: false, + }); + } - Ok((action_result, digests.into_iter().collect::>())) - } - - /// - /// Races the given local execution future against an attempt to look up the result in the cache. - /// - /// Returns a result that indicates whether we used the cache so that we can skip cache writes if - /// so. - /// - async fn speculate_read_action_cache( - &self, - context: Context, - cache_lookup_start: Instant, - action_digest: Digest, - failures_cached: bool, - request: &Process, - mut local_execution_future: BoxFuture< - '_, - Result, - >, - ) -> Result<(FallibleProcessResultWithPlatform, bool), ProcessError> { - // A future to read from the cache and log the results accordingly. - let mut cache_read_future = async { - let response = check_action_cache( - action_digest, - &request.description, - request.execution_environment.clone(), - &context, - self.provider.clone(), - self.store.clone(), - self.cache_content_behavior, - ) - .await; - match response { - Ok(cached_response_opt) => match &cached_response_opt { - Some(cached_response) if cached_response.exit_code == 0 || failures_cached => { - log::debug!( - "remote cache hit for: {:?} digest={:?} response={:?}", - request.description, - action_digest, - cached_response - ); - cached_response_opt - } - _ => { - log::debug!( - "remote cache miss for: {:?} digest={:?}", - request.description, - action_digest - ); - None - } - }, - Err(err) => { - self.log_cache_error(err.to_string(), CacheErrorType::ReadError); - None + for output_file_path in &command.output_files { + let output_file = match Self::extract_output_file(&output_trie, output_file_path)? { + Some(output_file) => output_file, + None => continue, + }; + + digests.insert(require_digest(output_file.digest.as_ref())?); + action_result.output_files.push(output_file); } - } + + Ok((action_result, digests.into_iter().collect::>())) } - .boxed(); - // We speculate between reading from the remote cache vs. running locally. - in_workunit!( + /// + /// Races the given local execution future against an attempt to look up the result in the cache. + /// + /// Returns a result that indicates whether we used the cache so that we can skip cache writes if + /// so. + /// + async fn speculate_read_action_cache( + &self, + context: Context, + cache_lookup_start: Instant, + action_digest: Digest, + failures_cached: bool, + request: &Process, + mut local_execution_future: BoxFuture< + '_, + Result, + >, + ) -> Result<(FallibleProcessResultWithPlatform, bool), ProcessError> { + // A future to read from the cache and log the results accordingly. + let mut cache_read_future = async { + let response = check_action_cache( + action_digest, + &request.description, + request.execution_environment.clone(), + &context, + self.provider.clone(), + self.store.clone(), + self.cache_content_behavior, + ) + .await; + match response { + Ok(cached_response_opt) => match &cached_response_opt { + Some(cached_response) if cached_response.exit_code == 0 || failures_cached => { + log::debug!( + "remote cache hit for: {:?} digest={:?} response={:?}", + request.description, + action_digest, + cached_response + ); + cached_response_opt + } + _ => { + log::debug!( + "remote cache miss for: {:?} digest={:?}", + request.description, + action_digest + ); + None + } + }, + Err(err) => { + self.log_cache_error(err.to_string(), CacheErrorType::ReadError); + None + } + } + } + .boxed(); + + // We speculate between reading from the remote cache vs. running locally. + in_workunit!( "remote_cache_read_speculation", Level::Trace, |workunit| async move { @@ -336,204 +336,207 @@ impl CommandRunner { } } ).await - } - - async fn handle_cache_read_completed( - &self, - workunit: &mut RunningWorkunit, - cache_lookup_start: Instant, - cache_result: Option, - local_execution_future: BoxFuture<'_, Result>, - ) -> Result<(FallibleProcessResultWithPlatform, bool), ProcessError> { - if let Some(mut cached_response) = cache_result { - cached_response - .metadata - .update_cache_hit_elapsed(cache_lookup_start.elapsed()); - workunit.increment_counter(Metric::RemoteCacheSpeculationRemoteCompletedFirst, 1); - if let Some(time_saved) = cached_response.metadata.saved_by_cache { - let time_saved = std::time::Duration::from(time_saved).as_millis() as u64; - workunit.increment_counter(Metric::RemoteCacheTotalTimeSavedMs, time_saved); - workunit.record_observation(ObservationMetric::RemoteCacheTimeSavedMs, time_saved); - } - // When we successfully use the cache, we change the description and increase the level - // (but not so much that it will be logged by default). - workunit.update_metadata(|initial| { - initial.map(|(initial, _)| { - ( - WorkunitMetadata { - desc: initial.desc.as_ref().map(|desc| format!("Hit: {desc}")), - ..initial - }, - Level::Debug, - ) - }) - }); - Ok((cached_response, true)) - } else { - // Note that we don't increment a counter here, as there is nothing of note in this - // scenario: the remote cache did not save unnecessary local work, nor was the remote - // trip unusually slow such that local execution was faster. - local_execution_future.await.map(|res| (res, false)) } - } - /// Stores an execution result into the remote Action Cache. - async fn update_action_cache( - &self, - result: &FallibleProcessResultWithPlatform, - command: &Command, - action_digest: Digest, - command_digest: Digest, - ) -> Result<(), StoreError> { - // Upload the Action and Command, but not the input files. See #12432. - // Assumption: The Action and Command have already been stored locally. - crate::remote::ensure_action_uploaded(&self.store, command_digest, action_digest, None).await?; - - // Create an ActionResult from the process result. - let (action_result, digests_for_action_result) = self - .make_action_result(command, result, &self.store) - .await?; - - // Ensure that all digests referenced by directly and indirectly by the ActionResult - // have been uploaded to the remote cache. - self - .store - .ensure_remote_has_recursive(digests_for_action_result) - .await?; - - self - .provider - .update_action_result(action_digest, action_result) - .await?; - Ok(()) - } - - fn log_cache_error(&self, err: String, err_type: CacheErrorType) { - let err_count = { - let mut errors_counter = match err_type { - CacheErrorType::ReadError => self.read_errors_counter.lock(), - CacheErrorType::WriteError => self.write_errors_counter.lock(), - }; - let count = errors_counter.entry(err.clone()).or_insert(0); - *count += 1; - *count - }; - let failure_desc = match err_type { - CacheErrorType::ReadError => "read from", - CacheErrorType::WriteError => "write to", - }; - let log_msg = - format!("Failed to {failure_desc} remote cache ({err_count} occurrences so far): {err}"); - let log_at_warn = match self.warnings_behavior { - RemoteCacheWarningsBehavior::Ignore => false, - RemoteCacheWarningsBehavior::FirstOnly => err_count == 1, - RemoteCacheWarningsBehavior::Backoff => err_count.is_power_of_two(), - }; - if log_at_warn { - log::warn!("{}", log_msg); - } else { - log::debug!("{}", log_msg); + async fn handle_cache_read_completed( + &self, + workunit: &mut RunningWorkunit, + cache_lookup_start: Instant, + cache_result: Option, + local_execution_future: BoxFuture< + '_, + Result, + >, + ) -> Result<(FallibleProcessResultWithPlatform, bool), ProcessError> { + if let Some(mut cached_response) = cache_result { + cached_response + .metadata + .update_cache_hit_elapsed(cache_lookup_start.elapsed()); + workunit.increment_counter(Metric::RemoteCacheSpeculationRemoteCompletedFirst, 1); + if let Some(time_saved) = cached_response.metadata.saved_by_cache { + let time_saved = std::time::Duration::from(time_saved).as_millis() as u64; + workunit.increment_counter(Metric::RemoteCacheTotalTimeSavedMs, time_saved); + workunit.record_observation(ObservationMetric::RemoteCacheTimeSavedMs, time_saved); + } + // When we successfully use the cache, we change the description and increase the level + // (but not so much that it will be logged by default). + workunit.update_metadata(|initial| { + initial.map(|(initial, _)| { + ( + WorkunitMetadata { + desc: initial.desc.as_ref().map(|desc| format!("Hit: {desc}")), + ..initial + }, + Level::Debug, + ) + }) + }); + Ok((cached_response, true)) + } else { + // Note that we don't increment a counter here, as there is nothing of note in this + // scenario: the remote cache did not save unnecessary local work, nor was the remote + // trip unusually slow such that local execution was faster. + local_execution_future.await.map(|res| (res, false)) + } + } + + /// Stores an execution result into the remote Action Cache. + async fn update_action_cache( + &self, + result: &FallibleProcessResultWithPlatform, + command: &Command, + action_digest: Digest, + command_digest: Digest, + ) -> Result<(), StoreError> { + // Upload the Action and Command, but not the input files. See #12432. + // Assumption: The Action and Command have already been stored locally. + crate::remote::ensure_action_uploaded(&self.store, command_digest, action_digest, None) + .await?; + + // Create an ActionResult from the process result. + let (action_result, digests_for_action_result) = self + .make_action_result(command, result, &self.store) + .await?; + + // Ensure that all digests referenced by directly and indirectly by the ActionResult + // have been uploaded to the remote cache. + self.store + .ensure_remote_has_recursive(digests_for_action_result) + .await?; + + self.provider + .update_action_result(action_digest, action_result) + .await?; + Ok(()) + } + + fn log_cache_error(&self, err: String, err_type: CacheErrorType) { + let err_count = { + let mut errors_counter = match err_type { + CacheErrorType::ReadError => self.read_errors_counter.lock(), + CacheErrorType::WriteError => self.write_errors_counter.lock(), + }; + let count = errors_counter.entry(err.clone()).or_insert(0); + *count += 1; + *count + }; + let failure_desc = match err_type { + CacheErrorType::ReadError => "read from", + CacheErrorType::WriteError => "write to", + }; + let log_msg = format!( + "Failed to {failure_desc} remote cache ({err_count} occurrences so far): {err}" + ); + let log_at_warn = match self.warnings_behavior { + RemoteCacheWarningsBehavior::Ignore => false, + RemoteCacheWarningsBehavior::FirstOnly => err_count == 1, + RemoteCacheWarningsBehavior::Backoff => err_count.is_power_of_two(), + }; + if log_at_warn { + log::warn!("{}", log_msg); + } else { + log::debug!("{}", log_msg); + } } - } } impl Debug for CommandRunner { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("remote_cache::CommandRunner") - .field("inner", &self.inner) - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("remote_cache::CommandRunner") + .field("inner", &self.inner) + .finish_non_exhaustive() + } } enum CacheErrorType { - ReadError, - WriteError, + ReadError, + WriteError, } #[async_trait] impl process_execution::CommandRunner for CommandRunner { - async fn run( - &self, - context: Context, - workunit: &mut RunningWorkunit, - request: Process, - ) -> Result { - let cache_lookup_start = Instant::now(); - // Construct the REv2 ExecuteRequest and related data for this execution request. - let EntireExecuteRequest { - action, command, .. - } = make_execute_request( - &request, - self.instance_name.clone(), - self.process_cache_namespace.clone(), - &self.store, - self - .append_only_caches_base_path - .as_ref() - .map(|s| s.as_ref()), - ) - .await?; - let failures_cached = request.cache_scope == ProcessCacheScope::Always; - - // Ensure the action and command are stored locally. - let (command_digest, action_digest) = - crate::remote::ensure_action_stored_locally(&self.store, &command, &action).await?; - - let use_remote_cache = request.cache_scope == ProcessCacheScope::Always - || request.cache_scope == ProcessCacheScope::Successful; - - let (result, hit_cache) = if self.cache_read && use_remote_cache { - self - .speculate_read_action_cache( - context.clone(), - cache_lookup_start, - action_digest, - failures_cached, - &request.clone(), - self.inner.run(context.clone(), workunit, request), + async fn run( + &self, + context: Context, + workunit: &mut RunningWorkunit, + request: Process, + ) -> Result { + let cache_lookup_start = Instant::now(); + // Construct the REv2 ExecuteRequest and related data for this execution request. + let EntireExecuteRequest { + action, command, .. + } = make_execute_request( + &request, + self.instance_name.clone(), + self.process_cache_namespace.clone(), + &self.store, + self.append_only_caches_base_path + .as_ref() + .map(|s| s.as_ref()), ) - .await? - } else { - ( - self.inner.run(context.clone(), workunit, request).await?, - false, - ) - }; - - if !hit_cache - && (result.exit_code == 0 || failures_cached) - && self.cache_write - && use_remote_cache - { - let command_runner = self.clone(); - let result = result.clone(); - let write_fut = in_workunit!("remote_cache_write", Level::Trace, |workunit| async move { - workunit.increment_counter(Metric::RemoteCacheWriteAttempts, 1); - let write_result = command_runner - .update_action_cache(&result, &command, action_digest, command_digest) - .await; - match write_result { - Ok(_) => workunit.increment_counter(Metric::RemoteCacheWriteSuccesses, 1), - Err(err) => { - command_runner.log_cache_error(err.to_string(), CacheErrorType::WriteError); - workunit.increment_counter(Metric::RemoteCacheWriteErrors, 1); - } + .await?; + let failures_cached = request.cache_scope == ProcessCacheScope::Always; + + // Ensure the action and command are stored locally. + let (command_digest, action_digest) = + crate::remote::ensure_action_stored_locally(&self.store, &command, &action).await?; + + let use_remote_cache = request.cache_scope == ProcessCacheScope::Always + || request.cache_scope == ProcessCacheScope::Successful; + + let (result, hit_cache) = if self.cache_read && use_remote_cache { + self.speculate_read_action_cache( + context.clone(), + cache_lookup_start, + action_digest, + failures_cached, + &request.clone(), + self.inner.run(context.clone(), workunit, request), + ) + .await? + } else { + ( + self.inner.run(context.clone(), workunit, request).await?, + false, + ) }; - } - // NB: We must box the future to avoid a stack overflow. - .boxed()); - let task_name = format!("remote cache write {action_digest:?}"); - context - .tail_tasks - .spawn_on(&task_name, self.executor.handle(), write_fut.boxed()); - } - Ok(result) - } + if !hit_cache + && (result.exit_code == 0 || failures_cached) + && self.cache_write + && use_remote_cache + { + let command_runner = self.clone(); + let result = result.clone(); + let write_fut = + in_workunit!("remote_cache_write", Level::Trace, |workunit| async move { + workunit.increment_counter(Metric::RemoteCacheWriteAttempts, 1); + let write_result = command_runner + .update_action_cache(&result, &command, action_digest, command_digest) + .await; + match write_result { + Ok(_) => workunit.increment_counter(Metric::RemoteCacheWriteSuccesses, 1), + Err(err) => { + command_runner + .log_cache_error(err.to_string(), CacheErrorType::WriteError); + workunit.increment_counter(Metric::RemoteCacheWriteErrors, 1); + } + }; + } + // NB: We must box the future to avoid a stack overflow. + .boxed()); + let task_name = format!("remote cache write {action_digest:?}"); + context + .tail_tasks + .spawn_on(&task_name, self.executor.handle(), write_fut.boxed()); + } + + Ok(result) + } - async fn shutdown(&self) -> Result<(), String> { - self.inner.shutdown().await - } + async fn shutdown(&self) -> Result<(), String> { + self.inner.shutdown().await + } } /// Check the remote Action Cache for a cached result of running the given `command` and the Action @@ -544,67 +547,68 @@ impl process_execution::CommandRunner for CommandRunner { /// explicitly in order to avoid duplicating already-cached work. This behavior matches /// the Bazel RE client. async fn check_action_cache( - action_digest: Digest, - command_description: &str, - environment: ProcessExecutionEnvironment, - context: &Context, - provider: Arc, - store: Store, - cache_content_behavior: CacheContentBehavior, + action_digest: Digest, + command_description: &str, + environment: ProcessExecutionEnvironment, + context: &Context, + provider: Arc, + store: Store, + cache_content_behavior: CacheContentBehavior, ) -> Result, ProcessError> { - in_workunit!( - "check_action_cache", - Level::Debug, - desc = Some(format!("Remote cache lookup for: {command_description}")), - |workunit| async move { - workunit.increment_counter(Metric::RemoteCacheRequests, 1); - - let start = Instant::now(); - let response = provider - .get_action_result(action_digest, &context.build_id) - .and_then(|action_result| async move { - let Some(action_result) = action_result else { - return Ok(None); - }; - - let response = populate_fallible_execution_result( - store.clone(), - context.run_id, - &action_result, - false, - ProcessResultSource::HitRemotely, - environment, - ) - .await - .map_err(|e| format!("Output roots could not be loaded: {e}"))?; - - let cache_content_valid = check_cache_content(&response, &store, cache_content_behavior) - .await - .map_err(|e| format!("Output content could not be validated: {e}"))?; - - if cache_content_valid { - Ok(Some(response)) - } else { - Ok(None) - } - }) - .await; - - workunit.record_observation( - ObservationMetric::RemoteCacheGetActionResultTimeMicros, - start.elapsed().as_micros() as u64, - ); - - let counter = match response { - Ok(Some(_)) => Metric::RemoteCacheRequestsCached, - Ok(None) => Metric::RemoteCacheRequestsUncached, - // TODO: Ensure that we're catching missing digests. - Err(_) => Metric::RemoteCacheReadErrors, - }; - workunit.increment_counter(counter, 1); - - response.map_err(ProcessError::from) - } - ) - .await + in_workunit!( + "check_action_cache", + Level::Debug, + desc = Some(format!("Remote cache lookup for: {command_description}")), + |workunit| async move { + workunit.increment_counter(Metric::RemoteCacheRequests, 1); + + let start = Instant::now(); + let response = provider + .get_action_result(action_digest, &context.build_id) + .and_then(|action_result| async move { + let Some(action_result) = action_result else { + return Ok(None); + }; + + let response = populate_fallible_execution_result( + store.clone(), + context.run_id, + &action_result, + false, + ProcessResultSource::HitRemotely, + environment, + ) + .await + .map_err(|e| format!("Output roots could not be loaded: {e}"))?; + + let cache_content_valid = + check_cache_content(&response, &store, cache_content_behavior) + .await + .map_err(|e| format!("Output content could not be validated: {e}"))?; + + if cache_content_valid { + Ok(Some(response)) + } else { + Ok(None) + } + }) + .await; + + workunit.record_observation( + ObservationMetric::RemoteCacheGetActionResultTimeMicros, + start.elapsed().as_micros() as u64, + ); + + let counter = match response { + Ok(Some(_)) => Metric::RemoteCacheRequestsCached, + Ok(None) => Metric::RemoteCacheRequestsUncached, + // TODO: Ensure that we're catching missing digests. + Err(_) => Metric::RemoteCacheReadErrors, + }; + workunit.increment_counter(counter, 1); + + response.map_err(ProcessError::from) + } + ) + .await } diff --git a/src/rust/engine/process_execution/remote/src/remote_cache_tests.rs b/src/rust/engine/process_execution/remote/src/remote_cache_tests.rs index ec81b1176db..39d6d5d0aad 100644 --- a/src/rust/engine/process_execution/remote/src/remote_cache_tests.rs +++ b/src/rust/engine/process_execution/remote/src/remote_cache_tests.rs @@ -24,10 +24,10 @@ use workunit_store::{RunId, RunningWorkunit, WorkunitStore}; use crate::remote::ensure_action_stored_locally; use crate::remote_cache::{RemoteCacheRunnerOptions, RemoteCacheWarningsBehavior}; use process_execution::{ - make_execute_request, CacheContentBehavior, CommandRunner as CommandRunnerTrait, Context, - EntireExecuteRequest, FallibleProcessResultWithPlatform, Platform, Process, ProcessCacheScope, - ProcessError, ProcessExecutionEnvironment, ProcessExecutionStrategy, ProcessResultMetadata, - ProcessResultSource, + make_execute_request, CacheContentBehavior, CommandRunner as CommandRunnerTrait, Context, + EntireExecuteRequest, FallibleProcessResultWithPlatform, Platform, Process, ProcessCacheScope, + ProcessError, ProcessExecutionEnvironment, ProcessExecutionStrategy, ProcessResultMetadata, + ProcessResultSource, }; const CACHE_READ_TIMEOUT: Duration = Duration::from_secs(5); @@ -35,807 +35,807 @@ const CACHE_READ_TIMEOUT: Duration = Duration::from_secs(5); /// A mock of the local runner used for better hermeticity of the tests. #[derive(Debug, Clone)] struct MockLocalCommandRunner { - result: Result, - call_counter: Arc, - delay: Duration, + result: Result, + call_counter: Arc, + delay: Duration, } impl MockLocalCommandRunner { - pub fn new( - exit_code: i32, - call_counter: Arc, - delay_ms: u64, - ) -> MockLocalCommandRunner { - MockLocalCommandRunner { - result: Ok(FallibleProcessResultWithPlatform { - stdout_digest: EMPTY_DIGEST, - stderr_digest: EMPTY_DIGEST, - exit_code, - output_directory: EMPTY_DIRECTORY_DIGEST.clone(), - metadata: ProcessResultMetadata::new( - None, - ProcessResultSource::Ran, - ProcessExecutionEnvironment { - name: None, - platform: Platform::current().unwrap(), - strategy: ProcessExecutionStrategy::Local, - }, - RunId(0), - ), - }), - call_counter, - delay: Duration::from_millis(delay_ms), + pub fn new( + exit_code: i32, + call_counter: Arc, + delay_ms: u64, + ) -> MockLocalCommandRunner { + MockLocalCommandRunner { + result: Ok(FallibleProcessResultWithPlatform { + stdout_digest: EMPTY_DIGEST, + stderr_digest: EMPTY_DIGEST, + exit_code, + output_directory: EMPTY_DIRECTORY_DIGEST.clone(), + metadata: ProcessResultMetadata::new( + None, + ProcessResultSource::Ran, + ProcessExecutionEnvironment { + name: None, + platform: Platform::current().unwrap(), + strategy: ProcessExecutionStrategy::Local, + }, + RunId(0), + ), + }), + call_counter, + delay: Duration::from_millis(delay_ms), + } } - } } #[async_trait] impl CommandRunnerTrait for MockLocalCommandRunner { - async fn run( - &self, - _context: Context, - _workunit: &mut RunningWorkunit, - _req: Process, - ) -> Result { - sleep(self.delay).await; - self.call_counter.fetch_add(1, Ordering::SeqCst); - self.result.clone() - } - - async fn shutdown(&self) -> Result<(), String> { - Ok(()) - } + async fn run( + &self, + _context: Context, + _workunit: &mut RunningWorkunit, + _req: Process, + ) -> Result { + sleep(self.delay).await; + self.call_counter.fetch_add(1, Ordering::SeqCst); + self.result.clone() + } + + async fn shutdown(&self) -> Result<(), String> { + Ok(()) + } } // NB: We bundle these into a struct to ensure they share the same lifetime. struct StoreSetup { - pub store: Store, - pub _store_temp_dir: TempDir, - pub cas: StubCAS, - pub executor: task_executor::Executor, + pub store: Store, + pub _store_temp_dir: TempDir, + pub cas: StubCAS, + pub executor: task_executor::Executor, } impl StoreSetup { - pub async fn new() -> Self { - Self::new_with_stub_cas(StubCAS::builder().build()).await - } + pub async fn new() -> Self { + Self::new_with_stub_cas(StubCAS::builder().build()).await + } - pub async fn new_with_stub_cas(cas: StubCAS) -> Self { - let executor = task_executor::Executor::new(); - let store_temp_dir = TempDir::new().unwrap(); - let store_dir = store_temp_dir.path().join("store_dir"); - let store = Store::local_only(executor.clone(), store_dir) - .unwrap() - .into_with_remote(RemoteOptions { - cas_address: cas.address(), - instance_name: None, - tls_config: tls::Config::default(), - headers: BTreeMap::new(), - chunk_size_bytes: 10 * 1024 * 1024, - rpc_timeout: Duration::from_secs(1), - rpc_retries: 1, - rpc_concurrency_limit: 256, - capabilities_cell_opt: None, - batch_api_size_limit: 4 * 1024 * 1024, - }) - .await - .unwrap(); - Self { - store, - _store_temp_dir: store_temp_dir, - cas, - executor, + pub async fn new_with_stub_cas(cas: StubCAS) -> Self { + let executor = task_executor::Executor::new(); + let store_temp_dir = TempDir::new().unwrap(); + let store_dir = store_temp_dir.path().join("store_dir"); + let store = Store::local_only(executor.clone(), store_dir) + .unwrap() + .into_with_remote(RemoteOptions { + cas_address: cas.address(), + instance_name: None, + tls_config: tls::Config::default(), + headers: BTreeMap::new(), + chunk_size_bytes: 10 * 1024 * 1024, + rpc_timeout: Duration::from_secs(1), + rpc_retries: 1, + rpc_concurrency_limit: 256, + capabilities_cell_opt: None, + batch_api_size_limit: 4 * 1024 * 1024, + }) + .await + .unwrap(); + Self { + store, + _store_temp_dir: store_temp_dir, + cas, + executor, + } } - } } fn create_local_runner( - exit_code: i32, - delay_ms: u64, + exit_code: i32, + delay_ms: u64, ) -> (Box, Arc) { - let call_counter = Arc::new(AtomicUsize::new(0)); - let local_runner = Box::new(MockLocalCommandRunner::new( - exit_code, - call_counter.clone(), - delay_ms, - )); - (local_runner, call_counter) + let call_counter = Arc::new(AtomicUsize::new(0)); + let local_runner = Box::new(MockLocalCommandRunner::new( + exit_code, + call_counter.clone(), + delay_ms, + )); + (local_runner, call_counter) } async fn create_cached_runner( - local: Box, - store_setup: &StoreSetup, - cache_content_behavior: CacheContentBehavior, + local: Box, + store_setup: &StoreSetup, + cache_content_behavior: CacheContentBehavior, ) -> Box { - Box::new( - crate::remote_cache::CommandRunner::from_provider_options( - RemoteCacheRunnerOptions { - inner: local.into(), - instance_name: None, - process_cache_namespace: None, - executor: store_setup.executor.clone(), - store: store_setup.store.clone(), - cache_read: true, - cache_write: true, - warnings_behavior: RemoteCacheWarningsBehavior::FirstOnly, - cache_content_behavior, - append_only_caches_base_path: None, - }, - RemoteCacheProviderOptions { - instance_name: None, - action_cache_address: store_setup.cas.address(), - tls_config: tls::Config::default(), - headers: BTreeMap::default(), - concurrency_limit: 256, - rpc_timeout: CACHE_READ_TIMEOUT, - }, + Box::new( + crate::remote_cache::CommandRunner::from_provider_options( + RemoteCacheRunnerOptions { + inner: local.into(), + instance_name: None, + process_cache_namespace: None, + executor: store_setup.executor.clone(), + store: store_setup.store.clone(), + cache_read: true, + cache_write: true, + warnings_behavior: RemoteCacheWarningsBehavior::FirstOnly, + cache_content_behavior, + append_only_caches_base_path: None, + }, + RemoteCacheProviderOptions { + instance_name: None, + action_cache_address: store_setup.cas.address(), + tls_config: tls::Config::default(), + headers: BTreeMap::default(), + concurrency_limit: 256, + rpc_timeout: CACHE_READ_TIMEOUT, + }, + ) + .await + .expect("caching command runner"), ) - .await - .expect("caching command runner"), - ) } // TODO: Unfortunately, this code cannot be moved to the `testutil::mock` crate, because that // introduces a cycle between this crate and that one. async fn create_process(store_setup: &StoreSetup) -> (Process, Digest) { - let process = Process::new(vec![ - "this process will not execute: see MockLocalCommandRunner".to_string(), - ]); - let EntireExecuteRequest { - action, command, .. - } = make_execute_request(&process, None, None, &store_setup.store, None) - .await - .unwrap(); - let (_command_digest, action_digest) = - ensure_action_stored_locally(&store_setup.store, &command, &action) - .await - .unwrap(); - (process, action_digest) + let process = Process::new(vec![ + "this process will not execute: see MockLocalCommandRunner".to_string(), + ]); + let EntireExecuteRequest { + action, command, .. + } = make_execute_request(&process, None, None, &store_setup.store, None) + .await + .unwrap(); + let (_command_digest, action_digest) = + ensure_action_stored_locally(&store_setup.store, &command, &action) + .await + .unwrap(); + (process, action_digest) } #[tokio::test] async fn cache_read_success() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let store_setup = StoreSetup::new().await; - let (local_runner, local_runner_call_counter) = create_local_runner(1, 1000); - let cache_runner = - create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; - - let (process, action_digest) = create_process(&store_setup).await; - store_setup - .cas - .action_cache - .insert(action_digest, 0, EMPTY_DIGEST, EMPTY_DIGEST); - - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); - let remote_result = cache_runner - .run(Context::default(), &mut workunit, process) - .await - .unwrap(); - assert_eq!(remote_result.exit_code, 0); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let store_setup = StoreSetup::new().await; + let (local_runner, local_runner_call_counter) = create_local_runner(1, 1000); + let cache_runner = + create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; + + let (process, action_digest) = create_process(&store_setup).await; + store_setup + .cas + .action_cache + .insert(action_digest, 0, EMPTY_DIGEST, EMPTY_DIGEST); + + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + let remote_result = cache_runner + .run(Context::default(), &mut workunit, process) + .await + .unwrap(); + assert_eq!(remote_result.exit_code, 0); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); } /// If the cache has any issues during reads from the action cache, we should gracefully fallback /// to the local runner. #[tokio::test] async fn cache_read_skipped_on_action_cache_errors() { - let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); - let store_setup = StoreSetup::new().await; - let (local_runner, local_runner_call_counter) = create_local_runner(1, 500); - let cache_runner = - create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; - - let (process, action_digest) = create_process(&store_setup).await; - store_setup - .cas - .action_cache - .insert(action_digest, 0, EMPTY_DIGEST, EMPTY_DIGEST); - store_setup - .cas - .action_cache - .always_errors - .store(true, Ordering::SeqCst); - - assert_eq!( - workunit_store.get_metrics().get("remote_cache_read_errors"), - None - ); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); - let remote_result = cache_runner - .run(Context::default(), &mut workunit, process) - .await - .unwrap(); - assert_eq!(remote_result.exit_code, 1); - assert_eq!( - workunit_store.get_metrics().get("remote_cache_read_errors"), - Some(&1) - ); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); + let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); + let store_setup = StoreSetup::new().await; + let (local_runner, local_runner_call_counter) = create_local_runner(1, 500); + let cache_runner = + create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; + + let (process, action_digest) = create_process(&store_setup).await; + store_setup + .cas + .action_cache + .insert(action_digest, 0, EMPTY_DIGEST, EMPTY_DIGEST); + store_setup + .cas + .action_cache + .always_errors + .store(true, Ordering::SeqCst); + + assert_eq!( + workunit_store.get_metrics().get("remote_cache_read_errors"), + None + ); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + let remote_result = cache_runner + .run(Context::default(), &mut workunit, process) + .await + .unwrap(); + assert_eq!(remote_result.exit_code, 1); + assert_eq!( + workunit_store.get_metrics().get("remote_cache_read_errors"), + Some(&1) + ); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); } /// If the cache cannot find a digest during a read from the store during fetch, we should gracefully /// fallback to the local runner. #[tokio::test] async fn cache_read_skipped_on_missing_digest() { - let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); - let store_setup = StoreSetup::new().await; - let (local_runner, local_runner_call_counter) = create_local_runner(1, 500); - let cache_runner = - create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Fetch).await; - - // Claim that the process has a non-empty and not-persisted stdout digest. - let (process, action_digest) = create_process(&store_setup).await; - store_setup.cas.action_cache.insert( - action_digest, - 0, - Digest::of_bytes("pigs flying".as_bytes()), - EMPTY_DIGEST, - ); - - assert_eq!( - workunit_store - .get_metrics() - .get("remote_cache_requests_uncached"), - None - ); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); - let remote_result = cache_runner - .run(Context::default(), &mut workunit, process) - .await - .unwrap(); - assert_eq!(remote_result.exit_code, 1); - assert_eq!( - workunit_store - .get_metrics() - .get("remote_cache_requests_uncached"), - Some(&1), - ); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); -} - -/// With eager_fetch enabled, we should skip the remote cache if any of the process result's -/// digests are invalid. This will force rerunning the process locally. Otherwise, we should use -/// the cached result with its non-existent digests. -#[tokio::test] -async fn cache_read_eager_fetch() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - async fn run_process( - cache_content_behavior: CacheContentBehavior, - workunit: &mut RunningWorkunit, - ) -> (i32, usize) { + let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); let store_setup = StoreSetup::new().await; - let (local_runner, local_runner_call_counter) = create_local_runner(1, 1000); + let (local_runner, local_runner_call_counter) = create_local_runner(1, 500); let cache_runner = - create_cached_runner(local_runner, &store_setup, cache_content_behavior).await; + create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Fetch).await; + // Claim that the process has a non-empty and not-persisted stdout digest. let (process, action_digest) = create_process(&store_setup).await; store_setup.cas.action_cache.insert( - action_digest, - 0, - TestData::roland().digest(), - TestData::roland().digest(), + action_digest, + 0, + Digest::of_bytes("pigs flying".as_bytes()), + EMPTY_DIGEST, ); + assert_eq!( + workunit_store + .get_metrics() + .get("remote_cache_requests_uncached"), + None + ); assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); let remote_result = cache_runner - .run(Context::default(), workunit, process) - .await - .unwrap(); - - let final_local_count = local_runner_call_counter.load(Ordering::SeqCst); - (remote_result.exit_code, final_local_count) - } - - let (lazy_exit_code, lazy_local_call_count) = - run_process(CacheContentBehavior::Defer, &mut workunit).await; - assert_eq!(lazy_exit_code, 0); - assert_eq!(lazy_local_call_count, 0); - - let (eager_exit_code, eager_local_call_count) = - run_process(CacheContentBehavior::Fetch, &mut workunit).await; - assert_eq!(eager_exit_code, 1); - assert_eq!(eager_local_call_count, 1); + .run(Context::default(), &mut workunit, process) + .await + .unwrap(); + assert_eq!(remote_result.exit_code, 1); + assert_eq!( + workunit_store + .get_metrics() + .get("remote_cache_requests_uncached"), + Some(&1), + ); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); } +/// With eager_fetch enabled, we should skip the remote cache if any of the process result's +/// digests are invalid. This will force rerunning the process locally. Otherwise, we should use +/// the cached result with its non-existent digests. #[tokio::test] -async fn cache_read_speculation() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - async fn run_process( - local_delay_ms: u64, - remote_delay_ms: u64, - remote_cache_speculation_delay_ms: u64, - cache_hit: bool, - cached_exit_code: i32, - cache_scope: ProcessCacheScope, - workunit: &mut RunningWorkunit, - ) -> (i32, usize) { - let store_setup = StoreSetup::new_with_stub_cas( - StubCAS::builder() - .ac_read_delay(Duration::from_millis(remote_delay_ms)) - .build(), - ) - .await; - let (local_runner, local_runner_call_counter) = create_local_runner(1, local_delay_ms); - let cache_runner = - create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; +async fn cache_read_eager_fetch() { + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + async fn run_process( + cache_content_behavior: CacheContentBehavior, + workunit: &mut RunningWorkunit, + ) -> (i32, usize) { + let store_setup = StoreSetup::new().await; + let (local_runner, local_runner_call_counter) = create_local_runner(1, 1000); + let cache_runner = + create_cached_runner(local_runner, &store_setup, cache_content_behavior).await; + + let (process, action_digest) = create_process(&store_setup).await; + store_setup.cas.action_cache.insert( + action_digest, + 0, + TestData::roland().digest(), + TestData::roland().digest(), + ); + + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + let remote_result = cache_runner + .run(Context::default(), workunit, process) + .await + .unwrap(); + + let final_local_count = local_runner_call_counter.load(Ordering::SeqCst); + (remote_result.exit_code, final_local_count) + } - let (process, action_digest) = create_process(&store_setup).await; - let process = process.cache_scope(cache_scope); - let process = process.remote_cache_speculation_delay(std::time::Duration::from_millis( - remote_cache_speculation_delay_ms, - )); - if cache_hit { - store_setup.cas.action_cache.insert( - action_digest, - cached_exit_code, - EMPTY_DIGEST, - EMPTY_DIGEST, - ); + let (lazy_exit_code, lazy_local_call_count) = + run_process(CacheContentBehavior::Defer, &mut workunit).await; + assert_eq!(lazy_exit_code, 0); + assert_eq!(lazy_local_call_count, 0); + + let (eager_exit_code, eager_local_call_count) = + run_process(CacheContentBehavior::Fetch, &mut workunit).await; + assert_eq!(eager_exit_code, 1); + assert_eq!(eager_local_call_count, 1); +} + +#[tokio::test] +async fn cache_read_speculation() { + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + async fn run_process( + local_delay_ms: u64, + remote_delay_ms: u64, + remote_cache_speculation_delay_ms: u64, + cache_hit: bool, + cached_exit_code: i32, + cache_scope: ProcessCacheScope, + workunit: &mut RunningWorkunit, + ) -> (i32, usize) { + let store_setup = StoreSetup::new_with_stub_cas( + StubCAS::builder() + .ac_read_delay(Duration::from_millis(remote_delay_ms)) + .build(), + ) + .await; + let (local_runner, local_runner_call_counter) = create_local_runner(1, local_delay_ms); + let cache_runner = + create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; + + let (process, action_digest) = create_process(&store_setup).await; + let process = process.cache_scope(cache_scope); + let process = process.remote_cache_speculation_delay(std::time::Duration::from_millis( + remote_cache_speculation_delay_ms, + )); + if cache_hit { + store_setup.cas.action_cache.insert( + action_digest, + cached_exit_code, + EMPTY_DIGEST, + EMPTY_DIGEST, + ); + } + + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + let result = cache_runner + .run(Context::default(), workunit, process) + .await + .unwrap(); + + let final_local_count = local_runner_call_counter.load(Ordering::SeqCst); + (result.exit_code, final_local_count) } - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); - let result = cache_runner - .run(Context::default(), workunit, process) - .await - .unwrap(); - - let final_local_count = local_runner_call_counter.load(Ordering::SeqCst); - (result.exit_code, final_local_count) - } - - // Case 1: remote is faster than local. - let (exit_code, local_call_count) = run_process( - 200, - 0, - 0, - true, - 0, - ProcessCacheScope::Successful, - &mut workunit, - ) - .await; - assert_eq!(exit_code, 0); - assert_eq!(local_call_count, 0); - - // Case 2: local is faster than remote. - let (exit_code, local_call_count) = run_process( - 0, - 200, - 0, - true, - 0, - ProcessCacheScope::Successful, - &mut workunit, - ) - .await; - assert_eq!(exit_code, 1); - assert_eq!(local_call_count, 1); - - // Case 3: the remote lookup wins, but there is no cache entry so we fallback to local execution. - let (exit_code, local_call_count) = run_process( - 200, - 0, - 0, - false, - 0, - ProcessCacheScope::Successful, - &mut workunit, - ) - .await; - assert_eq!(exit_code, 1); - assert_eq!(local_call_count, 1); - - // Case 4: the remote lookup wins, but it was a failed process with cache scope Successful. - let (exit_code, local_call_count) = run_process( - 0, - 0, - 200, - true, - 5, - ProcessCacheScope::Successful, - &mut workunit, - ) - .await; - assert_eq!(exit_code, 1); - assert_eq!(local_call_count, 1); - - // Case 5: the remote lookup wins, and even though it was a failed process, cache scope was Always. - let (exit_code, local_call_count) = - run_process(0, 0, 200, true, 5, ProcessCacheScope::Always, &mut workunit).await; - assert_eq!(exit_code, 5); - assert_eq!(local_call_count, 0); - - // Case 6: remote is faster than speculation read delay. - let (exit_code, local_call_count) = run_process( - 0, - 0, - 200, - true, - 0, - ProcessCacheScope::Successful, - &mut workunit, - ) - .await; - assert_eq!(exit_code, 0); - assert_eq!(local_call_count, 0); - - // Case 7: remote is faster than speculation read delay, but there is no cache entry so we fallback to local execution. - let (exit_code, local_call_count) = run_process( - 0, - 0, - 200, - false, - 0, - ProcessCacheScope::Successful, - &mut workunit, - ) - .await; - assert_eq!(exit_code, 1); - assert_eq!(local_call_count, 1); - - // Case 8: local with speculation read delay is faster than remote. - let (exit_code, local_call_count) = run_process( - 0, - 200, - 0, - true, - 0, - ProcessCacheScope::Successful, - &mut workunit, - ) - .await; - assert_eq!(exit_code, 1); - assert_eq!(local_call_count, 1); + // Case 1: remote is faster than local. + let (exit_code, local_call_count) = run_process( + 200, + 0, + 0, + true, + 0, + ProcessCacheScope::Successful, + &mut workunit, + ) + .await; + assert_eq!(exit_code, 0); + assert_eq!(local_call_count, 0); + + // Case 2: local is faster than remote. + let (exit_code, local_call_count) = run_process( + 0, + 200, + 0, + true, + 0, + ProcessCacheScope::Successful, + &mut workunit, + ) + .await; + assert_eq!(exit_code, 1); + assert_eq!(local_call_count, 1); + + // Case 3: the remote lookup wins, but there is no cache entry so we fallback to local execution. + let (exit_code, local_call_count) = run_process( + 200, + 0, + 0, + false, + 0, + ProcessCacheScope::Successful, + &mut workunit, + ) + .await; + assert_eq!(exit_code, 1); + assert_eq!(local_call_count, 1); + + // Case 4: the remote lookup wins, but it was a failed process with cache scope Successful. + let (exit_code, local_call_count) = run_process( + 0, + 0, + 200, + true, + 5, + ProcessCacheScope::Successful, + &mut workunit, + ) + .await; + assert_eq!(exit_code, 1); + assert_eq!(local_call_count, 1); + + // Case 5: the remote lookup wins, and even though it was a failed process, cache scope was Always. + let (exit_code, local_call_count) = + run_process(0, 0, 200, true, 5, ProcessCacheScope::Always, &mut workunit).await; + assert_eq!(exit_code, 5); + assert_eq!(local_call_count, 0); + + // Case 6: remote is faster than speculation read delay. + let (exit_code, local_call_count) = run_process( + 0, + 0, + 200, + true, + 0, + ProcessCacheScope::Successful, + &mut workunit, + ) + .await; + assert_eq!(exit_code, 0); + assert_eq!(local_call_count, 0); + + // Case 7: remote is faster than speculation read delay, but there is no cache entry so we fallback to local execution. + let (exit_code, local_call_count) = run_process( + 0, + 0, + 200, + false, + 0, + ProcessCacheScope::Successful, + &mut workunit, + ) + .await; + assert_eq!(exit_code, 1); + assert_eq!(local_call_count, 1); + + // Case 8: local with speculation read delay is faster than remote. + let (exit_code, local_call_count) = run_process( + 0, + 200, + 0, + true, + 0, + ProcessCacheScope::Successful, + &mut workunit, + ) + .await; + assert_eq!(exit_code, 1); + assert_eq!(local_call_count, 1); } #[tokio::test] async fn cache_write_success() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let store_setup = StoreSetup::new().await; - let (local_runner, local_runner_call_counter) = create_local_runner(0, 100); - let cache_runner = - create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; - let (process, action_digest) = create_process(&store_setup).await; - - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); - assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); - - let context = Context::default(); - let local_result = cache_runner - .run(context.clone(), &mut workunit, process.clone()) - .await - .unwrap(); - context.tail_tasks.wait(Duration::from_secs(2)).await; - assert_eq!(local_result.exit_code, 0); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); - - // Wait for the cache write block to finish. - sleep(Duration::from_secs(1)).await; - assert_eq!(store_setup.cas.action_cache.len(), 1); - assert_eq!( - store_setup - .cas - .action_cache - .get(action_digest) - .unwrap() - .exit_code, - 0 - ); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let store_setup = StoreSetup::new().await; + let (local_runner, local_runner_call_counter) = create_local_runner(0, 100); + let cache_runner = + create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; + let (process, action_digest) = create_process(&store_setup).await; + + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); + + let context = Context::default(); + let local_result = cache_runner + .run(context.clone(), &mut workunit, process.clone()) + .await + .unwrap(); + context.tail_tasks.wait(Duration::from_secs(2)).await; + assert_eq!(local_result.exit_code, 0); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); + + // Wait for the cache write block to finish. + sleep(Duration::from_secs(1)).await; + assert_eq!(store_setup.cas.action_cache.len(), 1); + assert_eq!( + store_setup + .cas + .action_cache + .get(action_digest) + .unwrap() + .exit_code, + 0 + ); } #[tokio::test] async fn cache_write_not_for_failures() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let store_setup = StoreSetup::new().await; - let (local_runner, local_runner_call_counter) = create_local_runner(1, 100); - let cache_runner = - create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; - let (process, _action_digest) = create_process(&store_setup).await; - - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); - assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); - - let local_result = cache_runner - .run(Context::default(), &mut workunit, process.clone()) - .await - .unwrap(); - assert_eq!(local_result.exit_code, 1); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let store_setup = StoreSetup::new().await; + let (local_runner, local_runner_call_counter) = create_local_runner(1, 100); + let cache_runner = + create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; + let (process, _action_digest) = create_process(&store_setup).await; - // Wait for the cache write block to finish. - sleep(Duration::from_millis(100)).await; - assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); + + let local_result = cache_runner + .run(Context::default(), &mut workunit, process.clone()) + .await + .unwrap(); + assert_eq!(local_result.exit_code, 1); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); + + // Wait for the cache write block to finish. + sleep(Duration::from_millis(100)).await; + assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); } /// Cache writes should be async and not block the CommandRunner from returning. #[tokio::test] async fn cache_write_does_not_block() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let store_setup = StoreSetup::new_with_stub_cas( - StubCAS::builder() - .ac_write_delay(Duration::from_millis(100)) - .build(), - ) - .await; - let (local_runner, local_runner_call_counter) = create_local_runner(0, 100); - let cache_runner = - create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; - let (process, action_digest) = create_process(&store_setup).await; - - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); - assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); - - let context = Context::default(); - let local_result = cache_runner - .run(context.clone(), &mut workunit, process.clone()) - .await - .unwrap(); - assert_eq!(local_result.exit_code, 0); - assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); - - // We expect the cache write to have not finished yet, even though we already finished - // CommandRunner::run(). - assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let store_setup = StoreSetup::new_with_stub_cas( + StubCAS::builder() + .ac_write_delay(Duration::from_millis(100)) + .build(), + ) + .await; + let (local_runner, local_runner_call_counter) = create_local_runner(0, 100); + let cache_runner = + create_cached_runner(local_runner, &store_setup, CacheContentBehavior::Defer).await; + let (process, action_digest) = create_process(&store_setup).await; - context.tail_tasks.wait(Duration::from_secs(2)).await; - assert_eq!(store_setup.cas.action_cache.len(), 1); - assert_eq!( - store_setup - .cas - .action_cache - .get(action_digest) - .unwrap() - .exit_code, - 0 - ); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 0); + assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); + + let context = Context::default(); + let local_result = cache_runner + .run(context.clone(), &mut workunit, process.clone()) + .await + .unwrap(); + assert_eq!(local_result.exit_code, 0); + assert_eq!(local_runner_call_counter.load(Ordering::SeqCst), 1); + + // We expect the cache write to have not finished yet, even though we already finished + // CommandRunner::run(). + assert!(store_setup.cas.action_cache.action_map.lock().is_empty()); + + context.tail_tasks.wait(Duration::from_secs(2)).await; + assert_eq!(store_setup.cas.action_cache.len(), 1); + assert_eq!( + store_setup + .cas + .action_cache + .get(action_digest) + .unwrap() + .exit_code, + 0 + ); } #[tokio::test] async fn make_tree_from_directory() { - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - - // Prepare the store to contain /pets/cats/roland.ext. We will then extract various pieces of it - // into Tree protos. - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - let input_tree = TestTree::double_nested(); - - let (tree, file_digests) = crate::remote_cache::CommandRunner::make_tree_for_output_directory( - &input_tree.digest_trie(), - RelativePath::new("pets").unwrap(), - ) - .unwrap() - .unwrap(); - - // Note that we do not store the `pets/` prefix in the Tree, per the REAPI docs on - // `OutputDirectory`. - let root_dir = tree.root.unwrap(); - assert_eq!(root_dir.files.len(), 0); - assert_eq!(root_dir.directories.len(), 1); - let dir_node = &root_dir.directories[0]; - assert_eq!(dir_node.name, "cats"); - let dir_digest: Digest = dir_node.digest.as_ref().unwrap().try_into().unwrap(); - assert_eq!(dir_digest, TestDirectory::containing_roland().digest()); - let children = tree.children; - assert_eq!(children.len(), 1); - let child_dir = &children[0]; - assert_eq!(child_dir.files.len(), 1); - assert_eq!(child_dir.directories.len(), 0); - let file_node = &child_dir.files[0]; - assert_eq!(file_node.name, "roland.ext"); - let file_digest: Digest = file_node.digest.as_ref().unwrap().try_into().unwrap(); - assert_eq!(file_digest, TestData::roland().digest()); - assert_eq!(file_digests, vec![TestData::roland().digest()]); - - // Test that extracting non-existent output directories fails gracefully. - assert!( - crate::remote_cache::CommandRunner::make_tree_for_output_directory( - &input_tree.digest_trie(), - RelativePath::new("animals").unwrap(), - ) - .unwrap() - .is_none() - ); - assert!( - crate::remote_cache::CommandRunner::make_tree_for_output_directory( - &input_tree.digest_trie(), - RelativePath::new("pets/xyzzy").unwrap(), + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + + // Prepare the store to contain /pets/cats/roland.ext. We will then extract various pieces of it + // into Tree protos. + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + let input_tree = TestTree::double_nested(); + + let (tree, file_digests) = crate::remote_cache::CommandRunner::make_tree_for_output_directory( + &input_tree.digest_trie(), + RelativePath::new("pets").unwrap(), ) .unwrap() - .is_none() - ); + .unwrap(); + + // Note that we do not store the `pets/` prefix in the Tree, per the REAPI docs on + // `OutputDirectory`. + let root_dir = tree.root.unwrap(); + assert_eq!(root_dir.files.len(), 0); + assert_eq!(root_dir.directories.len(), 1); + let dir_node = &root_dir.directories[0]; + assert_eq!(dir_node.name, "cats"); + let dir_digest: Digest = dir_node.digest.as_ref().unwrap().try_into().unwrap(); + assert_eq!(dir_digest, TestDirectory::containing_roland().digest()); + let children = tree.children; + assert_eq!(children.len(), 1); + let child_dir = &children[0]; + assert_eq!(child_dir.files.len(), 1); + assert_eq!(child_dir.directories.len(), 0); + let file_node = &child_dir.files[0]; + assert_eq!(file_node.name, "roland.ext"); + let file_digest: Digest = file_node.digest.as_ref().unwrap().try_into().unwrap(); + assert_eq!(file_digest, TestData::roland().digest()); + assert_eq!(file_digests, vec![TestData::roland().digest()]); + + // Test that extracting non-existent output directories fails gracefully. + assert!( + crate::remote_cache::CommandRunner::make_tree_for_output_directory( + &input_tree.digest_trie(), + RelativePath::new("animals").unwrap(), + ) + .unwrap() + .is_none() + ); + assert!( + crate::remote_cache::CommandRunner::make_tree_for_output_directory( + &input_tree.digest_trie(), + RelativePath::new("pets/xyzzy").unwrap(), + ) + .unwrap() + .is_none() + ); } #[tokio::test] async fn extract_output_file() { - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - let input_tree = TestTree::nested(); - - let output_file = crate::remote_cache::CommandRunner::extract_output_file( - &input_tree.digest_trie(), - "cats/roland.ext", - ) - .unwrap() - .unwrap(); - - assert_eq!(output_file.path, "cats/roland.ext"); - let file_digest: Digest = output_file.digest.unwrap().try_into().unwrap(); - assert_eq!(file_digest, TestData::roland().digest()); - - // Extract non-existent files to make sure that Ok(None) is returned. - assert!(crate::remote_cache::CommandRunner::extract_output_file( - &input_tree.digest_trie(), - "animals.ext", - ) - .unwrap() - .is_none()); - assert!(crate::remote_cache::CommandRunner::extract_output_file( - &input_tree.digest_trie(), - "cats/xyzzy", - ) - .unwrap() - .is_none()); - - // Error if a path has been declared as a file but isn't. - assert_eq!( - crate::remote_cache::CommandRunner::extract_output_file(&input_tree.digest_trie(), "cats",), - Err(format!( + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + let input_tree = TestTree::nested(); + + let output_file = crate::remote_cache::CommandRunner::extract_output_file( + &input_tree.digest_trie(), + "cats/roland.ext", + ) + .unwrap() + .unwrap(); + + assert_eq!(output_file.path, "cats/roland.ext"); + let file_digest: Digest = output_file.digest.unwrap().try_into().unwrap(); + assert_eq!(file_digest, TestData::roland().digest()); + + // Extract non-existent files to make sure that Ok(None) is returned. + assert!(crate::remote_cache::CommandRunner::extract_output_file( + &input_tree.digest_trie(), + "animals.ext", + ) + .unwrap() + .is_none()); + assert!(crate::remote_cache::CommandRunner::extract_output_file( + &input_tree.digest_trie(), + "cats/xyzzy", + ) + .unwrap() + .is_none()); + + // Error if a path has been declared as a file but isn't. + assert_eq!( + crate::remote_cache::CommandRunner::extract_output_file(&input_tree.digest_trie(), "cats",), + Err(format!( "Declared output file path \"cats\" in output digest {:?} contained a directory instead.", TestDirectory::nested().digest() )) - ); + ); } #[tokio::test] async fn make_action_result_basic() { - #[derive(Debug)] - struct MockCommandRunner; - - #[async_trait] - impl CommandRunnerTrait for MockCommandRunner { - async fn run( - &self, - _context: Context, - _workunit: &mut RunningWorkunit, - _req: Process, - ) -> Result { - unimplemented!() - } - - async fn shutdown(&self) -> Result<(), String> { - Ok(()) + #[derive(Debug)] + struct MockCommandRunner; + + #[async_trait] + impl CommandRunnerTrait for MockCommandRunner { + async fn run( + &self, + _context: Context, + _workunit: &mut RunningWorkunit, + _req: Process, + ) -> Result { + unimplemented!() + } + + async fn shutdown(&self) -> Result<(), String> { + Ok(()) + } } - } - - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .store_file_bytes(TestData::robin().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .record_directory(&TestDirectory::containing_roland().directory(), true) - .await - .expect("Error saving directory"); - store - .record_directory(&TestDirectory::nested().directory(), true) - .await - .expect("Error saving directory"); - let directory_digest = store - .record_directory(&TestDirectory::double_nested().directory(), true) - .await - .expect("Error saving directory"); - - let mock_command_runner = Arc::new(MockCommandRunner); - let cas = StubCAS::builder().build(); - let runner = crate::remote_cache::CommandRunner::from_provider_options( - RemoteCacheRunnerOptions { - inner: mock_command_runner.clone(), - instance_name: None, - process_cache_namespace: None, - executor: executor.clone(), - store: store.clone(), - cache_read: true, - cache_write: true, - warnings_behavior: RemoteCacheWarningsBehavior::FirstOnly, - cache_content_behavior: CacheContentBehavior::Defer, - append_only_caches_base_path: None, - }, - RemoteCacheProviderOptions { - instance_name: None, - action_cache_address: cas.address(), - tls_config: tls::Config::default(), - headers: BTreeMap::default(), - concurrency_limit: 256, - rpc_timeout: CACHE_READ_TIMEOUT, - }, - ) - .await - .expect("caching command runner"); - - let command = remexec::Command { - arguments: vec!["this is a test".into()], - output_files: vec!["pets/cats/roland.ext".into()], - output_directories: vec!["pets/cats".into()], - ..Default::default() - }; - - let process_result = FallibleProcessResultWithPlatform { - stdout_digest: TestData::roland().digest(), - stderr_digest: TestData::robin().digest(), - output_directory: DirectoryDigest::from_persisted_digest(directory_digest), - exit_code: 102, - metadata: ProcessResultMetadata::new( - None, - ProcessResultSource::Ran, - ProcessExecutionEnvironment { - name: None, - platform: Platform::Linux_x86_64, - strategy: ProcessExecutionStrategy::Local, - }, - RunId(0), - ), - }; - - let (action_result, digests) = runner - .make_action_result(&command, &process_result, &store) + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .store_file_bytes(TestData::robin().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .record_directory(&TestDirectory::containing_roland().directory(), true) + .await + .expect("Error saving directory"); + store + .record_directory(&TestDirectory::nested().directory(), true) + .await + .expect("Error saving directory"); + let directory_digest = store + .record_directory(&TestDirectory::double_nested().directory(), true) + .await + .expect("Error saving directory"); + + let mock_command_runner = Arc::new(MockCommandRunner); + let cas = StubCAS::builder().build(); + let runner = crate::remote_cache::CommandRunner::from_provider_options( + RemoteCacheRunnerOptions { + inner: mock_command_runner.clone(), + instance_name: None, + process_cache_namespace: None, + executor: executor.clone(), + store: store.clone(), + cache_read: true, + cache_write: true, + warnings_behavior: RemoteCacheWarningsBehavior::FirstOnly, + cache_content_behavior: CacheContentBehavior::Defer, + append_only_caches_base_path: None, + }, + RemoteCacheProviderOptions { + instance_name: None, + action_cache_address: cas.address(), + tls_config: tls::Config::default(), + headers: BTreeMap::default(), + concurrency_limit: 256, + rpc_timeout: CACHE_READ_TIMEOUT, + }, + ) .await - .unwrap(); - - assert_eq!(action_result.exit_code, process_result.exit_code); - - let stdout_digest: Digest = action_result.stdout_digest.unwrap().try_into().unwrap(); - assert_eq!(stdout_digest, process_result.stdout_digest); + .expect("caching command runner"); + + let command = remexec::Command { + arguments: vec!["this is a test".into()], + output_files: vec!["pets/cats/roland.ext".into()], + output_directories: vec!["pets/cats".into()], + ..Default::default() + }; + + let process_result = FallibleProcessResultWithPlatform { + stdout_digest: TestData::roland().digest(), + stderr_digest: TestData::robin().digest(), + output_directory: DirectoryDigest::from_persisted_digest(directory_digest), + exit_code: 102, + metadata: ProcessResultMetadata::new( + None, + ProcessResultSource::Ran, + ProcessExecutionEnvironment { + name: None, + platform: Platform::Linux_x86_64, + strategy: ProcessExecutionStrategy::Local, + }, + RunId(0), + ), + }; + + let (action_result, digests) = runner + .make_action_result(&command, &process_result, &store) + .await + .unwrap(); + + assert_eq!(action_result.exit_code, process_result.exit_code); + + let stdout_digest: Digest = action_result.stdout_digest.unwrap().try_into().unwrap(); + assert_eq!(stdout_digest, process_result.stdout_digest); + + let stderr_digest: Digest = action_result.stderr_digest.unwrap().try_into().unwrap(); + assert_eq!(stderr_digest, process_result.stderr_digest); + + assert_eq!(action_result.output_files.len(), 1); + assert_eq!( + action_result.output_files[0], + remexec::OutputFile { + digest: Some(TestData::roland().digest().into()), + path: "pets/cats/roland.ext".to_owned(), + is_executable: false, + ..remexec::OutputFile::default() + } + ); - let stderr_digest: Digest = action_result.stderr_digest.unwrap().try_into().unwrap(); - assert_eq!(stderr_digest, process_result.stderr_digest); + assert_eq!(action_result.output_directories.len(), 1); + assert_eq!( + action_result.output_directories[0], + remexec::OutputDirectory { + path: "pets/cats".to_owned(), + tree_digest: Some(TestTree::roland_at_root().digest().into()), + is_topologically_sorted: false, + } + ); - assert_eq!(action_result.output_files.len(), 1); - assert_eq!( - action_result.output_files[0], - remexec::OutputFile { - digest: Some(TestData::roland().digest().into()), - path: "pets/cats/roland.ext".to_owned(), - is_executable: false, - ..remexec::OutputFile::default() - } - ); - - assert_eq!(action_result.output_directories.len(), 1); - assert_eq!( - action_result.output_directories[0], - remexec::OutputDirectory { - path: "pets/cats".to_owned(), - tree_digest: Some(TestTree::roland_at_root().digest().into()), - is_topologically_sorted: false, - } - ); - - let actual_digests_set = digests.into_iter().collect::>(); - let expected_digests_set = hashset! { - TestData::roland().digest(), // stdout - TestData::robin().digest(), // stderr - TestTree::roland_at_root().digest(), // tree directory - }; - assert_eq!(expected_digests_set, actual_digests_set); + let actual_digests_set = digests.into_iter().collect::>(); + let expected_digests_set = hashset! { + TestData::roland().digest(), // stdout + TestData::robin().digest(), // stderr + TestTree::roland_at_root().digest(), // tree directory + }; + assert_eq!(expected_digests_set, actual_digests_set); } diff --git a/src/rust/engine/process_execution/remote/src/remote_tests.rs b/src/rust/engine/process_execution/remote/src/remote_tests.rs index ae4fbbda384..e4c7a41cfe4 100644 --- a/src/rust/engine/process_execution/remote/src/remote_tests.rs +++ b/src/rust/engine/process_execution/remote/src/remote_tests.rs @@ -24,9 +24,9 @@ use workunit_store::{Level, RunId, RunningWorkunit, WorkunitStore}; use crate::remote::{CommandRunner, ExecutionError, OperationOrStatus}; use fs::{DirectoryDigest, RelativePath, SymlinkBehavior, EMPTY_DIRECTORY_DIGEST}; use process_execution::{ - CacheName, CommandRunner as CommandRunnerTrait, Context, EntireExecuteRequest, - FallibleProcessResultWithPlatform, InputDigests, Platform, Process, ProcessCacheScope, - ProcessError, ProcessExecutionEnvironment, ProcessExecutionStrategy, + CacheName, CommandRunner as CommandRunnerTrait, Context, EntireExecuteRequest, + FallibleProcessResultWithPlatform, InputDigests, Platform, Process, ProcessCacheScope, + ProcessError, ProcessExecutionEnvironment, ProcessExecutionStrategy, }; use std::any::type_name; use std::io::Cursor; @@ -40,1511 +40,1522 @@ const EXEC_CONCURRENCY_LIMIT: usize = 256; #[derive(Debug, PartialEq)] struct RemoteTestResult { - original: FallibleProcessResultWithPlatform, - stdout_bytes: Vec, - stderr_bytes: Vec, + original: FallibleProcessResultWithPlatform, + stdout_bytes: Vec, + stderr_bytes: Vec, } impl RemoteTestResult { - pub fn stdout(&self) -> &str { - std::str::from_utf8(&self.stdout_bytes).unwrap() - } + pub fn stdout(&self) -> &str { + std::str::from_utf8(&self.stdout_bytes).unwrap() + } - #[allow(dead_code)] - pub fn stderr(&self) -> &str { - std::str::from_utf8(&self.stderr_bytes).unwrap() - } + #[allow(dead_code)] + pub fn stderr(&self) -> &str { + std::str::from_utf8(&self.stderr_bytes).unwrap() + } } #[derive(Debug, PartialEq)] enum StdoutType { - Raw(String), - Digest(Digest), + Raw(String), + Digest(Digest), } #[derive(Debug, PartialEq)] enum StderrType { - Raw(String), - Digest(Digest), + Raw(String), + Digest(Digest), } fn make_environment(platform: Platform) -> ProcessExecutionEnvironment { - ProcessExecutionEnvironment { - name: None, - platform, - strategy: ProcessExecutionStrategy::RemoteExecution(vec![]), - } + ProcessExecutionEnvironment { + name: None, + platform, + strategy: ProcessExecutionStrategy::RemoteExecution(vec![]), + } } #[tokio::test] async fn make_execute_request() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let input_directory = TestDirectory::containing_roland(); - let req = Process { - argv: owned_string_vec(&["/bin/echo", "yo"]), - env: vec![("SOME".to_owned(), "value".to_owned())] - .into_iter() - .collect(), - working_directory: None, - input_digests: InputDigests::with_input_files(input_directory.directory_digest()), - // Intentionally poorly sorted: - output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), - output_directories: relative_paths(&["directory/name"]).collect(), - timeout: None, - description: "some description".to_owned(), - level: log::Level::Info, - append_only_caches: BTreeMap::new(), - jdk_home: None, - execution_slot_variable: None, - concurrency_available: 0, - cache_scope: ProcessCacheScope::Always, - execution_environment: make_environment(Platform::Linux_x86_64), - remote_cache_speculation_delay: std::time::Duration::from_millis(0), - attempt: 0, - }; - - let want_command = remexec::Command { - arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - remexec::command::EnvironmentVariable { - name: "SOME".to_owned(), - value: "value".to_owned(), - }, - ], - output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], - output_directories: vec!["directory/name".to_owned()], - platform: Some(remexec::Platform::default()), - ..Default::default() - }; - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "d7b7538a7a57a2b04da51ffffff758036f43ebb92d37b66bd1bb8c6af0030e57", - ) - .unwrap(), - 187, - )) - .into(), - ), - input_root_digest: Some((&input_directory.digest()).into()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "16bf057effe6d18553979a069228f0da81df307c964ea0f162bb60e31070bb27", - ) - .unwrap(), - 141, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - assert_eq!( - process_execution::make_execute_request(&req, None, None, &store, None).await, - Ok(EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: input_directory.directory_digest(), - }) - ); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let input_directory = TestDirectory::containing_roland(); + let req = Process { + argv: owned_string_vec(&["/bin/echo", "yo"]), + env: vec![("SOME".to_owned(), "value".to_owned())] + .into_iter() + .collect(), + working_directory: None, + input_digests: InputDigests::with_input_files(input_directory.directory_digest()), + // Intentionally poorly sorted: + output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), + output_directories: relative_paths(&["directory/name"]).collect(), + timeout: None, + description: "some description".to_owned(), + level: log::Level::Info, + append_only_caches: BTreeMap::new(), + jdk_home: None, + execution_slot_variable: None, + concurrency_available: 0, + cache_scope: ProcessCacheScope::Always, + execution_environment: make_environment(Platform::Linux_x86_64), + remote_cache_speculation_delay: std::time::Duration::from_millis(0), + attempt: 0, + }; + + let want_command = remexec::Command { + arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + remexec::command::EnvironmentVariable { + name: "SOME".to_owned(), + value: "value".to_owned(), + }, + ], + output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], + output_directories: vec!["directory/name".to_owned()], + platform: Some(remexec::Platform::default()), + ..Default::default() + }; + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "d7b7538a7a57a2b04da51ffffff758036f43ebb92d37b66bd1bb8c6af0030e57", + ) + .unwrap(), + 187, + )) + .into(), + ), + input_root_digest: Some((&input_directory.digest()).into()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "16bf057effe6d18553979a069228f0da81df307c964ea0f162bb60e31070bb27", + ) + .unwrap(), + 141, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; + + assert_eq!( + process_execution::make_execute_request(&req, None, None, &store, None).await, + Ok(EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: input_directory.directory_digest(), + }) + ); } #[tokio::test] async fn make_execute_request_with_instance_name() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let input_directory = TestDirectory::containing_roland(); - let req = Process { - argv: owned_string_vec(&["/bin/echo", "yo"]), - env: vec![("SOME".to_owned(), "value".to_owned())] - .into_iter() - .collect(), - working_directory: None, - input_digests: InputDigests::with_input_files(input_directory.directory_digest()), - // Intentionally poorly sorted: - output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), - output_directories: relative_paths(&["directory/name"]).collect(), - timeout: None, - description: "some description".to_owned(), - level: log::Level::Info, - append_only_caches: BTreeMap::new(), - jdk_home: None, - execution_slot_variable: None, - concurrency_available: 0, - cache_scope: ProcessCacheScope::Always, - execution_environment: ProcessExecutionEnvironment { - name: None, - platform: Platform::Linux_x86_64, - strategy: ProcessExecutionStrategy::RemoteExecution(vec![( - "target_platform".to_owned(), - "apple-2e".to_owned(), - )]), - }, - remote_cache_speculation_delay: std::time::Duration::from_millis(0), - attempt: 0, - }; - - let want_command = remexec::Command { - arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - remexec::command::EnvironmentVariable { - name: "SOME".to_owned(), - value: "value".to_owned(), - }, - ], - output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], - output_directories: vec!["directory/name".to_owned()], - platform: Some(remexec::Platform { - properties: vec![remexec::platform::Property { - name: "target_platform".to_owned(), - value: "apple-2e".to_owned(), - }], - }), - ..Default::default() - }; - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "9f8a65e780495003c341923b62a06ae6796dcad47e396dc89704b10bc26e1729", - ) - .unwrap(), - 216, - )) - .into(), - ), - input_root_digest: Some((&input_directory.digest()).into()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - instance_name: "dark-tower".to_owned(), - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "5b017857389d245cd0663105f3b8ee47bb7412940e4859098c8af46bdd21c8b6", + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let input_directory = TestDirectory::containing_roland(); + let req = Process { + argv: owned_string_vec(&["/bin/echo", "yo"]), + env: vec![("SOME".to_owned(), "value".to_owned())] + .into_iter() + .collect(), + working_directory: None, + input_digests: InputDigests::with_input_files(input_directory.directory_digest()), + // Intentionally poorly sorted: + output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), + output_directories: relative_paths(&["directory/name"]).collect(), + timeout: None, + description: "some description".to_owned(), + level: log::Level::Info, + append_only_caches: BTreeMap::new(), + jdk_home: None, + execution_slot_variable: None, + concurrency_available: 0, + cache_scope: ProcessCacheScope::Always, + execution_environment: ProcessExecutionEnvironment { + name: None, + platform: Platform::Linux_x86_64, + strategy: ProcessExecutionStrategy::RemoteExecution(vec![( + "target_platform".to_owned(), + "apple-2e".to_owned(), + )]), + }, + remote_cache_speculation_delay: std::time::Duration::from_millis(0), + attempt: 0, + }; + + let want_command = remexec::Command { + arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + remexec::command::EnvironmentVariable { + name: "SOME".to_owned(), + value: "value".to_owned(), + }, + ], + output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], + output_directories: vec!["directory/name".to_owned()], + platform: Some(remexec::Platform { + properties: vec![remexec::platform::Property { + name: "target_platform".to_owned(), + value: "apple-2e".to_owned(), + }], + }), + ..Default::default() + }; + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "9f8a65e780495003c341923b62a06ae6796dcad47e396dc89704b10bc26e1729", + ) + .unwrap(), + 216, + )) + .into(), + ), + input_root_digest: Some((&input_directory.digest()).into()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + instance_name: "dark-tower".to_owned(), + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "5b017857389d245cd0663105f3b8ee47bb7412940e4859098c8af46bdd21c8b6", + ) + .unwrap(), + 141, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; + + assert_eq!( + process_execution::make_execute_request( + &req, + Some("dark-tower".to_owned()), + None, + &store, + None ) - .unwrap(), - 141, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - assert_eq!( - process_execution::make_execute_request( - &req, - Some("dark-tower".to_owned()), - None, - &store, - None - ) - .await, - Ok(EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: input_directory.directory_digest(), - }) - ); + .await, + Ok(EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: input_directory.directory_digest(), + }) + ); } #[tokio::test] async fn make_execute_request_with_cache_key_gen_version() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let input_directory = TestDirectory::containing_roland(); - let req = Process { - argv: owned_string_vec(&["/bin/echo", "yo"]), - env: vec![("SOME".to_owned(), "value".to_owned())] - .into_iter() - .collect(), - working_directory: None, - input_digests: InputDigests::with_input_files(input_directory.directory_digest()), - // Intentionally poorly sorted: - output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), - output_directories: relative_paths(&["directory/name"]).collect(), - timeout: None, - description: "some description".to_owned(), - level: log::Level::Info, - append_only_caches: BTreeMap::new(), - jdk_home: None, - execution_slot_variable: None, - concurrency_available: 0, - cache_scope: ProcessCacheScope::Always, - execution_environment: make_environment(Platform::Linux_x86_64), - remote_cache_speculation_delay: std::time::Duration::from_millis(0), - attempt: 0, - }; - - let mut want_command = remexec::Command { - arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_GEN_VERSION_ENV_VAR_NAME.to_owned(), - value: "meep".to_owned(), - }, - remexec::command::EnvironmentVariable { - name: "SOME".to_owned(), - value: "value".to_owned(), - }, - ], - output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], - output_directories: vec!["directory/name".to_owned()], - platform: Some(remexec::Platform::default()), - ..Default::default() - }; - want_command - .environment_variables - .sort_by(|x, y| x.name.cmp(&y.name)); - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "04ed10b1ddac69249ad1ca463fd4284c4f9c0115a2f2aaf1fd8a9ce6571ee29c", - ) - .unwrap(), - 224, - )) - .into(), - ), - input_root_digest: Some((&input_directory.digest()).into()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "e55329e2c0413a6def422752f9e964204e7e40ec81e2867a6222a43727ba29d1", - ) - .unwrap(), - 141, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - assert_eq!( - process_execution::make_execute_request(&req, None, Some("meep".to_owned()), &store, None) - .await, - Ok(EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: input_directory.directory_digest(), - }) - ); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let input_directory = TestDirectory::containing_roland(); + let req = Process { + argv: owned_string_vec(&["/bin/echo", "yo"]), + env: vec![("SOME".to_owned(), "value".to_owned())] + .into_iter() + .collect(), + working_directory: None, + input_digests: InputDigests::with_input_files(input_directory.directory_digest()), + // Intentionally poorly sorted: + output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), + output_directories: relative_paths(&["directory/name"]).collect(), + timeout: None, + description: "some description".to_owned(), + level: log::Level::Info, + append_only_caches: BTreeMap::new(), + jdk_home: None, + execution_slot_variable: None, + concurrency_available: 0, + cache_scope: ProcessCacheScope::Always, + execution_environment: make_environment(Platform::Linux_x86_64), + remote_cache_speculation_delay: std::time::Duration::from_millis(0), + attempt: 0, + }; + + let mut want_command = remexec::Command { + arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_GEN_VERSION_ENV_VAR_NAME.to_owned(), + value: "meep".to_owned(), + }, + remexec::command::EnvironmentVariable { + name: "SOME".to_owned(), + value: "value".to_owned(), + }, + ], + output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], + output_directories: vec!["directory/name".to_owned()], + platform: Some(remexec::Platform::default()), + ..Default::default() + }; + want_command + .environment_variables + .sort_by(|x, y| x.name.cmp(&y.name)); + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "04ed10b1ddac69249ad1ca463fd4284c4f9c0115a2f2aaf1fd8a9ce6571ee29c", + ) + .unwrap(), + 224, + )) + .into(), + ), + input_root_digest: Some((&input_directory.digest()).into()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "e55329e2c0413a6def422752f9e964204e7e40ec81e2867a6222a43727ba29d1", + ) + .unwrap(), + 141, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; + + assert_eq!( + process_execution::make_execute_request(&req, None, Some("meep".to_owned()), &store, None) + .await, + Ok(EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: input_directory.directory_digest(), + }) + ); } #[tokio::test] async fn make_execute_request_with_jdk() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let input_directory = TestDirectory::containing_roland(); - let mut req = Process::new(owned_string_vec(&["/bin/echo", "yo"])); - req.execution_environment.platform = Platform::Linux_x86_64; - req.jdk_home = Some(PathBuf::from("/tmp")); - req.description = "some description".to_owned(); - req.input_digests = InputDigests::with_input_files(input_directory.directory_digest()); - - let want_command = remexec::Command { - arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::Local.cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - ], - platform: Some(remexec::Platform { - properties: vec![remexec::platform::Property { - name: "JDK_SYMLINK".to_owned(), - value: ".jdk".to_owned(), - }], - }), - ..Default::default() - }; - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "45e72f32f1d935e02732e26a8aaec041877811a9b7fe66816ace7b570173953e", - ) - .unwrap(), - 142, - )) - .into(), - ), - input_root_digest: Some((&input_directory.digest()).into()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "2868a54befe3ad9d8fd2ac30c2a170ac890715ec0b196ab8259e8b6beabf7d1c", - ) - .unwrap(), - 141, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - assert_eq!( - process_execution::make_execute_request(&req, None, None, &store, None).await, - Ok(EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: input_directory.directory_digest(), - }) - ); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let input_directory = TestDirectory::containing_roland(); + let mut req = Process::new(owned_string_vec(&["/bin/echo", "yo"])); + req.execution_environment.platform = Platform::Linux_x86_64; + req.jdk_home = Some(PathBuf::from("/tmp")); + req.description = "some description".to_owned(); + req.input_digests = InputDigests::with_input_files(input_directory.directory_digest()); + + let want_command = remexec::Command { + arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::Local.cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + ], + platform: Some(remexec::Platform { + properties: vec![remexec::platform::Property { + name: "JDK_SYMLINK".to_owned(), + value: ".jdk".to_owned(), + }], + }), + ..Default::default() + }; + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "45e72f32f1d935e02732e26a8aaec041877811a9b7fe66816ace7b570173953e", + ) + .unwrap(), + 142, + )) + .into(), + ), + input_root_digest: Some((&input_directory.digest()).into()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "2868a54befe3ad9d8fd2ac30c2a170ac890715ec0b196ab8259e8b6beabf7d1c", + ) + .unwrap(), + 141, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; + + assert_eq!( + process_execution::make_execute_request(&req, None, None, &store, None).await, + Ok(EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: input_directory.directory_digest(), + }) + ); } #[tokio::test] async fn make_execute_request_with_jdk_and_extra_platform_properties() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let input_directory = TestDirectory::containing_roland(); - let mut req = Process::new(owned_string_vec(&["/bin/echo", "yo"])).remote_execution(vec![ - ("FIRST".to_owned(), "foo".to_owned()), - ("Multi".to_owned(), "uno".to_owned()), - ("last".to_owned(), "bar".to_owned()), - ("Multi".to_owned(), "dos".to_owned()), - ]); - req.execution_environment.platform = Platform::Linux_x86_64; - req.input_digests = InputDigests::with_input_files(input_directory.directory_digest()); - req.description = "some description".to_owned(); - req.jdk_home = Some(PathBuf::from("/tmp")); - - let want_command = remexec::Command { - arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - ], - platform: Some(remexec::Platform { - properties: vec![ - remexec::platform::Property { - name: "FIRST".to_owned(), - value: "foo".to_owned(), - }, - remexec::platform::Property { - name: "JDK_SYMLINK".to_owned(), - value: ".jdk".to_owned(), - }, - remexec::platform::Property { - name: "Multi".to_owned(), - value: "dos".to_owned(), - }, - remexec::platform::Property { - name: "Multi".to_owned(), - value: "uno".to_owned(), - }, - remexec::platform::Property { - name: "last".to_owned(), - value: "bar".to_owned(), - }, - ], - }), - ..Default::default() - }; - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "03b368b6f449438938636f57fbaf6b6e2a1eb776583b5197c1320b646ee8d64a", - ) - .unwrap(), - 198, - )) - .into(), - ), - input_root_digest: Some((&input_directory.digest()).into()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "0291bda0da047d715b6da33c1e4c2a74679ab06c95a32424ea754f70be5242ed", - ) - .unwrap(), - 141, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - assert_eq!( - process_execution::make_execute_request(&req, None, None, &store, None).await, - Ok(EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: input_directory.directory_digest(), - }) - ); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let input_directory = TestDirectory::containing_roland(); + let mut req = Process::new(owned_string_vec(&["/bin/echo", "yo"])).remote_execution(vec![ + ("FIRST".to_owned(), "foo".to_owned()), + ("Multi".to_owned(), "uno".to_owned()), + ("last".to_owned(), "bar".to_owned()), + ("Multi".to_owned(), "dos".to_owned()), + ]); + req.execution_environment.platform = Platform::Linux_x86_64; + req.input_digests = InputDigests::with_input_files(input_directory.directory_digest()); + req.description = "some description".to_owned(); + req.jdk_home = Some(PathBuf::from("/tmp")); + + let want_command = remexec::Command { + arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + ], + platform: Some(remexec::Platform { + properties: vec![ + remexec::platform::Property { + name: "FIRST".to_owned(), + value: "foo".to_owned(), + }, + remexec::platform::Property { + name: "JDK_SYMLINK".to_owned(), + value: ".jdk".to_owned(), + }, + remexec::platform::Property { + name: "Multi".to_owned(), + value: "dos".to_owned(), + }, + remexec::platform::Property { + name: "Multi".to_owned(), + value: "uno".to_owned(), + }, + remexec::platform::Property { + name: "last".to_owned(), + value: "bar".to_owned(), + }, + ], + }), + ..Default::default() + }; + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "03b368b6f449438938636f57fbaf6b6e2a1eb776583b5197c1320b646ee8d64a", + ) + .unwrap(), + 198, + )) + .into(), + ), + input_root_digest: Some((&input_directory.digest()).into()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "0291bda0da047d715b6da33c1e4c2a74679ab06c95a32424ea754f70be5242ed", + ) + .unwrap(), + 141, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; + + assert_eq!( + process_execution::make_execute_request(&req, None, None, &store, None).await, + Ok(EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: input_directory.directory_digest(), + }) + ); } #[tokio::test] async fn make_execute_request_with_timeout() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let input_directory = TestDirectory::containing_roland(); - let req = Process { - argv: owned_string_vec(&["/bin/echo", "yo"]), - env: vec![("SOME".to_owned(), "value".to_owned())] - .into_iter() - .collect(), - working_directory: None, - input_digests: InputDigests::with_input_files(input_directory.directory_digest()), - // Intentionally poorly sorted: - output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), - output_directories: relative_paths(&["directory/name"]).collect(), - timeout: one_second(), - description: "some description".to_owned(), - level: log::Level::Info, - append_only_caches: BTreeMap::new(), - jdk_home: None, - execution_slot_variable: None, - concurrency_available: 0, - cache_scope: ProcessCacheScope::Always, - execution_environment: make_environment(Platform::Linux_x86_64), - remote_cache_speculation_delay: std::time::Duration::from_millis(0), - attempt: 0, - }; - - let want_command = remexec::Command { - arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - remexec::command::EnvironmentVariable { - name: "SOME".to_owned(), - value: "value".to_owned(), - }, - ], - output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], - output_directories: vec!["directory/name".to_owned()], - platform: Some(remexec::Platform::default()), - ..Default::default() - }; - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "d7b7538a7a57a2b04da51ffffff758036f43ebb92d37b66bd1bb8c6af0030e57", - ) - .unwrap(), - 187, - )) - .into(), - ), - input_root_digest: Some((&input_directory.digest()).into()), - timeout: Some(prost_types::Duration::try_from(Duration::from_secs(1)).unwrap()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "6e3666265a4ef89ddf26a406516484429b2d8e744fbae6b36a66c6853407626a", - ) - .unwrap(), - 145, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - assert_eq!( - process_execution::make_execute_request(&req, None, None, &store, None).await, - Ok(EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: input_directory.directory_digest(), - }) - ); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let input_directory = TestDirectory::containing_roland(); + let req = Process { + argv: owned_string_vec(&["/bin/echo", "yo"]), + env: vec![("SOME".to_owned(), "value".to_owned())] + .into_iter() + .collect(), + working_directory: None, + input_digests: InputDigests::with_input_files(input_directory.directory_digest()), + // Intentionally poorly sorted: + output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), + output_directories: relative_paths(&["directory/name"]).collect(), + timeout: one_second(), + description: "some description".to_owned(), + level: log::Level::Info, + append_only_caches: BTreeMap::new(), + jdk_home: None, + execution_slot_variable: None, + concurrency_available: 0, + cache_scope: ProcessCacheScope::Always, + execution_environment: make_environment(Platform::Linux_x86_64), + remote_cache_speculation_delay: std::time::Duration::from_millis(0), + attempt: 0, + }; + + let want_command = remexec::Command { + arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + remexec::command::EnvironmentVariable { + name: "SOME".to_owned(), + value: "value".to_owned(), + }, + ], + output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], + output_directories: vec!["directory/name".to_owned()], + platform: Some(remexec::Platform::default()), + ..Default::default() + }; + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "d7b7538a7a57a2b04da51ffffff758036f43ebb92d37b66bd1bb8c6af0030e57", + ) + .unwrap(), + 187, + )) + .into(), + ), + input_root_digest: Some((&input_directory.digest()).into()), + timeout: Some(prost_types::Duration::try_from(Duration::from_secs(1)).unwrap()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "6e3666265a4ef89ddf26a406516484429b2d8e744fbae6b36a66c6853407626a", + ) + .unwrap(), + 145, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; + + assert_eq!( + process_execution::make_execute_request(&req, None, None, &store, None).await, + Ok(EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: input_directory.directory_digest(), + }) + ); } #[tokio::test] async fn make_execute_request_with_append_only_caches() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); - let input_directory = TestDirectory::containing_roland(); - store - .record_directory(&input_directory.directory(), false) - .await - .unwrap(); + let input_directory = TestDirectory::containing_roland(); + store + .record_directory(&input_directory.directory(), false) + .await + .unwrap(); + + let req = Process { + argv: owned_string_vec(&["/bin/cat", "../.cache/xyzzy/foo.txt"]), + env: vec![("SOME".to_owned(), "value".to_owned())] + .into_iter() + .collect(), + working_directory: Some(RelativePath::new(Path::new("animals")).unwrap()), + input_digests: InputDigests::with_input_files(input_directory.directory_digest()), + output_files: BTreeSet::new(), + output_directories: BTreeSet::new(), + timeout: one_second(), + description: "some description".to_owned(), + level: log::Level::Info, + append_only_caches: btreemap! { + CacheName::new(String::from("xyzzy")).unwrap() => RelativePath::new(Path::new(".cache/xyzzy")).unwrap(), + }, + jdk_home: None, + execution_slot_variable: None, + concurrency_available: 0, + cache_scope: ProcessCacheScope::Always, + execution_environment: make_environment(Platform::Linux_x86_64), + remote_cache_speculation_delay: std::time::Duration::from_millis(0), + attempt: 0, + }; + + let want_command = remexec::Command { + arguments: vec![ + "./__pants_wrapper__".to_owned(), + "/bin/cat".to_owned(), + "../.cache/xyzzy/foo.txt".to_owned(), + ], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + remexec::command::EnvironmentVariable { + name: "SOME".to_owned(), + value: "value".to_owned(), + }, + ], + platform: Some(remexec::Platform::default()), + ..Default::default() + }; + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "1deb19eddcefd5074263064a7df2a19caeb4e6d86a849bc07e23a5d856f886ec", + ) + .unwrap(), + 178, + )) + .into(), + ), + input_root_digest: Some( + (Digest::new( + Fingerprint::from_hex_string( + "92f5d2ff07cb6cdf4a70f2d6392781b482cd587b9dd69d6729ac73eb54110a69", + ) + .unwrap(), + 178, + )) + .into(), + ), + timeout: Some(prost_types::Duration::try_from(Duration::from_secs(1)).unwrap()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "e4196db365556cbeed4941845f448cfafc1fabb76b3c476c3f378f358235d3c4", + ) + .unwrap(), + 146, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; - let req = Process { - argv: owned_string_vec(&["/bin/cat", "../.cache/xyzzy/foo.txt"]), - env: vec![("SOME".to_owned(), "value".to_owned())] - .into_iter() - .collect(), - working_directory: Some(RelativePath::new(Path::new("animals")).unwrap()), - input_digests: InputDigests::with_input_files(input_directory.directory_digest()), - output_files: BTreeSet::new(), - output_directories: BTreeSet::new(), - timeout: one_second(), - description: "some description".to_owned(), - level: log::Level::Info, - append_only_caches: btreemap! { - CacheName::new(String::from("xyzzy")).unwrap() => RelativePath::new(Path::new(".cache/xyzzy")).unwrap(), - }, - jdk_home: None, - execution_slot_variable: None, - concurrency_available: 0, - cache_scope: ProcessCacheScope::Always, - execution_environment: make_environment(Platform::Linux_x86_64), - remote_cache_speculation_delay: std::time::Duration::from_millis(0), - attempt: 0, - }; - - let want_command = remexec::Command { - arguments: vec![ - "./__pants_wrapper__".to_owned(), - "/bin/cat".to_owned(), - "../.cache/xyzzy/foo.txt".to_owned(), - ], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - remexec::command::EnvironmentVariable { - name: "SOME".to_owned(), - value: "value".to_owned(), - }, - ], - platform: Some(remexec::Platform::default()), - ..Default::default() - }; - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "1deb19eddcefd5074263064a7df2a19caeb4e6d86a849bc07e23a5d856f886ec", - ) - .unwrap(), - 178, - )) - .into(), - ), - input_root_digest: Some( - (Digest::new( + let want_input_root_digest = DirectoryDigest::from_persisted_digest(Digest::new( Fingerprint::from_hex_string( - "92f5d2ff07cb6cdf4a70f2d6392781b482cd587b9dd69d6729ac73eb54110a69", + "92f5d2ff07cb6cdf4a70f2d6392781b482cd587b9dd69d6729ac73eb54110a69", ) .unwrap(), 178, - )) - .into(), - ), - timeout: Some(prost_types::Duration::try_from(Duration::from_secs(1)).unwrap()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "e4196db365556cbeed4941845f448cfafc1fabb76b3c476c3f378f358235d3c4", - ) - .unwrap(), - 146, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - let want_input_root_digest = DirectoryDigest::from_persisted_digest(Digest::new( - Fingerprint::from_hex_string( - "92f5d2ff07cb6cdf4a70f2d6392781b482cd587b9dd69d6729ac73eb54110a69", - ) - .unwrap(), - 178, - )); - - let got_execute_request = - process_execution::make_execute_request(&req, None, None, &store, Some("/append-only-caches")) - .await - .unwrap(); - assert_eq!( - got_execute_request, - EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: want_input_root_digest, - } - ); + )); - // Ensure that the wrapper script was added to the input root. - let mut files = store - .load_digest_trie(got_execute_request.input_root_digest) + let got_execute_request = process_execution::make_execute_request( + &req, + None, + None, + &store, + Some("/append-only-caches"), + ) .await - .unwrap() - .files(SymlinkBehavior::Oblivious); - files.sort(); - assert_eq!( - files, - vec![ - Path::new("__pants_wrapper__").to_path_buf(), - Path::new("roland.ext").to_path_buf() - ] - ) + .unwrap(); + assert_eq!( + got_execute_request, + EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: want_input_root_digest, + } + ); + + // Ensure that the wrapper script was added to the input root. + let mut files = store + .load_digest_trie(got_execute_request.input_root_digest) + .await + .unwrap() + .files(SymlinkBehavior::Oblivious); + files.sort(); + assert_eq!( + files, + vec![ + Path::new("__pants_wrapper__").to_path_buf(), + Path::new("roland.ext").to_path_buf() + ] + ) } #[tokio::test] async fn make_execute_request_using_immutable_inputs() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let prefix = RelativePath::new("cats").unwrap(); - let input_directory = TestDirectory::containing_roland(); - store - .record_directory(&input_directory.directory(), false) - .await - .expect("Saving directory bytes to store"); - let input_digests = InputDigests::new( - &store, - EMPTY_DIRECTORY_DIGEST.clone(), - { - let mut map = BTreeMap::new(); - map.insert(prefix.clone(), input_directory.directory_digest()); - map - }, - BTreeSet::new(), - ) - .await - .unwrap(); - - // The computed input root digest will be prefixed with the mount point. - let expected_digest = store - .add_prefix(input_directory.directory_digest(), &prefix) + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let prefix = RelativePath::new("cats").unwrap(); + let input_directory = TestDirectory::containing_roland(); + store + .record_directory(&input_directory.directory(), false) + .await + .expect("Saving directory bytes to store"); + let input_digests = InputDigests::new( + &store, + EMPTY_DIRECTORY_DIGEST.clone(), + { + let mut map = BTreeMap::new(); + map.insert(prefix.clone(), input_directory.directory_digest()); + map + }, + BTreeSet::new(), + ) .await .unwrap(); - let req = Process { - argv: owned_string_vec(&["/bin/echo", "yo"]), - env: vec![("SOME".to_owned(), "value".to_owned())] - .into_iter() - .collect(), - working_directory: None, - input_digests, - output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), - output_directories: relative_paths(&["directory/name"]).collect(), - timeout: None, - description: "some description".to_owned(), - level: log::Level::Info, - append_only_caches: BTreeMap::new(), - jdk_home: None, - execution_slot_variable: None, - concurrency_available: 0, - cache_scope: ProcessCacheScope::Always, - execution_environment: make_environment(Platform::Linux_x86_64), - remote_cache_speculation_delay: std::time::Duration::from_millis(0), - attempt: 0, - }; - - let want_command = remexec::Command { - arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), - value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), - }, - remexec::command::EnvironmentVariable { - name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), - value: "linux_x86_64".to_owned(), - }, - remexec::command::EnvironmentVariable { - name: "SOME".to_owned(), - value: "value".to_owned(), - }, - ], - output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], - output_directories: vec!["directory/name".to_owned()], - platform: Some(remexec::Platform::default()), - ..Default::default() - }; - - let want_action = remexec::Action { - command_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "d7b7538a7a57a2b04da51ffffff758036f43ebb92d37b66bd1bb8c6af0030e57", - ) - .unwrap(), - 187, - )) - .into(), - ), - input_root_digest: Some((&expected_digest.as_digest()).into()), - ..Default::default() - }; - - let want_execute_request = remexec::ExecuteRequest { - action_digest: Some( - (&Digest::new( - Fingerprint::from_hex_string( - "2c1eae75a54d2464ac63ba51587deb3986f15c3966c61f77fb9b06b195f4127a", - ) - .unwrap(), - 141, - )) - .into(), - ), - skip_cache_lookup: true, - ..Default::default() - }; - - assert_eq!( - process_execution::make_execute_request(&req, None, None, &store, None).await, - Ok(EntireExecuteRequest { - action: want_action, - command: want_command, - execute_request: want_execute_request, - input_root_digest: expected_digest, - }) - ); + // The computed input root digest will be prefixed with the mount point. + let expected_digest = store + .add_prefix(input_directory.directory_digest(), &prefix) + .await + .unwrap(); + + let req = Process { + argv: owned_string_vec(&["/bin/echo", "yo"]), + env: vec![("SOME".to_owned(), "value".to_owned())] + .into_iter() + .collect(), + working_directory: None, + input_digests, + output_files: relative_paths(&["path/to/file.ext", "other/file.ext"]).collect(), + output_directories: relative_paths(&["directory/name"]).collect(), + timeout: None, + description: "some description".to_owned(), + level: log::Level::Info, + append_only_caches: BTreeMap::new(), + jdk_home: None, + execution_slot_variable: None, + concurrency_available: 0, + cache_scope: ProcessCacheScope::Always, + execution_environment: make_environment(Platform::Linux_x86_64), + remote_cache_speculation_delay: std::time::Duration::from_millis(0), + attempt: 0, + }; + + let want_command = remexec::Command { + arguments: vec!["/bin/echo".to_owned(), "yo".to_owned()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_EXECUTION_STRATEGY.to_owned(), + value: ProcessExecutionStrategy::RemoteExecution(vec![]).cache_value(), + }, + remexec::command::EnvironmentVariable { + name: process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_owned(), + value: "linux_x86_64".to_owned(), + }, + remexec::command::EnvironmentVariable { + name: "SOME".to_owned(), + value: "value".to_owned(), + }, + ], + output_files: vec!["other/file.ext".to_owned(), "path/to/file.ext".to_owned()], + output_directories: vec!["directory/name".to_owned()], + platform: Some(remexec::Platform::default()), + ..Default::default() + }; + + let want_action = remexec::Action { + command_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "d7b7538a7a57a2b04da51ffffff758036f43ebb92d37b66bd1bb8c6af0030e57", + ) + .unwrap(), + 187, + )) + .into(), + ), + input_root_digest: Some((&expected_digest.as_digest()).into()), + ..Default::default() + }; + + let want_execute_request = remexec::ExecuteRequest { + action_digest: Some( + (&Digest::new( + Fingerprint::from_hex_string( + "2c1eae75a54d2464ac63ba51587deb3986f15c3966c61f77fb9b06b195f4127a", + ) + .unwrap(), + 141, + )) + .into(), + ), + skip_cache_lookup: true, + ..Default::default() + }; + + assert_eq!( + process_execution::make_execute_request(&req, None, None, &store, None).await, + Ok(EntireExecuteRequest { + action: want_action, + command: want_command, + execute_request: want_execute_request, + input_root_digest: expected_digest, + }) + ); } #[tokio::test] async fn successful_with_only_call_to_execute() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - let op_name = "gimme-foo".to_string(); - - let mock_server = { - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name), - make_successful_operation( - &op_name, - StdoutType::Raw("foo".to_owned()), - StderrType::Raw("".to_owned()), - 0, - ), - ]), - }]), - None, - ) - }; + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); - let result = run_command_remote(mock_server.address(), execute_request) - .await - .unwrap(); + let execute_request = echo_foo_request(); + let op_name = "gimme-foo".to_string(); + + let mock_server = { + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name), + make_successful_operation( + &op_name, + StdoutType::Raw("foo".to_owned()), + StderrType::Raw("".to_owned()), + 0, + ), + ]), + }]), + None, + ) + }; + + let result = run_command_remote(mock_server.address(), execute_request) + .await + .unwrap(); - assert_eq!(result.stdout_bytes, "foo".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); - assert_cancellation_requests(&mock_server, vec![]); + assert_eq!(result.stdout_bytes, "foo".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + assert_cancellation_requests(&mock_server, vec![]); } #[tokio::test] async fn successful_after_reconnect_with_wait_execution() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - let op_name = "gimme-foo".to_string(); - - let mock_server = { - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ - ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), - }, - ExpectedAPICall::WaitExecution { - operation_name: op_name.clone(), - stream_responses: Ok(vec![make_successful_operation( - &op_name, - StdoutType::Raw("foo".to_owned()), - StderrType::Raw("".to_owned()), - 0, - )]), - }, - ]), - None, - ) - }; + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); - let result = run_command_remote(mock_server.address(), execute_request) - .await - .unwrap(); + let execute_request = echo_foo_request(); + let op_name = "gimme-foo".to_string(); + + let mock_server = { + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ + ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), + }, + ExpectedAPICall::WaitExecution { + operation_name: op_name.clone(), + stream_responses: Ok(vec![make_successful_operation( + &op_name, + StdoutType::Raw("foo".to_owned()), + StderrType::Raw("".to_owned()), + 0, + )]), + }, + ]), + None, + ) + }; - assert_eq!(result.stdout_bytes, "foo".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); - assert_cancellation_requests(&mock_server, vec![]); + let result = run_command_remote(mock_server.address(), execute_request) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "foo".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + assert_cancellation_requests(&mock_server, vec![]); } #[tokio::test] async fn successful_after_reconnect_from_retryable_error() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - let op_name_1 = "gimme-foo".to_string(); - let op_name_2 = "gimme-bar".to_string(); - - let mock_server = { - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - let execute_request_2 = execute_request.clone(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ - ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name_1), - make_retryable_operation_failure(), - ]), - }, - ExpectedAPICall::Execute { - execute_request: execute_request_2, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name_2), - make_successful_operation( - &op_name_2, - StdoutType::Raw("foo".to_owned()), - StderrType::Raw("".to_owned()), - 0, - ), - ]), - }, - ]), - None, - ) - }; + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + let op_name_1 = "gimme-foo".to_string(); + let op_name_2 = "gimme-bar".to_string(); + + let mock_server = { + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + let execute_request_2 = execute_request.clone(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ + ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name_1), + make_retryable_operation_failure(), + ]), + }, + ExpectedAPICall::Execute { + execute_request: execute_request_2, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name_2), + make_successful_operation( + &op_name_2, + StdoutType::Raw("foo".to_owned()), + StderrType::Raw("".to_owned()), + 0, + ), + ]), + }, + ]), + None, + ) + }; - let result = run_command_remote(mock_server.address(), execute_request) - .await - .unwrap(); + let result = run_command_remote(mock_server.address(), execute_request) + .await + .unwrap(); - assert_eq!(result.stdout_bytes, "foo".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); - assert_cancellation_requests(&mock_server, vec![]); + assert_eq!(result.stdout_bytes, "foo".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + assert_cancellation_requests(&mock_server, vec![]); } #[tokio::test] async fn creates_executing_workunit() { - let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - let op_name = "gimme-foo".to_string(); - - let queue_time = Duration::from_millis(100); - let executing_time = Duration::from_millis(100); - - let mock_server = { - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![ - make_delayed_incomplete_operation_with_stage( - &op_name, - queue_time, - ExecutionStageValue::Queued, - ), - make_delayed_incomplete_operation_with_stage( - &op_name, - Duration::from_millis(0), - ExecutionStageValue::Executing, - ), - make_delayed_incomplete_operation_with_stage( - &op_name, - executing_time, - ExecutionStageValue::Completed, - ), - make_successful_operation( - &op_name, - StdoutType::Raw("foo".to_owned()), - StderrType::Raw("".to_owned()), - 0, - ), - ]), - }]), - None, - ) - }; - - let result = - run_command_remote_in_workunit(mock_server.address(), execute_request, &mut workunit) - .await - .unwrap(); - - assert_eq!(result.original.exit_code, 0); - - // Confirm that a workunit was created, and that it took: - // 1. at least the queue_time less than its parent - // 2. more than the executing_time - let (_, completed_workunits) = workunit_store.latest_workunits(Level::Trace); - let parent_duration: Duration = completed_workunits - .iter() - .find(|wu| wu.name == "run_execute_request") - .unwrap() - .time_span() - .unwrap() - .duration - .into(); - let child_duration: Duration = completed_workunits - .iter() - .find(|wu| wu.name == "run_remote_process") - .unwrap() - .time_span() - .unwrap() - .duration - .into(); - - assert!(parent_duration - queue_time >= child_duration); - assert!(child_duration >= executing_time); + let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + let op_name = "gimme-foo".to_string(); + + let queue_time = Duration::from_millis(100); + let executing_time = Duration::from_millis(100); + + let mock_server = { + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![ + make_delayed_incomplete_operation_with_stage( + &op_name, + queue_time, + ExecutionStageValue::Queued, + ), + make_delayed_incomplete_operation_with_stage( + &op_name, + Duration::from_millis(0), + ExecutionStageValue::Executing, + ), + make_delayed_incomplete_operation_with_stage( + &op_name, + executing_time, + ExecutionStageValue::Completed, + ), + make_successful_operation( + &op_name, + StdoutType::Raw("foo".to_owned()), + StderrType::Raw("".to_owned()), + 0, + ), + ]), + }]), + None, + ) + }; + + let result = + run_command_remote_in_workunit(mock_server.address(), execute_request, &mut workunit) + .await + .unwrap(); + + assert_eq!(result.original.exit_code, 0); + + // Confirm that a workunit was created, and that it took: + // 1. at least the queue_time less than its parent + // 2. more than the executing_time + let (_, completed_workunits) = workunit_store.latest_workunits(Level::Trace); + let parent_duration: Duration = completed_workunits + .iter() + .find(|wu| wu.name == "run_execute_request") + .unwrap() + .time_span() + .unwrap() + .duration + .into(); + let child_duration: Duration = completed_workunits + .iter() + .find(|wu| wu.name == "run_remote_process") + .unwrap() + .time_span() + .unwrap() + .duration + .into(); + + assert!(parent_duration - queue_time >= child_duration); + assert!(child_duration >= executing_time); } #[tokio::test] async fn dropped_request_cancels() { - let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); + let (workunit_store, mut workunit) = WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); - let client_timeout = Duration::new(5, 0); - let delayed_operation_time = Duration::new(15, 0); + let client_timeout = Duration::new(5, 0); + let delayed_operation_time = Duration::new(15, 0); - let request = Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])); + let request = Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])); - let op_name = "gimme-foo".to_string(); + let op_name = "gimme-foo".to_string(); - let mock_server = { - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request: process_execution::make_execute_request( - &request, None, None, &store, None, + let mock_server = { + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request: process_execution::make_execute_request( + &request, None, None, &store, None, + ) + .await + .unwrap() + .execute_request, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name), + make_delayed_incomplete_operation(&op_name, delayed_operation_time), + ]), + }]), + None, ) - .await - .unwrap() - .execute_request, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name), - make_delayed_incomplete_operation(&op_name, delayed_operation_time), - ]), - }]), - None, + }; + + let cas = mock::StubCAS::builder() + .file(&TestData::roland()) + .directory(&TestDirectory::containing_roland()) + .build(); + let (command_runner, _store) = create_command_runner(mock_server.address(), &cas).await; + + let context = Context { + workunit_store, + build_id: String::from("marmosets"), + run_id: RunId(0), + ..Context::default() + }; + + // Timeout the run, which should cause the remote operation to be cancelled. + if let Ok(res) = timeout( + client_timeout, + command_runner.run(context, &mut workunit, request), ) - }; - - let cas = mock::StubCAS::builder() - .file(&TestData::roland()) - .directory(&TestDirectory::containing_roland()) - .build(); - let (command_runner, _store) = create_command_runner(mock_server.address(), &cas).await; - - let context = Context { - workunit_store, - build_id: String::from("marmosets"), - run_id: RunId(0), - ..Context::default() - }; - - // Timeout the run, which should cause the remote operation to be cancelled. - if let Ok(res) = timeout( - client_timeout, - command_runner.run(context, &mut workunit, request), - ) - .await - { - panic!("Did not expect the client to return successfully. Got: {res:?}"); - } - - // Wait for the cancellation to have been spawned and sent. - sleep(Duration::from_secs(2)).await; - - // Confirm that the cancellation was sent. - assert_cancellation_requests(&mock_server, vec![op_name.to_owned()]); + .await + { + panic!("Did not expect the client to return successfully. Got: {res:?}"); + } + + // Wait for the cancellation to have been spawned and sent. + sleep(Duration::from_secs(2)).await; + + // Confirm that the cancellation was sent. + assert_cancellation_requests(&mock_server, vec![op_name.to_owned()]); } #[tokio::test] async fn server_rejecting_execute_request_gives_error() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - - let mock_server = mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request: process_execution::make_execute_request( - &Process::new(owned_string_vec(&["/bin/echo", "-n", "bar"])), - None, - None, - &store, + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + + let mock_server = mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request: process_execution::make_execute_request( + &Process::new(owned_string_vec(&["/bin/echo", "-n", "bar"])), + None, + None, + &store, + None, + ) + .await + .unwrap() + .execute_request, + stream_responses: Err(Status::invalid_argument("".to_owned())), + }]), None, - ) - .await - .unwrap() - .execute_request, - stream_responses: Err(Status::invalid_argument("".to_owned())), - }]), - None, - ); - - let error = run_command_remote(mock_server.address(), execute_request) - .await - .expect_err("Want Err"); - assert!(&error.to_string().contains("InvalidArgument")); - assert!(&error.to_string().contains("Did not expect this request")); + ); + + let error = run_command_remote(mock_server.address(), execute_request) + .await + .expect_err("Want Err"); + assert!(&error.to_string().contains("InvalidArgument")); + assert!(&error.to_string().contains("Did not expect this request")); } #[tokio::test] async fn server_sending_triggering_timeout_with_deadline_exceeded() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - - let mock_server = { - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request, - stream_responses: Err(Status::deadline_exceeded("")), - }]), - None, - ) - }; + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + + let mock_server = { + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request, + stream_responses: Err(Status::deadline_exceeded("")), + }]), + None, + ) + }; - let result = run_command_remote(mock_server.address(), execute_request) - .await - .expect("Should succeed, but with a failed process."); - assert!(result.stdout().contains("user timeout")); + let result = run_command_remote(mock_server.address(), execute_request) + .await + .expect("Should succeed, but with a failed process."); + assert!(result.stdout().contains("user timeout")); } fn remote_options_for_cas(cas: &mock::StubCAS) -> RemoteOptions { - RemoteOptions { - cas_address: cas.address(), - instance_name: None, - tls_config: tls::Config::default(), - headers: BTreeMap::new(), - chunk_size_bytes: 10 * 1024 * 1024, - rpc_timeout: Duration::from_secs(1), - rpc_retries: 1, - rpc_concurrency_limit: STORE_CONCURRENCY_LIMIT, - capabilities_cell_opt: None, - batch_api_size_limit: STORE_BATCH_API_SIZE_LIMIT, - } + RemoteOptions { + cas_address: cas.address(), + instance_name: None, + tls_config: tls::Config::default(), + headers: BTreeMap::new(), + chunk_size_bytes: 10 * 1024 * 1024, + rpc_timeout: Duration::from_secs(1), + rpc_retries: 1, + rpc_concurrency_limit: STORE_CONCURRENCY_LIMIT, + capabilities_cell_opt: None, + batch_api_size_limit: STORE_BATCH_API_SIZE_LIMIT, + } } #[tokio::test] async fn sends_headers() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let cas = mock::StubCAS::empty(); - let runtime = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(runtime.clone(), store_dir) - .unwrap() - .into_with_remote(remote_options_for_cas(&cas)) - .await - .unwrap(); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let execute_request = echo_foo_request(); - let op_name = "gimme-foo".to_string(); + let cas = mock::StubCAS::empty(); + let runtime = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(runtime.clone(), store_dir) + .unwrap() + .into_with_remote(remote_options_for_cas(&cas)) + .await + .unwrap(); - let mock_server = { - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); + let execute_request = echo_foo_request(); + let op_name = "gimme-foo".to_string(); - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name), - make_successful_operation( - &op_name, - StdoutType::Raw("foo".to_owned()), - StderrType::Raw("".to_owned()), - 0, - ), - ]), - }]), - None, + let mock_server = { + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name), + make_successful_operation( + &op_name, + StdoutType::Raw("foo".to_owned()), + StderrType::Raw("".to_owned()), + 0, + ), + ]), + }]), + None, + ) + }; + + let command_runner = CommandRunner::new( + &mock_server.address(), + None, + None, + None, + Default::default(), + btreemap! { + String::from("cat") => String::from("roland"), + String::from("authorization") => String::from("Bearer catnip-will-get-you-anywhere"), + }, + store, + task_executor::Executor::new(), + OVERALL_DEADLINE_SECS, + RETRY_INTERVAL, + EXEC_CONCURRENCY_LIMIT, + None, ) - }; - - let command_runner = CommandRunner::new( - &mock_server.address(), - None, - None, - None, - Default::default(), - btreemap! { - String::from("cat") => String::from("roland"), - String::from("authorization") => String::from("Bearer catnip-will-get-you-anywhere"), - }, - store, - task_executor::Executor::new(), - OVERALL_DEADLINE_SECS, - RETRY_INTERVAL, - EXEC_CONCURRENCY_LIMIT, - None, - ) - .await - .unwrap(); - let context = Context { - workunit_store: WorkunitStore::new(false, log::Level::Debug), - build_id: String::from("marmosets"), - run_id: RunId(0), - ..Context::default() - }; - command_runner - .run(context, &mut workunit, execute_request) .await - .expect("Execution failed"); - - let received_messages = mock_server.mock_responder.received_messages.lock(); - let message_headers: Vec<_> = received_messages - .iter() - .map(|received_message| received_message.headers.clone()) - .collect(); - assert_eq!(message_headers.len(), 1); - for headers in message_headers { - { - let want_key = "google.devtools.remoteexecution.v1test.requestmetadata-bin"; - assert!(headers.contains_key(want_key)); - - let bytes = headers.get_bin(want_key).unwrap().to_bytes().unwrap(); - let proto = remexec::RequestMetadata::decode(Cursor::new(bytes)) - .expect("Failed to parse metadata proto"); - - assert_eq!(proto.tool_details.map(|x| x.tool_name).unwrap(), "pants"); - assert_eq!(proto.tool_invocation_id, "marmosets"); + .unwrap(); + let context = Context { + workunit_store: WorkunitStore::new(false, log::Level::Debug), + build_id: String::from("marmosets"), + run_id: RunId(0), + ..Context::default() + }; + command_runner + .run(context, &mut workunit, execute_request) + .await + .expect("Execution failed"); + + let received_messages = mock_server.mock_responder.received_messages.lock(); + let message_headers: Vec<_> = received_messages + .iter() + .map(|received_message| received_message.headers.clone()) + .collect(); + assert_eq!(message_headers.len(), 1); + for headers in message_headers { + { + let want_key = "google.devtools.remoteexecution.v1test.requestmetadata-bin"; + assert!(headers.contains_key(want_key)); + + let bytes = headers.get_bin(want_key).unwrap().to_bytes().unwrap(); + let proto = remexec::RequestMetadata::decode(Cursor::new(bytes)) + .expect("Failed to parse metadata proto"); + + assert_eq!(proto.tool_details.map(|x| x.tool_name).unwrap(), "pants"); + assert_eq!(proto.tool_invocation_id, "marmosets"); + } + + assert_eq!(headers.get("cat").unwrap().to_str().unwrap(), "roland"); + + assert_eq!( + headers.get("authorization").unwrap().to_str().unwrap(), + "Bearer catnip-will-get-you-anywhere" + ); } - - assert_eq!(headers.get("cat").unwrap().to_str().unwrap(), "roland"); - - assert_eq!( - headers.get("authorization").unwrap().to_str().unwrap(), - "Bearer catnip-will-get-you-anywhere" - ); - } } #[tokio::test] async fn extract_response_with_digest_stdout() { - WorkunitStore::setup_for_tests(); - - let op_name = "gimme-foo".to_string(); - let testdata = TestData::roland(); - let testdata_empty = TestData::empty(); - let result = extract_execute_response( - make_successful_operation( - &op_name, - StdoutType::Digest(testdata.digest()), - StderrType::Raw(testdata_empty.string()), - 0, + WorkunitStore::setup_for_tests(); + + let op_name = "gimme-foo".to_string(); + let testdata = TestData::roland(); + let testdata_empty = TestData::empty(); + let result = extract_execute_response( + make_successful_operation( + &op_name, + StdoutType::Digest(testdata.digest()), + StderrType::Raw(testdata_empty.string()), + 0, + ) + .op + .unwrap() + .unwrap(), + Platform::Linux_x86_64, ) - .op - .unwrap() - .unwrap(), - Platform::Linux_x86_64, - ) - .await - .unwrap(); + .await + .unwrap(); - assert_eq!(result.stdout_bytes, testdata.bytes()); - assert_eq!(result.stderr_bytes, testdata_empty.bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + assert_eq!(result.stdout_bytes, testdata.bytes()); + assert_eq!(result.stderr_bytes, testdata_empty.bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] async fn extract_response_with_digest_stderr() { - let _ = WorkunitStore::setup_for_tests(); - let op_name = "gimme-foo".to_string(); - let testdata = TestData::roland(); - let testdata_empty = TestData::empty(); - let result = extract_execute_response( - make_successful_operation( - &op_name, - StdoutType::Raw(testdata_empty.string()), - StderrType::Digest(testdata.digest()), - 0, + let _ = WorkunitStore::setup_for_tests(); + let op_name = "gimme-foo".to_string(); + let testdata = TestData::roland(); + let testdata_empty = TestData::empty(); + let result = extract_execute_response( + make_successful_operation( + &op_name, + StdoutType::Raw(testdata_empty.string()), + StderrType::Digest(testdata.digest()), + 0, + ) + .op + .unwrap() + .unwrap(), + Platform::Linux_x86_64, ) - .op - .unwrap() - .unwrap(), - Platform::Linux_x86_64, - ) - .await - .unwrap(); + .await + .unwrap(); - assert_eq!(result.stdout_bytes, testdata_empty.bytes()); - assert_eq!(result.stderr_bytes, testdata.bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + assert_eq!(result.stdout_bytes, testdata_empty.bytes()); + assert_eq!(result.stderr_bytes, testdata.bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] async fn ensure_inline_stdio_is_stored() { - WorkunitStore::setup_for_tests(); + WorkunitStore::setup_for_tests(); - let runtime = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store_dir_path = store_dir.path(); + let runtime = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store_dir_path = store_dir.path(); - let cas = mock::StubCAS::empty(); - let store = Store::local_only(runtime.clone(), store_dir_path) - .unwrap() - .into_with_remote(remote_options_for_cas(&cas)) - .await - .unwrap(); + let cas = mock::StubCAS::empty(); + let store = Store::local_only(runtime.clone(), store_dir_path) + .unwrap() + .into_with_remote(remote_options_for_cas(&cas)) + .await + .unwrap(); - let test_stdout = TestData::roland(); - let test_stderr = TestData::catnip(); + let test_stdout = TestData::roland(); + let test_stderr = TestData::catnip(); - let mock_server = { - let op_name = "cat".to_owned(); + let mock_server = { + let op_name = "cat".to_owned(); - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&echo_roland_request(), None, None, &store, None) - .await - .unwrap(); + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request( + &echo_roland_request(), + None, + None, + &store, + None, + ) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name), + make_successful_operation( + &op_name.clone(), + StdoutType::Raw(test_stdout.string()), + StderrType::Raw(test_stderr.string()), + 0, + ), + ]), + }]), + None, + ) + }; - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name), - make_successful_operation( - &op_name.clone(), - StdoutType::Raw(test_stdout.string()), - StderrType::Raw(test_stderr.string()), - 0, - ), - ]), - }]), - None, + let cmd_runner = CommandRunner::new( + &mock_server.address(), + None, + None, + None, + Default::default(), + BTreeMap::new(), + store.clone(), + task_executor::Executor::new(), + OVERALL_DEADLINE_SECS, + RETRY_INTERVAL, + EXEC_CONCURRENCY_LIMIT, + None, ) - }; - - let cmd_runner = CommandRunner::new( - &mock_server.address(), - None, - None, - None, - Default::default(), - BTreeMap::new(), - store.clone(), - task_executor::Executor::new(), - OVERALL_DEADLINE_SECS, - RETRY_INTERVAL, - EXEC_CONCURRENCY_LIMIT, - None, - ) - .await - .unwrap(); - - let result = run_cmd_runner(echo_roland_request(), cmd_runner, store) .await .unwrap(); - assert_eq!(result.stdout_bytes, test_stdout.bytes()); - assert_eq!(result.stderr_bytes, test_stderr.bytes()); - assert_eq!(result.original.exit_code, 0); - - let local_store = - Store::local_only(runtime.clone(), store_dir_path).expect("Error creating local store"); - { - assert_eq!( - local_store - .load_file_bytes_with(test_stdout.digest(), Bytes::copy_from_slice) - .await - .unwrap(), - test_stdout.bytes() - ); - assert_eq!( - local_store - .load_file_bytes_with(test_stderr.digest(), Bytes::copy_from_slice) + let result = run_cmd_runner(echo_roland_request(), cmd_runner, store) .await - .unwrap(), - test_stderr.bytes() - ); - } + .unwrap(); + + assert_eq!(result.stdout_bytes, test_stdout.bytes()); + assert_eq!(result.stderr_bytes, test_stderr.bytes()); + assert_eq!(result.original.exit_code, 0); + + let local_store = + Store::local_only(runtime.clone(), store_dir_path).expect("Error creating local store"); + { + assert_eq!( + local_store + .load_file_bytes_with(test_stdout.digest(), Bytes::copy_from_slice) + .await + .unwrap(), + test_stdout.bytes() + ); + assert_eq!( + local_store + .load_file_bytes_with(test_stderr.digest(), Bytes::copy_from_slice) + .await + .unwrap(), + test_stderr.bytes() + ); + } } #[tokio::test] async fn bad_result_bytes() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - - let mock_server = { - let op_name = "gimme-foo".to_string(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request: process_execution::make_execute_request( - &execute_request, - None, - None, - &store, - None, - ) - .await - .unwrap() - .execute_request, - stream_responses: Ok(vec![ + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + + let mock_server = { + let op_name = "gimme-foo".to_string(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request: process_execution::make_execute_request( + &execute_request, + None, + None, + &store, + None, + ) + .await + .unwrap() + .execute_request, + stream_responses: Ok(vec![ make_incomplete_operation(&op_name), MockOperation::new(Operation { name: op_name.clone(), @@ -1559,1243 +1570,1254 @@ async fn bad_result_bytes() { ..Default::default() }), ]), - }]), - None, - ) - }; + }]), + None, + ) + }; - run_command_remote(mock_server.address(), execute_request) - .await - .expect_err("Want Err"); + run_command_remote(mock_server.address(), execute_request) + .await + .expect_err("Want Err"); } #[tokio::test] async fn initial_response_error() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - - let mock_server = { - let op_name = "gimme-foo".to_string(); - - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![MockOperation::new({ - Operation { - name: op_name.to_string(), - done: true, - result: Some(protos::gen::google::longrunning::operation::Result::Error( - protos::gen::google::rpc::Status { - code: Code::Internal as i32, - message: "Something went wrong".to_string(), - ..Default::default() - }, - )), - ..Default::default() - } - })]), - }]), - None, - ) - }; + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + + let mock_server = { + let op_name = "gimme-foo".to_string(); + + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![MockOperation::new({ + Operation { + name: op_name.to_string(), + done: true, + result: Some(protos::gen::google::longrunning::operation::Result::Error( + protos::gen::google::rpc::Status { + code: Code::Internal as i32, + message: "Something went wrong".to_string(), + ..Default::default() + }, + )), + ..Default::default() + } + })]), + }]), + None, + ) + }; - let result = run_command_remote(mock_server.address(), execute_request) - .await - .expect_err("Want Err"); + let result = run_command_remote(mock_server.address(), execute_request) + .await + .expect_err("Want Err"); - assert!(result.to_string().ends_with( - "Error from remote execution: \ + assert!(result.to_string().ends_with( + "Error from remote execution: \ InvalidArgument: \"Execute endpoint called. Did not expect this call.\"" - )); + )); } #[tokio::test] async fn initial_response_missing_response_and_error() { - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - - let mock_server = { - let op_name = "gimme-foo".to_string(); - - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![MockOperation::new({ - Operation { - name: op_name.to_string(), - done: true, - ..Default::default() - } - })]), - }]), - None, - ) - }; + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + + let mock_server = { + let op_name = "gimme-foo".to_string(); + + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![MockOperation::new({ + Operation { + name: op_name.to_string(), + done: true, + ..Default::default() + } + })]), + }]), + None, + ) + }; - let result = run_command_remote(mock_server.address(), execute_request) - .await - .expect_err("Want Err"); + let result = run_command_remote(mock_server.address(), execute_request) + .await + .expect_err("Want Err"); - assert!(result - .to_string() - .ends_with("Operation finished but no response supplied")); + assert!(result + .to_string() + .ends_with("Operation finished but no response supplied")); } #[tokio::test] async fn fails_after_retry_limit_exceeded() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - - let mock_server = { - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ - ExpectedAPICall::Execute { - execute_request: execute_request.clone(), - stream_responses: Ok(vec![make_retryable_operation_failure()]), - }, - ExpectedAPICall::Execute { - execute_request: execute_request.clone(), - stream_responses: Ok(vec![make_retryable_operation_failure()]), - }, - ExpectedAPICall::Execute { - execute_request: execute_request.clone(), - stream_responses: Ok(vec![make_retryable_operation_failure()]), - }, - ExpectedAPICall::Execute { - execute_request: execute_request.clone(), - stream_responses: Ok(vec![make_retryable_operation_failure()]), - }, - ExpectedAPICall::Execute { - execute_request: execute_request.clone(), - stream_responses: Ok(vec![make_retryable_operation_failure()]), - }, - ExpectedAPICall::Execute { - execute_request: execute_request, - stream_responses: Ok(vec![make_retryable_operation_failure()]), - }, - ]), - None, - ) - }; + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + + let mock_server = { + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ + ExpectedAPICall::Execute { + execute_request: execute_request.clone(), + stream_responses: Ok(vec![make_retryable_operation_failure()]), + }, + ExpectedAPICall::Execute { + execute_request: execute_request.clone(), + stream_responses: Ok(vec![make_retryable_operation_failure()]), + }, + ExpectedAPICall::Execute { + execute_request: execute_request.clone(), + stream_responses: Ok(vec![make_retryable_operation_failure()]), + }, + ExpectedAPICall::Execute { + execute_request: execute_request.clone(), + stream_responses: Ok(vec![make_retryable_operation_failure()]), + }, + ExpectedAPICall::Execute { + execute_request: execute_request.clone(), + stream_responses: Ok(vec![make_retryable_operation_failure()]), + }, + ExpectedAPICall::Execute { + execute_request: execute_request, + stream_responses: Ok(vec![make_retryable_operation_failure()]), + }, + ]), + None, + ) + }; - let result = run_command_remote(mock_server.address(), execute_request) - .await - .expect_err("Expected error"); + let result = run_command_remote(mock_server.address(), execute_request) + .await + .expect_err("Expected error"); - assert!(result.to_string().ends_with( - "Too many failures from server. \ + assert!(result.to_string().ends_with( + "Too many failures from server. \ The last error was: the bot running the task appears to be lost" - )); + )); } #[tokio::test] async fn fails_after_retry_limit_exceeded_with_stream_close() { - WorkunitStore::setup_for_tests(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = Store::local_only(executor, store_dir).unwrap(); - - let execute_request = echo_foo_request(); - - let mock_server = { - let op_name = "foo-bar".to_owned(); - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&execute_request, None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ - ExpectedAPICall::Execute { - execute_request: execute_request, - stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), - }, - ExpectedAPICall::WaitExecution { - operation_name: op_name.clone(), - stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), - }, - ExpectedAPICall::WaitExecution { - operation_name: op_name.clone(), - stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), - }, - ExpectedAPICall::WaitExecution { - operation_name: op_name.clone(), - stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), - }, - ExpectedAPICall::WaitExecution { - operation_name: op_name.clone(), - stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), - }, - ExpectedAPICall::WaitExecution { - operation_name: op_name.clone(), - stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), - }, - ]), - None, - ) - }; + WorkunitStore::setup_for_tests(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = Store::local_only(executor, store_dir).unwrap(); + + let execute_request = echo_foo_request(); + + let mock_server = { + let op_name = "foo-bar".to_owned(); + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request(&execute_request, None, None, &store, None) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ + ExpectedAPICall::Execute { + execute_request: execute_request, + stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), + }, + ExpectedAPICall::WaitExecution { + operation_name: op_name.clone(), + stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), + }, + ExpectedAPICall::WaitExecution { + operation_name: op_name.clone(), + stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), + }, + ExpectedAPICall::WaitExecution { + operation_name: op_name.clone(), + stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), + }, + ExpectedAPICall::WaitExecution { + operation_name: op_name.clone(), + stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), + }, + ExpectedAPICall::WaitExecution { + operation_name: op_name.clone(), + stream_responses: Ok(vec![make_incomplete_operation(&op_name)]), + }, + ]), + None, + ) + }; - let result = run_command_remote(mock_server.address(), execute_request) - .await - .expect_err("Expected error"); + let result = run_command_remote(mock_server.address(), execute_request) + .await + .expect_err("Expected error"); - assert!(result.to_string().ends_with( - "Too many failures from server. \ + assert!(result.to_string().ends_with( + "Too many failures from server. \ The last event was the server disconnecting with no error given." - )); + )); } #[tokio::test] async fn execute_missing_file_uploads_if_known() { - WorkunitStore::setup_for_tests(); - - let runtime = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let cas = mock::StubCAS::builder() - .directory(&TestDirectory::containing_roland()) - .build(); - let store = Store::local_only(runtime.clone(), store_dir) - .unwrap() - .into_with_remote(remote_options_for_cas(&cas)) - .await - .unwrap(); + WorkunitStore::setup_for_tests(); + + let runtime = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let cas = mock::StubCAS::builder() + .directory(&TestDirectory::containing_roland()) + .build(); + let store = Store::local_only(runtime.clone(), store_dir) + .unwrap() + .into_with_remote(remote_options_for_cas(&cas)) + .await + .unwrap(); - let roland = TestData::roland(); - - let mock_server = { - let op_name = "cat".to_owned(); - - let EntireExecuteRequest { - execute_request, .. - } = process_execution::make_execute_request(&cat_roland_request(), None, None, &store, None) - .await - .unwrap(); - - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![ - ExpectedAPICall::Execute { - execute_request, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name), - make_precondition_failure_operation(vec![missing_preconditionfailure_violation( - &roland.digest(), - )]), - ]), - }, - ExpectedAPICall::Execute { - execute_request: process_execution::make_execute_request( + let roland = TestData::roland(); + + let mock_server = { + let op_name = "cat".to_owned(); + + let EntireExecuteRequest { + execute_request, .. + } = process_execution::make_execute_request( &cat_roland_request(), None, None, &store, None, - ) - .await - .unwrap() - .execute_request, - stream_responses: Ok(vec![ - make_incomplete_operation(&op_name), - make_successful_operation( - "cat2", - StdoutType::Raw(roland.string()), - StderrType::Raw("".to_owned()), - 0, - ), - ]), - }, - ]), - None, - ) - }; + ) + .await + .unwrap(); + + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![ + ExpectedAPICall::Execute { + execute_request, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name), + make_precondition_failure_operation(vec![ + missing_preconditionfailure_violation(&roland.digest()), + ]), + ]), + }, + ExpectedAPICall::Execute { + execute_request: process_execution::make_execute_request( + &cat_roland_request(), + None, + None, + &store, + None, + ) + .await + .unwrap() + .execute_request, + stream_responses: Ok(vec![ + make_incomplete_operation(&op_name), + make_successful_operation( + "cat2", + StdoutType::Raw(roland.string()), + StderrType::Raw("".to_owned()), + 0, + ), + ]), + }, + ]), + None, + ) + }; - store - .store_file_bytes(roland.bytes(), false) - .await - .expect("Saving file bytes to store"); - store - .record_directory(&TestDirectory::containing_roland().directory(), false) - .await - .expect("Saving directory bytes to store"); - let command_runner = CommandRunner::new( - &mock_server.address(), - None, - None, - None, - Default::default(), - BTreeMap::new(), - store.clone(), - task_executor::Executor::new(), - OVERALL_DEADLINE_SECS, - RETRY_INTERVAL, - EXEC_CONCURRENCY_LIMIT, - None, - ) - .await - .unwrap(); - - let result = run_cmd_runner(cat_roland_request(), command_runner, store) + store + .store_file_bytes(roland.bytes(), false) + .await + .expect("Saving file bytes to store"); + store + .record_directory(&TestDirectory::containing_roland().directory(), false) + .await + .expect("Saving directory bytes to store"); + let command_runner = CommandRunner::new( + &mock_server.address(), + None, + None, + None, + Default::default(), + BTreeMap::new(), + store.clone(), + task_executor::Executor::new(), + OVERALL_DEADLINE_SECS, + RETRY_INTERVAL, + EXEC_CONCURRENCY_LIMIT, + None, + ) .await .unwrap(); - assert_eq!(result.stdout_bytes, roland.bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); + let result = run_cmd_runner(cat_roland_request(), command_runner, store) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, roland.bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); - { - let blobs = cas.blobs.lock(); - assert_eq!(blobs.get(&roland.fingerprint()), Some(&roland.bytes())); - } + { + let blobs = cas.blobs.lock(); + assert_eq!(blobs.get(&roland.fingerprint()), Some(&roland.bytes())); + } } #[tokio::test] async fn execute_missing_file_errors_if_unknown() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let missing_digest = TestDirectory::containing_roland().digest(); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let missing_digest = TestDirectory::containing_roland().digest(); + + let mock_server = { + mock::execution_server::TestServer::new( + mock::execution_server::MockExecution::new(vec![]), + None, + ) + }; - let mock_server = { - mock::execution_server::TestServer::new( - mock::execution_server::MockExecution::new(vec![]), - None, + let store_dir = TempDir::new().unwrap(); + let cas = mock::StubCAS::builder().file(&TestData::roland()).build(); + let runtime = task_executor::Executor::new(); + let store = Store::local_only(runtime.clone(), store_dir) + .unwrap() + .into_with_remote(remote_options_for_cas(&cas)) + .await + .unwrap(); + + let runner = CommandRunner::new( + &mock_server.address(), + None, + None, + None, + Default::default(), + BTreeMap::new(), + store, + task_executor::Executor::new(), + OVERALL_DEADLINE_SECS, + RETRY_INTERVAL, + EXEC_CONCURRENCY_LIMIT, + None, ) - }; - - let store_dir = TempDir::new().unwrap(); - let cas = mock::StubCAS::builder().file(&TestData::roland()).build(); - let runtime = task_executor::Executor::new(); - let store = Store::local_only(runtime.clone(), store_dir) - .unwrap() - .into_with_remote(remote_options_for_cas(&cas)) .await .unwrap(); - let runner = CommandRunner::new( - &mock_server.address(), - None, - None, - None, - Default::default(), - BTreeMap::new(), - store, - task_executor::Executor::new(), - OVERALL_DEADLINE_SECS, - RETRY_INTERVAL, - EXEC_CONCURRENCY_LIMIT, - None, - ) - .await - .unwrap(); - - let error = runner - .run(Context::default(), &mut workunit, cat_roland_request()) - .await - .expect_err("Want error"); - assert_contains(&error.to_string(), &format!("{}", missing_digest.hash)); + let error = runner + .run(Context::default(), &mut workunit, cat_roland_request()) + .await + .expect_err("Want error"); + assert_contains(&error.to_string(), &format!("{}", missing_digest.hash)); } #[tokio::test] async fn extract_execute_response_success() { - let wanted_exit_code = 17; - let wanted_stdout = Bytes::from_static(b"roland"); - let wanted_stderr = Bytes::from_static(b"simba"); - - let operation = Operation { - name: "cat".to_owned(), - done: true, - result: Some( - protos::gen::google::longrunning::operation::Result::Response(make_any_proto( - &remexec::ExecuteResponse { - result: Some(remexec::ActionResult { - exit_code: wanted_exit_code, - stdout_raw: wanted_stdout.clone(), - stderr_raw: wanted_stderr.clone(), - output_files: vec![remexec::OutputFile { - path: "cats/roland.ext".into(), - digest: Some((&TestData::roland().digest()).into()), - is_executable: false, - ..Default::default() - }], - ..Default::default() - }), - ..Default::default() - }, - "protos::gen::", - )), - ), - ..Default::default() - }; + let wanted_exit_code = 17; + let wanted_stdout = Bytes::from_static(b"roland"); + let wanted_stderr = Bytes::from_static(b"simba"); + + let operation = Operation { + name: "cat".to_owned(), + done: true, + result: Some( + protos::gen::google::longrunning::operation::Result::Response(make_any_proto( + &remexec::ExecuteResponse { + result: Some(remexec::ActionResult { + exit_code: wanted_exit_code, + stdout_raw: wanted_stdout.clone(), + stderr_raw: wanted_stderr.clone(), + output_files: vec![remexec::OutputFile { + path: "cats/roland.ext".into(), + digest: Some((&TestData::roland().digest()).into()), + is_executable: false, + ..Default::default() + }], + ..Default::default() + }), + ..Default::default() + }, + "protos::gen::", + )), + ), + ..Default::default() + }; - let result = extract_execute_response(operation, Platform::Linux_x86_64) - .await - .unwrap(); + let result = extract_execute_response(operation, Platform::Linux_x86_64) + .await + .unwrap(); - assert_eq!(result.stdout_bytes, wanted_stdout); - assert_eq!(result.stderr_bytes, wanted_stderr); - assert_eq!(result.original.exit_code, wanted_exit_code); - assert_eq!( - result.original.output_directory, - TestDirectory::nested().directory_digest() - ); + assert_eq!(result.stdout_bytes, wanted_stdout); + assert_eq!(result.stderr_bytes, wanted_stderr); + assert_eq!(result.original.exit_code, wanted_exit_code); + assert_eq!( + result.original.output_directory, + TestDirectory::nested().directory_digest() + ); } #[tokio::test] async fn extract_execute_response_timeout() { - let operation = Operation { - name: "cat".to_owned(), - done: true, - result: Some( - protos::gen::google::longrunning::operation::Result::Response(make_any_proto( - &remexec::ExecuteResponse { - status: Some(protos::gen::google::rpc::Status { - code: Code::DeadlineExceeded as i32, - ..Default::default() - }), - ..Default::default() - }, - "protos::gen::", - )), - ), - ..Default::default() - }; + let operation = Operation { + name: "cat".to_owned(), + done: true, + result: Some( + protos::gen::google::longrunning::operation::Result::Response(make_any_proto( + &remexec::ExecuteResponse { + status: Some(protos::gen::google::rpc::Status { + code: Code::DeadlineExceeded as i32, + ..Default::default() + }), + ..Default::default() + }, + "protos::gen::", + )), + ), + ..Default::default() + }; - match extract_execute_response(operation, Platform::Linux_x86_64).await { - Err(ExecutionError::Timeout) => (), - other => assert!(false, "Want timeout error, got {other:?}"), - }; + match extract_execute_response(operation, Platform::Linux_x86_64).await { + Err(ExecutionError::Timeout) => (), + other => assert!(false, "Want timeout error, got {other:?}"), + }; } #[tokio::test] async fn extract_execute_response_missing_digests() { - let missing_files = vec![ - TestData::roland().digest(), - TestDirectory::containing_roland().digest(), - ]; - - let missing = missing_files - .iter() - .map(missing_preconditionfailure_violation) - .collect(); - - let operation = make_precondition_failure_operation(missing) - .op - .unwrap() - .unwrap(); + let missing_files = vec![ + TestData::roland().digest(), + TestDirectory::containing_roland().digest(), + ]; + + let missing = missing_files + .iter() + .map(missing_preconditionfailure_violation) + .collect(); + + let operation = make_precondition_failure_operation(missing) + .op + .unwrap() + .unwrap(); - assert_eq!( - extract_execute_response(operation, Platform::Linux_x86_64).await, - Err(ExecutionError::MissingRemoteDigests(missing_files)) - ); + assert_eq!( + extract_execute_response(operation, Platform::Linux_x86_64).await, + Err(ExecutionError::MissingRemoteDigests(missing_files)) + ); } #[tokio::test] async fn extract_execute_response_missing_other_things() { - let missing = vec![ - missing_preconditionfailure_violation(&TestData::roland().digest()), - protos::gen::google::rpc::precondition_failure::Violation { - r#type: "MISSING".to_owned(), - subject: "monkeys".to_owned(), - ..Default::default() - }, - ]; - - let operation = make_precondition_failure_operation(missing) - .op - .unwrap() - .unwrap(); + let missing = vec![ + missing_preconditionfailure_violation(&TestData::roland().digest()), + protos::gen::google::rpc::precondition_failure::Violation { + r#type: "MISSING".to_owned(), + subject: "monkeys".to_owned(), + ..Default::default() + }, + ]; + + let operation = make_precondition_failure_operation(missing) + .op + .unwrap() + .unwrap(); - match extract_execute_response(operation, Platform::Linux_x86_64).await { - Err(ExecutionError::Fatal(err)) => assert_contains(&err.to_string(), "monkeys"), - other => assert!(false, "Want fatal error, got {other:?}"), - }; + match extract_execute_response(operation, Platform::Linux_x86_64).await { + Err(ExecutionError::Fatal(err)) => assert_contains(&err.to_string(), "monkeys"), + other => assert!(false, "Want fatal error, got {other:?}"), + }; } #[tokio::test] async fn extract_execute_response_other_failed_precondition() { - let missing = vec![protos::gen::google::rpc::precondition_failure::Violation { - r#type: "OUT_OF_CAPACITY".to_owned(), - ..Default::default() - }]; - - let operation = make_precondition_failure_operation(missing) - .op - .unwrap() - .unwrap(); + let missing = vec![protos::gen::google::rpc::precondition_failure::Violation { + r#type: "OUT_OF_CAPACITY".to_owned(), + ..Default::default() + }]; + + let operation = make_precondition_failure_operation(missing) + .op + .unwrap() + .unwrap(); - match extract_execute_response(operation, Platform::Linux_x86_64).await { - Err(ExecutionError::Fatal(err)) => assert_contains(&err.to_string(), "OUT_OF_CAPACITY"), - other => assert!(false, "Want fatal error, got {other:?}"), - }; + match extract_execute_response(operation, Platform::Linux_x86_64).await { + Err(ExecutionError::Fatal(err)) => assert_contains(&err.to_string(), "OUT_OF_CAPACITY"), + other => assert!(false, "Want fatal error, got {other:?}"), + }; } #[tokio::test] async fn extract_execute_response_missing_without_list() { - let missing = vec![]; + let missing = vec![]; - let operation = make_precondition_failure_operation(missing) - .op - .unwrap() - .unwrap(); + let operation = make_precondition_failure_operation(missing) + .op + .unwrap() + .unwrap(); - match extract_execute_response(operation, Platform::Linux_x86_64).await { - Err(ExecutionError::Fatal(err)) => { - assert_contains(&err.to_string().to_lowercase(), "precondition") - } - other => assert!(false, "Want fatal error, got {other:?}"), - }; + match extract_execute_response(operation, Platform::Linux_x86_64).await { + Err(ExecutionError::Fatal(err)) => { + assert_contains(&err.to_string().to_lowercase(), "precondition") + } + other => assert!(false, "Want fatal error, got {other:?}"), + }; } #[tokio::test] async fn extract_execute_response_other_status() { - let operation = Operation { - name: "cat".to_owned(), - done: true, - result: Some( - protos::gen::google::longrunning::operation::Result::Response(make_any_proto( - &remexec::ExecuteResponse { - status: Some(protos::gen::google::rpc::Status { - code: Code::PermissionDenied as i32, - ..Default::default() - }), - ..Default::default() - }, - "protos::gen::", - )), - ), - ..Default::default() - }; + let operation = Operation { + name: "cat".to_owned(), + done: true, + result: Some( + protos::gen::google::longrunning::operation::Result::Response(make_any_proto( + &remexec::ExecuteResponse { + status: Some(protos::gen::google::rpc::Status { + code: Code::PermissionDenied as i32, + ..Default::default() + }), + ..Default::default() + }, + "protos::gen::", + )), + ), + ..Default::default() + }; - match extract_execute_response(operation, Platform::Linux_x86_64).await { - Err(ExecutionError::Fatal(err)) => assert_contains(&err.to_string(), "PermissionDenied"), - other => assert!(false, "Want fatal error, got {other:?}"), - }; + match extract_execute_response(operation, Platform::Linux_x86_64).await { + Err(ExecutionError::Fatal(err)) => assert_contains(&err.to_string(), "PermissionDenied"), + other => assert!(false, "Want fatal error, got {other:?}"), + }; } #[tokio::test] async fn remote_workunits_are_stored() { - let (workunit_store, _) = WorkunitStore::setup_for_tests(); - let op_name = "gimme-foo".to_string(); - let testdata = TestData::roland(); - let testdata_empty = TestData::empty(); - let operation = make_successful_operation_with_metadata( - &op_name, - StdoutType::Digest(testdata.digest()), - StderrType::Raw(testdata_empty.string()), - 0, - ); - let cas = mock::StubCAS::builder() - .file(&TestData::roland()) - .directory(&TestDirectory::containing_roland()) - .build(); - // TODO: This CommandRunner is only used for parsing, add so intentionally passes a CAS/AC - // address rather than an Execution address. - let (command_runner, _store) = create_command_runner(cas.address(), &cas).await; - - command_runner - .extract_execute_response( - RunId(0), - make_environment(Platform::Linux_x86_64), - OperationOrStatus::Operation(operation), - ) - .await - .unwrap(); + let (workunit_store, _) = WorkunitStore::setup_for_tests(); + let op_name = "gimme-foo".to_string(); + let testdata = TestData::roland(); + let testdata_empty = TestData::empty(); + let operation = make_successful_operation_with_metadata( + &op_name, + StdoutType::Digest(testdata.digest()), + StderrType::Raw(testdata_empty.string()), + 0, + ); + let cas = mock::StubCAS::builder() + .file(&TestData::roland()) + .directory(&TestDirectory::containing_roland()) + .build(); + // TODO: This CommandRunner is only used for parsing, add so intentionally passes a CAS/AC + // address rather than an Execution address. + let (command_runner, _store) = create_command_runner(cas.address(), &cas).await; + + command_runner + .extract_execute_response( + RunId(0), + make_environment(Platform::Linux_x86_64), + OperationOrStatus::Operation(operation), + ) + .await + .unwrap(); - let got_workunit_items: HashSet<&'static str> = workunit_store - .latest_workunits(log::Level::Trace) - .1 - .into_iter() - .map(|workunit| workunit.name) - .collect(); + let got_workunit_items: HashSet<&'static str> = workunit_store + .latest_workunits(log::Level::Trace) + .1 + .into_iter() + .map(|workunit| workunit.name) + .collect(); - let wanted_workunit_items = hashset! { - "remote execution action scheduling", - "remote execution worker input fetching", - "remote execution worker command executing", - "remote execution worker output uploading", - }; + let wanted_workunit_items = hashset! { + "remote execution action scheduling", + "remote execution worker input fetching", + "remote execution worker command executing", + "remote execution worker output uploading", + }; - assert!(got_workunit_items.is_superset(&wanted_workunit_items)); + assert!(got_workunit_items.is_superset(&wanted_workunit_items)); } #[tokio::test] async fn format_error_complete() { - let error = protos::gen::google::rpc::Status { - code: Code::Cancelled as i32, - message: "Oops, oh well!".to_string(), - ..Default::default() - }; + let error = protos::gen::google::rpc::Status { + code: Code::Cancelled as i32, + message: "Oops, oh well!".to_string(), + ..Default::default() + }; - assert_eq!( - crate::remote::format_error(&error), - "Cancelled: Oops, oh well!".to_string() - ); + assert_eq!( + crate::remote::format_error(&error), + "Cancelled: Oops, oh well!".to_string() + ); } #[tokio::test] async fn extract_execute_response_unknown_code() { - let error = protos::gen::google::rpc::Status { - code: 555, - message: "Oops, oh well!".to_string(), - ..Default::default() - }; + let error = protos::gen::google::rpc::Status { + code: 555, + message: "Oops, oh well!".to_string(), + ..Default::default() + }; - assert_eq!( - crate::remote::format_error(&error), - "555: Oops, oh well!".to_string() - ); + assert_eq!( + crate::remote::format_error(&error), + "555: Oops, oh well!".to_string() + ); } #[tokio::test] async fn digest_command() { - let command = remexec::Command { - arguments: vec!["/bin/echo".to_string(), "foo".to_string()], - environment_variables: vec![ - remexec::command::EnvironmentVariable { - name: "A".to_string(), - value: "a".to_string(), - }, - remexec::command::EnvironmentVariable { - name: "B".to_string(), - value: "b".to_string(), - }, - ], - ..Default::default() - }; - - let digest = process_execution::digest(&command).unwrap(); - - assert_eq!( - &digest.hash.to_hex(), - "a32cd427e5df6a998199266681692989f56c19cabd1cc637bdd56ae2e62619b4" - ); - assert_eq!(digest.size_bytes, 32) + let command = remexec::Command { + arguments: vec!["/bin/echo".to_string(), "foo".to_string()], + environment_variables: vec![ + remexec::command::EnvironmentVariable { + name: "A".to_string(), + value: "a".to_string(), + }, + remexec::command::EnvironmentVariable { + name: "B".to_string(), + value: "b".to_string(), + }, + ], + ..Default::default() + }; + + let digest = process_execution::digest(&command).unwrap(); + + assert_eq!( + &digest.hash.to_hex(), + "a32cd427e5df6a998199266681692989f56c19cabd1cc637bdd56ae2e62619b4" + ); + assert_eq!(digest.size_bytes, 32) } #[tokio::test] async fn extract_output_files_from_response_one_file() { - let execute_response = remexec::ExecuteResponse { - result: Some(remexec::ActionResult { - exit_code: 0, - output_files: vec![remexec::OutputFile { - path: "roland.ext".into(), - digest: Some((&TestData::roland().digest()).into()), - is_executable: false, + let execute_response = remexec::ExecuteResponse { + result: Some(remexec::ActionResult { + exit_code: 0, + output_files: vec![remexec::OutputFile { + path: "roland.ext".into(), + digest: Some((&TestData::roland().digest()).into()), + is_executable: false, + ..Default::default() + }], + ..Default::default() + }), ..Default::default() - }], - ..Default::default() - }), - ..Default::default() - }; + }; - assert_eq!( - extract_output_files_from_response(&execute_response).await, - Ok(TestDirectory::containing_roland().digest()) - ) + assert_eq!( + extract_output_files_from_response(&execute_response).await, + Ok(TestDirectory::containing_roland().digest()) + ) } #[tokio::test] async fn extract_output_files_from_response_two_files_not_nested() { - let execute_response = remexec::ExecuteResponse { - result: Some(remexec::ActionResult { - exit_code: 0, - output_files: vec![ - remexec::OutputFile { - path: "roland.ext".into(), - digest: Some((&TestData::roland().digest()).into()), - is_executable: false, - ..Default::default() - }, - remexec::OutputFile { - path: "treats.ext".into(), - digest: Some((&TestData::catnip().digest()).into()), - is_executable: false, - ..Default::default() - }, - ], - ..Default::default() - }), - ..Default::default() - }; + let execute_response = remexec::ExecuteResponse { + result: Some(remexec::ActionResult { + exit_code: 0, + output_files: vec![ + remexec::OutputFile { + path: "roland.ext".into(), + digest: Some((&TestData::roland().digest()).into()), + is_executable: false, + ..Default::default() + }, + remexec::OutputFile { + path: "treats.ext".into(), + digest: Some((&TestData::catnip().digest()).into()), + is_executable: false, + ..Default::default() + }, + ], + ..Default::default() + }), + ..Default::default() + }; - assert_eq!( - extract_output_files_from_response(&execute_response).await, - Ok(TestDirectory::containing_roland_and_treats().digest()) - ) + assert_eq!( + extract_output_files_from_response(&execute_response).await, + Ok(TestDirectory::containing_roland_and_treats().digest()) + ) } #[tokio::test] async fn extract_output_files_from_response_two_files_nested() { - let execute_response = remexec::ExecuteResponse { - result: Some(remexec::ActionResult { - exit_code: 0, - output_files: vec![ - remexec::OutputFile { - path: "cats/roland.ext".into(), - digest: Some((&TestData::roland().digest()).into()), - is_executable: false, - ..Default::default() - }, - remexec::OutputFile { - path: "treats.ext".into(), - digest: Some((&TestData::catnip().digest()).into()), - is_executable: false, - ..Default::default() - }, - ], - ..Default::default() - }), - ..Default::default() - }; + let execute_response = remexec::ExecuteResponse { + result: Some(remexec::ActionResult { + exit_code: 0, + output_files: vec![ + remexec::OutputFile { + path: "cats/roland.ext".into(), + digest: Some((&TestData::roland().digest()).into()), + is_executable: false, + ..Default::default() + }, + remexec::OutputFile { + path: "treats.ext".into(), + digest: Some((&TestData::catnip().digest()).into()), + is_executable: false, + ..Default::default() + }, + ], + ..Default::default() + }), + ..Default::default() + }; - assert_eq!( - extract_output_files_from_response(&execute_response).await, - Ok(TestDirectory::recursive().digest()) - ) + assert_eq!( + extract_output_files_from_response(&execute_response).await, + Ok(TestDirectory::recursive().digest()) + ) } #[tokio::test] async fn extract_output_files_from_response_just_directory() { - let _ = WorkunitStore::setup_for_tests(); - let test_tree: TestTree = TestDirectory::containing_roland().into(); - - let execute_response = remexec::ExecuteResponse { - result: Some(remexec::ActionResult { - exit_code: 0, - output_directories: vec![remexec::OutputDirectory { - path: "cats".into(), - tree_digest: Some(test_tree.digest().into()), - is_topologically_sorted: false, - }], - ..Default::default() - }), - ..Default::default() - }; - - assert_eq!( - extract_output_files_from_response(&execute_response).await, - Ok(TestDirectory::nested().digest()) - ) + let _ = WorkunitStore::setup_for_tests(); + let test_tree: TestTree = TestDirectory::containing_roland().into(); + + let execute_response = remexec::ExecuteResponse { + result: Some(remexec::ActionResult { + exit_code: 0, + output_directories: vec![remexec::OutputDirectory { + path: "cats".into(), + tree_digest: Some(test_tree.digest().into()), + is_topologically_sorted: false, + }], + ..Default::default() + }), + ..Default::default() + }; + + assert_eq!( + extract_output_files_from_response(&execute_response).await, + Ok(TestDirectory::nested().digest()) + ) } #[tokio::test] async fn extract_output_files_from_response_directories_and_files() { - // /treats.ext - // /pets/cats/roland.ext - // /pets/dogs/robin.ext - - let _ = WorkunitStore::setup_for_tests(); - let execute_response = remexec::ExecuteResponse { - result: Some(remexec::ActionResult { - exit_code: 0, - output_files: vec![remexec::OutputFile { - path: "treats.ext".into(), - digest: Some((&TestData::catnip().digest()).into()), + // /treats.ext + // /pets/cats/roland.ext + // /pets/dogs/robin.ext + + let _ = WorkunitStore::setup_for_tests(); + let execute_response = remexec::ExecuteResponse { + result: Some(remexec::ActionResult { + exit_code: 0, + output_files: vec![remexec::OutputFile { + path: "treats.ext".into(), + digest: Some((&TestData::catnip().digest()).into()), + ..Default::default() + }], + output_directories: vec![ + remexec::OutputDirectory { + path: "pets/cats".into(), + tree_digest: Some((&TestTree::roland_at_root().digest()).into()), + is_topologically_sorted: false, + }, + remexec::OutputDirectory { + path: "pets/dogs".into(), + tree_digest: Some((&TestTree::robin_at_root().digest()).into()), + is_topologically_sorted: false, + }, + ], + ..Default::default() + }), ..Default::default() - }], - output_directories: vec![ - remexec::OutputDirectory { - path: "pets/cats".into(), - tree_digest: Some((&TestTree::roland_at_root().digest()).into()), - is_topologically_sorted: false, - }, - remexec::OutputDirectory { - path: "pets/dogs".into(), - tree_digest: Some((&TestTree::robin_at_root().digest()).into()), - is_topologically_sorted: false, - }, - ], - ..Default::default() - }), - ..Default::default() - }; - - assert_eq!( - extract_output_files_from_response(&execute_response).await, - Ok(Digest::new( - Fingerprint::from_hex_string( - "b5e7010d8c5ef77b383fc60ea00bdfb4743dd6fa3983033b50f218ea90124d0d" - ) - .unwrap(), - 163 - )) - ) + }; + + assert_eq!( + extract_output_files_from_response(&execute_response).await, + Ok(Digest::new( + Fingerprint::from_hex_string( + "b5e7010d8c5ef77b383fc60ea00bdfb4743dd6fa3983033b50f218ea90124d0d" + ) + .unwrap(), + 163 + )) + ) } #[tokio::test] async fn extract_output_files_from_response_no_prefix() { - let _ = WorkunitStore::setup_for_tests(); - let execute_response = remexec::ExecuteResponse { - result: Some(remexec::ActionResult { - exit_code: 0, - output_directories: vec![remexec::OutputDirectory { - path: String::new(), - tree_digest: Some((&TestTree::roland_at_root().digest()).into()), - is_topologically_sorted: false, - }], - ..Default::default() - }), - ..Default::default() - }; - - assert_eq!( - extract_output_files_from_response(&execute_response).await, - Ok(TestDirectory::containing_roland().digest()) - ) + let _ = WorkunitStore::setup_for_tests(); + let execute_response = remexec::ExecuteResponse { + result: Some(remexec::ActionResult { + exit_code: 0, + output_directories: vec![remexec::OutputDirectory { + path: String::new(), + tree_digest: Some((&TestTree::roland_at_root().digest()).into()), + is_topologically_sorted: false, + }], + ..Default::default() + }), + ..Default::default() + }; + + assert_eq!( + extract_output_files_from_response(&execute_response).await, + Ok(TestDirectory::containing_roland().digest()) + ) } pub fn echo_foo_request() -> Process { - let mut req = Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])); - req.timeout = Some(Duration::from_millis(5000)); - req.description = "echo a foo".to_string(); - req + let mut req = Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"])); + req.timeout = Some(Duration::from_millis(5000)); + req.description = "echo a foo".to_string(); + req } fn make_incomplete_operation(operation_name: &str) -> MockOperation { - MockOperation::new(Operation { - name: operation_name.to_string(), - done: false, - ..Default::default() - }) + MockOperation::new(Operation { + name: operation_name.to_string(), + done: false, + ..Default::default() + }) } fn make_delayed_incomplete_operation(operation_name: &str, delay: Duration) -> MockOperation { - let mut op = make_incomplete_operation(operation_name); - op.duration = Some(delay); - op + let mut op = make_incomplete_operation(operation_name); + op.duration = Some(delay); + op } fn make_delayed_incomplete_operation_with_stage( - operation_name: &str, - delay: Duration, - stage: ExecutionStageValue, + operation_name: &str, + delay: Duration, + stage: ExecutionStageValue, ) -> MockOperation { - let mut op = make_delayed_incomplete_operation(operation_name, delay); - match &mut op.op { - Ok(Some(op)) => { - op.metadata = Some(make_any_proto( - &remexec::ExecuteOperationMetadata { - stage: stage as i32, - ..Default::default() - }, - "protos::gen::", - )); + let mut op = make_delayed_incomplete_operation(operation_name, delay); + match &mut op.op { + Ok(Some(op)) => { + op.metadata = Some(make_any_proto( + &remexec::ExecuteOperationMetadata { + stage: stage as i32, + ..Default::default() + }, + "protos::gen::", + )); + } + x => panic!("Unexpected MockOperation content: {x:?}"), } - x => panic!("Unexpected MockOperation content: {x:?}"), - } - op + op } fn make_retryable_operation_failure() -> MockOperation { - let status = protos::gen::google::rpc::Status { - code: Code::Aborted as i32, - message: String::from("the bot running the task appears to be lost"), - ..Default::default() - }; - - let operation = Operation { - done: true, - result: Some( - protos::gen::google::longrunning::operation::Result::Response(make_any_proto( - &remexec::ExecuteResponse { - status: Some(status), - ..Default::default() - }, - "protos::gen::", - )), - ), - ..Default::default() - }; + let status = protos::gen::google::rpc::Status { + code: Code::Aborted as i32, + message: String::from("the bot running the task appears to be lost"), + ..Default::default() + }; + + let operation = Operation { + done: true, + result: Some( + protos::gen::google::longrunning::operation::Result::Response(make_any_proto( + &remexec::ExecuteResponse { + status: Some(status), + ..Default::default() + }, + "protos::gen::", + )), + ), + ..Default::default() + }; - MockOperation { - op: Ok(Some(operation)), - duration: None, - } + MockOperation { + op: Ok(Some(operation)), + duration: None, + } } fn make_action_result( - stdout: StdoutType, - stderr: StderrType, - exit_code: i32, - metadata: Option, + stdout: StdoutType, + stderr: StderrType, + exit_code: i32, + metadata: Option, ) -> remexec::ActionResult { - let mut action_result = remexec::ActionResult::default(); - match stdout { - StdoutType::Raw(stdout_raw) => { - action_result.stdout_raw = stdout_raw.into_bytes().into(); + let mut action_result = remexec::ActionResult::default(); + match stdout { + StdoutType::Raw(stdout_raw) => { + action_result.stdout_raw = stdout_raw.into_bytes().into(); + } + StdoutType::Digest(stdout_digest) => { + action_result.stdout_digest = Some((&stdout_digest).into()); + } } - StdoutType::Digest(stdout_digest) => { - action_result.stdout_digest = Some((&stdout_digest).into()); + match stderr { + StderrType::Raw(stderr_raw) => { + action_result.stderr_raw = stderr_raw.into_bytes().into(); + } + StderrType::Digest(stderr_digest) => { + action_result.stderr_digest = Some((&stderr_digest).into()); + } } - } - match stderr { - StderrType::Raw(stderr_raw) => { - action_result.stderr_raw = stderr_raw.into_bytes().into(); - } - StderrType::Digest(stderr_digest) => { - action_result.stderr_digest = Some((&stderr_digest).into()); - } - } - action_result.exit_code = exit_code; - if let Some(metadata) = metadata { - action_result.execution_metadata = Some(metadata); - }; - action_result + action_result.exit_code = exit_code; + if let Some(metadata) = metadata { + action_result.execution_metadata = Some(metadata); + }; + action_result } fn make_successful_operation_with_maybe_metadata( - operation_name: &str, - stdout: StdoutType, - stderr: StderrType, - exit_code: i32, - metadata: Option, + operation_name: &str, + stdout: StdoutType, + stderr: StderrType, + exit_code: i32, + metadata: Option, ) -> Operation { - Operation { - name: operation_name.to_string(), - done: true, - result: Some( - protos::gen::google::longrunning::operation::Result::Response(make_any_proto( - &remexec::ExecuteResponse { - status: Some(protos::gen::google::rpc::Status { - code: Code::Ok as i32, - ..Default::default() - }), - result: Some(make_action_result(stdout, stderr, exit_code, metadata)), - ..Default::default() - }, - "protos::gen::", - )), - ), - ..Default::default() - } + Operation { + name: operation_name.to_string(), + done: true, + result: Some( + protos::gen::google::longrunning::operation::Result::Response(make_any_proto( + &remexec::ExecuteResponse { + status: Some(protos::gen::google::rpc::Status { + code: Code::Ok as i32, + ..Default::default() + }), + result: Some(make_action_result(stdout, stderr, exit_code, metadata)), + ..Default::default() + }, + "protos::gen::", + )), + ), + ..Default::default() + } } fn make_successful_operation( - operation_name: &str, - stdout: StdoutType, - stderr: StderrType, - exit_code: i32, + operation_name: &str, + stdout: StdoutType, + stderr: StderrType, + exit_code: i32, ) -> MockOperation { - let op = - make_successful_operation_with_maybe_metadata(operation_name, stdout, stderr, exit_code, None); - MockOperation::new(op) + let op = make_successful_operation_with_maybe_metadata( + operation_name, + stdout, + stderr, + exit_code, + None, + ); + MockOperation::new(op) } fn make_successful_operation_with_metadata( - operation_name: &str, - stdout: StdoutType, - stderr: StderrType, - exit_code: i32, + operation_name: &str, + stdout: StdoutType, + stderr: StderrType, + exit_code: i32, ) -> Operation { - let metadata = remexec::ExecutedActionMetadata { - queued_timestamp: Some(timestamp_only_secs(0)), - worker_start_timestamp: Some(timestamp_only_secs(1)), - input_fetch_start_timestamp: Some(timestamp_only_secs(2)), - input_fetch_completed_timestamp: Some(timestamp_only_secs(3)), - execution_start_timestamp: Some(timestamp_only_secs(4)), - execution_completed_timestamp: Some(timestamp_only_secs(5)), - output_upload_start_timestamp: Some(timestamp_only_secs(6)), - output_upload_completed_timestamp: Some(timestamp_only_secs(7)), - worker_completed_timestamp: Some(timestamp_only_secs(8)), - ..Default::default() - }; - - make_successful_operation_with_maybe_metadata( - operation_name, - stdout, - stderr, - exit_code, - Some(metadata), - ) + let metadata = remexec::ExecutedActionMetadata { + queued_timestamp: Some(timestamp_only_secs(0)), + worker_start_timestamp: Some(timestamp_only_secs(1)), + input_fetch_start_timestamp: Some(timestamp_only_secs(2)), + input_fetch_completed_timestamp: Some(timestamp_only_secs(3)), + execution_start_timestamp: Some(timestamp_only_secs(4)), + execution_completed_timestamp: Some(timestamp_only_secs(5)), + output_upload_start_timestamp: Some(timestamp_only_secs(6)), + output_upload_completed_timestamp: Some(timestamp_only_secs(7)), + worker_completed_timestamp: Some(timestamp_only_secs(8)), + ..Default::default() + }; + + make_successful_operation_with_maybe_metadata( + operation_name, + stdout, + stderr, + exit_code, + Some(metadata), + ) } fn timestamp_only_secs(v: i64) -> prost_types::Timestamp { - prost_types::Timestamp { - seconds: v, - nanos: 0, - } + prost_types::Timestamp { + seconds: v, + nanos: 0, + } } fn make_precondition_failure_operation( - violations: Vec, + violations: Vec, ) -> MockOperation { - let operation = Operation { - name: "cat".to_owned(), - done: true, - result: Some( - protos::gen::google::longrunning::operation::Result::Response(make_any_proto( - &remexec::ExecuteResponse { - status: Some(make_precondition_failure_status(violations)), - ..Default::default() - }, - "protos::gen::", - )), - ), - ..Default::default() - }; - MockOperation::new(operation) + let operation = Operation { + name: "cat".to_owned(), + done: true, + result: Some( + protos::gen::google::longrunning::operation::Result::Response(make_any_proto( + &remexec::ExecuteResponse { + status: Some(make_precondition_failure_status(violations)), + ..Default::default() + }, + "protos::gen::", + )), + ), + ..Default::default() + }; + MockOperation::new(operation) } fn make_precondition_failure_status( - violations: Vec, + violations: Vec, ) -> protos::gen::google::rpc::Status { - protos::gen::google::rpc::Status { - code: Code::FailedPrecondition as i32, - details: vec![make_any_proto( - &protos::gen::google::rpc::PreconditionFailure { violations }, - "protos::gen::", - )], - ..Default::default() - } + protos::gen::google::rpc::Status { + code: Code::FailedPrecondition as i32, + details: vec![make_any_proto( + &protos::gen::google::rpc::PreconditionFailure { violations }, + "protos::gen::", + )], + ..Default::default() + } } async fn run_cmd_runner( - request: Process, - command_runner: R, - store: Store, + request: Process, + command_runner: R, + store: Store, ) -> Result { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let original = command_runner - .run(Context::default(), &mut workunit, request) - .await?; - let stdout_bytes = store - .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) - .await?; - let stderr_bytes = store - .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) - .await?; - Ok(RemoteTestResult { - original, - stdout_bytes, - stderr_bytes, - }) + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let original = command_runner + .run(Context::default(), &mut workunit, request) + .await?; + let stdout_bytes = store + .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) + .await?; + let stderr_bytes = store + .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) + .await?; + Ok(RemoteTestResult { + original, + stdout_bytes, + stderr_bytes, + }) } async fn create_command_runner( - execution_address: String, - cas: &mock::StubCAS, + execution_address: String, + cas: &mock::StubCAS, ) -> (CommandRunner, Store) { - let runtime = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = make_store(store_dir.path(), cas, runtime).await; - let command_runner = CommandRunner::new( - &execution_address, - None, - None, - None, - Default::default(), - BTreeMap::new(), - store.clone(), - task_executor::Executor::new(), - OVERALL_DEADLINE_SECS, - RETRY_INTERVAL, - EXEC_CONCURRENCY_LIMIT, - None, - ) - .await - .expect("Failed to make command runner"); - (command_runner, store) + let runtime = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = make_store(store_dir.path(), cas, runtime).await; + let command_runner = CommandRunner::new( + &execution_address, + None, + None, + None, + Default::default(), + BTreeMap::new(), + store.clone(), + task_executor::Executor::new(), + OVERALL_DEADLINE_SECS, + RETRY_INTERVAL, + EXEC_CONCURRENCY_LIMIT, + None, + ) + .await + .expect("Failed to make command runner"); + (command_runner, store) } async fn run_command_remote( - execution_address: String, - request: Process, + execution_address: String, + request: Process, ) -> Result { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - run_command_remote_in_workunit(execution_address, request, &mut workunit).await + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + run_command_remote_in_workunit(execution_address, request, &mut workunit).await } async fn run_command_remote_in_workunit( - execution_address: String, - request: Process, - workunit: &mut RunningWorkunit, + execution_address: String, + request: Process, + workunit: &mut RunningWorkunit, ) -> Result { - let cas = mock::StubCAS::builder() - .file(&TestData::roland()) - .directory(&TestDirectory::containing_roland()) - .tree(&TestTree::roland_at_root()) - .build(); - let (command_runner, store) = create_command_runner(execution_address, &cas).await; - let original = command_runner - .run(Context::default(), workunit, request) - .await?; - - let stdout_bytes = store - .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) - .await?; - let stderr_bytes = store - .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) - .await?; - Ok(RemoteTestResult { - original, - stdout_bytes, - stderr_bytes, - }) + let cas = mock::StubCAS::builder() + .file(&TestData::roland()) + .directory(&TestDirectory::containing_roland()) + .tree(&TestTree::roland_at_root()) + .build(); + let (command_runner, store) = create_command_runner(execution_address, &cas).await; + let original = command_runner + .run(Context::default(), workunit, request) + .await?; + + let stdout_bytes = store + .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) + .await?; + let stderr_bytes = store + .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) + .await?; + Ok(RemoteTestResult { + original, + stdout_bytes, + stderr_bytes, + }) } async fn make_store( - store_dir: &Path, - cas: &mock::StubCAS, - executor: task_executor::Executor, + store_dir: &Path, + cas: &mock::StubCAS, + executor: task_executor::Executor, ) -> Store { - Store::local_only(executor, store_dir) - .unwrap() - .into_with_remote(remote_options_for_cas(&cas)) - .await - .unwrap() + Store::local_only(executor, store_dir) + .unwrap() + .into_with_remote(remote_options_for_cas(&cas)) + .await + .unwrap() } async fn extract_execute_response( - operation: Operation, - platform: Platform, + operation: Operation, + platform: Platform, ) -> Result { - let cas = mock::StubCAS::builder() - .file(&TestData::roland()) - .directory(&TestDirectory::containing_roland()) - .build(); - // TODO: This CommandRunner is only used for parsing, add so intentionally passes a CAS/AC - // address rather than an Execution address. - let (command_runner, store) = create_command_runner(cas.address(), &cas).await; - - let original = command_runner - .extract_execute_response( - RunId(0), - make_environment(platform), - OperationOrStatus::Operation(operation), - ) - .await?; + let cas = mock::StubCAS::builder() + .file(&TestData::roland()) + .directory(&TestDirectory::containing_roland()) + .build(); + // TODO: This CommandRunner is only used for parsing, add so intentionally passes a CAS/AC + // address rather than an Execution address. + let (command_runner, store) = create_command_runner(cas.address(), &cas).await; + + let original = command_runner + .extract_execute_response( + RunId(0), + make_environment(platform), + OperationOrStatus::Operation(operation), + ) + .await?; - let stdout_bytes: Vec = store - .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) - .await - .unwrap(); + let stdout_bytes: Vec = store + .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) + .await + .unwrap(); - let stderr_bytes: Vec = store - .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) - .await - .unwrap(); + let stderr_bytes: Vec = store + .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) + .await + .unwrap(); - Ok(RemoteTestResult { - original, - stdout_bytes, - stderr_bytes, - }) + Ok(RemoteTestResult { + original, + stdout_bytes, + stderr_bytes, + }) } async fn extract_output_files_from_response( - execute_response: &remexec::ExecuteResponse, + execute_response: &remexec::ExecuteResponse, ) -> Result { - let cas = mock::StubCAS::builder() - .file(&TestData::roland()) - .directory(&TestDirectory::containing_roland()) - .tree(&TestTree::roland_at_root()) - .tree(&TestTree::robin_at_root()) - .build(); - let executor = task_executor::Executor::new(); - let store_dir = TempDir::new().unwrap(); - let store = make_store(store_dir.path(), &cas, executor.clone()).await; - let action_result = execute_response - .result - .as_ref() - .ok_or_else(|| "No ActionResult found".to_string())?; - let directory_digest = - process_execution::extract_output_files(store, action_result, false).await?; - Ok(directory_digest.as_digest()) + let cas = mock::StubCAS::builder() + .file(&TestData::roland()) + .directory(&TestDirectory::containing_roland()) + .tree(&TestTree::roland_at_root()) + .tree(&TestTree::robin_at_root()) + .build(); + let executor = task_executor::Executor::new(); + let store_dir = TempDir::new().unwrap(); + let store = make_store(store_dir.path(), &cas, executor.clone()).await; + let action_result = execute_response + .result + .as_ref() + .ok_or_else(|| "No ActionResult found".to_string())?; + let directory_digest = + process_execution::extract_output_files(store, action_result, false).await?; + Ok(directory_digest.as_digest()) } fn make_any_proto(message: &T, prefix: &str) -> prost_types::Any { - let rust_type_name = type_name::(); - let proto_type_name = rust_type_name - .strip_prefix(prefix) - .unwrap() - .replace("::", "."); + let rust_type_name = type_name::(); + let proto_type_name = rust_type_name + .strip_prefix(prefix) + .unwrap() + .replace("::", "."); - prost_types::Any { - type_url: format!("type.googleapis.com/{proto_type_name}"), - value: message.to_bytes().to_vec(), - } + prost_types::Any { + type_url: format!("type.googleapis.com/{proto_type_name}"), + value: message.to_bytes().to_vec(), + } } fn missing_preconditionfailure_violation( - digest: &Digest, + digest: &Digest, ) -> protos::gen::google::rpc::precondition_failure::Violation { - { - protos::gen::google::rpc::precondition_failure::Violation { - r#type: "MISSING".to_owned(), - subject: format!("blobs/{}/{}", digest.hash, digest.size_bytes), - ..Default::default() + { + protos::gen::google::rpc::precondition_failure::Violation { + r#type: "MISSING".to_owned(), + subject: format!("blobs/{}/{}", digest.hash, digest.size_bytes), + ..Default::default() + } } - } } #[track_caller] fn assert_contains(haystack: &str, needle: &str) { - assert!( - haystack.contains(needle), - "{haystack:?} should contain {needle:?}" - ) + assert!( + haystack.contains(needle), + "{haystack:?} should contain {needle:?}" + ) } fn cat_roland_request() -> Process { - let argv = owned_string_vec(&["/bin/cat", "roland.ext"]); - let mut process = Process::new(argv); - process.execution_environment.platform = Platform::Linux_x86_64; - process.input_digests = - InputDigests::with_input_files(TestDirectory::containing_roland().directory_digest()); - process.timeout = one_second(); - process.description = "cat a roland".to_string(); - process + let argv = owned_string_vec(&["/bin/cat", "roland.ext"]); + let mut process = Process::new(argv); + process.execution_environment.platform = Platform::Linux_x86_64; + process.input_digests = + InputDigests::with_input_files(TestDirectory::containing_roland().directory_digest()); + process.timeout = one_second(); + process.description = "cat a roland".to_string(); + process } fn echo_roland_request() -> Process { - let mut req = Process::new(owned_string_vec(&["/bin/echo", "meoooow"])); - req.execution_environment.platform = Platform::Linux_x86_64; - req.timeout = one_second(); - req.description = "unleash a roaring meow".to_string(); - req + let mut req = Process::new(owned_string_vec(&["/bin/echo", "meoooow"])); + req.execution_environment.platform = Platform::Linux_x86_64; + req.timeout = one_second(); + req.description = "unleash a roaring meow".to_string(); + req } fn assert_cancellation_requests( - mock_server: &mock::execution_server::TestServer, - expected: Vec, + mock_server: &mock::execution_server::TestServer, + expected: Vec, ) { - let cancels = mock_server - .mock_responder - .cancelation_requests - .lock() - .iter() - .map(|req| req.name.clone()) - .collect::>(); - assert_eq!(expected, cancels); + let cancels = mock_server + .mock_responder + .cancelation_requests + .lock() + .iter() + .map(|req| req.name.clone()) + .collect::>(); + assert_eq!(expected, cancels); } fn one_second() -> Option { - Some(Duration::from_millis(1000)) + Some(Duration::from_millis(1000)) } diff --git a/src/rust/engine/process_execution/src/bounded.rs b/src/rust/engine/process_execution/src/bounded.rs index 1ec0420e8c4..e13f606082d 100644 --- a/src/rust/engine/process_execution/src/bounded.rs +++ b/src/rust/engine/process_execution/src/bounded.rs @@ -34,330 +34,330 @@ lazy_static! { /// #[derive(Clone)] pub struct CommandRunner { - inner: Arc, - sema: AsyncSemaphore, + inner: Arc, + sema: AsyncSemaphore, } impl CommandRunner { - pub fn new( - executor: &Executor, - inner: Box, - bound: usize, - ) -> CommandRunner { - CommandRunner { - inner: inner.into(), - sema: AsyncSemaphore::new( - executor, - bound, - // TODO: Make configurable. - Duration::from_millis(200), - ), + pub fn new( + executor: &Executor, + inner: Box, + bound: usize, + ) -> CommandRunner { + CommandRunner { + inner: inner.into(), + sema: AsyncSemaphore::new( + executor, + bound, + // TODO: Make configurable. + Duration::from_millis(200), + ), + } } - } } impl Debug for CommandRunner { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("bounded::CommandRunner") - .field("inner", &self.inner) - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("bounded::CommandRunner") + .field("inner", &self.inner) + .finish_non_exhaustive() + } } #[async_trait] impl crate::CommandRunner for CommandRunner { - async fn run( - &self, - context: Context, - workunit: &mut RunningWorkunit, - process: Process, - ) -> Result { - let semaphore_acquisition = self.sema.acquire(process.concurrency_available); - let permit = in_workunit!( - "acquire_command_runner_slot", - // TODO: The UI uses the presence of a blocked workunit below a parent as an indication that - // the parent is blocked. If this workunit is filtered out, parents nodes which are waiting - // for the semaphore will render, even though they are effectively idle. - // - // https://github.com/pantsbuild/pants/issues/14680 will likely allow for a more principled - // solution to this problem, such as removing the mutable `blocking` flag, and then never - // filtering blocked workunits at creation time, regardless of level. - Level::Debug, - |workunit| async move { - let _blocking_token = workunit.blocking(); - semaphore_acquisition.await - } - ) - .await; - - loop { - let mut process = process.clone(); - let concurrency_available = permit.concurrency(); - log::debug!( - "Running {} under semaphore with concurrency id: {}, and concurrency: {}", - process.description, - permit.concurrency_slot(), - concurrency_available, - ); - - // TODO: Both of these templating cases should be implemented at the lowest possible level: - // they might currently be applied above a cache. - if let Some(ref execution_slot_env_var) = process.execution_slot_variable { - process.env.insert( - execution_slot_env_var.clone(), - format!("{}", permit.concurrency_slot()), - ); - } - if process.concurrency_available > 0 { - let concurrency = format!("{}", permit.concurrency()); - let mut matched = false; - process.argv = std::mem::take(&mut process.argv) - .into_iter() - .map( - |arg| match CONCURRENCY_TEMPLATE_RE.replace_all(&arg, &concurrency) { - Cow::Owned(altered) => { - matched = true; - altered - } - Cow::Borrowed(_original) => arg, - }, - ) - .collect(); - if !matched { - return Err( - format!( - "Process {} set `concurrency_available={}`, but did not include \ + async fn run( + &self, + context: Context, + workunit: &mut RunningWorkunit, + process: Process, + ) -> Result { + let semaphore_acquisition = self.sema.acquire(process.concurrency_available); + let permit = in_workunit!( + "acquire_command_runner_slot", + // TODO: The UI uses the presence of a blocked workunit below a parent as an indication that + // the parent is blocked. If this workunit is filtered out, parents nodes which are waiting + // for the semaphore will render, even though they are effectively idle. + // + // https://github.com/pantsbuild/pants/issues/14680 will likely allow for a more principled + // solution to this problem, such as removing the mutable `blocking` flag, and then never + // filtering blocked workunits at creation time, regardless of level. + Level::Debug, + |workunit| async move { + let _blocking_token = workunit.blocking(); + semaphore_acquisition.await + } + ) + .await; + + loop { + let mut process = process.clone(); + let concurrency_available = permit.concurrency(); + log::debug!( + "Running {} under semaphore with concurrency id: {}, and concurrency: {}", + process.description, + permit.concurrency_slot(), + concurrency_available, + ); + + // TODO: Both of these templating cases should be implemented at the lowest possible level: + // they might currently be applied above a cache. + if let Some(ref execution_slot_env_var) = process.execution_slot_variable { + process.env.insert( + execution_slot_env_var.clone(), + format!("{}", permit.concurrency_slot()), + ); + } + if process.concurrency_available > 0 { + let concurrency = format!("{}", permit.concurrency()); + let mut matched = false; + process.argv = std::mem::take(&mut process.argv) + .into_iter() + .map( + |arg| match CONCURRENCY_TEMPLATE_RE.replace_all(&arg, &concurrency) { + Cow::Owned(altered) => { + matched = true; + altered + } + Cow::Borrowed(_original) => arg, + }, + ) + .collect(); + if !matched { + return Err(format!( + "Process {} set `concurrency_available={}`, but did not include \ the `{}` template variable in its arguments.", - process.description, process.concurrency_available, *CONCURRENCY_TEMPLATE_RE - ) - .into(), - ); - } - } - - let running_process = self.inner.run(context.clone(), workunit, process.clone()); - tokio::select! { - _ = permit.notified_concurrency_changed() => { - log::debug!( - "Process {} was preempted, and went from concurrency {} to concurrency {}", - process.description, - concurrency_available, - permit.concurrency(), - ); - continue; - }, - res = running_process => { - // The process completed. - return res; + process.description, + process.concurrency_available, + *CONCURRENCY_TEMPLATE_RE + ) + .into()); + } + } + + let running_process = self.inner.run(context.clone(), workunit, process.clone()); + tokio::select! { + _ = permit.notified_concurrency_changed() => { + log::debug!( + "Process {} was preempted, and went from concurrency {} to concurrency {}", + process.description, + concurrency_available, + permit.concurrency(), + ); + continue; + }, + res = running_process => { + // The process completed. + return res; + } + } } - } } - } - async fn shutdown(&self) -> Result<(), String> { - self.inner.shutdown().await - } + async fn shutdown(&self) -> Result<(), String> { + self.inner.shutdown().await + } } /// A wrapped Semaphore which adds concurrency metadata which supports overcommit. #[derive(Clone)] pub(crate) struct AsyncSemaphore { - sema: Arc, - state: Arc>, - preemptible_duration: Duration, + sema: Arc, + state: Arc>, + preemptible_duration: Duration, } pub(crate) struct State { - total_concurrency: usize, - available_ids: VecDeque, - tasks: Vec>, + total_concurrency: usize, + available_ids: VecDeque, + tasks: Vec>, } impl State { - #[cfg(test)] - pub(crate) fn new_for_tests(total_concurrency: usize, tasks: Vec>) -> Self { - Self { - total_concurrency, - available_ids: VecDeque::new(), - tasks, + #[cfg(test)] + pub(crate) fn new_for_tests(total_concurrency: usize, tasks: Vec>) -> Self { + Self { + total_concurrency, + available_ids: VecDeque::new(), + tasks, + } } - } } impl AsyncSemaphore { - pub fn new( - executor: &Executor, - permits: usize, - preemptible_duration: Duration, - ) -> AsyncSemaphore { - let mut available_ids = VecDeque::new(); - for id in 1..=permits { - available_ids.push_back(id); - } - - let state = Arc::new(Mutex::new(State { - total_concurrency: permits, - available_ids, - tasks: Vec::new(), - })); + pub fn new( + executor: &Executor, + permits: usize, + preemptible_duration: Duration, + ) -> AsyncSemaphore { + let mut available_ids = VecDeque::new(); + for id in 1..=permits { + available_ids.push_back(id); + } - // Spawn a task which will periodically balance Tasks. - let _balancer_task = { - let state = Arc::downgrade(&state); - executor.native_spawn(async move { - loop { - sleep(preemptible_duration / 4).await; - if let Some(state) = state.upgrade() { - // Balance tasks. - let mut state = state.lock(); - balance(Instant::now(), &mut state); - } else { - // The AsyncSemaphore was torn down. - break; - } + let state = Arc::new(Mutex::new(State { + total_concurrency: permits, + available_ids, + tasks: Vec::new(), + })); + + // Spawn a task which will periodically balance Tasks. + let _balancer_task = { + let state = Arc::downgrade(&state); + executor.native_spawn(async move { + loop { + sleep(preemptible_duration / 4).await; + if let Some(state) = state.upgrade() { + // Balance tasks. + let mut state = state.lock(); + balance(Instant::now(), &mut state); + } else { + // The AsyncSemaphore was torn down. + break; + } + } + }) + }; + + AsyncSemaphore { + sema: Arc::new(Semaphore::new(permits)), + state, + preemptible_duration, } - }) - }; + } + + #[cfg(test)] + pub(crate) fn available_permits(&self) -> usize { + self.sema.available_permits() + } - AsyncSemaphore { - sema: Arc::new(Semaphore::new(permits)), - state, - preemptible_duration, + /// + /// Runs the given Future-creating function (and the Future it returns) under the semaphore. + /// + /// NB: This method does not support preemption, or controlling concurrency. + /// + // TODO: https://github.com/rust-lang/rust/issues/46379 + #[allow(dead_code)] + pub(crate) async fn with_acquired(self, f: F) -> O + where + F: FnOnce(usize) -> B, + B: Future, + { + let permit = self.acquire(1).await; + let res = f(permit.task.id).await; + drop(permit); + res } - } - - #[cfg(test)] - pub(crate) fn available_permits(&self) -> usize { - self.sema.available_permits() - } - - /// - /// Runs the given Future-creating function (and the Future it returns) under the semaphore. - /// - /// NB: This method does not support preemption, or controlling concurrency. - /// - // TODO: https://github.com/rust-lang/rust/issues/46379 - #[allow(dead_code)] - pub(crate) async fn with_acquired(self, f: F) -> O - where - F: FnOnce(usize) -> B, - B: Future, - { - let permit = self.acquire(1).await; - let res = f(permit.task.id).await; - drop(permit); - res - } - - /// - /// Acquire a slot on the semaphore when it becomes available. Additionally, attempt to acquire - /// the given amount of concurrency. The amount actually acquired will be reported on the - /// returned Permit. - /// - pub async fn acquire(&self, concurrency_desired: usize) -> Permit<'_> { - let permit = self.sema.acquire().await.expect("semaphore closed"); - let task = { - let mut state = self.state.lock(); - let id = state - .available_ids - .pop_front() - .expect("More permits were distributed than ids exist."); - - // A Task is initially given its fair share of the available concurrency: i.e., the first - // arriving task gets all of the slots, and the second arriving gets half, even though that - // means that we overcommit. Balancing will adjust concurrency later, to the extent that it - // can given preemption timeouts. - // - // This is because we cannot anticipate the number of inbound processes, and we never want to - // delay a process from starting. - let concurrency_desired = max(concurrency_desired, 1); - let concurrency_actual = min( - concurrency_desired, - state.total_concurrency / (state.tasks.len() + 1), - ); - let task = Arc::new(Task::new( - id, - concurrency_desired, - concurrency_actual, - Instant::now() + self.preemptible_duration, - )); - state.tasks.push(task.clone()); - task - }; - Permit { - state: self.state.clone(), - _permit: permit, - task, + + /// + /// Acquire a slot on the semaphore when it becomes available. Additionally, attempt to acquire + /// the given amount of concurrency. The amount actually acquired will be reported on the + /// returned Permit. + /// + pub async fn acquire(&self, concurrency_desired: usize) -> Permit<'_> { + let permit = self.sema.acquire().await.expect("semaphore closed"); + let task = { + let mut state = self.state.lock(); + let id = state + .available_ids + .pop_front() + .expect("More permits were distributed than ids exist."); + + // A Task is initially given its fair share of the available concurrency: i.e., the first + // arriving task gets all of the slots, and the second arriving gets half, even though that + // means that we overcommit. Balancing will adjust concurrency later, to the extent that it + // can given preemption timeouts. + // + // This is because we cannot anticipate the number of inbound processes, and we never want to + // delay a process from starting. + let concurrency_desired = max(concurrency_desired, 1); + let concurrency_actual = min( + concurrency_desired, + state.total_concurrency / (state.tasks.len() + 1), + ); + let task = Arc::new(Task::new( + id, + concurrency_desired, + concurrency_actual, + Instant::now() + self.preemptible_duration, + )); + state.tasks.push(task.clone()); + task + }; + Permit { + state: self.state.clone(), + _permit: permit, + task, + } } - } } pub struct Permit<'a> { - state: Arc>, - // NB: Kept for its `Drop` impl. - _permit: SemaphorePermit<'a>, - task: Arc, + state: Arc>, + // NB: Kept for its `Drop` impl. + _permit: SemaphorePermit<'a>, + task: Arc, } impl Permit<'_> { - pub fn concurrency_slot(&self) -> usize { - self.task.id - } + pub fn concurrency_slot(&self) -> usize { + self.task.id + } - pub fn concurrency(&self) -> usize { - self.task.concurrency() - } + pub fn concurrency(&self) -> usize { + self.task.concurrency() + } - pub async fn notified_concurrency_changed(&self) { - self.task.notify_concurrency_changed.notified().await - } + pub async fn notified_concurrency_changed(&self) { + self.task.notify_concurrency_changed.notified().await + } } impl<'a> Drop for Permit<'a> { - fn drop(&mut self) { - let mut state = self.state.lock(); - state.available_ids.push_back(self.task.id); - let tasks_position = state - .tasks - .iter() - .position(|t| t.id == self.task.id) - .unwrap(); - state.tasks.swap_remove(tasks_position); - } + fn drop(&mut self) { + let mut state = self.state.lock(); + state.available_ids.push_back(self.task.id); + let tasks_position = state + .tasks + .iter() + .position(|t| t.id == self.task.id) + .unwrap(); + state.tasks.swap_remove(tasks_position); + } } pub(crate) struct Task { - id: usize, - concurrency_desired: usize, - pub(crate) concurrency_actual: atomic::AtomicUsize, - notify_concurrency_changed: Notify, - preemptible_until: Instant, -} - -impl Task { - pub(crate) fn new( id: usize, concurrency_desired: usize, - concurrency_actual: usize, + pub(crate) concurrency_actual: atomic::AtomicUsize, + notify_concurrency_changed: Notify, preemptible_until: Instant, - ) -> Self { - assert!(concurrency_actual <= concurrency_desired); - Self { - id, - concurrency_desired, - concurrency_actual: atomic::AtomicUsize::new(concurrency_actual), - notify_concurrency_changed: Notify::new(), - preemptible_until, +} + +impl Task { + pub(crate) fn new( + id: usize, + concurrency_desired: usize, + concurrency_actual: usize, + preemptible_until: Instant, + ) -> Self { + assert!(concurrency_actual <= concurrency_desired); + Self { + id, + concurrency_desired, + concurrency_actual: atomic::AtomicUsize::new(concurrency_actual), + notify_concurrency_changed: Notify::new(), + preemptible_until, + } } - } - pub(crate) fn concurrency(&self) -> usize { - self.concurrency_actual.load(atomic::Ordering::SeqCst) - } + pub(crate) fn concurrency(&self) -> usize { + self.concurrency_actual.load(atomic::Ordering::SeqCst) + } - fn preemptible(&self, now: Instant) -> bool { - self.preemptible_until > now - } + fn preemptible(&self, now: Instant) -> bool { + self.preemptible_until > now + } } /// Given a set of Tasks with their desired and actual concurrency, balance the concurrency levels @@ -368,80 +368,78 @@ impl Task { /// them), but takes State as mutable in order to guarantee that it gets an atomic view of the /// tasks. pub(crate) fn balance(now: Instant, state: &mut State) -> usize { - let concurrency_used: usize = state.tasks.iter().map(|t| t.concurrency()).sum(); - let mut desired_change_in_commitment = - state.total_concurrency as isize - concurrency_used as isize; - let mut prempted = 0; - - // To reduce the number of tasks that we preempty, we preempt them in order by the amount of - // concurrency that they desire or can relinquish. - match desired_change_in_commitment.cmp(&0) { - Ordering::Equal => { - // Nothing to do! Although some tasks might not have their desired concurrency levels, it's - // probably not worth preempting any tasks to fix that. - } - Ordering::Less => { - // We're overcommitted: order by the amount that they can relinquish. - let mut preemptible_tasks = state - .tasks - .iter() - .filter_map(|t| { - // A task may never have less than one slot. - let relinquishable = t.concurrency() - 1; - if relinquishable > 0 && t.preemptible(now) { - Some((relinquishable, t)) - } else { - None - } - }) - .collect::>(); - preemptible_tasks.sort_by_key(|(relinquishable, _)| Reverse(*relinquishable)); - - for (relinquishable, task) in preemptible_tasks { - if desired_change_in_commitment == 0 { - break; + let concurrency_used: usize = state.tasks.iter().map(|t| t.concurrency()).sum(); + let mut desired_change_in_commitment = + state.total_concurrency as isize - concurrency_used as isize; + let mut prempted = 0; + + // To reduce the number of tasks that we preempty, we preempt them in order by the amount of + // concurrency that they desire or can relinquish. + match desired_change_in_commitment.cmp(&0) { + Ordering::Equal => { + // Nothing to do! Although some tasks might not have their desired concurrency levels, it's + // probably not worth preempting any tasks to fix that. } - - let relinquish = min(relinquishable, (-desired_change_in_commitment) as usize); - desired_change_in_commitment += relinquish as isize; - task - .concurrency_actual - .fetch_sub(relinquish, atomic::Ordering::SeqCst); - task.notify_concurrency_changed.notify_one(); - prempted += 1; - } - } - Ordering::Greater => { - // We're undercommitted: order by the amount that they are owed. - let mut preemptible_tasks = state - .tasks - .iter() - .filter_map(|t| { - let desired = t.concurrency_desired - t.concurrency(); - if desired > 0 && t.preemptible(now) { - Some((desired, t)) - } else { - None - } - }) - .collect::>(); - preemptible_tasks.sort_by_key(|(desired, _)| Reverse(*desired)); - - for (desired, task) in preemptible_tasks { - if desired_change_in_commitment == 0 { - break; + Ordering::Less => { + // We're overcommitted: order by the amount that they can relinquish. + let mut preemptible_tasks = state + .tasks + .iter() + .filter_map(|t| { + // A task may never have less than one slot. + let relinquishable = t.concurrency() - 1; + if relinquishable > 0 && t.preemptible(now) { + Some((relinquishable, t)) + } else { + None + } + }) + .collect::>(); + preemptible_tasks.sort_by_key(|(relinquishable, _)| Reverse(*relinquishable)); + + for (relinquishable, task) in preemptible_tasks { + if desired_change_in_commitment == 0 { + break; + } + + let relinquish = min(relinquishable, (-desired_change_in_commitment) as usize); + desired_change_in_commitment += relinquish as isize; + task.concurrency_actual + .fetch_sub(relinquish, atomic::Ordering::SeqCst); + task.notify_concurrency_changed.notify_one(); + prempted += 1; + } + } + Ordering::Greater => { + // We're undercommitted: order by the amount that they are owed. + let mut preemptible_tasks = state + .tasks + .iter() + .filter_map(|t| { + let desired = t.concurrency_desired - t.concurrency(); + if desired > 0 && t.preemptible(now) { + Some((desired, t)) + } else { + None + } + }) + .collect::>(); + preemptible_tasks.sort_by_key(|(desired, _)| Reverse(*desired)); + + for (desired, task) in preemptible_tasks { + if desired_change_in_commitment == 0 { + break; + } + + let acquire = min(desired, desired_change_in_commitment as usize); + desired_change_in_commitment -= acquire as isize; + task.concurrency_actual + .fetch_add(acquire, atomic::Ordering::SeqCst); + task.notify_concurrency_changed.notify_one(); + prempted += 1; + } } - - let acquire = min(desired, desired_change_in_commitment as usize); - desired_change_in_commitment -= acquire as isize; - task - .concurrency_actual - .fetch_add(acquire, atomic::Ordering::SeqCst); - task.notify_concurrency_changed.notify_one(); - prempted += 1; - } } - } - prempted + prempted } diff --git a/src/rust/engine/process_execution/src/bounded_tests.rs b/src/rust/engine/process_execution/src/bounded_tests.rs index 653f39980bb..aeea73dad42 100644 --- a/src/rust/engine/process_execution/src/bounded_tests.rs +++ b/src/rust/engine/process_execution/src/bounded_tests.rs @@ -10,402 +10,402 @@ use tokio::time::{sleep, timeout}; use crate::bounded::{balance, AsyncSemaphore, State, Task}; fn mk_semaphore(permits: usize) -> AsyncSemaphore { - mk_semaphore_with_preemptible_duration(permits, Duration::from_millis(200)) + mk_semaphore_with_preemptible_duration(permits, Duration::from_millis(200)) } fn mk_semaphore_with_preemptible_duration( - permits: usize, - preemptible_duration: Duration, + permits: usize, + preemptible_duration: Duration, ) -> AsyncSemaphore { - let executor = task_executor::Executor::new(); - AsyncSemaphore::new(&executor, permits, preemptible_duration) + let executor = task_executor::Executor::new(); + AsyncSemaphore::new(&executor, permits, preemptible_duration) } #[tokio::test] async fn acquire_and_release() { - let sema = mk_semaphore(1); + let sema = mk_semaphore(1); - sema.with_acquired(|_id| future::ready(())).await; + sema.with_acquired(|_id| future::ready(())).await; } #[tokio::test] async fn correct_semaphore_slot_ids() { - let sema = mk_semaphore(2); - let (tx1, rx1) = oneshot::channel(); - let (tx2, rx2) = oneshot::channel(); - let (tx3, rx3) = oneshot::channel(); - let (tx4, rx4) = oneshot::channel(); - - let scale = Duration::from_millis(100); - - //Process 1 - tokio::spawn(sema.clone().with_acquired(move |id| async move { - tx1.send(id).unwrap(); - sleep(2 * scale).await; - future::ready(()) - })); - //Process 2 - tokio::spawn(sema.clone().with_acquired(move |id| async move { - sleep(scale).await; - tx2.send(id).unwrap(); - future::ready(()) - })); - //Process 3 - tokio::spawn(sema.clone().with_acquired(move |id| async move { - sleep(scale).await; - tx3.send(id).unwrap(); - future::ready(()) - })); - - sleep(5 * scale).await; - - //Process 4 - tokio::spawn(sema.clone().with_acquired(move |id| async move { - sleep(scale).await; - tx4.send(id).unwrap(); - future::ready(()) - })); - - let id1 = rx1.await.unwrap(); - let id2 = rx2.await.unwrap(); - let id3 = rx3.await.unwrap(); - let id4 = rx4.await.unwrap(); - - // Process 1 should get ID 1, then process 2 should run with id 2 and complete, then process 3 - // should run in the same "slot" as process 2 and get the same id (2). Process 4 is scheduled - // later and gets put into "slot" 1. - assert_eq!(id1, 1); - assert_eq!(id2, 2); - assert_eq!(id3, 2); - assert_eq!(id4, 1); + let sema = mk_semaphore(2); + let (tx1, rx1) = oneshot::channel(); + let (tx2, rx2) = oneshot::channel(); + let (tx3, rx3) = oneshot::channel(); + let (tx4, rx4) = oneshot::channel(); + + let scale = Duration::from_millis(100); + + //Process 1 + tokio::spawn(sema.clone().with_acquired(move |id| async move { + tx1.send(id).unwrap(); + sleep(2 * scale).await; + future::ready(()) + })); + //Process 2 + tokio::spawn(sema.clone().with_acquired(move |id| async move { + sleep(scale).await; + tx2.send(id).unwrap(); + future::ready(()) + })); + //Process 3 + tokio::spawn(sema.clone().with_acquired(move |id| async move { + sleep(scale).await; + tx3.send(id).unwrap(); + future::ready(()) + })); + + sleep(5 * scale).await; + + //Process 4 + tokio::spawn(sema.clone().with_acquired(move |id| async move { + sleep(scale).await; + tx4.send(id).unwrap(); + future::ready(()) + })); + + let id1 = rx1.await.unwrap(); + let id2 = rx2.await.unwrap(); + let id3 = rx3.await.unwrap(); + let id4 = rx4.await.unwrap(); + + // Process 1 should get ID 1, then process 2 should run with id 2 and complete, then process 3 + // should run in the same "slot" as process 2 and get the same id (2). Process 4 is scheduled + // later and gets put into "slot" 1. + assert_eq!(id1, 1); + assert_eq!(id2, 2); + assert_eq!(id3, 2); + assert_eq!(id4, 1); } #[tokio::test] async fn correct_semaphore_slot_ids_2() { - let sema = mk_semaphore(4); - let (tx1, rx1) = oneshot::channel(); - let (tx2, rx2) = oneshot::channel(); - let (tx3, rx3) = oneshot::channel(); - let (tx4, rx4) = oneshot::channel(); - let (tx5, rx5) = oneshot::channel(); - let (tx6, rx6) = oneshot::channel(); - let (tx7, rx7) = oneshot::channel(); - - println!("Spawning process 1"); - tokio::spawn(sema.clone().with_acquired(move |id| async move { - println!("Exec process 1"); - tx1.send(id).unwrap(); - sleep(Duration::from_millis(20)).await; - future::ready(()) - })); - println!("Spawning process 2"); - tokio::spawn(sema.clone().with_acquired(move |id| async move { - println!("Exec process 2"); - tx2.send(id).unwrap(); - sleep(Duration::from_millis(20)).await; - future::ready(()) - })); - println!("Spawning process 3"); - tokio::spawn(sema.clone().with_acquired(move |id| async move { - println!("Exec process 3"); - tx3.send(id).unwrap(); - sleep(Duration::from_millis(20)).await; - future::ready(()) - })); - println!("Spawning process 4"); - tokio::spawn(sema.clone().with_acquired(move |id| async move { - println!("Exec process 4"); - tx4.send(id).unwrap(); - sleep(Duration::from_millis(20)).await; - future::ready(()) - })); - println!("Spawning process 5"); - tokio::spawn(sema.clone().with_acquired(move |id| async move { - println!("Exec process 5"); - tx5.send(id).unwrap(); - sleep(Duration::from_millis(20)).await; - future::ready(()) - })); - println!("Spawning process 6"); - tokio::spawn(sema.clone().with_acquired(move |id| async move { - println!("Exec process 6"); - tx6.send(id).unwrap(); - sleep(Duration::from_millis(20)).await; - future::ready(()) - })); - println!("Spawning process 7"); - tokio::spawn(sema.clone().with_acquired(move |id| async move { - println!("Exec process 7"); - tx7.send(id).unwrap(); - sleep(Duration::from_millis(20)).await; - future::ready(()) - })); - - let id1 = rx1.await.unwrap(); - let id2 = rx2.await.unwrap(); - let id3 = rx3.await.unwrap(); - let id4 = rx4.await.unwrap(); - let id5 = rx5.await.unwrap(); - let id6 = rx6.await.unwrap(); - let id7 = rx7.await.unwrap(); - - assert_eq!(id1, 1); - assert_eq!(id2, 2); - assert_eq!(id3, 3); - assert_eq!(id4, 4); - assert_eq!(id5, 1); - assert_eq!(id6, 2); - assert_eq!(id7, 3); + let sema = mk_semaphore(4); + let (tx1, rx1) = oneshot::channel(); + let (tx2, rx2) = oneshot::channel(); + let (tx3, rx3) = oneshot::channel(); + let (tx4, rx4) = oneshot::channel(); + let (tx5, rx5) = oneshot::channel(); + let (tx6, rx6) = oneshot::channel(); + let (tx7, rx7) = oneshot::channel(); + + println!("Spawning process 1"); + tokio::spawn(sema.clone().with_acquired(move |id| async move { + println!("Exec process 1"); + tx1.send(id).unwrap(); + sleep(Duration::from_millis(20)).await; + future::ready(()) + })); + println!("Spawning process 2"); + tokio::spawn(sema.clone().with_acquired(move |id| async move { + println!("Exec process 2"); + tx2.send(id).unwrap(); + sleep(Duration::from_millis(20)).await; + future::ready(()) + })); + println!("Spawning process 3"); + tokio::spawn(sema.clone().with_acquired(move |id| async move { + println!("Exec process 3"); + tx3.send(id).unwrap(); + sleep(Duration::from_millis(20)).await; + future::ready(()) + })); + println!("Spawning process 4"); + tokio::spawn(sema.clone().with_acquired(move |id| async move { + println!("Exec process 4"); + tx4.send(id).unwrap(); + sleep(Duration::from_millis(20)).await; + future::ready(()) + })); + println!("Spawning process 5"); + tokio::spawn(sema.clone().with_acquired(move |id| async move { + println!("Exec process 5"); + tx5.send(id).unwrap(); + sleep(Duration::from_millis(20)).await; + future::ready(()) + })); + println!("Spawning process 6"); + tokio::spawn(sema.clone().with_acquired(move |id| async move { + println!("Exec process 6"); + tx6.send(id).unwrap(); + sleep(Duration::from_millis(20)).await; + future::ready(()) + })); + println!("Spawning process 7"); + tokio::spawn(sema.clone().with_acquired(move |id| async move { + println!("Exec process 7"); + tx7.send(id).unwrap(); + sleep(Duration::from_millis(20)).await; + future::ready(()) + })); + + let id1 = rx1.await.unwrap(); + let id2 = rx2.await.unwrap(); + let id3 = rx3.await.unwrap(); + let id4 = rx4.await.unwrap(); + let id5 = rx5.await.unwrap(); + let id6 = rx6.await.unwrap(); + let id7 = rx7.await.unwrap(); + + assert_eq!(id1, 1); + assert_eq!(id2, 2); + assert_eq!(id3, 3); + assert_eq!(id4, 4); + assert_eq!(id5, 1); + assert_eq!(id6, 2); + assert_eq!(id7, 3); } #[tokio::test] async fn at_most_n_acquisitions() { - let sema = mk_semaphore(1); - let handle1 = sema.clone(); - let handle2 = sema.clone(); - - let (tx_thread1, acquired_thread1) = oneshot::channel(); - let (unblock_thread1, rx_thread1) = oneshot::channel(); - let (tx_thread2, acquired_thread2) = oneshot::channel(); - - tokio::spawn(handle1.with_acquired(move |_id| { - async { - // Indicate that we've acquired, and then wait to be signaled to exit. - tx_thread1.send(()).unwrap(); - rx_thread1.await.unwrap(); - future::ready(()) + let sema = mk_semaphore(1); + let handle1 = sema.clone(); + let handle2 = sema.clone(); + + let (tx_thread1, acquired_thread1) = oneshot::channel(); + let (unblock_thread1, rx_thread1) = oneshot::channel(); + let (tx_thread2, acquired_thread2) = oneshot::channel(); + + tokio::spawn(handle1.with_acquired(move |_id| { + async { + // Indicate that we've acquired, and then wait to be signaled to exit. + tx_thread1.send(()).unwrap(); + rx_thread1.await.unwrap(); + future::ready(()) + } + })); + + // Wait for thread1 to acquire, and then launch thread2. + if let Err(_) = timeout(Duration::from_secs(5), acquired_thread1).await { + panic!("thread1 didn't acquire."); + } + + tokio::spawn(handle2.with_acquired(move |_id| { + tx_thread2.send(()).unwrap(); + future::ready(()) + })); + + // thread2 should not signal until we unblock thread1. + let acquired_thread2 = + match future::select(sleep(Duration::from_millis(100)).boxed(), acquired_thread2).await { + future::Either::Left((_, acquired_thread2)) => acquired_thread2, + future::Either::Right(_) => { + panic!("thread2 should not have acquired while thread1 was holding.") + } + }; + + // Unblock thread1 and confirm that thread2 acquires. + unblock_thread1.send(()).unwrap(); + if let Err(_) = timeout(Duration::from_secs(5), acquired_thread2).await { + panic!("thread2 didn't acquire."); } - })); - - // Wait for thread1 to acquire, and then launch thread2. - if let Err(_) = timeout(Duration::from_secs(5), acquired_thread1).await { - panic!("thread1 didn't acquire."); - } - - tokio::spawn(handle2.with_acquired(move |_id| { - tx_thread2.send(()).unwrap(); - future::ready(()) - })); - - // thread2 should not signal until we unblock thread1. - let acquired_thread2 = - match future::select(sleep(Duration::from_millis(100)).boxed(), acquired_thread2).await { - future::Either::Left((_, acquired_thread2)) => acquired_thread2, - future::Either::Right(_) => { - panic!("thread2 should not have acquired while thread1 was holding.") - } - }; - - // Unblock thread1 and confirm that thread2 acquires. - unblock_thread1.send(()).unwrap(); - if let Err(_) = timeout(Duration::from_secs(5), acquired_thread2).await { - panic!("thread2 didn't acquire."); - } } #[tokio::test] async fn drop_while_waiting() { - // This tests that a task in the waiters queue of the semaphore is removed - // from the queue when the future that is was polling gets dropped. - // - // First we acquire the semaphore with a "process" which hangs until we send - // it a signal via the unblock_thread1 channel. This means that any futures that - // try to acquire the semaphore will be queued up until we unblock thread . - // - // Next we spawn a future on a second thread that tries to acquire the semaphore, - // and get added to the waiters queue, we drop that future after a Delay timer - // completes. The drop should cause the task to be removed from the waiters queue. - // - // Then we spawn a 3rd future that tries to acquire the semaphore but cannot - // because thread1 still has the only permit. After this future is added to the waiters - // we unblock thread1 and wait for a signal from the thread3 that it acquires. - // - // If the SECOND future was not removed from the waiters queue we would not get a signal - // that thread3 acquired the lock because the 2nd task would be blocking the queue trying to - // poll a non existent future. - let sema = mk_semaphore(1); - let handle1 = sema.clone(); - let handle2 = sema.clone(); - let handle3 = sema.clone(); - - let (tx_thread1, acquired_thread1) = oneshot::channel(); - let (unblock_thread1, rx_thread1) = oneshot::channel(); - let (tx_thread3, acquired_thread3) = oneshot::channel(); - let (unblock_thread3, rx_thread3) = oneshot::channel(); - let (tx_thread2_attempt_1, did_not_acquire_thread2_attempt_1) = oneshot::channel(); - - tokio::spawn(handle1.with_acquired(move |_id| { - async { - // Indicate that we've acquired, and then wait to be signaled to exit. - tx_thread1.send(()).unwrap(); - rx_thread1.await.unwrap(); - future::ready(()) + // This tests that a task in the waiters queue of the semaphore is removed + // from the queue when the future that is was polling gets dropped. + // + // First we acquire the semaphore with a "process" which hangs until we send + // it a signal via the unblock_thread1 channel. This means that any futures that + // try to acquire the semaphore will be queued up until we unblock thread . + // + // Next we spawn a future on a second thread that tries to acquire the semaphore, + // and get added to the waiters queue, we drop that future after a Delay timer + // completes. The drop should cause the task to be removed from the waiters queue. + // + // Then we spawn a 3rd future that tries to acquire the semaphore but cannot + // because thread1 still has the only permit. After this future is added to the waiters + // we unblock thread1 and wait for a signal from the thread3 that it acquires. + // + // If the SECOND future was not removed from the waiters queue we would not get a signal + // that thread3 acquired the lock because the 2nd task would be blocking the queue trying to + // poll a non existent future. + let sema = mk_semaphore(1); + let handle1 = sema.clone(); + let handle2 = sema.clone(); + let handle3 = sema.clone(); + + let (tx_thread1, acquired_thread1) = oneshot::channel(); + let (unblock_thread1, rx_thread1) = oneshot::channel(); + let (tx_thread3, acquired_thread3) = oneshot::channel(); + let (unblock_thread3, rx_thread3) = oneshot::channel(); + let (tx_thread2_attempt_1, did_not_acquire_thread2_attempt_1) = oneshot::channel(); + + tokio::spawn(handle1.with_acquired(move |_id| { + async { + // Indicate that we've acquired, and then wait to be signaled to exit. + tx_thread1.send(()).unwrap(); + rx_thread1.await.unwrap(); + future::ready(()) + } + })); + + // Wait for thread1 to acquire, and then launch thread2. + if let Err(_) = timeout(Duration::from_secs(5), acquired_thread1).await { + panic!("thread1 didn't acquire."); } - })); - - // Wait for thread1 to acquire, and then launch thread2. - if let Err(_) = timeout(Duration::from_secs(5), acquired_thread1).await { - panic!("thread1 didn't acquire."); - } - - // thread2 will wait for a little while, but then drop its PermitFuture to give up on waiting. - tokio::spawn(async move { - let permit_future = handle2.acquire(1).boxed(); - let delay_future = sleep(Duration::from_millis(100)).boxed(); - let raced_result = future::select(delay_future, permit_future).await; - // We expect to have timed out, because the other Future will not resolve until asked. - match raced_result { - future::Either::Left(_) => {} - future::Either::Right(_) => panic!("Expected to time out."), - }; - tx_thread2_attempt_1.send(()).unwrap(); - }); - - tokio::spawn(handle3.with_acquired(move |_id| { - async { - // Indicate that we've acquired, and then wait to be signaled to exit. - tx_thread3.send(()).unwrap(); - rx_thread3.await.unwrap(); - future::ready(()) + + // thread2 will wait for a little while, but then drop its PermitFuture to give up on waiting. + tokio::spawn(async move { + let permit_future = handle2.acquire(1).boxed(); + let delay_future = sleep(Duration::from_millis(100)).boxed(); + let raced_result = future::select(delay_future, permit_future).await; + // We expect to have timed out, because the other Future will not resolve until asked. + match raced_result { + future::Either::Left(_) => {} + future::Either::Right(_) => panic!("Expected to time out."), + }; + tx_thread2_attempt_1.send(()).unwrap(); + }); + + tokio::spawn(handle3.with_acquired(move |_id| { + async { + // Indicate that we've acquired, and then wait to be signaled to exit. + tx_thread3.send(()).unwrap(); + rx_thread3.await.unwrap(); + future::ready(()) + } + })); + + // thread2 should signal that it did not successfully acquire for the first attempt. + if let Err(_) = timeout(Duration::from_secs(5), did_not_acquire_thread2_attempt_1).await { + panic!("thread2 should have failed to acquire by now."); } - })); - - // thread2 should signal that it did not successfully acquire for the first attempt. - if let Err(_) = timeout(Duration::from_secs(5), did_not_acquire_thread2_attempt_1).await { - panic!("thread2 should have failed to acquire by now."); - } - - // Unblock thread1 and confirm that thread3 acquires. - unblock_thread1.send(()).unwrap(); - if let Err(_) = timeout(Duration::from_secs(5), acquired_thread3).await { - panic!("thread3 didn't acquire."); - } - unblock_thread3.send(()).unwrap(); + + // Unblock thread1 and confirm that thread3 acquires. + unblock_thread1.send(()).unwrap(); + if let Err(_) = timeout(Duration::from_secs(5), acquired_thread3).await { + panic!("thread3 didn't acquire."); + } + unblock_thread3.send(()).unwrap(); } #[tokio::test] async fn dropped_future_is_removed_from_queue() { - let sema = mk_semaphore(1); - let handle1 = sema.clone(); - let handle2 = sema.clone(); - - let (tx_thread1, acquired_thread1) = oneshot::channel(); - let (unblock_thread1, rx_thread1) = oneshot::channel::<()>(); - let (tx_thread2, gave_up_thread2) = oneshot::channel(); - let (unblock_thread2, rx_thread2) = oneshot::channel(); - - let join_handle1 = tokio::spawn(handle1.with_acquired(move |_id| { - async { - // Indicate that we've acquired, and then wait to be signaled to exit. - tx_thread1.send(()).unwrap(); - rx_thread1.await.unwrap(); - future::ready(()) + let sema = mk_semaphore(1); + let handle1 = sema.clone(); + let handle2 = sema.clone(); + + let (tx_thread1, acquired_thread1) = oneshot::channel(); + let (unblock_thread1, rx_thread1) = oneshot::channel::<()>(); + let (tx_thread2, gave_up_thread2) = oneshot::channel(); + let (unblock_thread2, rx_thread2) = oneshot::channel(); + + let join_handle1 = tokio::spawn(handle1.with_acquired(move |_id| { + async { + // Indicate that we've acquired, and then wait to be signaled to exit. + tx_thread1.send(()).unwrap(); + rx_thread1.await.unwrap(); + future::ready(()) + } + })); + + // Wait for the first handle to acquire, and then launch thread2. + if let Err(_) = timeout(Duration::from_secs(5), acquired_thread1).await { + panic!("thread1 didn't acquire."); } - })); - - // Wait for the first handle to acquire, and then launch thread2. - if let Err(_) = timeout(Duration::from_secs(5), acquired_thread1).await { - panic!("thread1 didn't acquire."); - } - let waiter = handle2.with_acquired(|_id| future::ready(())); - let join_handle2 = tokio::spawn(async move { - match future::select(sleep(Duration::from_millis(100)).boxed(), waiter.boxed()).await { - future::Either::Left(((), waiter_future)) => { - tx_thread2.send(()).unwrap(); - rx_thread2.await.unwrap(); - drop(waiter_future); - } - future::Either::Right(_) => { - panic!("The sleep result should always be ready first!"); - } + let waiter = handle2.with_acquired(|_id| future::ready(())); + let join_handle2 = tokio::spawn(async move { + match future::select(sleep(Duration::from_millis(100)).boxed(), waiter.boxed()).await { + future::Either::Left(((), waiter_future)) => { + tx_thread2.send(()).unwrap(); + rx_thread2.await.unwrap(); + drop(waiter_future); + } + future::Either::Right(_) => { + panic!("The sleep result should always be ready first!"); + } + } + }); + + // Wait for thread2 to give up on acquiring. + if let Err(_) = timeout(Duration::from_secs(5), gave_up_thread2).await { + panic!("thread2 didn't give up on acquiring."); + } + assert_eq!(0, sema.available_permits()); + + // Then cause it to drop its attempt. + unblock_thread2.send(()).unwrap(); + if let Err(_) = timeout(Duration::from_secs(5), join_handle2).await { + panic!("thread2 didn't exit."); } - }); - - // Wait for thread2 to give up on acquiring. - if let Err(_) = timeout(Duration::from_secs(5), gave_up_thread2).await { - panic!("thread2 didn't give up on acquiring."); - } - assert_eq!(0, sema.available_permits()); - - // Then cause it to drop its attempt. - unblock_thread2.send(()).unwrap(); - if let Err(_) = timeout(Duration::from_secs(5), join_handle2).await { - panic!("thread2 didn't exit."); - } - assert_eq!(0, sema.available_permits()); - - // Finally, release in thread1. - unblock_thread1.send(()).unwrap(); - if let Err(_) = timeout(Duration::from_secs(5), join_handle1).await { - panic!("thread1 didn't exit."); - } - assert_eq!(1, sema.available_permits()); + assert_eq!(0, sema.available_permits()); + + // Finally, release in thread1. + unblock_thread1.send(()).unwrap(); + if let Err(_) = timeout(Duration::from_secs(5), join_handle1).await { + panic!("thread1 didn't exit."); + } + assert_eq!(1, sema.available_permits()); } #[tokio::test] async fn preemption() { - let ten_secs = Duration::from_secs(10); - let sema = mk_semaphore_with_preemptible_duration(2, ten_secs); - - // Acquire a permit which will take all concurrency, and confirm that it doesn't get preempted. - let permit1 = sema.acquire(2).await; - assert_eq!(2, permit1.concurrency()); - if let Ok(_) = timeout(ten_secs / 100, permit1.notified_concurrency_changed()).await { - panic!("permit1 should not have been preempted."); - } - - // Acquire another permit, and confirm that it doesn't get preempted. - let permit2 = sema.acquire(2).await; - if let Ok(_) = timeout(ten_secs / 100, permit2.notified_concurrency_changed()).await { - panic!("permit2 should not have been preempted."); - } - - // But that permit1 does get preempted. - if let Err(_) = timeout(ten_secs, permit1.notified_concurrency_changed()).await { - panic!("permit1 should have been preempted."); - } - - assert_eq!(1, permit1.concurrency()); - assert_eq!(1, permit2.concurrency()); + let ten_secs = Duration::from_secs(10); + let sema = mk_semaphore_with_preemptible_duration(2, ten_secs); + + // Acquire a permit which will take all concurrency, and confirm that it doesn't get preempted. + let permit1 = sema.acquire(2).await; + assert_eq!(2, permit1.concurrency()); + if let Ok(_) = timeout(ten_secs / 100, permit1.notified_concurrency_changed()).await { + panic!("permit1 should not have been preempted."); + } + + // Acquire another permit, and confirm that it doesn't get preempted. + let permit2 = sema.acquire(2).await; + if let Ok(_) = timeout(ten_secs / 100, permit2.notified_concurrency_changed()).await { + panic!("permit2 should not have been preempted."); + } + + // But that permit1 does get preempted. + if let Err(_) = timeout(ten_secs, permit1.notified_concurrency_changed()).await { + panic!("permit1 should have been preempted."); + } + + assert_eq!(1, permit1.concurrency()); + assert_eq!(1, permit2.concurrency()); } /// Given Tasks as triples of desired, actual, and expected concurrency (all of which are /// assumed to be preemptible), assert that the expected concurrency is applied. fn test_balance( - total_concurrency: usize, - expected_preempted: usize, - task_defs: Vec<(usize, usize, usize)>, + total_concurrency: usize, + expected_preempted: usize, + task_defs: Vec<(usize, usize, usize)>, ) { - let ten_minutes_from_now = Instant::now() + Duration::from_secs(10 * 60); - let tasks = task_defs - .iter() - .enumerate() - .map(|(id, (desired, actual, _))| { - Arc::new(Task::new(id, *desired, *actual, ten_minutes_from_now)) - }) - .collect::>(); - - let mut state = State::new_for_tests(total_concurrency, tasks.clone()); - - assert_eq!(expected_preempted, balance(Instant::now(), &mut state)); - for (task, (_, _, expected)) in tasks.iter().zip(task_defs.into_iter()) { - assert_eq!(expected, task.concurrency()); - } + let ten_minutes_from_now = Instant::now() + Duration::from_secs(10 * 60); + let tasks = task_defs + .iter() + .enumerate() + .map(|(id, (desired, actual, _))| { + Arc::new(Task::new(id, *desired, *actual, ten_minutes_from_now)) + }) + .collect::>(); + + let mut state = State::new_for_tests(total_concurrency, tasks.clone()); + + assert_eq!(expected_preempted, balance(Instant::now(), &mut state)); + for (task, (_, _, expected)) in tasks.iter().zip(task_defs.into_iter()) { + assert_eq!(expected, task.concurrency()); + } } #[tokio::test] async fn balance_noop() { - test_balance(2, 0, vec![(1, 1, 1), (1, 1, 1)]); + test_balance(2, 0, vec![(1, 1, 1), (1, 1, 1)]); } #[tokio::test] async fn balance_overcommitted() { - // Preempt the first Task and give it one slot, without adjusting the second task. - test_balance(2, 1, vec![(2, 2, 1), (1, 1, 1)]); + // Preempt the first Task and give it one slot, without adjusting the second task. + test_balance(2, 1, vec![(2, 2, 1), (1, 1, 1)]); } #[tokio::test] async fn balance_undercommitted() { - // Should preempt both Tasks to give them more concurrency. - test_balance(4, 2, vec![(2, 1, 2), (2, 1, 2)]); + // Should preempt both Tasks to give them more concurrency. + test_balance(4, 2, vec![(2, 1, 2), (2, 1, 2)]); } diff --git a/src/rust/engine/process_execution/src/cache.rs b/src/rust/engine/process_execution/src/cache.rs index 34dd63771a1..fb4c6934e41 100644 --- a/src/rust/engine/process_execution/src/cache.rs +++ b/src/rust/engine/process_execution/src/cache.rs @@ -14,274 +14,280 @@ use protos::gen::pants::cache::{CacheKey, CacheKeyType}; use serde::{Deserialize, Serialize}; use store::{Store, StoreError}; use workunit_store::{ - in_workunit, Level, Metric, ObservationMetric, RunningWorkunit, WorkunitMetadata, + in_workunit, Level, Metric, ObservationMetric, RunningWorkunit, WorkunitMetadata, }; use crate::{ - check_cache_content, CacheContentBehavior, Context, FallibleProcessResultWithPlatform, Platform, - Process, ProcessCacheScope, ProcessError, ProcessExecutionEnvironment, ProcessResultSource, + check_cache_content, CacheContentBehavior, Context, FallibleProcessResultWithPlatform, + Platform, Process, ProcessCacheScope, ProcessError, ProcessExecutionEnvironment, + ProcessResultSource, }; // TODO: Consider moving into protobuf as a CacheValue type. #[derive(Serialize, Deserialize)] struct PlatformAndResponseBytes { - platform: Platform, - response_bytes: Vec, + platform: Platform, + response_bytes: Vec, } #[derive(Clone)] pub struct CommandRunner { - inner: Arc, - cache: PersistentCache, - file_store: Store, - cache_read: bool, - cache_content_behavior: CacheContentBehavior, - process_cache_namespace: Option, -} - -impl CommandRunner { - pub fn new( inner: Arc, cache: PersistentCache, file_store: Store, cache_read: bool, cache_content_behavior: CacheContentBehavior, process_cache_namespace: Option, - ) -> CommandRunner { - CommandRunner { - inner, - cache, - file_store, - cache_read, - cache_content_behavior, - process_cache_namespace, +} + +impl CommandRunner { + pub fn new( + inner: Arc, + cache: PersistentCache, + file_store: Store, + cache_read: bool, + cache_content_behavior: CacheContentBehavior, + process_cache_namespace: Option, + ) -> CommandRunner { + CommandRunner { + inner, + cache, + file_store, + cache_read, + cache_content_behavior, + process_cache_namespace, + } } - } } impl Debug for CommandRunner { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("cache::CommandRunner") - .field("inner", &self.inner) - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("cache::CommandRunner") + .field("inner", &self.inner) + .finish_non_exhaustive() + } } #[async_trait] impl crate::CommandRunner for CommandRunner { - async fn run( - &self, - context: Context, - workunit: &mut RunningWorkunit, - req: Process, - ) -> Result { - let write_failures_to_cache = req.cache_scope == ProcessCacheScope::Always; - let key = CacheKey { - digest: Some( - crate::get_digest( - &req, - None, - self.process_cache_namespace.clone(), - &self.file_store, - None, - ) - .await - .into(), - ), - key_type: CacheKeyType::Process.into(), - }; + async fn run( + &self, + context: Context, + workunit: &mut RunningWorkunit, + req: Process, + ) -> Result { + let write_failures_to_cache = req.cache_scope == ProcessCacheScope::Always; + let key = CacheKey { + digest: Some( + crate::get_digest( + &req, + None, + self.process_cache_namespace.clone(), + &self.file_store, + None, + ) + .await + .into(), + ), + key_type: CacheKeyType::Process.into(), + }; - if self.cache_read { - let context2 = context.clone(); - let key2 = key.clone(); - let environment = req.execution_environment.clone(); - let cache_read_result = in_workunit!( - "local_cache_read", - Level::Trace, - desc = Some(format!("Local cache lookup: {}", req.description)), - |workunit| async move { - workunit.increment_counter(Metric::LocalCacheRequests, 1); + if self.cache_read { + let context2 = context.clone(); + let key2 = key.clone(); + let environment = req.execution_environment.clone(); + let cache_read_result = in_workunit!( + "local_cache_read", + Level::Trace, + desc = Some(format!("Local cache lookup: {}", req.description)), + |workunit| async move { + workunit.increment_counter(Metric::LocalCacheRequests, 1); - match self.lookup(&context2, &key2, environment).await { - Ok(Some(result)) if result.exit_code == 0 || write_failures_to_cache => { - workunit.increment_counter(Metric::LocalCacheRequestsCached, 1); - if let Some(time_saved) = result.metadata.saved_by_cache { - let time_saved = std::time::Duration::from(time_saved).as_millis() as u64; - workunit.increment_counter(Metric::LocalCacheTotalTimeSavedMs, time_saved); - context2 - .workunit_store - .record_observation(ObservationMetric::LocalCacheTimeSavedMs, time_saved); - } - // When we successfully use the cache, we change the description and increase the - // level (but not so much that it will be logged by default). - workunit.update_metadata(|initial| { - initial.map(|(initial, _)| { - ( - WorkunitMetadata { - desc: initial.desc.as_ref().map(|desc| format!("Hit: {desc}")), - ..initial - }, - Level::Debug, - ) - }) - }); - Ok(result) - } - Err(err) => { - debug!( - "Error loading process execution result from local cache: {} \ + match self.lookup(&context2, &key2, environment).await { + Ok(Some(result)) if result.exit_code == 0 || write_failures_to_cache => { + workunit.increment_counter(Metric::LocalCacheRequestsCached, 1); + if let Some(time_saved) = result.metadata.saved_by_cache { + let time_saved = + std::time::Duration::from(time_saved).as_millis() as u64; + workunit.increment_counter( + Metric::LocalCacheTotalTimeSavedMs, + time_saved, + ); + context2.workunit_store.record_observation( + ObservationMetric::LocalCacheTimeSavedMs, + time_saved, + ); + } + // When we successfully use the cache, we change the description and increase the + // level (but not so much that it will be logged by default). + workunit.update_metadata(|initial| { + initial.map(|(initial, _)| { + ( + WorkunitMetadata { + desc: initial + .desc + .as_ref() + .map(|desc| format!("Hit: {desc}")), + ..initial + }, + Level::Debug, + ) + }) + }); + Ok(result) + } + Err(err) => { + debug!( + "Error loading process execution result from local cache: {} \ - continuing to execute", - err - ); - workunit.increment_counter(Metric::LocalCacheReadErrors, 1); - // Falling through to re-execute. - Err(()) - } - Ok(_) => { - // Either we missed, or we hit for a failing result. - workunit.increment_counter(Metric::LocalCacheRequestsUncached, 1); - // Falling through to execute. - Err(()) + err + ); + workunit.increment_counter(Metric::LocalCacheReadErrors, 1); + // Falling through to re-execute. + Err(()) + } + Ok(_) => { + // Either we missed, or we hit for a failing result. + workunit.increment_counter(Metric::LocalCacheRequestsUncached, 1); + // Falling through to execute. + Err(()) + } + } + } + ) + .await; + + if let Ok(result) = cache_read_result { + return Ok(result); } - } } - ) - .await; - - if let Ok(result) = cache_read_result { - return Ok(result); - } - } - let result = self.inner.run(context.clone(), workunit, req).await?; - if result.exit_code == 0 || write_failures_to_cache { - let result = result.clone(); - in_workunit!("local_cache_write", Level::Trace, |workunit| async move { - if let Err(err) = self.store(&key, &result).await { - warn!( + let result = self.inner.run(context.clone(), workunit, req).await?; + if result.exit_code == 0 || write_failures_to_cache { + let result = result.clone(); + in_workunit!("local_cache_write", Level::Trace, |workunit| async move { + if let Err(err) = self.store(&key, &result).await { + warn!( "Error storing process execution result to local cache: {} - ignoring and continuing", err ); - workunit.increment_counter(Metric::LocalCacheWriteErrors, 1); + workunit.increment_counter(Metric::LocalCacheWriteErrors, 1); + } + }) + .await; } - }) - .await; + Ok(result) } - Ok(result) - } - async fn shutdown(&self) -> Result<(), String> { - self.inner.shutdown().await - } + async fn shutdown(&self) -> Result<(), String> { + self.inner.shutdown().await + } } impl CommandRunner { - async fn lookup( - &self, - context: &Context, - action_key: &CacheKey, - environment: ProcessExecutionEnvironment, - ) -> Result, StoreError> { - let cache_lookup_start = Instant::now(); - use remexec::ExecuteResponse; + async fn lookup( + &self, + context: &Context, + action_key: &CacheKey, + environment: ProcessExecutionEnvironment, + ) -> Result, StoreError> { + let cache_lookup_start = Instant::now(); + use remexec::ExecuteResponse; - // See whether there is a cache entry. - let maybe_cache_value = self.cache.load(action_key).await?; - let maybe_execute_response = if let Some(bytes) = maybe_cache_value { - let decoded: PlatformAndResponseBytes = bincode::deserialize(&bytes) - .map_err(|err| format!("Could not deserialize platform and response: {err}"))?; - let platform = decoded.platform; - let execute_response = ExecuteResponse::decode(&decoded.response_bytes[..]) - .map_err(|e| format!("Invalid ExecuteResponse: {e:?}"))?; - Some((execute_response, platform)) - } else { - return Ok(None); - }; + // See whether there is a cache entry. + let maybe_cache_value = self.cache.load(action_key).await?; + let maybe_execute_response = if let Some(bytes) = maybe_cache_value { + let decoded: PlatformAndResponseBytes = bincode::deserialize(&bytes) + .map_err(|err| format!("Could not deserialize platform and response: {err}"))?; + let platform = decoded.platform; + let execute_response = ExecuteResponse::decode(&decoded.response_bytes[..]) + .map_err(|e| format!("Invalid ExecuteResponse: {e:?}"))?; + Some((execute_response, platform)) + } else { + return Ok(None); + }; - // Deserialize the cache entry if it existed. - // TODO: The platform in the cache value is unused. See #18450. - let mut result = if let Some((execute_response, _platform)) = maybe_execute_response { - if let Some(ref action_result) = execute_response.result { - crate::populate_fallible_execution_result( - self.file_store.clone(), - context.run_id, - action_result, - true, - ProcessResultSource::HitLocally, - environment, - ) - .await? - } else { - return Err( - "action result missing from ExecuteResponse" - .to_owned() - .into(), - ); - } - } else { - return Ok(None); - }; + // Deserialize the cache entry if it existed. + // TODO: The platform in the cache value is unused. See #18450. + let mut result = if let Some((execute_response, _platform)) = maybe_execute_response { + if let Some(ref action_result) = execute_response.result { + crate::populate_fallible_execution_result( + self.file_store.clone(), + context.run_id, + action_result, + true, + ProcessResultSource::HitLocally, + environment, + ) + .await? + } else { + return Err("action result missing from ExecuteResponse" + .to_owned() + .into()); + } + } else { + return Ok(None); + }; - if check_cache_content(&result, &self.file_store, self.cache_content_behavior).await? { - // NB: We set the cache hit elapsed time as late as possible (after having validated the cache content). - result - .metadata - .update_cache_hit_elapsed(cache_lookup_start.elapsed()); - Ok(Some(result)) - } else { - Ok(None) + if check_cache_content(&result, &self.file_store, self.cache_content_behavior).await? { + // NB: We set the cache hit elapsed time as late as possible (after having validated the cache content). + result + .metadata + .update_cache_hit_elapsed(cache_lookup_start.elapsed()); + Ok(Some(result)) + } else { + Ok(None) + } } - } - async fn store( - &self, - action_key: &CacheKey, - result: &FallibleProcessResultWithPlatform, - ) -> Result<(), StoreError> { - let stdout_digest = result.stdout_digest; - let stderr_digest = result.stderr_digest; + async fn store( + &self, + action_key: &CacheKey, + result: &FallibleProcessResultWithPlatform, + ) -> Result<(), StoreError> { + let stdout_digest = result.stdout_digest; + let stderr_digest = result.stderr_digest; - // Ensure that the process output is persisted. - self - .file_store - .ensure_directory_digest_persisted(result.output_directory.clone()) - .await?; + // Ensure that the process output is persisted. + self.file_store + .ensure_directory_digest_persisted(result.output_directory.clone()) + .await?; - let action_result = remexec::ActionResult { - exit_code: result.exit_code, - output_directories: vec![remexec::OutputDirectory { - path: String::new(), - tree_digest: Some((&result.output_directory.as_digest()).into()), - is_topologically_sorted: false, - }], - stdout_digest: Some((&stdout_digest).into()), - stderr_digest: Some((&stderr_digest).into()), - execution_metadata: Some(result.metadata.clone().into()), - ..remexec::ActionResult::default() - }; - let execute_response = remexec::ExecuteResponse { - cached_result: true, - result: Some(action_result), - ..remexec::ExecuteResponse::default() - }; + let action_result = remexec::ActionResult { + exit_code: result.exit_code, + output_directories: vec![remexec::OutputDirectory { + path: String::new(), + tree_digest: Some((&result.output_directory.as_digest()).into()), + is_topologically_sorted: false, + }], + stdout_digest: Some((&stdout_digest).into()), + stderr_digest: Some((&stderr_digest).into()), + execution_metadata: Some(result.metadata.clone().into()), + ..remexec::ActionResult::default() + }; + let execute_response = remexec::ExecuteResponse { + cached_result: true, + result: Some(action_result), + ..remexec::ExecuteResponse::default() + }; - // TODO: Should probably have a configurable lease time which is larger than default. - // (This isn't super urgent because we don't ever actually GC this store. So also...) - // TODO: GC the local process execution cache. + // TODO: Should probably have a configurable lease time which is larger than default. + // (This isn't super urgent because we don't ever actually GC this store. So also...) + // TODO: GC the local process execution cache. - let mut response_bytes = Vec::with_capacity(execute_response.encoded_len()); - execute_response - .encode(&mut response_bytes) - .map_err(|err| format!("Error serializing execute process result to cache: {err}"))?; + let mut response_bytes = Vec::with_capacity(execute_response.encoded_len()); + execute_response + .encode(&mut response_bytes) + .map_err(|err| format!("Error serializing execute process result to cache: {err}"))?; - let bytes_to_store = bincode::serialize(&PlatformAndResponseBytes { - platform: result.metadata.environment.platform, - response_bytes, - }) - .map(Bytes::from) - .map_err(|err| format!("Error serializing platform and execute process result: {err}"))?; + let bytes_to_store = bincode::serialize(&PlatformAndResponseBytes { + platform: result.metadata.environment.platform, + response_bytes, + }) + .map(Bytes::from) + .map_err(|err| format!("Error serializing platform and execute process result: {err}"))?; - self.cache.store(action_key, bytes_to_store).await?; - Ok(()) - } + self.cache.store(action_key, bytes_to_store).await?; + Ok(()) + } } diff --git a/src/rust/engine/process_execution/src/cache_tests.rs b/src/rust/engine/process_execution/src/cache_tests.rs index 10341eb1b52..dd0cabbb44c 100644 --- a/src/rust/engine/process_execution/src/cache_tests.rs +++ b/src/rust/engine/process_execution/src/cache_tests.rs @@ -13,182 +13,182 @@ use testutil::relative_paths; use workunit_store::{RunningWorkunit, WorkunitStore}; use crate::{ - local::KeepSandboxes, CacheContentBehavior, CommandRunner as CommandRunnerTrait, Context, - FallibleProcessResultWithPlatform, NamedCaches, Process, ProcessError, + local::KeepSandboxes, CacheContentBehavior, CommandRunner as CommandRunnerTrait, Context, + FallibleProcessResultWithPlatform, NamedCaches, Process, ProcessError, }; struct RoundtripResults { - uncached: Result, - maybe_cached: Result, + uncached: Result, + maybe_cached: Result, } fn create_local_runner() -> (Box, Store, TempDir) { - let runtime = task_executor::Executor::new(); - let base_dir = TempDir::new().unwrap(); - let named_cache_dir = base_dir.path().join("named_cache_dir"); - let store_dir = base_dir.path().join("store_dir"); - let store = Store::local_only(runtime.clone(), store_dir).unwrap(); - let runner = Box::new(crate::local::CommandRunner::new( - store.clone(), - runtime, - base_dir.path().to_owned(), - NamedCaches::new_local(named_cache_dir), - ImmutableInputs::new(store.clone(), base_dir.path()).unwrap(), - KeepSandboxes::Never, - )); - (runner, store, base_dir) + let runtime = task_executor::Executor::new(); + let base_dir = TempDir::new().unwrap(); + let named_cache_dir = base_dir.path().join("named_cache_dir"); + let store_dir = base_dir.path().join("store_dir"); + let store = Store::local_only(runtime.clone(), store_dir).unwrap(); + let runner = Box::new(crate::local::CommandRunner::new( + store.clone(), + runtime, + base_dir.path().to_owned(), + NamedCaches::new_local(named_cache_dir), + ImmutableInputs::new(store.clone(), base_dir.path()).unwrap(), + KeepSandboxes::Never, + )); + (runner, store, base_dir) } fn create_cached_runner( - local: Box, - store: Store, + local: Box, + store: Store, ) -> (Box, TempDir) { - let runtime = task_executor::Executor::new(); - let cache_dir = TempDir::new().unwrap(); - let max_lmdb_size = 50 * 1024 * 1024; //50 MB - I didn't pick that number but it seems reasonable. - - let cache = PersistentCache::new( - cache_dir.path(), - max_lmdb_size, - runtime, - DEFAULT_LEASE_TIME, - 1, - ) - .unwrap(); - - let runner = Box::new(crate::cache::CommandRunner::new( - local.into(), - cache, - store, - true, - CacheContentBehavior::Fetch, - None, - )); - - (runner, cache_dir) -} - -fn create_script(script_exit_code: i8) -> (Process, PathBuf, TempDir) { - let script_dir = TempDir::new().unwrap(); - let script_path = script_dir.path().join("script"); - std::fs::File::create(&script_path) - .and_then(|mut file| { - writeln!( - file, - "echo -n {} > roland && echo Hello && echo >&2 World; exit {}", - TestData::roland().string(), - script_exit_code - ) - }) + let runtime = task_executor::Executor::new(); + let cache_dir = TempDir::new().unwrap(); + let max_lmdb_size = 50 * 1024 * 1024; //50 MB - I didn't pick that number but it seems reasonable. + + let cache = PersistentCache::new( + cache_dir.path(), + max_lmdb_size, + runtime, + DEFAULT_LEASE_TIME, + 1, + ) .unwrap(); - let process = Process::new(vec![ - testutil::path::find_bash(), - format!("{}", script_path.display()), - ]) - .output_files(relative_paths(&["roland"]).collect()); + let runner = Box::new(crate::cache::CommandRunner::new( + local.into(), + cache, + store, + true, + CacheContentBehavior::Fetch, + None, + )); - (process, script_path, script_dir) + (runner, cache_dir) +} + +fn create_script(script_exit_code: i8) -> (Process, PathBuf, TempDir) { + let script_dir = TempDir::new().unwrap(); + let script_path = script_dir.path().join("script"); + std::fs::File::create(&script_path) + .and_then(|mut file| { + writeln!( + file, + "echo -n {} > roland && echo Hello && echo >&2 World; exit {}", + TestData::roland().string(), + script_exit_code + ) + }) + .unwrap(); + + let process = Process::new(vec![ + testutil::path::find_bash(), + format!("{}", script_path.display()), + ]) + .output_files(relative_paths(&["roland"]).collect()); + + (process, script_path, script_dir) } async fn run_roundtrip(script_exit_code: i8, workunit: &mut RunningWorkunit) -> RoundtripResults { - let (local, store, _local_runner_dir) = create_local_runner(); - let (process, script_path, _script_dir) = create_script(script_exit_code); + let (local, store, _local_runner_dir) = create_local_runner(); + let (process, script_path, _script_dir) = create_script(script_exit_code); - let local_result = local - .run(Context::default(), workunit, process.clone()) - .await; + let local_result = local + .run(Context::default(), workunit, process.clone()) + .await; - let (caching, _cache_dir) = create_cached_runner(local, store.clone()); + let (caching, _cache_dir) = create_cached_runner(local, store.clone()); - let uncached_result = caching - .run(Context::default(), workunit, process.clone()) - .await; + let uncached_result = caching + .run(Context::default(), workunit, process.clone()) + .await; - assert_eq!(local_result, uncached_result); + assert_eq!(local_result, uncached_result); - // Removing the file means that were the command to be run again without any caching, it would - // fail due to a FileNotFound error. So, If the second run succeeds, that implies that the - // cache was successfully used. - std::fs::remove_file(&script_path).unwrap(); - let maybe_cached_result = caching.run(Context::default(), workunit, process).await; + // Removing the file means that were the command to be run again without any caching, it would + // fail due to a FileNotFound error. So, If the second run succeeds, that implies that the + // cache was successfully used. + std::fs::remove_file(&script_path).unwrap(); + let maybe_cached_result = caching.run(Context::default(), workunit, process).await; - RoundtripResults { - uncached: uncached_result, - maybe_cached: maybe_cached_result, - } + RoundtripResults { + uncached: uncached_result, + maybe_cached: maybe_cached_result, + } } #[tokio::test] async fn cache_success() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let results = run_roundtrip(0, &mut workunit).await; - assert_eq!(results.uncached, results.maybe_cached); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let results = run_roundtrip(0, &mut workunit).await; + assert_eq!(results.uncached, results.maybe_cached); } #[tokio::test] async fn failures_not_cached() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let results = run_roundtrip(1, &mut workunit).await; - assert_ne!(results.uncached, results.maybe_cached); - assert_eq!(results.uncached.unwrap().exit_code, 1); - assert_eq!(results.maybe_cached.unwrap().exit_code, 127); // aka the return code for file not found + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let results = run_roundtrip(1, &mut workunit).await; + assert_ne!(results.uncached, results.maybe_cached); + assert_eq!(results.uncached.unwrap().exit_code, 1); + assert_eq!(results.maybe_cached.unwrap().exit_code, 127); // aka the return code for file not found } #[tokio::test] async fn recover_from_missing_store_contents() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let (local, store, _local_runner_dir) = create_local_runner(); - let (caching, _cache_dir) = create_cached_runner(local, store.clone()); - let (process, _script_path, _script_dir) = create_script(0); - - // Run once to cache the process. - let first_result = caching - .run(Context::default(), &mut workunit, process.clone()) - .await - .unwrap(); - - // Delete the first child of the output directory parent to confirm that we ensure that more - // than just the root of the output is present when hitting the cache. - { - let output_dir_digest = first_result.output_directory; - store - .ensure_directory_digest_persisted(output_dir_digest.clone()) - .await - .unwrap(); - let output_dir = store - .load_directory(output_dir_digest.as_digest()) - .await - .unwrap(); - let output_child_digest = output_dir - .files - .first() - .unwrap() - .digest - .as_ref() - .unwrap() - .try_into() - .unwrap(); - let removed = store.remove_file(output_child_digest).await.unwrap(); - assert!(removed); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let (local, store, _local_runner_dir) = create_local_runner(); + let (caching, _cache_dir) = create_cached_runner(local, store.clone()); + let (process, _script_path, _script_dir) = create_script(0); + + // Run once to cache the process. + let first_result = caching + .run(Context::default(), &mut workunit, process.clone()) + .await + .unwrap(); + + // Delete the first child of the output directory parent to confirm that we ensure that more + // than just the root of the output is present when hitting the cache. + { + let output_dir_digest = first_result.output_directory; + store + .ensure_directory_digest_persisted(output_dir_digest.clone()) + .await + .unwrap(); + let output_dir = store + .load_directory(output_dir_digest.as_digest()) + .await + .unwrap(); + let output_child_digest = output_dir + .files + .first() + .unwrap() + .digest + .as_ref() + .unwrap() + .try_into() + .unwrap(); + let removed = store.remove_file(output_child_digest).await.unwrap(); + assert!(removed); + assert!(store + .contents_for_directory(output_dir_digest) + .await + .err() + .is_some()) + } + + // Ensure that we don't fail if we re-run. + let second_result = caching + .run(Context::default(), &mut workunit, process.clone()) + .await + .unwrap(); + + // And that the entire output directory can be loaded. assert!(store - .contents_for_directory(output_dir_digest) - .await - .err() - .is_some()) - } - - // Ensure that we don't fail if we re-run. - let second_result = caching - .run(Context::default(), &mut workunit, process.clone()) - .await - .unwrap(); - - // And that the entire output directory can be loaded. - assert!(store - .contents_for_directory(second_result.output_directory) - .await - .ok() - .is_some()) + .contents_for_directory(second_result.output_directory) + .await + .ok() + .is_some()) } diff --git a/src/rust/engine/process_execution/src/children.rs b/src/rust/engine/process_execution/src/children.rs index e09e1e6936e..d3640066f55 100644 --- a/src/rust/engine/process_execution/src/children.rs +++ b/src/rust/engine/process_execution/src/children.rs @@ -19,166 +19,165 @@ const GRACEFUL_SHUTDOWN_POLL_TIME: time::Duration = time::Duration::from_millis( /// and to all local execution in general. It could also be adjusted for sending other posix /// signals in sequence for . pub struct ManagedChild { - child: Child, - graceful_shutdown_timeout: Option, - killed: bool, + child: Child, + graceful_shutdown_timeout: Option, + killed: bool, } impl ManagedChild { - pub fn spawn( - command: &mut Command, - graceful_shutdown_timeout: Option, - ) -> std::io::Result { - // Set `kill_on_drop` to encourage `tokio` to `wait` the process via its own "reaping" - // mechanism: - // see https://docs.rs/tokio/1.14.0/tokio/process/struct.Command.html#method.kill_on_drop - command.kill_on_drop(true); - - // Adjust the Command to create its own PGID as it starts, to make it safe to kill the PGID - // later. - unsafe { - command.pre_exec(|| { - nix::unistd::setsid().map(|_pgid| ()).map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::Other, - format!("Could not create new pgid: {e}"), - ) + pub fn spawn( + command: &mut Command, + graceful_shutdown_timeout: Option, + ) -> std::io::Result { + // Set `kill_on_drop` to encourage `tokio` to `wait` the process via its own "reaping" + // mechanism: + // see https://docs.rs/tokio/1.14.0/tokio/process/struct.Command.html#method.kill_on_drop + command.kill_on_drop(true); + + // Adjust the Command to create its own PGID as it starts, to make it safe to kill the PGID + // later. + unsafe { + command.pre_exec(|| { + nix::unistd::setsid().map(|_pgid| ()).map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("Could not create new pgid: {e}"), + ) + }) + }); + }; + + // Then spawn. + let child = command.spawn()?; + Ok(Self { + child, + graceful_shutdown_timeout, + killed: false, }) - }); - }; - - // Then spawn. - let child = command.spawn()?; - Ok(Self { - child, - graceful_shutdown_timeout, - killed: false, - }) - } - - fn get_pgid(&self) -> Result { - let pid = self.id().ok_or_else(|| "Process had no PID.".to_owned())?; - let pgid = getpgid(Some(Pid::from_raw(pid as i32))) - .map_err(|e| format!("Could not get process group id of child process: {e}"))?; - Ok(pgid) - } - - /// Send a signal to the child process group. - fn signal_pg>>(&mut self, signal: T) -> Result<(), String> { - let pgid = self.get_pgid()?; - // the negative PGID will signal the entire process group. - signal::kill(Pid::from_raw(-pgid.as_raw()), signal) - .map_err(|e| format!("Failed to interrupt child process group: {e}"))?; - Ok(()) - } - - /// Check if the child has exited. - /// - /// This returns true if the child has exited with any return code, or false - /// if the child has not yet exited. An error indicated a system error checking - /// the result of the child process, and does not necessarily indicate that - /// has exited or not. - fn check_child_has_exited(&mut self) -> Result { - self - .child - .try_wait() - .map(|o| o.is_some()) - .map_err(|e| e.to_string()) - } - - /// Synchronously wait for the child to exit. - /// - /// This method will repeatedly poll the child process until it exits, an error occurrs - /// or the timeout is reached. - /// - /// A return value of Ok(true) indicates that the child has terminated, Ok(false) indicates - /// that we reached the max_wait_duration while waiting for the child to terminate. - /// - /// This method *will* block the current thread but will do so for a bounded amount of time. - fn wait_for_child_exit_sync( - &mut self, - max_wait_duration: time::Duration, - ) -> Result { - let maybe_id = self.child.id(); - let deadline = time::Instant::now() + max_wait_duration; - while time::Instant::now() <= deadline { - if self.check_child_has_exited()? { - return Ok(true); - } - log::debug!("Waiting for {:?} to exit...", maybe_id); - thread::sleep(GRACEFUL_SHUTDOWN_POLL_TIME); } - // If we get here we have timed-out. - Ok(false) - } - - /// Attempt to shutdown the process (gracefully, if was configured that way at creation). - /// - /// Graceful shutdown will send a SIGINT to the process and give it a chance to exit. If the - /// process does not respond to the SIGINT within a fixed interval, a SIGKILL will be sent. - /// - /// NB: This method *will* block the current thread but it will do so for a bounded amount of time, - /// as long as the operating system responds to `SIGKILL` in a bounded amount of time. - /// - /// TODO: Async drop might eventually allow for making this blocking more explicit. - /// - pub fn attempt_shutdown_sync(&mut self) -> Result<(), String> { - if let Some(graceful_shutdown_timeout) = self.graceful_shutdown_timeout { - // If we fail to send SIGINT, then we will also fail to send SIGKILL, so we return eagerly - // on error here. - self.signal_pg(signal::Signal::SIGINT)?; - match self.wait_for_child_exit_sync(graceful_shutdown_timeout) { - Ok(true) => { - // Process was gracefully shutdown: return. - self.killed = true; - return Ok(()); + + fn get_pgid(&self) -> Result { + let pid = self.id().ok_or_else(|| "Process had no PID.".to_owned())?; + let pgid = getpgid(Some(Pid::from_raw(pid as i32))) + .map_err(|e| format!("Could not get process group id of child process: {e}"))?; + Ok(pgid) + } + + /// Send a signal to the child process group. + fn signal_pg>>(&mut self, signal: T) -> Result<(), String> { + let pgid = self.get_pgid()?; + // the negative PGID will signal the entire process group. + signal::kill(Pid::from_raw(-pgid.as_raw()), signal) + .map_err(|e| format!("Failed to interrupt child process group: {e}"))?; + Ok(()) + } + + /// Check if the child has exited. + /// + /// This returns true if the child has exited with any return code, or false + /// if the child has not yet exited. An error indicated a system error checking + /// the result of the child process, and does not necessarily indicate that + /// has exited or not. + fn check_child_has_exited(&mut self) -> Result { + self.child + .try_wait() + .map(|o| o.is_some()) + .map_err(|e| e.to_string()) + } + + /// Synchronously wait for the child to exit. + /// + /// This method will repeatedly poll the child process until it exits, an error occurrs + /// or the timeout is reached. + /// + /// A return value of Ok(true) indicates that the child has terminated, Ok(false) indicates + /// that we reached the max_wait_duration while waiting for the child to terminate. + /// + /// This method *will* block the current thread but will do so for a bounded amount of time. + fn wait_for_child_exit_sync( + &mut self, + max_wait_duration: time::Duration, + ) -> Result { + let maybe_id = self.child.id(); + let deadline = time::Instant::now() + max_wait_duration; + while time::Instant::now() <= deadline { + if self.check_child_has_exited()? { + return Ok(true); + } + log::debug!("Waiting for {:?} to exit...", maybe_id); + thread::sleep(GRACEFUL_SHUTDOWN_POLL_TIME); } - Ok(false) => { - // We timed out waiting for the child to exit, so we need to kill it. - log::warn!( + // If we get here we have timed-out. + Ok(false) + } + + /// Attempt to shutdown the process (gracefully, if was configured that way at creation). + /// + /// Graceful shutdown will send a SIGINT to the process and give it a chance to exit. If the + /// process does not respond to the SIGINT within a fixed interval, a SIGKILL will be sent. + /// + /// NB: This method *will* block the current thread but it will do so for a bounded amount of time, + /// as long as the operating system responds to `SIGKILL` in a bounded amount of time. + /// + /// TODO: Async drop might eventually allow for making this blocking more explicit. + /// + pub fn attempt_shutdown_sync(&mut self) -> Result<(), String> { + if let Some(graceful_shutdown_timeout) = self.graceful_shutdown_timeout { + // If we fail to send SIGINT, then we will also fail to send SIGKILL, so we return eagerly + // on error here. + self.signal_pg(signal::Signal::SIGINT)?; + match self.wait_for_child_exit_sync(graceful_shutdown_timeout) { + Ok(true) => { + // Process was gracefully shutdown: return. + self.killed = true; + return Ok(()); + } + Ok(false) => { + // We timed out waiting for the child to exit, so we need to kill it. + log::warn!( "Timed out waiting for graceful shutdown of process group. Will try SIGKILL instead." ); + } + Err(e) => { + log::warn!("An error occurred while waiting for graceful shutdown of process group ({}). Will try SIGKILL instead.", e); + } + } } - Err(e) => { - log::warn!("An error occurred while waiting for graceful shutdown of process group ({}). Will try SIGKILL instead.", e); - } - } + + self.kill_pgid() } - self.kill_pgid() - } - - /// Kill the process's unique PGID or return an error if we don't have a PID or cannot kill. - fn kill_pgid(&mut self) -> Result<(), String> { - self.signal_pg(signal::Signal::SIGKILL)?; - // NB: Since the SIGKILL was successfully delivered above, the only things that could cause the - // child not to eventually exit would be if it had become a zombie (which shouldn't be possible, - // because we are its parent process, and we are still alive). - let _ = self.wait_for_child_exit_sync(time::Duration::from_secs(1800))?; - self.killed = true; - Ok(()) - } + /// Kill the process's unique PGID or return an error if we don't have a PID or cannot kill. + fn kill_pgid(&mut self) -> Result<(), String> { + self.signal_pg(signal::Signal::SIGKILL)?; + // NB: Since the SIGKILL was successfully delivered above, the only things that could cause the + // child not to eventually exit would be if it had become a zombie (which shouldn't be possible, + // because we are its parent process, and we are still alive). + let _ = self.wait_for_child_exit_sync(time::Duration::from_secs(1800))?; + self.killed = true; + Ok(()) + } } impl Deref for ManagedChild { - type Target = Child; + type Target = Child; - fn deref(&self) -> &Child { - &self.child - } + fn deref(&self) -> &Child { + &self.child + } } impl DerefMut for ManagedChild { - fn deref_mut(&mut self) -> &mut Child { - &mut self.child - } + fn deref_mut(&mut self) -> &mut Child { + &mut self.child + } } /// Implements drop by killing the process group. impl Drop for ManagedChild { - fn drop(&mut self) { - if !self.killed { - let _ = self.attempt_shutdown_sync(); + fn drop(&mut self) { + if !self.killed { + let _ = self.attempt_shutdown_sync(); + } } - } } diff --git a/src/rust/engine/process_execution/src/lib.rs b/src/rust/engine/process_execution/src/lib.rs index 967c7f93c6d..227c95905e9 100644 --- a/src/rust/engine/process_execution/src/lib.rs +++ b/src/rust/engine/process_execution/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -107,161 +107,161 @@ pub const CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME: &str = "PANTS_CACHE_KEY_TARGET #[derive(Clone, Debug, PartialEq, Eq)] pub enum ProcessError { - /// A Digest was not present in either of the local or remote Stores. - MissingDigest(String, Digest), - /// All other error types. - Unclassified(String), + /// A Digest was not present in either of the local or remote Stores. + MissingDigest(String, Digest), + /// All other error types. + Unclassified(String), } impl ProcessError { - pub fn enrich(self, prefix: &str) -> Self { - match self { - Self::MissingDigest(s, d) => Self::MissingDigest(format!("{prefix}: {s}"), d), - Self::Unclassified(s) => Self::Unclassified(format!("{prefix}: {s}")), + pub fn enrich(self, prefix: &str) -> Self { + match self { + Self::MissingDigest(s, d) => Self::MissingDigest(format!("{prefix}: {s}"), d), + Self::Unclassified(s) => Self::Unclassified(format!("{prefix}: {s}")), + } } - } } impl Display for ProcessError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::MissingDigest(s, d) => { - write!(f, "{s}: {d:?}") - } - Self::Unclassified(s) => write!(f, "{s}"), + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::MissingDigest(s, d) => { + write!(f, "{s}: {d:?}") + } + Self::Unclassified(s) => write!(f, "{s}"), + } } - } } impl From for ProcessError { - fn from(err: StoreError) -> Self { - match err { - StoreError::MissingDigest(s, d) => Self::MissingDigest(s, d), - StoreError::Unclassified(s) => Self::Unclassified(s), + fn from(err: StoreError) -> Self { + match err { + StoreError::MissingDigest(s, d) => Self::MissingDigest(s, d), + StoreError::Unclassified(s) => Self::Unclassified(s), + } } - } } impl From for ProcessError { - fn from(err: String) -> Self { - Self::Unclassified(err) - } + fn from(err: String) -> Self { + Self::Unclassified(err) + } } #[derive( - PartialOrd, Ord, Clone, Copy, Debug, DeepSizeOf, Eq, PartialEq, Hash, Serialize, Deserialize, + PartialOrd, Ord, Clone, Copy, Debug, DeepSizeOf, Eq, PartialEq, Hash, Serialize, Deserialize, )] #[allow(non_camel_case_types)] pub enum Platform { - Macos_x86_64, - Macos_arm64, - Linux_x86_64, - Linux_arm64, + Macos_x86_64, + Macos_arm64, + Linux_x86_64, + Linux_arm64, } impl Platform { - pub fn current() -> Result { - let platform_info = - uname::uname().map_err(|_| "Failed to get local platform info!".to_string())?; - match platform_info { - uname::Info { - ref sysname, - ref machine, - .. - } if sysname.to_lowercase() == "linux" && machine.to_lowercase() == "x86_64" => { - Ok(Platform::Linux_x86_64) - } - uname::Info { - ref sysname, - ref machine, - .. - } if sysname.to_lowercase() == "linux" - && (machine.to_lowercase() == "arm64" || machine.to_lowercase() == "aarch64") => - { - Ok(Platform::Linux_arm64) - } - uname::Info { - ref sysname, - ref machine, - .. - } if sysname.to_lowercase() == "darwin" && machine.to_lowercase() == "arm64" => { - Ok(Platform::Macos_arm64) - } - uname::Info { - ref sysname, - ref machine, - .. - } if sysname.to_lowercase() == "darwin" && machine.to_lowercase() == "x86_64" => { - Ok(Platform::Macos_x86_64) - } - uname::Info { - ref sysname, - ref machine, - .. - } => Err(format!( - "Found unknown system/arch name pair {sysname} {machine}" - )), + pub fn current() -> Result { + let platform_info = + uname::uname().map_err(|_| "Failed to get local platform info!".to_string())?; + match platform_info { + uname::Info { + ref sysname, + ref machine, + .. + } if sysname.to_lowercase() == "linux" && machine.to_lowercase() == "x86_64" => { + Ok(Platform::Linux_x86_64) + } + uname::Info { + ref sysname, + ref machine, + .. + } if sysname.to_lowercase() == "linux" + && (machine.to_lowercase() == "arm64" || machine.to_lowercase() == "aarch64") => + { + Ok(Platform::Linux_arm64) + } + uname::Info { + ref sysname, + ref machine, + .. + } if sysname.to_lowercase() == "darwin" && machine.to_lowercase() == "arm64" => { + Ok(Platform::Macos_arm64) + } + uname::Info { + ref sysname, + ref machine, + .. + } if sysname.to_lowercase() == "darwin" && machine.to_lowercase() == "x86_64" => { + Ok(Platform::Macos_x86_64) + } + uname::Info { + ref sysname, + ref machine, + .. + } => Err(format!( + "Found unknown system/arch name pair {sysname} {machine}" + )), + } } - } } impl From for String { - fn from(platform: Platform) -> String { - match platform { - Platform::Linux_x86_64 => "linux_x86_64".to_string(), - Platform::Linux_arm64 => "linux_arm64".to_string(), - Platform::Macos_arm64 => "macos_arm64".to_string(), - Platform::Macos_x86_64 => "macos_x86_64".to_string(), + fn from(platform: Platform) -> String { + match platform { + Platform::Linux_x86_64 => "linux_x86_64".to_string(), + Platform::Linux_arm64 => "linux_arm64".to_string(), + Platform::Macos_arm64 => "macos_arm64".to_string(), + Platform::Macos_x86_64 => "macos_x86_64".to_string(), + } } - } } impl TryFrom for Platform { - type Error = String; - fn try_from(variant_candidate: String) -> Result { - match variant_candidate.as_ref() { - "macos_arm64" => Ok(Platform::Macos_arm64), - "macos_x86_64" => Ok(Platform::Macos_x86_64), - "linux_x86_64" => Ok(Platform::Linux_x86_64), - "linux_arm64" => Ok(Platform::Linux_arm64), - other => Err(format!("Unknown platform {other:?} encountered in parsing")), + type Error = String; + fn try_from(variant_candidate: String) -> Result { + match variant_candidate.as_ref() { + "macos_arm64" => Ok(Platform::Macos_arm64), + "macos_x86_64" => Ok(Platform::Macos_x86_64), + "linux_x86_64" => Ok(Platform::Linux_x86_64), + "linux_arm64" => Ok(Platform::Linux_arm64), + other => Err(format!("Unknown platform {other:?} encountered in parsing")), + } } - } } #[derive(Clone, Copy, Debug, DeepSizeOf, Eq, PartialEq, Hash, Serialize)] pub enum ProcessCacheScope { - // Cached in all locations, regardless of success or failure. - Always, - // Cached in all locations, but only if the process exits successfully. - Successful, - // Cached only in memory (i.e. memoized in pantsd), but never persistently, regardless of - // success vs. failure. - PerRestartAlways, - // Cached only in memory (i.e. memoized in pantsd), but never persistently, and only if - // successful. - PerRestartSuccessful, - // Will run once per Session, i.e. once per run of Pants. This happens because the engine - // de-duplicates identical work; the process is neither memoized in memory nor cached to disk. - PerSession, + // Cached in all locations, regardless of success or failure. + Always, + // Cached in all locations, but only if the process exits successfully. + Successful, + // Cached only in memory (i.e. memoized in pantsd), but never persistently, regardless of + // success vs. failure. + PerRestartAlways, + // Cached only in memory (i.e. memoized in pantsd), but never persistently, and only if + // successful. + PerRestartSuccessful, + // Will run once per Session, i.e. once per run of Pants. This happens because the engine + // de-duplicates identical work; the process is neither memoized in memory nor cached to disk. + PerSession, } impl TryFrom for ProcessCacheScope { - type Error = String; - fn try_from(variant_candidate: String) -> Result { - match variant_candidate.to_lowercase().as_ref() { - "always" => Ok(ProcessCacheScope::Always), - "successful" => Ok(ProcessCacheScope::Successful), - "per_restart_always" => Ok(ProcessCacheScope::PerRestartAlways), - "per_restart_successful" => Ok(ProcessCacheScope::PerRestartSuccessful), - "per_session" => Ok(ProcessCacheScope::PerSession), - other => Err(format!("Unknown Process cache scope: {other:?}")), + type Error = String; + fn try_from(variant_candidate: String) -> Result { + match variant_candidate.to_lowercase().as_ref() { + "always" => Ok(ProcessCacheScope::Always), + "successful" => Ok(ProcessCacheScope::Successful), + "per_restart_always" => Ok(ProcessCacheScope::PerRestartAlways), + "per_restart_successful" => Ok(ProcessCacheScope::PerRestartSuccessful), + "per_session" => Ok(ProcessCacheScope::PerSession), + other => Err(format!("Unknown Process cache scope: {other:?}")), + } } - } } fn serialize_level(level: &log::Level, s: S) -> Result { - s.serialize_str(&level.to_string()) + s.serialize_str(&level.to_string()) } /// Input Digests for a process execution. @@ -271,222 +271,222 @@ fn serialize_level(level: &log::Level, s: S) -> Result, - - /// If non-empty, use nailgun in supported runners, using the specified `immutable_inputs` keys - /// as server inputs. All other keys (and the input_files) will be client inputs. - pub use_nailgun: BTreeSet, + /// All of the input Digests, merged and relativized. Runners without the ability to consume the + /// Digests individually should directly consume this value. + pub complete: DirectoryDigest, + + /// The merged Digest of any `use_nailgun`-relevant Digests. + pub nailgun: DirectoryDigest, + + /// The input files for the process execution, which will be materialized as mutable inputs in a + /// sandbox for the process. + /// + pub inputs: DirectoryDigest, + + /// Immutable input digests to make available in the input root. + /// + /// These digests are intended for inputs that will be reused between multiple Process + /// invocations, without being mutated. This might be useful to provide the tools being executed, + /// but can also be used for tool inputs such as compilation artifacts. + /// + /// The digests will be mounted at the relative path represented by the `RelativePath` keys. + /// The executor may choose how to make the digests available, including by just merging + /// the digest normally into the input root, creating a symlink to a persistent cache, + /// or bind mounting the directory read-only into a persistent cache. Consequently, the mount + /// point of each input must not overlap the `input_files`, even for directory entries. + /// + /// Assumes the build action does not modify the Digest as made available. This may be + /// enforced by an executor, for example by bind mounting the directory read-only. + pub immutable_inputs: BTreeMap, + + /// If non-empty, use nailgun in supported runners, using the specified `immutable_inputs` keys + /// as server inputs. All other keys (and the input_files) will be client inputs. + pub use_nailgun: BTreeSet, } impl InputDigests { - pub async fn new( - store: &Store, - inputs: DirectoryDigest, - immutable_inputs: BTreeMap, - use_nailgun: BTreeSet, - ) -> Result { - // Collect all digests into `complete`. - let mut complete_digests = try_join_all( - immutable_inputs - .iter() - .map(|(path, digest)| store.add_prefix(digest.clone(), path)) - .collect::>(), - ) - .await?; - // And collect only the subset of the Digests which impact nailgun into `nailgun`. - let nailgun_digests = immutable_inputs - .keys() - .zip(complete_digests.iter()) - .filter_map(|(path, digest)| { - if use_nailgun.contains(path) { - Some(digest.clone()) - } else { - None - } - }) - .collect::>(); - complete_digests.push(inputs.clone()); - - let (complete, nailgun) = - try_join!(store.merge(complete_digests), store.merge(nailgun_digests),)?; - Ok(Self { - complete, - nailgun, - inputs, - immutable_inputs, - use_nailgun, - }) - } - - pub async fn new_from_merged(store: &Store, from: Vec) -> Result { - let mut merged_immutable_inputs = BTreeMap::new(); - for input_digests in from.iter() { - let size_before = merged_immutable_inputs.len(); - let immutable_inputs = &input_digests.immutable_inputs; - merged_immutable_inputs.append(&mut immutable_inputs.clone()); - if size_before + immutable_inputs.len() != merged_immutable_inputs.len() { - return Err( - format!( + pub async fn new( + store: &Store, + inputs: DirectoryDigest, + immutable_inputs: BTreeMap, + use_nailgun: BTreeSet, + ) -> Result { + // Collect all digests into `complete`. + let mut complete_digests = try_join_all( + immutable_inputs + .iter() + .map(|(path, digest)| store.add_prefix(digest.clone(), path)) + .collect::>(), + ) + .await?; + // And collect only the subset of the Digests which impact nailgun into `nailgun`. + let nailgun_digests = immutable_inputs + .keys() + .zip(complete_digests.iter()) + .filter_map(|(path, digest)| { + if use_nailgun.contains(path) { + Some(digest.clone()) + } else { + None + } + }) + .collect::>(); + complete_digests.push(inputs.clone()); + + let (complete, nailgun) = + try_join!(store.merge(complete_digests), store.merge(nailgun_digests),)?; + Ok(Self { + complete, + nailgun, + inputs, + immutable_inputs, + use_nailgun, + }) + } + + pub async fn new_from_merged( + store: &Store, + from: Vec, + ) -> Result { + let mut merged_immutable_inputs = BTreeMap::new(); + for input_digests in from.iter() { + let size_before = merged_immutable_inputs.len(); + let immutable_inputs = &input_digests.immutable_inputs; + merged_immutable_inputs.append(&mut immutable_inputs.clone()); + if size_before + immutable_inputs.len() != merged_immutable_inputs.len() { + return Err(format!( "Tried to merge two-or-more immutable inputs at the same path with different values! \ The collision involved one of the entries in: {immutable_inputs:?}" ) - .into(), - ); - } + .into()); + } + } + + let complete_digests = from + .iter() + .map(|input_digests| input_digests.complete.clone()) + .collect(); + let nailgun_digests = from + .iter() + .map(|input_digests| input_digests.nailgun.clone()) + .collect(); + let input_files_digests = from + .iter() + .map(|input_digests| input_digests.inputs.clone()) + .collect(); + let (complete, nailgun, inputs) = try_join!( + store.merge(complete_digests), + store.merge(nailgun_digests), + store.merge(input_files_digests), + )?; + Ok(Self { + complete, + nailgun, + inputs, + immutable_inputs: merged_immutable_inputs, + use_nailgun: Itertools::concat( + from.iter() + .map(|input_digests| input_digests.use_nailgun.clone()), + ) + .into_iter() + .collect(), + }) } - let complete_digests = from - .iter() - .map(|input_digests| input_digests.complete.clone()) - .collect(); - let nailgun_digests = from - .iter() - .map(|input_digests| input_digests.nailgun.clone()) - .collect(); - let input_files_digests = from - .iter() - .map(|input_digests| input_digests.inputs.clone()) - .collect(); - let (complete, nailgun, inputs) = try_join!( - store.merge(complete_digests), - store.merge(nailgun_digests), - store.merge(input_files_digests), - )?; - Ok(Self { - complete, - nailgun, - inputs, - immutable_inputs: merged_immutable_inputs, - use_nailgun: Itertools::concat( - from - .iter() - .map(|input_digests| input_digests.use_nailgun.clone()), - ) - .into_iter() - .collect(), - }) - } - - pub fn with_input_files(inputs: DirectoryDigest) -> Self { - Self { - complete: inputs.clone(), - nailgun: EMPTY_DIRECTORY_DIGEST.clone(), - inputs, - immutable_inputs: BTreeMap::new(), - use_nailgun: BTreeSet::new(), + pub fn with_input_files(inputs: DirectoryDigest) -> Self { + Self { + complete: inputs.clone(), + nailgun: EMPTY_DIRECTORY_DIGEST.clone(), + inputs, + immutable_inputs: BTreeMap::new(), + use_nailgun: BTreeSet::new(), + } + } + + /// Split the InputDigests into client and server subsets. + /// + /// TODO: The server subset will have an accurate `complete` Digest, but the client will not. + /// This is currently safe because the nailgun client code does not consume that field, but it + /// would be good to find a better factoring. + pub fn nailgun_client_and_server(&self) -> (InputDigests, InputDigests) { + let (server, client) = self + .immutable_inputs + .clone() + .into_iter() + .partition(|(path, _digest)| self.use_nailgun.contains(path)); + + ( + // Client. + InputDigests { + // TODO: See method doc. + complete: EMPTY_DIRECTORY_DIGEST.clone(), + nailgun: EMPTY_DIRECTORY_DIGEST.clone(), + inputs: self.inputs.clone(), + immutable_inputs: client, + use_nailgun: BTreeSet::new(), + }, + // Server. + InputDigests { + complete: self.nailgun.clone(), + nailgun: EMPTY_DIRECTORY_DIGEST.clone(), + inputs: EMPTY_DIRECTORY_DIGEST.clone(), + immutable_inputs: server, + use_nailgun: BTreeSet::new(), + }, + ) } - } - - /// Split the InputDigests into client and server subsets. - /// - /// TODO: The server subset will have an accurate `complete` Digest, but the client will not. - /// This is currently safe because the nailgun client code does not consume that field, but it - /// would be good to find a better factoring. - pub fn nailgun_client_and_server(&self) -> (InputDigests, InputDigests) { - let (server, client) = self - .immutable_inputs - .clone() - .into_iter() - .partition(|(path, _digest)| self.use_nailgun.contains(path)); - - ( - // Client. - InputDigests { - // TODO: See method doc. - complete: EMPTY_DIRECTORY_DIGEST.clone(), - nailgun: EMPTY_DIRECTORY_DIGEST.clone(), - inputs: self.inputs.clone(), - immutable_inputs: client, - use_nailgun: BTreeSet::new(), - }, - // Server. - InputDigests { - complete: self.nailgun.clone(), - nailgun: EMPTY_DIRECTORY_DIGEST.clone(), - inputs: EMPTY_DIRECTORY_DIGEST.clone(), - immutable_inputs: server, - use_nailgun: BTreeSet::new(), - }, - ) - } } impl Default for InputDigests { - fn default() -> Self { - Self { - complete: EMPTY_DIRECTORY_DIGEST.clone(), - nailgun: EMPTY_DIRECTORY_DIGEST.clone(), - inputs: EMPTY_DIRECTORY_DIGEST.clone(), - immutable_inputs: BTreeMap::new(), - use_nailgun: BTreeSet::new(), + fn default() -> Self { + Self { + complete: EMPTY_DIRECTORY_DIGEST.clone(), + nailgun: EMPTY_DIRECTORY_DIGEST.clone(), + inputs: EMPTY_DIRECTORY_DIGEST.clone(), + immutable_inputs: BTreeMap::new(), + use_nailgun: BTreeSet::new(), + } } - } } #[derive(DeepSizeOf, Debug, Clone, Hash, PartialEq, Eq, Serialize)] pub enum ProcessExecutionStrategy { - Local, - /// Stores the platform_properties. - RemoteExecution(Vec<(String, String)>), - /// Stores the image name. - Docker(String), + Local, + /// Stores the platform_properties. + RemoteExecution(Vec<(String, String)>), + /// Stores the image name. + Docker(String), } impl ProcessExecutionStrategy { - /// What to insert into the Command proto so that we don't incorrectly cache - /// Docker vs remote execution vs local execution. - pub fn cache_value(&self) -> String { - match self { - Self::Local => "local_execution".to_string(), - Self::RemoteExecution(_) => "remote_execution".to_string(), - // NB: this image will include the container ID, thanks to - // https://github.com/pantsbuild/pants/pull/17101. - Self::Docker(image) => format!("docker_execution: {image}"), + /// What to insert into the Command proto so that we don't incorrectly cache + /// Docker vs remote execution vs local execution. + pub fn cache_value(&self) -> String { + match self { + Self::Local => "local_execution".to_string(), + Self::RemoteExecution(_) => "remote_execution".to_string(), + // NB: this image will include the container ID, thanks to + // https://github.com/pantsbuild/pants/pull/17101. + Self::Docker(image) => format!("docker_execution: {image}"), + } } - } - pub fn strategy_type(&self) -> &'static str { - match self { - Self::Local => "local", - Self::RemoteExecution(_) => "remote", - Self::Docker(_) => "docker", + pub fn strategy_type(&self) -> &'static str { + match self { + Self::Local => "local", + Self::RemoteExecution(_) => "remote", + Self::Docker(_) => "docker", + } } - } } #[derive(DeepSizeOf, Debug, Clone, Hash, PartialEq, Eq, Serialize)] pub struct ProcessExecutionEnvironment { - /// The name of the environment the process is running in, or None if it is running in the - /// default (local) environment. - pub name: Option, - pub platform: Platform, - pub strategy: ProcessExecutionStrategy, + /// The name of the environment the process is running in, or None if it is running in the + /// default (local) environment. + pub name: Option, + pub platform: Platform, + pub strategy: ProcessExecutionStrategy, } /// @@ -498,215 +498,215 @@ pub struct ProcessExecutionEnvironment { #[derive(DeepSizeOf, Derivative, Clone, Debug, Eq, Serialize)] #[derivative(PartialEq, Hash)] pub struct Process { - /// - /// The arguments to execute. - /// - /// The first argument should be an absolute or relative path to the binary to execute. - /// - /// No PATH lookup will be performed unless a PATH environment variable is specified. - /// - /// No shell expansion will take place. - /// - pub argv: Vec, - /// - /// The environment variables to set for the execution. - /// - /// No other environment variables will be set (except possibly for an empty PATH variable). - /// - pub env: BTreeMap, - - /// - /// A relative path to a directory existing in the `input_files` digest to execute the process - /// from. Defaults to the `input_files` root. - /// - pub working_directory: Option, - - /// - /// All of the input digests for the process. - /// - pub input_digests: InputDigests, - - pub output_files: BTreeSet, - - pub output_directories: BTreeSet, - - pub timeout: Option, - - /// If not None, then a bounded::CommandRunner executing this Process will set an environment - /// variable with this name containing a unique execution slot number. - pub execution_slot_variable: Option, - - /// If non-zero, the amount of parallelism that this process is capable of given its inputs. This - /// value does not directly set the number of cores allocated to the process: that is computed - /// based on availability, and provided as a template value in the arguments of the process. - /// - /// When set, a `{pants_concurrency}` variable will be templated into the `argv` of the process. - /// - /// Processes which set this value may be preempted (i.e. canceled and restarted) for a short - /// period after starting if available resources have changed (because other processes have - /// started or finished). - pub concurrency_available: usize, - - #[derivative(PartialEq = "ignore", Hash = "ignore")] - pub description: String, - - // NB: We serialize with a function to avoid adding a serde dep to the logging crate. - #[serde(serialize_with = "serialize_level")] - pub level: log::Level, - - /// - /// Declares that this process uses the given named caches (which might have associated config - /// in the future) at the associated relative paths within its workspace. Cache names must - /// contain only lowercase ascii characters or underscores. - /// - /// Caches are exposed to processes within their workspaces at the relative paths represented - /// by the values of the dict. A process may optionally check for the existence of the relevant - /// directory, and disable use of that cache if it has not been created by the executor - /// (indicating a lack of support for this feature). - /// - /// These caches are globally shared and so must be concurrency safe: a consumer of the cache - /// must never assume that it has exclusive access to the provided directory. - /// - pub append_only_caches: BTreeMap, - - /// - /// If present, a symlink will be created at .jdk which points to this directory for local - /// execution, or a system-installed JDK (ignoring the value of the present Some) for remote - /// execution. - /// - /// This is some technical debt we should clean up; - /// see . - /// - pub jdk_home: Option, - - pub cache_scope: ProcessCacheScope, - - pub execution_environment: ProcessExecutionEnvironment, - - pub remote_cache_speculation_delay: std::time::Duration, - - /// - /// The attempt number, in the case this Process is being retried. - /// - /// This is included in hash/eq so it creates a unique node in the runtime graph. - /// - pub attempt: usize, + /// + /// The arguments to execute. + /// + /// The first argument should be an absolute or relative path to the binary to execute. + /// + /// No PATH lookup will be performed unless a PATH environment variable is specified. + /// + /// No shell expansion will take place. + /// + pub argv: Vec, + /// + /// The environment variables to set for the execution. + /// + /// No other environment variables will be set (except possibly for an empty PATH variable). + /// + pub env: BTreeMap, + + /// + /// A relative path to a directory existing in the `input_files` digest to execute the process + /// from. Defaults to the `input_files` root. + /// + pub working_directory: Option, + + /// + /// All of the input digests for the process. + /// + pub input_digests: InputDigests, + + pub output_files: BTreeSet, + + pub output_directories: BTreeSet, + + pub timeout: Option, + + /// If not None, then a bounded::CommandRunner executing this Process will set an environment + /// variable with this name containing a unique execution slot number. + pub execution_slot_variable: Option, + + /// If non-zero, the amount of parallelism that this process is capable of given its inputs. This + /// value does not directly set the number of cores allocated to the process: that is computed + /// based on availability, and provided as a template value in the arguments of the process. + /// + /// When set, a `{pants_concurrency}` variable will be templated into the `argv` of the process. + /// + /// Processes which set this value may be preempted (i.e. canceled and restarted) for a short + /// period after starting if available resources have changed (because other processes have + /// started or finished). + pub concurrency_available: usize, + + #[derivative(PartialEq = "ignore", Hash = "ignore")] + pub description: String, + + // NB: We serialize with a function to avoid adding a serde dep to the logging crate. + #[serde(serialize_with = "serialize_level")] + pub level: log::Level, + + /// + /// Declares that this process uses the given named caches (which might have associated config + /// in the future) at the associated relative paths within its workspace. Cache names must + /// contain only lowercase ascii characters or underscores. + /// + /// Caches are exposed to processes within their workspaces at the relative paths represented + /// by the values of the dict. A process may optionally check for the existence of the relevant + /// directory, and disable use of that cache if it has not been created by the executor + /// (indicating a lack of support for this feature). + /// + /// These caches are globally shared and so must be concurrency safe: a consumer of the cache + /// must never assume that it has exclusive access to the provided directory. + /// + pub append_only_caches: BTreeMap, + + /// + /// If present, a symlink will be created at .jdk which points to this directory for local + /// execution, or a system-installed JDK (ignoring the value of the present Some) for remote + /// execution. + /// + /// This is some technical debt we should clean up; + /// see . + /// + pub jdk_home: Option, + + pub cache_scope: ProcessCacheScope, + + pub execution_environment: ProcessExecutionEnvironment, + + pub remote_cache_speculation_delay: std::time::Duration, + + /// + /// The attempt number, in the case this Process is being retried. + /// + /// This is included in hash/eq so it creates a unique node in the runtime graph. + /// + pub attempt: usize, } impl Process { - /// - /// Constructs a Process with default values for most fields, after which the builder pattern can - /// be used to set values. - /// - /// We use the more ergonomic (but possibly slightly slower) "move self for each builder method" - /// pattern, so this method should only be used in tests: production usage should construct the - /// Process struct wholesale. We can reconsider this if we end up with more production callsites - /// that require partial options. - /// - /// NB: Some of the default values used in this constructor only make sense in tests. - /// - pub fn new(argv: Vec) -> Process { - Process { - argv, - env: BTreeMap::new(), - working_directory: None, - input_digests: InputDigests::default(), - output_files: BTreeSet::new(), - output_directories: BTreeSet::new(), - timeout: None, - description: "".to_string(), - level: log::Level::Info, - append_only_caches: BTreeMap::new(), - jdk_home: None, - execution_slot_variable: None, - concurrency_available: 0, - cache_scope: ProcessCacheScope::Successful, - execution_environment: ProcessExecutionEnvironment { - name: None, - platform: Platform::current().unwrap(), - strategy: ProcessExecutionStrategy::Local, - }, - remote_cache_speculation_delay: std::time::Duration::from_millis(0), - attempt: 0, + /// + /// Constructs a Process with default values for most fields, after which the builder pattern can + /// be used to set values. + /// + /// We use the more ergonomic (but possibly slightly slower) "move self for each builder method" + /// pattern, so this method should only be used in tests: production usage should construct the + /// Process struct wholesale. We can reconsider this if we end up with more production callsites + /// that require partial options. + /// + /// NB: Some of the default values used in this constructor only make sense in tests. + /// + pub fn new(argv: Vec) -> Process { + Process { + argv, + env: BTreeMap::new(), + working_directory: None, + input_digests: InputDigests::default(), + output_files: BTreeSet::new(), + output_directories: BTreeSet::new(), + timeout: None, + description: "".to_string(), + level: log::Level::Info, + append_only_caches: BTreeMap::new(), + jdk_home: None, + execution_slot_variable: None, + concurrency_available: 0, + cache_scope: ProcessCacheScope::Successful, + execution_environment: ProcessExecutionEnvironment { + name: None, + platform: Platform::current().unwrap(), + strategy: ProcessExecutionStrategy::Local, + }, + remote_cache_speculation_delay: std::time::Duration::from_millis(0), + attempt: 0, + } + } + + /// + /// Replaces the environment for this process. + /// + pub fn env(mut self, env: BTreeMap) -> Process { + self.env = env; + self + } + + /// + /// Replaces the working_directory for this process. + /// + pub fn working_directory(mut self, working_directory: Option) -> Process { + self.working_directory = working_directory; + self + } + + /// + /// Replaces the output files for this process. + /// + pub fn output_files(mut self, output_files: BTreeSet) -> Process { + self.output_files = output_files; + self + } + + /// + /// Replaces the output directories for this process. + /// + pub fn output_directories(mut self, output_directories: BTreeSet) -> Process { + self.output_directories = output_directories; + self + } + + /// + /// Replaces the append only caches for this process. + /// + pub fn append_only_caches( + mut self, + append_only_caches: BTreeMap, + ) -> Process { + self.append_only_caches = append_only_caches; + self + } + + /// + /// Set the execution environment to Docker, with the specified image. + /// + pub fn docker(mut self, image: String) -> Process { + self.execution_environment = ProcessExecutionEnvironment { + name: None, + platform: Platform::current().unwrap(), + strategy: ProcessExecutionStrategy::Docker(image), + }; + self + } + + /// + /// Set the execution environment to remote execution with the specified platform properties. + /// + pub fn remote_execution(mut self, properties: Vec<(String, String)>) -> Process { + self.execution_environment = ProcessExecutionEnvironment { + name: None, + platform: Platform::current().unwrap(), + strategy: ProcessExecutionStrategy::RemoteExecution(properties), + }; + self + } + + pub fn remote_cache_speculation_delay(mut self, delay: std::time::Duration) -> Process { + self.remote_cache_speculation_delay = delay; + self + } + + pub fn cache_scope(mut self, cache_scope: ProcessCacheScope) -> Process { + self.cache_scope = cache_scope; + self } - } - - /// - /// Replaces the environment for this process. - /// - pub fn env(mut self, env: BTreeMap) -> Process { - self.env = env; - self - } - - /// - /// Replaces the working_directory for this process. - /// - pub fn working_directory(mut self, working_directory: Option) -> Process { - self.working_directory = working_directory; - self - } - - /// - /// Replaces the output files for this process. - /// - pub fn output_files(mut self, output_files: BTreeSet) -> Process { - self.output_files = output_files; - self - } - - /// - /// Replaces the output directories for this process. - /// - pub fn output_directories(mut self, output_directories: BTreeSet) -> Process { - self.output_directories = output_directories; - self - } - - /// - /// Replaces the append only caches for this process. - /// - pub fn append_only_caches( - mut self, - append_only_caches: BTreeMap, - ) -> Process { - self.append_only_caches = append_only_caches; - self - } - - /// - /// Set the execution environment to Docker, with the specified image. - /// - pub fn docker(mut self, image: String) -> Process { - self.execution_environment = ProcessExecutionEnvironment { - name: None, - platform: Platform::current().unwrap(), - strategy: ProcessExecutionStrategy::Docker(image), - }; - self - } - - /// - /// Set the execution environment to remote execution with the specified platform properties. - /// - pub fn remote_execution(mut self, properties: Vec<(String, String)>) -> Process { - self.execution_environment = ProcessExecutionEnvironment { - name: None, - platform: Platform::current().unwrap(), - strategy: ProcessExecutionStrategy::RemoteExecution(properties), - }; - self - } - - pub fn remote_cache_speculation_delay(mut self, delay: std::time::Duration) -> Process { - self.remote_cache_speculation_delay = delay; - self - } - - pub fn cache_scope(mut self, cache_scope: ProcessCacheScope) -> Process { - self.cache_scope = cache_scope; - self - } } /// @@ -717,139 +717,141 @@ impl Process { #[derive(DeepSizeOf, Derivative, Clone, Debug, Eq)] #[derivative(PartialEq, Hash)] pub struct FallibleProcessResultWithPlatform { - pub stdout_digest: Digest, - pub stderr_digest: Digest, - pub exit_code: i32, - pub output_directory: DirectoryDigest, - #[derivative(PartialEq = "ignore", Hash = "ignore")] - pub metadata: ProcessResultMetadata, + pub stdout_digest: Digest, + pub stderr_digest: Digest, + pub exit_code: i32, + pub output_directory: DirectoryDigest, + #[derivative(PartialEq = "ignore", Hash = "ignore")] + pub metadata: ProcessResultMetadata, } #[derive(Clone, Debug, DeepSizeOf, Eq, PartialEq)] pub struct ProcessResultMetadata { - /// The execution time of this process when it ran. - /// - /// Corresponds to `worker_start_timestamp` and `worker_completed_timestamp` from - /// `ExecutedActionMetadata`. - /// - /// NB: This is optional because the REAPI does not guarantee that it is returned. - pub total_elapsed: Option, - /// How much faster a cache hit was than running the process again. - /// - /// This includes the overhead of setting up and cleaning up the process for execution, and it - /// should include all overhead for the cache lookup. - /// - /// If the cache hit was slower than the original process, we return 0. Note that the cache hit - /// may still have been faster than rerunning the process a second time, e.g. if speculation - /// is used and the cache hit completed before the rerun; still, we cannot know how long the - /// second run would have taken, so the best we can do is report 0. - /// - /// If the original process's execution time was not recorded, this may be None. - pub saved_by_cache: Option, - /// The source of the result. - pub source: ProcessResultSource, - /// The environment that the process ran in. - pub environment: ProcessExecutionEnvironment, - /// The RunId of the Session in which the `ProcessResultSource` was accurate. In further runs - /// within the same process, the source of the process implicitly becomes memoization. - pub source_run_id: RunId, + /// The execution time of this process when it ran. + /// + /// Corresponds to `worker_start_timestamp` and `worker_completed_timestamp` from + /// `ExecutedActionMetadata`. + /// + /// NB: This is optional because the REAPI does not guarantee that it is returned. + pub total_elapsed: Option, + /// How much faster a cache hit was than running the process again. + /// + /// This includes the overhead of setting up and cleaning up the process for execution, and it + /// should include all overhead for the cache lookup. + /// + /// If the cache hit was slower than the original process, we return 0. Note that the cache hit + /// may still have been faster than rerunning the process a second time, e.g. if speculation + /// is used and the cache hit completed before the rerun; still, we cannot know how long the + /// second run would have taken, so the best we can do is report 0. + /// + /// If the original process's execution time was not recorded, this may be None. + pub saved_by_cache: Option, + /// The source of the result. + pub source: ProcessResultSource, + /// The environment that the process ran in. + pub environment: ProcessExecutionEnvironment, + /// The RunId of the Session in which the `ProcessResultSource` was accurate. In further runs + /// within the same process, the source of the process implicitly becomes memoization. + pub source_run_id: RunId, } impl ProcessResultMetadata { - pub fn new( - total_elapsed: Option, - source: ProcessResultSource, - environment: ProcessExecutionEnvironment, - source_run_id: RunId, - ) -> Self { - Self { - total_elapsed, - saved_by_cache: None, - source, - environment, - source_run_id, + pub fn new( + total_elapsed: Option, + source: ProcessResultSource, + environment: ProcessExecutionEnvironment, + source_run_id: RunId, + ) -> Self { + Self { + total_elapsed, + saved_by_cache: None, + source, + environment, + source_run_id, + } } - } - pub fn new_from_metadata( - metadata: ExecutedActionMetadata, - source: ProcessResultSource, - environment: ProcessExecutionEnvironment, - source_run_id: RunId, - ) -> Self { - let total_elapsed = match ( - metadata.worker_start_timestamp, - metadata.worker_completed_timestamp, - ) { - (Some(started), Some(completed)) => TimeSpan::from_start_and_end(&started, &completed, "") - .map(|span| span.duration) - .ok(), - _ => None, - }; + pub fn new_from_metadata( + metadata: ExecutedActionMetadata, + source: ProcessResultSource, + environment: ProcessExecutionEnvironment, + source_run_id: RunId, + ) -> Self { + let total_elapsed = match ( + metadata.worker_start_timestamp, + metadata.worker_completed_timestamp, + ) { + (Some(started), Some(completed)) => { + TimeSpan::from_start_and_end(&started, &completed, "") + .map(|span| span.duration) + .ok() + } + _ => None, + }; - Self::new(total_elapsed, source, environment, source_run_id) - } - - pub fn update_cache_hit_elapsed(&mut self, cache_hit_elapsed: std::time::Duration) { - self.saved_by_cache = self.total_elapsed.map(|total_elapsed| { - let total_elapsed: std::time::Duration = total_elapsed.into(); - total_elapsed - .checked_sub(cache_hit_elapsed) - .unwrap_or_else(|| std::time::Duration::new(0, 0)) - .into() - }); - } + Self::new(total_elapsed, source, environment, source_run_id) + } + + pub fn update_cache_hit_elapsed(&mut self, cache_hit_elapsed: std::time::Duration) { + self.saved_by_cache = self.total_elapsed.map(|total_elapsed| { + let total_elapsed: std::time::Duration = total_elapsed.into(); + total_elapsed + .checked_sub(cache_hit_elapsed) + .unwrap_or_else(|| std::time::Duration::new(0, 0)) + .into() + }); + } } impl From for ExecutedActionMetadata { - fn from(metadata: ProcessResultMetadata) -> ExecutedActionMetadata { - let (total_start, total_end) = match metadata.total_elapsed { - Some(elapsed) => { - // Because we do not have the precise start time, we hardcode to starting at UNIX_EPOCH. We - // only care about accurately preserving the duration. - let start = prost_types::Timestamp { - seconds: 0, - nanos: 0, + fn from(metadata: ProcessResultMetadata) -> ExecutedActionMetadata { + let (total_start, total_end) = match metadata.total_elapsed { + Some(elapsed) => { + // Because we do not have the precise start time, we hardcode to starting at UNIX_EPOCH. We + // only care about accurately preserving the duration. + let start = prost_types::Timestamp { + seconds: 0, + nanos: 0, + }; + let end = prost_types::Timestamp { + seconds: elapsed.secs as i64, + nanos: elapsed.nanos as i32, + }; + (Some(start), Some(end)) + } + None => (None, None), }; - let end = prost_types::Timestamp { - seconds: elapsed.secs as i64, - nanos: elapsed.nanos as i32, - }; - (Some(start), Some(end)) - } - None => (None, None), - }; - ExecutedActionMetadata { - worker_start_timestamp: total_start, - worker_completed_timestamp: total_end, - ..ExecutedActionMetadata::default() + ExecutedActionMetadata { + worker_start_timestamp: total_start, + worker_completed_timestamp: total_end, + ..ExecutedActionMetadata::default() + } } - } } #[derive(Clone, Copy, Debug, DeepSizeOf, Eq, PartialEq)] pub enum ProcessResultSource { - Ran, - HitLocally, - HitRemotely, + Ran, + HitLocally, + HitRemotely, } impl From for &'static str { - fn from(prs: ProcessResultSource) -> &'static str { - match prs { - ProcessResultSource::Ran => "ran", - ProcessResultSource::HitLocally => "hit_locally", - ProcessResultSource::HitRemotely => "hit_remotely", + fn from(prs: ProcessResultSource) -> &'static str { + match prs { + ProcessResultSource::Ran => "ran", + ProcessResultSource::HitLocally => "hit_locally", + ProcessResultSource::HitRemotely => "hit_remotely", + } } - } } #[derive(Clone, Copy, Debug, PartialEq, Eq, strum_macros::EnumString)] #[strum(serialize_all = "snake_case")] pub enum CacheContentBehavior { - Fetch, - Validate, - Defer, + Fetch, + Validate, + Defer, } /// @@ -859,448 +861,453 @@ pub enum CacheContentBehavior { /// execution will cause backtracking. /// pub async fn check_cache_content( - response: &FallibleProcessResultWithPlatform, - store: &Store, - cache_content_behavior: CacheContentBehavior, + response: &FallibleProcessResultWithPlatform, + store: &Store, + cache_content_behavior: CacheContentBehavior, ) -> Result { - match cache_content_behavior { - CacheContentBehavior::Fetch => { - let response = response.clone(); - let fetch_result = in_workunit!("eager_fetch_action_cache", Level::Trace, |_workunit| store - .ensure_downloaded( - HashSet::from([response.stdout_digest, response.stderr_digest]), - HashSet::from([response.output_directory]) - )) - .await; - match fetch_result { - Err(StoreError::MissingDigest { .. }) => Ok(false), - Ok(_) => Ok(true), - Err(e) => Err(e), - } - } - CacheContentBehavior::Validate => { - let directory_digests = vec![response.output_directory.clone()]; - let file_digests = vec![response.stdout_digest, response.stderr_digest]; - in_workunit!( - "eager_validate_action_cache", - Level::Trace, - |_workunit| async move { - store - .exists_recursive(directory_digests, file_digests) + match cache_content_behavior { + CacheContentBehavior::Fetch => { + let response = response.clone(); + let fetch_result = + in_workunit!("eager_fetch_action_cache", Level::Trace, |_workunit| store + .ensure_downloaded( + HashSet::from([response.stdout_digest, response.stderr_digest]), + HashSet::from([response.output_directory]) + )) + .await; + match fetch_result { + Err(StoreError::MissingDigest { .. }) => Ok(false), + Ok(_) => Ok(true), + Err(e) => Err(e), + } + } + CacheContentBehavior::Validate => { + let directory_digests = vec![response.output_directory.clone()]; + let file_digests = vec![response.stdout_digest, response.stderr_digest]; + in_workunit!( + "eager_validate_action_cache", + Level::Trace, + |_workunit| async move { + store + .exists_recursive(directory_digests, file_digests) + .await + } + ) .await } - ) - .await + CacheContentBehavior::Defer => Ok(true), } - CacheContentBehavior::Defer => Ok(true), - } } #[derive(Clone)] pub struct Context { - pub workunit_store: WorkunitStore, - pub build_id: String, - pub run_id: RunId, - pub tail_tasks: TailTasks, + pub workunit_store: WorkunitStore, + pub build_id: String, + pub run_id: RunId, + pub tail_tasks: TailTasks, } impl Default for Context { - fn default() -> Self { - Context { - workunit_store: WorkunitStore::new(false, log::Level::Debug), - build_id: String::default(), - run_id: RunId(0), - tail_tasks: TailTasks::new(), + fn default() -> Self { + Context { + workunit_store: WorkunitStore::new(false, log::Level::Debug), + build_id: String::default(), + run_id: RunId(0), + tail_tasks: TailTasks::new(), + } } - } } impl Context { - pub fn new( - workunit_store: WorkunitStore, - build_id: String, - run_id: RunId, - tail_tasks: TailTasks, - ) -> Context { - Context { - workunit_store, - build_id, - run_id, - tail_tasks, + pub fn new( + workunit_store: WorkunitStore, + build_id: String, + run_id: RunId, + tail_tasks: TailTasks, + ) -> Context { + Context { + workunit_store, + build_id, + run_id, + tail_tasks, + } } - } } #[async_trait] pub trait CommandRunner: Send + Sync + Debug { - /// - /// Submit a request for execution on the underlying runtime, and return - /// a future for it. - /// - async fn run( - &self, - context: Context, - workunit: &mut RunningWorkunit, - req: Process, - ) -> Result; - - /// Shutdown this CommandRunner cleanly. - async fn shutdown(&self) -> Result<(), String>; + /// + /// Submit a request for execution on the underlying runtime, and return + /// a future for it. + /// + async fn run( + &self, + context: Context, + workunit: &mut RunningWorkunit, + req: Process, + ) -> Result; + + /// Shutdown this CommandRunner cleanly. + async fn shutdown(&self) -> Result<(), String>; } #[async_trait] impl CommandRunner for Box { - async fn run( - &self, - context: Context, - workunit: &mut RunningWorkunit, - req: Process, - ) -> Result { - (**self).run(context, workunit, req).await - } - - async fn shutdown(&self) -> Result<(), String> { - (**self).shutdown().await - } + async fn run( + &self, + context: Context, + workunit: &mut RunningWorkunit, + req: Process, + ) -> Result { + (**self).run(context, workunit, req).await + } + + async fn shutdown(&self) -> Result<(), String> { + (**self).shutdown().await + } } #[async_trait] impl CommandRunner for Arc { - async fn run( - &self, - context: Context, - workunit: &mut RunningWorkunit, - req: Process, - ) -> Result { - (**self).run(context, workunit, req).await - } - - async fn shutdown(&self) -> Result<(), String> { - (**self).shutdown().await - } + async fn run( + &self, + context: Context, + workunit: &mut RunningWorkunit, + req: Process, + ) -> Result { + (**self).run(context, workunit, req).await + } + + async fn shutdown(&self) -> Result<(), String> { + (**self).shutdown().await + } } // TODO(#8513) possibly move to the MEPR struct, or to the hashing crate? pub async fn get_digest( - process: &Process, - instance_name: Option, - process_cache_namespace: Option, - store: &Store, - append_only_caches_base_path: Option<&str>, + process: &Process, + instance_name: Option, + process_cache_namespace: Option, + store: &Store, + append_only_caches_base_path: Option<&str>, ) -> Digest { - let EntireExecuteRequest { - execute_request, .. - } = make_execute_request( - process, - instance_name, - process_cache_namespace, - store, - append_only_caches_base_path, - ) - .await - .unwrap(); - execute_request.action_digest.unwrap().try_into().unwrap() + let EntireExecuteRequest { + execute_request, .. + } = make_execute_request( + process, + instance_name, + process_cache_namespace, + store, + append_only_caches_base_path, + ) + .await + .unwrap(); + execute_request.action_digest.unwrap().try_into().unwrap() } pub fn digest(message: &T) -> Result { - Ok(Digest::of_bytes(&message.to_bytes())) + Ok(Digest::of_bytes(&message.to_bytes())) } #[derive(Clone, Debug, PartialEq)] pub struct EntireExecuteRequest { - pub action: Action, - pub command: Command, - pub execute_request: ExecuteRequest, - pub input_root_digest: DirectoryDigest, + pub action: Action, + pub command: Command, + pub execute_request: ExecuteRequest, + pub input_root_digest: DirectoryDigest, } fn make_wrapper_for_append_only_caches( - caches: &BTreeMap, - base_path: &str, - working_directory: Option<&str>, + caches: &BTreeMap, + base_path: &str, + working_directory: Option<&str>, ) -> Result { - let mut script = String::new(); - writeln!(&mut script, "#!/bin/sh").map_err(|err| format!("write! failed: {err:?}"))?; - - // Setup the append-only caches. - for (cache_name, path) in caches { - writeln!( - &mut script, - "/bin/mkdir -p '{}/{}'", - base_path, - cache_name.name() - ) - .map_err(|err| format!("write! failed: {err:?}"))?; - if let Some(parent) = path.parent() { - writeln!(&mut script, "/bin/mkdir -p '{}'", parent.to_string_lossy()) + let mut script = String::new(); + writeln!(&mut script, "#!/bin/sh").map_err(|err| format!("write! failed: {err:?}"))?; + + // Setup the append-only caches. + for (cache_name, path) in caches { + writeln!( + &mut script, + "/bin/mkdir -p '{}/{}'", + base_path, + cache_name.name() + ) + .map_err(|err| format!("write! failed: {err:?}"))?; + if let Some(parent) = path.parent() { + writeln!(&mut script, "/bin/mkdir -p '{}'", parent.to_string_lossy()) + .map_err(|err| format!("write! failed: {err}"))?; + } + writeln!( + &mut script, + "/bin/ln -s '{}/{}' '{}'", + base_path, + cache_name.name(), + path.as_path().to_string_lossy() + ) + .map_err(|err| format!("write! failed: {err}"))?; + } + + // Change into any working directory. + // + // Note: When this wrapper script is in effect, Pants will not set the `working_directory` + // field on the `ExecuteRequest` so that this wrapper script can operate in the input root + // first. + if let Some(path) = working_directory { + writeln!( + &mut script, + concat!( + "cd '{0}'\n", + "if [ \"$?\" != 0 ]; then\n", + " echo \"pants-wrapper: Failed to change working directory to: {0}\" 1>&2\n", + " exit 1\n", + "fi\n", + ), + path + ) .map_err(|err| format!("write! failed: {err}"))?; } - writeln!( - &mut script, - "/bin/ln -s '{}/{}' '{}'", - base_path, - cache_name.name(), - path.as_path().to_string_lossy() - ) - .map_err(|err| format!("write! failed: {err}"))?; - } - - // Change into any working directory. - // - // Note: When this wrapper script is in effect, Pants will not set the `working_directory` - // field on the `ExecuteRequest` so that this wrapper script can operate in the input root - // first. - if let Some(path) = working_directory { - writeln!( - &mut script, - concat!( - "cd '{0}'\n", - "if [ \"$?\" != 0 ]; then\n", - " echo \"pants-wrapper: Failed to change working directory to: {0}\" 1>&2\n", - " exit 1\n", - "fi\n", - ), - path - ) - .map_err(|err| format!("write! failed: {err}"))?; - } - // Finally, execute the process. - writeln!(&mut script, "exec \"$@\"").map_err(|err| format!("write! failed: {err:?}"))?; - Ok(script) + // Finally, execute the process. + writeln!(&mut script, "exec \"$@\"").map_err(|err| format!("write! failed: {err:?}"))?; + Ok(script) } pub async fn make_execute_request( - req: &Process, - instance_name: Option, - cache_key_gen_version: Option, - store: &Store, - append_only_caches_base_path: Option<&str>, + req: &Process, + instance_name: Option, + cache_key_gen_version: Option, + store: &Store, + append_only_caches_base_path: Option<&str>, ) -> Result { - const WRAPPER_SCRIPT: &str = "./__pants_wrapper__"; - - // Implement append-only caches by running a wrapper script before the actual program - // to be invoked in the remote environment. - let wrapper_script_digest_opt = match (append_only_caches_base_path, &req.append_only_caches) { - (Some(base_path), caches) if !caches.is_empty() => { - let script = make_wrapper_for_append_only_caches( - caches, - base_path, - req.working_directory.as_ref().and_then(|p| p.to_str()), - )?; - let digest = store - .store_file_bytes(Bytes::from(script), false) - .await - .map_err(|err| format!("Failed to store wrapper script for remote execution: {err}"))?; - let path = RelativePath::new(Path::new(WRAPPER_SCRIPT))?; - let snapshot = store.snapshot_of_one_file(path, digest, true).await?; - let directory_digest = DirectoryDigest::new(snapshot.digest, snapshot.tree); - Some(directory_digest) + const WRAPPER_SCRIPT: &str = "./__pants_wrapper__"; + + // Implement append-only caches by running a wrapper script before the actual program + // to be invoked in the remote environment. + let wrapper_script_digest_opt = match (append_only_caches_base_path, &req.append_only_caches) { + (Some(base_path), caches) if !caches.is_empty() => { + let script = make_wrapper_for_append_only_caches( + caches, + base_path, + req.working_directory.as_ref().and_then(|p| p.to_str()), + )?; + let digest = store + .store_file_bytes(Bytes::from(script), false) + .await + .map_err(|err| { + format!("Failed to store wrapper script for remote execution: {err}") + })?; + let path = RelativePath::new(Path::new(WRAPPER_SCRIPT))?; + let snapshot = store.snapshot_of_one_file(path, digest, true).await?; + let directory_digest = DirectoryDigest::new(snapshot.digest, snapshot.tree); + Some(directory_digest) + } + _ => None, + }; + + let arguments = match &wrapper_script_digest_opt { + Some(_) => { + let mut args = Vec::with_capacity(req.argv.len() + 1); + args.push(WRAPPER_SCRIPT.to_string()); + args.extend(req.argv.iter().cloned()); + args + } + None => req.argv.clone(), + }; + + let mut command = remexec::Command { + arguments, + ..remexec::Command::default() + }; + + for (name, value) in &req.env { + if name == CACHE_KEY_GEN_VERSION_ENV_VAR_NAME + || name == CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME + || name == CACHE_KEY_SALT_ENV_VAR_NAME + { + return Err(format!( + "Cannot set env var with name {name} as that is reserved for internal use by pants" + )); + } + + command + .environment_variables + .push(remexec::command::EnvironmentVariable { + name: name.to_string(), + value: value.to_string(), + }); } - _ => None, - }; - - let arguments = match &wrapper_script_digest_opt { - Some(_) => { - let mut args = Vec::with_capacity(req.argv.len() + 1); - args.push(WRAPPER_SCRIPT.to_string()); - args.extend(req.argv.iter().cloned()); - args + + let mut platform_properties = match &req.execution_environment.strategy { + ProcessExecutionStrategy::RemoteExecution(properties) => properties.clone(), + _ => vec![], + }; + + if let Some(cache_key_gen_version) = cache_key_gen_version { + command + .environment_variables + .push(remexec::command::EnvironmentVariable { + name: CACHE_KEY_GEN_VERSION_ENV_VAR_NAME.to_string(), + value: cache_key_gen_version, + }); + } + + command + .environment_variables + .push(remexec::command::EnvironmentVariable { + name: CACHE_KEY_EXECUTION_STRATEGY.to_string(), + value: req.execution_environment.strategy.cache_value(), + }); + + if matches!( + req.cache_scope, + ProcessCacheScope::PerSession + | ProcessCacheScope::PerRestartAlways + | ProcessCacheScope::PerRestartSuccessful + ) { + command + .environment_variables + .push(remexec::command::EnvironmentVariable { + name: CACHE_KEY_SALT_ENV_VAR_NAME.to_string(), + value: Uuid::new_v4().to_string(), + }); } - None => req.argv.clone(), - }; - - let mut command = remexec::Command { - arguments, - ..remexec::Command::default() - }; - - for (name, value) in &req.env { - if name == CACHE_KEY_GEN_VERSION_ENV_VAR_NAME - || name == CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME - || name == CACHE_KEY_SALT_ENV_VAR_NAME + { - return Err(format!( - "Cannot set env var with name {name} as that is reserved for internal use by pants" - )); + command + .environment_variables + .push(remexec::command::EnvironmentVariable { + name: CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_string(), + value: req.execution_environment.platform.into(), + }); } - command - .environment_variables - .push(remexec::command::EnvironmentVariable { - name: name.to_string(), - value: value.to_string(), - }); - } - - let mut platform_properties = match &req.execution_environment.strategy { - ProcessExecutionStrategy::RemoteExecution(properties) => properties.clone(), - _ => vec![], - }; - - if let Some(cache_key_gen_version) = cache_key_gen_version { - command - .environment_variables - .push(remexec::command::EnvironmentVariable { - name: CACHE_KEY_GEN_VERSION_ENV_VAR_NAME.to_string(), - value: cache_key_gen_version, - }); - } - - command - .environment_variables - .push(remexec::command::EnvironmentVariable { - name: CACHE_KEY_EXECUTION_STRATEGY.to_string(), - value: req.execution_environment.strategy.cache_value(), - }); - - if matches!( - req.cache_scope, - ProcessCacheScope::PerSession - | ProcessCacheScope::PerRestartAlways - | ProcessCacheScope::PerRestartSuccessful - ) { - command - .environment_variables - .push(remexec::command::EnvironmentVariable { - name: CACHE_KEY_SALT_ENV_VAR_NAME.to_string(), - value: Uuid::new_v4().to_string(), - }); - } - - { - command - .environment_variables - .push(remexec::command::EnvironmentVariable { - name: CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME.to_string(), - value: req.execution_environment.platform.into(), - }); - } - - let mut output_files = req - .output_files - .iter() - .map(|p| { - p.to_str() - .map(str::to_owned) - .ok_or_else(|| format!("Non-UTF8 output file path: {p:?}")) - }) - .collect::, String>>()?; - output_files.sort(); - command.output_files = output_files; - - let mut output_directories = req - .output_directories - .iter() - .map(|p| { - p.to_str() - .map(str::to_owned) - .ok_or_else(|| format!("Non-UTF8 output directory path: {p:?}")) - }) - .collect::, String>>()?; - output_directories.sort(); - command.output_directories = output_directories; - - if let Some(working_directory) = &req.working_directory { - // Do not set `working_directory` if a wrapper script is in use because the wrapper script - // will change to the working directory itself. - if wrapper_script_digest_opt.is_none() { - command.working_directory = working_directory - .to_str() - .map(str::to_owned) - .unwrap_or_else(|| panic!("Non-UTF8 working directory path: {working_directory:?}")); + let mut output_files = req + .output_files + .iter() + .map(|p| { + p.to_str() + .map(str::to_owned) + .ok_or_else(|| format!("Non-UTF8 output file path: {p:?}")) + }) + .collect::, String>>()?; + output_files.sort(); + command.output_files = output_files; + + let mut output_directories = req + .output_directories + .iter() + .map(|p| { + p.to_str() + .map(str::to_owned) + .ok_or_else(|| format!("Non-UTF8 output directory path: {p:?}")) + }) + .collect::, String>>()?; + output_directories.sort(); + command.output_directories = output_directories; + + if let Some(working_directory) = &req.working_directory { + // Do not set `working_directory` if a wrapper script is in use because the wrapper script + // will change to the working directory itself. + if wrapper_script_digest_opt.is_none() { + command.working_directory = working_directory + .to_str() + .map(str::to_owned) + .unwrap_or_else(|| { + panic!("Non-UTF8 working directory path: {working_directory:?}") + }); + } + } + + if req.jdk_home.is_some() { + // Ideally, the JDK would be brought along as part of the input directory, but we don't + // currently have support for that. Scoot supports this property, and will symlink .jdk to a + // system-installed JDK https://github.com/twitter/scoot/pull/391 - we should probably come to + // some kind of consensus across tools as to how this should work; RBE appears to work by + // allowing you to specify a jdk-version platform property, and it will put a JDK at a + // well-known path in the docker container you specify in which to run. + platform_properties.push(("JDK_SYMLINK".to_owned(), ".jdk".to_owned())); + } + + // Extract `Platform` proto from the `Command` to avoid a partial move of `Command`. + let mut command_platform = command.platform.take().unwrap_or_default(); + + // Add configured platform properties to the `Platform`. + for (name, value) in platform_properties { + command_platform + .properties + .push(remexec::platform::Property { + name: name.clone(), + value: value.clone(), + }); } - } - - if req.jdk_home.is_some() { - // Ideally, the JDK would be brought along as part of the input directory, but we don't - // currently have support for that. Scoot supports this property, and will symlink .jdk to a - // system-installed JDK https://github.com/twitter/scoot/pull/391 - we should probably come to - // some kind of consensus across tools as to how this should work; RBE appears to work by - // allowing you to specify a jdk-version platform property, and it will put a JDK at a - // well-known path in the docker container you specify in which to run. - platform_properties.push(("JDK_SYMLINK".to_owned(), ".jdk".to_owned())); - } - - // Extract `Platform` proto from the `Command` to avoid a partial move of `Command`. - let mut command_platform = command.platform.take().unwrap_or_default(); - - // Add configured platform properties to the `Platform`. - for (name, value) in platform_properties { + + // Sort the platform properties. + // + // From the remote execution spec: + // The properties that make up this platform. In order to ensure that + // equivalent `Platform`s always hash to the same value, the properties MUST + // be lexicographically sorted by name, and then by value. Sorting of strings + // is done by code point, equivalently, by the UTF-8 bytes. + // + // Note: BuildBarn enforces this requirement. command_platform - .properties - .push(remexec::platform::Property { - name: name.clone(), - value: value.clone(), - }); - } - - // Sort the platform properties. - // - // From the remote execution spec: - // The properties that make up this platform. In order to ensure that - // equivalent `Platform`s always hash to the same value, the properties MUST - // be lexicographically sorted by name, and then by value. Sorting of strings - // is done by code point, equivalently, by the UTF-8 bytes. - // - // Note: BuildBarn enforces this requirement. - command_platform - .properties - .sort_by(|x, y| match x.name.cmp(&y.name) { - Ordering::Equal => x.value.cmp(&y.value), - v => v, - }); - - // Store the separate copy back into the Command proto. - command.platform = Some(command_platform); - - // Sort the environment variables. REv2 spec requires sorting by name for same reasons that - // platform properties are sorted, i.e. consistent hashing. - command - .environment_variables - .sort_by(|x, y| x.name.cmp(&y.name)); - - let input_root_digest: DirectoryDigest = match &wrapper_script_digest_opt { - Some(wrapper_digest) => { - let digests = vec![ - req.input_digests.complete.clone(), - wrapper_digest.to_owned(), - ]; - store - .merge(digests) - .await - .map_err(|err| format!("store error: {err}"))? + .properties + .sort_by(|x, y| match x.name.cmp(&y.name) { + Ordering::Equal => x.value.cmp(&y.value), + v => v, + }); + + // Store the separate copy back into the Command proto. + command.platform = Some(command_platform); + + // Sort the environment variables. REv2 spec requires sorting by name for same reasons that + // platform properties are sorted, i.e. consistent hashing. + command + .environment_variables + .sort_by(|x, y| x.name.cmp(&y.name)); + + let input_root_digest: DirectoryDigest = match &wrapper_script_digest_opt { + Some(wrapper_digest) => { + let digests = vec![ + req.input_digests.complete.clone(), + wrapper_digest.to_owned(), + ]; + store + .merge(digests) + .await + .map_err(|err| format!("store error: {err}"))? + } + None => req.input_digests.complete.clone(), + }; + + let mut action = remexec::Action { + command_digest: Some((&digest(&command)?).into()), + input_root_digest: Some(input_root_digest.as_digest().into()), + ..remexec::Action::default() + }; + + if let Some(timeout) = req.timeout { + action.timeout = Some(prost_types::Duration::try_from(timeout).unwrap()); } - None => req.input_digests.complete.clone(), - }; - - let mut action = remexec::Action { - command_digest: Some((&digest(&command)?).into()), - input_root_digest: Some(input_root_digest.as_digest().into()), - ..remexec::Action::default() - }; - - if let Some(timeout) = req.timeout { - action.timeout = Some(prost_types::Duration::try_from(timeout).unwrap()); - } - - let execute_request = remexec::ExecuteRequest { - action_digest: Some((&digest(&action)?).into()), - instance_name: instance_name.unwrap_or_else(|| "".to_owned()), - // We rely on the RemoteCache command runner for caching with remote execution. We always - // disable remote servers from doing caching themselves not only to avoid wasted work, but - // more importantly because they do not have our same caching semantics, e.g. - // `ProcessCacheScope.SUCCESSFUL` vs `ProcessCacheScope.ALWAYS`. - skip_cache_lookup: true, - ..remexec::ExecuteRequest::default() - }; - - Ok(EntireExecuteRequest { - action, - command, - execute_request, - input_root_digest, - }) + + let execute_request = remexec::ExecuteRequest { + action_digest: Some((&digest(&action)?).into()), + instance_name: instance_name.unwrap_or_else(|| "".to_owned()), + // We rely on the RemoteCache command runner for caching with remote execution. We always + // disable remote servers from doing caching themselves not only to avoid wasted work, but + // more importantly because they do not have our same caching semantics, e.g. + // `ProcessCacheScope.SUCCESSFUL` vs `ProcessCacheScope.ALWAYS`. + skip_cache_lookup: true, + ..remexec::ExecuteRequest::default() + }; + + Ok(EntireExecuteRequest { + action, + command, + execute_request, + input_root_digest, + }) } /// Convert an ActionResult into a FallibleProcessResultWithPlatform. @@ -1311,217 +1318,222 @@ pub async fn make_execute_request( /// `treat_tree_digest_as_final_directory_hack` is true, then that final merged directory /// will be extracted from the tree_digest of the single output directory. pub async fn populate_fallible_execution_result( - store: Store, - run_id: RunId, - action_result: &remexec::ActionResult, - treat_tree_digest_as_final_directory_hack: bool, - source: ProcessResultSource, - environment: ProcessExecutionEnvironment, + store: Store, + run_id: RunId, + action_result: &remexec::ActionResult, + treat_tree_digest_as_final_directory_hack: bool, + source: ProcessResultSource, + environment: ProcessExecutionEnvironment, ) -> Result { - let (stdout_digest, stderr_digest, output_directory) = future::try_join3( - extract_stdout(&store, action_result), - extract_stderr(&store, action_result), - extract_output_files( - store, - action_result, - treat_tree_digest_as_final_directory_hack, - ), - ) - .await?; - - let metadata = if let Some(metadata) = action_result.execution_metadata.clone() { - ProcessResultMetadata::new_from_metadata(metadata, source, environment, run_id) - } else { - ProcessResultMetadata::new(None, source, environment, run_id) - }; - - Ok(FallibleProcessResultWithPlatform { - stdout_digest, - stderr_digest, - exit_code: action_result.exit_code, - output_directory, - metadata, - }) + let (stdout_digest, stderr_digest, output_directory) = future::try_join3( + extract_stdout(&store, action_result), + extract_stderr(&store, action_result), + extract_output_files( + store, + action_result, + treat_tree_digest_as_final_directory_hack, + ), + ) + .await?; + + let metadata = if let Some(metadata) = action_result.execution_metadata.clone() { + ProcessResultMetadata::new_from_metadata(metadata, source, environment, run_id) + } else { + ProcessResultMetadata::new(None, source, environment, run_id) + }; + + Ok(FallibleProcessResultWithPlatform { + stdout_digest, + stderr_digest, + exit_code: action_result.exit_code, + output_directory, + metadata, + }) } fn extract_stdout<'a>( - store: &Store, - action_result: &'a remexec::ActionResult, + store: &Store, + action_result: &'a remexec::ActionResult, ) -> BoxFuture<'a, Result> { - let store = store.clone(); - async move { - if let Some(digest_proto) = &action_result.stdout_digest { - let stdout_digest_result: Result = digest_proto.try_into(); - let stdout_digest = - stdout_digest_result.map_err(|err| format!("Error extracting stdout: {err}"))?; - Ok(stdout_digest) - } else { - let stdout_raw = Bytes::copy_from_slice(&action_result.stdout_raw); - let digest = store - .store_file_bytes(stdout_raw, true) - .map_err(move |error| format!("Error storing raw stdout: {error:?}")) - .await?; - Ok(digest) + let store = store.clone(); + async move { + if let Some(digest_proto) = &action_result.stdout_digest { + let stdout_digest_result: Result = digest_proto.try_into(); + let stdout_digest = + stdout_digest_result.map_err(|err| format!("Error extracting stdout: {err}"))?; + Ok(stdout_digest) + } else { + let stdout_raw = Bytes::copy_from_slice(&action_result.stdout_raw); + let digest = store + .store_file_bytes(stdout_raw, true) + .map_err(move |error| format!("Error storing raw stdout: {error:?}")) + .await?; + Ok(digest) + } } - } - .boxed() + .boxed() } fn extract_stderr<'a>( - store: &Store, - action_result: &'a remexec::ActionResult, + store: &Store, + action_result: &'a remexec::ActionResult, ) -> BoxFuture<'a, Result> { - let store = store.clone(); - async move { - if let Some(digest_proto) = &action_result.stderr_digest { - let stderr_digest_result: Result = digest_proto.try_into(); - let stderr_digest = - stderr_digest_result.map_err(|err| format!("Error extracting stderr: {err}"))?; - Ok(stderr_digest) - } else { - let stderr_raw = Bytes::copy_from_slice(&action_result.stderr_raw); - let digest = store - .store_file_bytes(stderr_raw, true) - .map_err(move |error| format!("Error storing raw stderr: {error:?}")) - .await?; - Ok(digest) + let store = store.clone(); + async move { + if let Some(digest_proto) = &action_result.stderr_digest { + let stderr_digest_result: Result = digest_proto.try_into(); + let stderr_digest = + stderr_digest_result.map_err(|err| format!("Error extracting stderr: {err}"))?; + Ok(stderr_digest) + } else { + let stderr_raw = Bytes::copy_from_slice(&action_result.stderr_raw); + let digest = store + .store_file_bytes(stderr_raw, true) + .map_err(move |error| format!("Error storing raw stderr: {error:?}")) + .await?; + Ok(digest) + } } - } - .boxed() + .boxed() } pub fn extract_output_files( - store: Store, - action_result: &remexec::ActionResult, - treat_tree_digest_as_final_directory_hack: bool, + store: Store, + action_result: &remexec::ActionResult, + treat_tree_digest_as_final_directory_hack: bool, ) -> BoxFuture<'static, Result> { - // HACK: The caching CommandRunner stores the digest of the Directory that merges all output - // files and output directories in the `tree_digest` field of the `output_directories` field - // of the ActionResult/ExecuteResponse stored in the local cache. When - // `treat_tree_digest_as_final_directory_hack` is true, then this code will extract that - // directory from the tree_digest and skip the merging performed by the remainder of this - // method. - if treat_tree_digest_as_final_directory_hack { - match &action_result.output_directories[..] { - [directory] => { - match require_digest(directory.tree_digest.as_ref()) { - Ok(digest) => { - return future::ready::>(Ok( - DirectoryDigest::from_persisted_digest(digest), - )) - .boxed() - } - Err(err) => return futures::future::err(err.into()).boxed(), - }; - } - _ => { - return futures::future::err( - "illegal state: treat_tree_digest_as_final_directory_hack \ + // HACK: The caching CommandRunner stores the digest of the Directory that merges all output + // files and output directories in the `tree_digest` field of the `output_directories` field + // of the ActionResult/ExecuteResponse stored in the local cache. When + // `treat_tree_digest_as_final_directory_hack` is true, then this code will extract that + // directory from the tree_digest and skip the merging performed by the remainder of this + // method. + if treat_tree_digest_as_final_directory_hack { + match &action_result.output_directories[..] { + [directory] => { + match require_digest(directory.tree_digest.as_ref()) { + Ok(digest) => { + return future::ready::>(Ok( + DirectoryDigest::from_persisted_digest(digest), + )) + .boxed() + } + Err(err) => return futures::future::err(err.into()).boxed(), + }; + } + _ => { + return futures::future::err( + "illegal state: treat_tree_digest_as_final_directory_hack \ expected single output directory" - .to_owned() - .into(), - ) - .boxed(); - } + .to_owned() + .into(), + ) + .boxed(); + } + } } - } - - // Get Digests of output Directories. - // Then we'll make a Directory for the output files, and merge them. - let mut directory_digests = Vec::with_capacity(action_result.output_directories.len() + 1); - // TODO: Maybe take rather than clone - let output_directories = action_result.output_directories.clone(); - for dir in output_directories { - let store = store.clone(); - directory_digests.push( - (async move { - // The `OutputDirectory` contains the digest of a `Tree` proto which contains - // the `Directory` proto of the root directory of this `OutputDirectory` plus all - // of the `Directory` protos for child directories of that root. - - // Retrieve the Tree proto and hash its root `Directory` proto to obtain the digest - // of the output directory needed to construct the series of `Directory` protos needed - // for the final merge of the output directories. - let tree_digest: Digest = require_digest(dir.tree_digest.as_ref())?; - let directory_digest = store - .load_tree_from_remote(tree_digest) - .await? - .ok_or_else(|| format!("Tree with digest {tree_digest:?} was not in remote"))?; - store - .add_prefix(directory_digest, &RelativePath::new(dir.path)?) - .await - }) - .map_err(|err| format!("Error saving remote output directory to local cache: {err}")), - ); - } - - // Make a directory for the files - let mut path_map = HashMap::new(); - let path_stats_result: Result, String> = action_result - .output_files - .iter() - .map(|output_file| { - let output_file_path_buf = PathBuf::from(output_file.path.clone()); - let digest: Result = require_digest(output_file.digest.as_ref()); - path_map.insert(output_file_path_buf.clone(), digest?); - Ok(PathStat::file( - output_file_path_buf.clone(), - File { - path: output_file_path_buf, - is_executable: output_file.is_executable, - }, - )) - }) - .collect(); + // Get Digests of output Directories. + // Then we'll make a Directory for the output files, and merge them. + let mut directory_digests = Vec::with_capacity(action_result.output_directories.len() + 1); + // TODO: Maybe take rather than clone + let output_directories = action_result.output_directories.clone(); + for dir in output_directories { + let store = store.clone(); + directory_digests.push( + (async move { + // The `OutputDirectory` contains the digest of a `Tree` proto which contains + // the `Directory` proto of the root directory of this `OutputDirectory` plus all + // of the `Directory` protos for child directories of that root. + + // Retrieve the Tree proto and hash its root `Directory` proto to obtain the digest + // of the output directory needed to construct the series of `Directory` protos needed + // for the final merge of the output directories. + let tree_digest: Digest = require_digest(dir.tree_digest.as_ref())?; + let directory_digest = store + .load_tree_from_remote(tree_digest) + .await? + .ok_or_else(|| format!("Tree with digest {tree_digest:?} was not in remote"))?; + + store + .add_prefix(directory_digest, &RelativePath::new(dir.path)?) + .await + }) + .map_err(|err| format!("Error saving remote output directory to local cache: {err}")), + ); + } - let path_stats = try_future!(path_stats_result); + // Make a directory for the files + let mut path_map = HashMap::new(); + let path_stats_result: Result, String> = action_result + .output_files + .iter() + .map(|output_file| { + let output_file_path_buf = PathBuf::from(output_file.path.clone()); + let digest: Result = require_digest(output_file.digest.as_ref()); + path_map.insert(output_file_path_buf.clone(), digest?); + Ok(PathStat::file( + output_file_path_buf.clone(), + File { + path: output_file_path_buf, + is_executable: output_file.is_executable, + }, + )) + }) + .collect(); - #[derive(Clone)] - struct StoreOneOffRemoteDigest { - map_of_paths_to_digests: HashMap, - } + let path_stats = try_future!(path_stats_result); - impl StoreOneOffRemoteDigest { - fn new(map: HashMap) -> StoreOneOffRemoteDigest { - StoreOneOffRemoteDigest { - map_of_paths_to_digests: map, - } + #[derive(Clone)] + struct StoreOneOffRemoteDigest { + map_of_paths_to_digests: HashMap, } - } - - impl StoreFileByDigest for StoreOneOffRemoteDigest { - fn store_by_digest(&self, file: File) -> future::BoxFuture<'static, Result> { - match self.map_of_paths_to_digests.get(&file.path) { - Some(digest) => future::ok(*digest), - None => future::err(format!( - "Didn't know digest for path in remote execution response: {:?}", - file.path - )), - } - .boxed() + + impl StoreOneOffRemoteDigest { + fn new(map: HashMap) -> StoreOneOffRemoteDigest { + StoreOneOffRemoteDigest { + map_of_paths_to_digests: map, + } + } + } + + impl StoreFileByDigest for StoreOneOffRemoteDigest { + fn store_by_digest( + &self, + file: File, + ) -> future::BoxFuture<'static, Result> { + match self.map_of_paths_to_digests.get(&file.path) { + Some(digest) => future::ok(*digest), + None => future::err(format!( + "Didn't know digest for path in remote execution response: {:?}", + file.path + )), + } + .boxed() + } + } + + async move { + let files_snapshot = + Snapshot::from_path_stats(StoreOneOffRemoteDigest::new(path_map), path_stats).map_err( + move |error| { + format!( + "Error when storing the output file directory info in the remote CAS: {error:?}" + ) + }, + ); + + let (files_snapshot, mut directory_digests) = + future::try_join(files_snapshot, future::try_join_all(directory_digests)).await?; + + directory_digests.push(files_snapshot.into()); + + store + .merge(directory_digests) + .map_err(|err| err.enrich("Error when merging output files and directories")) + .await } - } - - async move { - let files_snapshot = - Snapshot::from_path_stats(StoreOneOffRemoteDigest::new(path_map), path_stats).map_err( - move |error| { - format!("Error when storing the output file directory info in the remote CAS: {error:?}") - }, - ); - - let (files_snapshot, mut directory_digests) = - future::try_join(files_snapshot, future::try_join_all(directory_digests)).await?; - - directory_digests.push(files_snapshot.into()); - - store - .merge(directory_digests) - .map_err(|err| err.enrich("Error when merging output files and directories")) - .await - } - .boxed() + .boxed() } #[cfg(test)] diff --git a/src/rust/engine/process_execution/src/local.rs b/src/rust/engine/process_execution/src/local.rs index cf26608765b..92ab5533b6e 100644 --- a/src/rust/engine/process_execution/src/local.rs +++ b/src/rust/engine/process_execution/src/local.rs @@ -14,9 +14,9 @@ use std::time::Instant; use async_trait::async_trait; use bytes::{Bytes, BytesMut}; use fs::{ - self, DigestTrie, DirectoryDigest, GlobExpansionConjunction, GlobMatching, PathGlobs, - Permissions, RelativePath, StrictGlobMatching, SymlinkBehavior, TypedPath, - EMPTY_DIRECTORY_DIGEST, + self, DigestTrie, DirectoryDigest, GlobExpansionConjunction, GlobMatching, PathGlobs, + Permissions, RelativePath, StrictGlobMatching, SymlinkBehavior, TypedPath, + EMPTY_DIRECTORY_DIGEST, }; use futures::stream::{BoxStream, StreamExt, TryStreamExt}; use futures::{try_join, FutureExt, TryFutureExt}; @@ -24,8 +24,8 @@ use log::{debug, info}; use nails::execution::ExitCode; use shell_quote::bash; use store::{ - ImmutableInputs, OneOffStoreFileByDigest, Snapshot, SnapshotOps, Store, StoreError, - WorkdirSymlink, + ImmutableInputs, OneOffStoreFileByDigest, Snapshot, SnapshotOps, Store, StoreError, + WorkdirSymlink, }; use task_executor::Executor; use tempfile::TempDir; @@ -36,8 +36,8 @@ use tokio_util::codec::{BytesCodec, FramedRead}; use workunit_store::{in_workunit, Level, Metric, RunningWorkunit}; use crate::{ - Context, FallibleProcessResultWithPlatform, ManagedChild, NamedCaches, Process, ProcessError, - ProcessResultMetadata, ProcessResultSource, + Context, FallibleProcessResultWithPlatform, ManagedChild, NamedCaches, Process, ProcessError, + ProcessResultMetadata, ProcessResultSource, }; pub const USER_EXECUTABLE_MODE: u32 = 0o100755; @@ -45,105 +45,105 @@ pub const USER_EXECUTABLE_MODE: u32 = 0o100755; #[derive(Clone, Copy, Debug, PartialEq, Eq, strum_macros::EnumString)] #[strum(serialize_all = "snake_case")] pub enum KeepSandboxes { - Always, - Never, - OnFailure, + Always, + Never, + OnFailure, } pub struct CommandRunner { - pub store: Store, - executor: Executor, - work_dir_base: PathBuf, - named_caches: NamedCaches, - immutable_inputs: ImmutableInputs, - keep_sandboxes: KeepSandboxes, - spawn_lock: RwLock<()>, -} - -impl CommandRunner { - pub fn new( - store: Store, + pub store: Store, executor: Executor, work_dir_base: PathBuf, named_caches: NamedCaches, immutable_inputs: ImmutableInputs, keep_sandboxes: KeepSandboxes, - ) -> CommandRunner { - CommandRunner { - store, - executor, - work_dir_base, - named_caches, - immutable_inputs, - keep_sandboxes, - spawn_lock: RwLock::new(()), + spawn_lock: RwLock<()>, +} + +impl CommandRunner { + pub fn new( + store: Store, + executor: Executor, + work_dir_base: PathBuf, + named_caches: NamedCaches, + immutable_inputs: ImmutableInputs, + keep_sandboxes: KeepSandboxes, + ) -> CommandRunner { + CommandRunner { + store, + executor, + work_dir_base, + named_caches, + immutable_inputs, + keep_sandboxes, + spawn_lock: RwLock::new(()), + } + } + + async fn construct_output_snapshot( + store: Store, + posix_fs: Arc, + output_file_paths: BTreeSet, + output_dir_paths: BTreeSet, + ) -> Result { + let output_paths = output_dir_paths + .into_iter() + .flat_map(|p| { + let mut dir_glob = { + let mut dir = PathBuf::from(p).into_os_string(); + if dir.is_empty() { + dir.push(".") + } + dir + }; + let dir = dir_glob.clone(); + dir_glob.push("/**"); + vec![dir, dir_glob] + }) + .chain( + output_file_paths + .into_iter() + .map(|p| PathBuf::from(p).into_os_string()), + ) + .map(|s| { + s.into_string() + .map_err(|e| format!("Error stringifying output paths: {e:?}")) + }) + .collect::, _>>()?; + + // TODO: should we error when globs fail? + let output_globs = PathGlobs::new( + output_paths, + StrictGlobMatching::Ignore, + GlobExpansionConjunction::AllMatch, + ) + .parse()?; + + let path_stats = posix_fs + .expand_globs(output_globs, SymlinkBehavior::Aware, None) + .map_err(|err| format!("Error expanding output globs: {err}")) + .await?; + Snapshot::from_path_stats( + OneOffStoreFileByDigest::new(store, posix_fs, true), + path_stats, + ) + .await } - } - - async fn construct_output_snapshot( - store: Store, - posix_fs: Arc, - output_file_paths: BTreeSet, - output_dir_paths: BTreeSet, - ) -> Result { - let output_paths = output_dir_paths - .into_iter() - .flat_map(|p| { - let mut dir_glob = { - let mut dir = PathBuf::from(p).into_os_string(); - if dir.is_empty() { - dir.push(".") - } - dir - }; - let dir = dir_glob.clone(); - dir_glob.push("/**"); - vec![dir, dir_glob] - }) - .chain( - output_file_paths - .into_iter() - .map(|p| PathBuf::from(p).into_os_string()), - ) - .map(|s| { - s.into_string() - .map_err(|e| format!("Error stringifying output paths: {e:?}")) - }) - .collect::, _>>()?; - - // TODO: should we error when globs fail? - let output_globs = PathGlobs::new( - output_paths, - StrictGlobMatching::Ignore, - GlobExpansionConjunction::AllMatch, - ) - .parse()?; - - let path_stats = posix_fs - .expand_globs(output_globs, SymlinkBehavior::Aware, None) - .map_err(|err| format!("Error expanding output globs: {err}")) - .await?; - Snapshot::from_path_stats( - OneOffStoreFileByDigest::new(store, posix_fs, true), - path_stats, - ) - .await - } - pub fn named_caches(&self) -> &NamedCaches { - &self.named_caches - } + pub fn named_caches(&self) -> &NamedCaches { + &self.named_caches + } - pub fn immutable_inputs(&self) -> &ImmutableInputs { - &self.immutable_inputs - } + pub fn immutable_inputs(&self) -> &ImmutableInputs { + &self.immutable_inputs + } } impl Debug for CommandRunner { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("local::CommandRunner") - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("local::CommandRunner") + .finish_non_exhaustive() + } } // TODO: A Stream that ends with `Exit` is error prone: we should consider creating a Child struct @@ -151,524 +151,526 @@ impl Debug for CommandRunner { // See https://github.com/stuhood/nails/issues/1 for more info. #[derive(Debug, PartialEq, Eq)] pub enum ChildOutput { - Stdout(Bytes), - Stderr(Bytes), - Exit(ExitCode), + Stdout(Bytes), + Stderr(Bytes), + Exit(ExitCode), } /// /// Collect the outputs of a child process. /// pub async fn collect_child_outputs<'a, 'b>( - stdout: &'a mut BytesMut, - stderr: &'a mut BytesMut, - mut stream: BoxStream<'b, Result>, + stdout: &'a mut BytesMut, + stderr: &'a mut BytesMut, + mut stream: BoxStream<'b, Result>, ) -> Result { - let mut exit_code = 1; + let mut exit_code = 1; - while let Some(child_output_res) = stream.next().await { - match child_output_res? { - ChildOutput::Stdout(bytes) => stdout.extend_from_slice(&bytes), - ChildOutput::Stderr(bytes) => stderr.extend_from_slice(&bytes), - ChildOutput::Exit(code) => exit_code = code.0, - }; - } + while let Some(child_output_res) = stream.next().await { + match child_output_res? { + ChildOutput::Stdout(bytes) => stdout.extend_from_slice(&bytes), + ChildOutput::Stderr(bytes) => stderr.extend_from_slice(&bytes), + ChildOutput::Exit(code) => exit_code = code.0, + }; + } - Ok(exit_code) + Ok(exit_code) } #[async_trait] impl super::CommandRunner for CommandRunner { - /// - /// Runs a command on this machine in the passed working directory. - /// - async fn run( - &self, - context: Context, - _workunit: &mut RunningWorkunit, - req: Process, - ) -> Result { - let req_debug_repr = format!("{req:#?}"); - in_workunit!( - "run_local_process", - req.level, - // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit - // renders at the Process's level. - desc = Some(req.description.clone()), - |workunit| async move { - let mut workdir = create_sandbox( - self.executor.clone(), - &self.work_dir_base, - &req.description, - self.keep_sandboxes, - )?; - - // Start working on a mutable version of the process. - let mut req = req; - // Update env, replacing `{chroot}` placeholders with `workdir_path`. - apply_chroot(workdir.path().to_str().unwrap(), &mut req); - - // Prepare the workdir. - let exclusive_spawn = prepare_workdir( - workdir.path().to_owned(), - &self.work_dir_base, - &req, - req.input_digests.inputs.clone(), - &self.store, - &self.named_caches, - &self.immutable_inputs, - None, - None, + /// + /// Runs a command on this machine in the passed working directory. + /// + async fn run( + &self, + context: Context, + _workunit: &mut RunningWorkunit, + req: Process, + ) -> Result { + let req_debug_repr = format!("{req:#?}"); + in_workunit!( + "run_local_process", + req.level, + // NB: See engine::nodes::NodeKey::workunit_level for more information on why this workunit + // renders at the Process's level. + desc = Some(req.description.clone()), + |workunit| async move { + let mut workdir = create_sandbox( + self.executor.clone(), + &self.work_dir_base, + &req.description, + self.keep_sandboxes, + )?; + + // Start working on a mutable version of the process. + let mut req = req; + // Update env, replacing `{chroot}` placeholders with `workdir_path`. + apply_chroot(workdir.path().to_str().unwrap(), &mut req); + + // Prepare the workdir. + let exclusive_spawn = prepare_workdir( + workdir.path().to_owned(), + &self.work_dir_base, + &req, + req.input_digests.inputs.clone(), + &self.store, + &self.named_caches, + &self.immutable_inputs, + None, + None, + ) + .await?; + + workunit.increment_counter(Metric::LocalExecutionRequests, 1); + // NB: The constraint on `CapturedWorkdir` is that any child processes spawned here have + // exited (or been killed in their `Drop` handlers), so this function can rely on the usual + // Drop order of local variables to assume that the sandbox is cleaned up after the process + // is. + let res = self + .run_and_capture_workdir( + req.clone(), + context, + self.store.clone(), + self.executor.clone(), + workdir.path().to_owned(), + (), + exclusive_spawn, + ) + .map_err(|msg| { + // Processes that experience no infrastructure issues should result in an "Ok" return, + // potentially with an exit code that indicates that they failed (with more information + // on stderr). Actually failing at this level indicates a failure to start or otherwise + // interact with the process, which would generally be an infrastructure or implementation + // error (something missing from the sandbox, incorrect permissions, etc). + // + // Given that this is expected to be rare, we dump the entire process definition in the + // error. + ProcessError::Unclassified(format!( + "Failed to execute: {req_debug_repr}\n\n{msg}" + )) + }) + .await; + + if self.keep_sandboxes == KeepSandboxes::Always + || self.keep_sandboxes == KeepSandboxes::OnFailure + && res.as_ref().map(|r| r.exit_code).unwrap_or(1) != 0 + { + workdir.keep(&req.description); + setup_run_sh_script( + workdir.path(), + &req.env, + &req.working_directory, + &req.argv, + workdir.path(), + )?; + } + + res + } ) - .await?; - - workunit.increment_counter(Metric::LocalExecutionRequests, 1); - // NB: The constraint on `CapturedWorkdir` is that any child processes spawned here have - // exited (or been killed in their `Drop` handlers), so this function can rely on the usual - // Drop order of local variables to assume that the sandbox is cleaned up after the process - // is. - let res = self - .run_and_capture_workdir( - req.clone(), - context, - self.store.clone(), - self.executor.clone(), - workdir.path().to_owned(), - (), - exclusive_spawn, - ) - .map_err(|msg| { - // Processes that experience no infrastructure issues should result in an "Ok" return, - // potentially with an exit code that indicates that they failed (with more information - // on stderr). Actually failing at this level indicates a failure to start or otherwise - // interact with the process, which would generally be an infrastructure or implementation - // error (something missing from the sandbox, incorrect permissions, etc). - // - // Given that this is expected to be rare, we dump the entire process definition in the - // error. - ProcessError::Unclassified(format!("Failed to execute: {req_debug_repr}\n\n{msg}")) - }) - .await; - - if self.keep_sandboxes == KeepSandboxes::Always - || self.keep_sandboxes == KeepSandboxes::OnFailure - && res.as_ref().map(|r| r.exit_code).unwrap_or(1) != 0 - { - workdir.keep(&req.description); - setup_run_sh_script( - workdir.path(), - &req.env, - &req.working_directory, - &req.argv, - workdir.path(), - )?; - } - - res - } - ) - .await - } + .await + } - async fn shutdown(&self) -> Result<(), String> { - Ok(()) - } + async fn shutdown(&self) -> Result<(), String> { + Ok(()) + } } #[async_trait] impl CapturedWorkdir for CommandRunner { - type WorkdirToken = (); - - async fn run_in_workdir<'s, 'c, 'w, 'r>( - &'s self, - _context: &'c Context, - workdir_path: &'w Path, - _workdir_token: (), - req: Process, - exclusive_spawn: bool, - ) -> Result>, String> { - let cwd = if let Some(ref working_directory) = req.working_directory { - workdir_path.join(working_directory) - } else { - workdir_path.to_owned() - }; - let mut command = Command::new(&req.argv[0]); - command - .env_clear() - // It would be really nice not to have to manually set PATH but this is sadly the only way - // to stop automatic PATH searching. - .env("PATH", "") - .args(&req.argv[1..]) - .current_dir(cwd) - .envs(&req.env) - .stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - - // See the documentation of the `CapturedWorkdir::run_in_workdir` method, but `exclusive_spawn` - // indicates the binary we're spawning was written out by the current thread, and, as such, - // there may be open file handles against it. This will occur whenever a concurrent call of this - // method proceeds through its fork point - // (https://pubs.opengroup.org/onlinepubs/009695399/functions/fork.html) while the current - // thread is in the middle of writing the binary and thus captures a clone of the open file - // handle, but that concurrent call has not yet gotten to its exec point - // (https://pubs.opengroup.org/onlinepubs/009695399/functions/exec.html) where the operating - // system will close the cloned file handle (via O_CLOEXEC being set on all files opened by - // Rust). To prevent a race like this holding this thread's binary open leading to an ETXTBSY - // (https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html) error, we - // maintain RwLock that allows non-`exclusive_spawn` binaries to spawn concurrently but ensures - // all such concurrent spawns have completed (and thus closed any cloned file handles) before - // proceeding to spawn the `exclusive_spawn` binary this thread has written. - // - // See: https://github.com/golang/go/issues/22315 for an excellent description of this generic - // unix problem. - let mut fork_exec = move || ManagedChild::spawn(&mut command, None); - let mut child = { - if exclusive_spawn { - let _write_locked = self.spawn_lock.write().await; - - // Despite the mitigations taken against racing our own forks, forks can happen in our - // process but outside of our control (in libraries). As such, we back-stop by sleeping and - // trying again for a while if we do hit one of these fork races we do not control. - const MAX_ETXTBSY_WAIT: Duration = Duration::from_millis(100); - let mut retries: u32 = 0; - let mut sleep_millis = 1; - - let start_time = std::time::Instant::now(); - loop { - match fork_exec() { - Err(e) => { - if e.raw_os_error() == Some(libc::ETXTBSY) && start_time.elapsed() < MAX_ETXTBSY_WAIT - { - tokio::time::sleep(std::time::Duration::from_millis(sleep_millis)).await; - retries += 1; - sleep_millis *= 2; - continue; - } else if retries > 0 { - break Err(format!( + type WorkdirToken = (); + + async fn run_in_workdir<'s, 'c, 'w, 'r>( + &'s self, + _context: &'c Context, + workdir_path: &'w Path, + _workdir_token: (), + req: Process, + exclusive_spawn: bool, + ) -> Result>, String> { + let cwd = if let Some(ref working_directory) = req.working_directory { + workdir_path.join(working_directory) + } else { + workdir_path.to_owned() + }; + let mut command = Command::new(&req.argv[0]); + command + .env_clear() + // It would be really nice not to have to manually set PATH but this is sadly the only way + // to stop automatic PATH searching. + .env("PATH", "") + .args(&req.argv[1..]) + .current_dir(cwd) + .envs(&req.env) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + // See the documentation of the `CapturedWorkdir::run_in_workdir` method, but `exclusive_spawn` + // indicates the binary we're spawning was written out by the current thread, and, as such, + // there may be open file handles against it. This will occur whenever a concurrent call of this + // method proceeds through its fork point + // (https://pubs.opengroup.org/onlinepubs/009695399/functions/fork.html) while the current + // thread is in the middle of writing the binary and thus captures a clone of the open file + // handle, but that concurrent call has not yet gotten to its exec point + // (https://pubs.opengroup.org/onlinepubs/009695399/functions/exec.html) where the operating + // system will close the cloned file handle (via O_CLOEXEC being set on all files opened by + // Rust). To prevent a race like this holding this thread's binary open leading to an ETXTBSY + // (https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html) error, we + // maintain RwLock that allows non-`exclusive_spawn` binaries to spawn concurrently but ensures + // all such concurrent spawns have completed (and thus closed any cloned file handles) before + // proceeding to spawn the `exclusive_spawn` binary this thread has written. + // + // See: https://github.com/golang/go/issues/22315 for an excellent description of this generic + // unix problem. + let mut fork_exec = move || ManagedChild::spawn(&mut command, None); + let mut child = { + if exclusive_spawn { + let _write_locked = self.spawn_lock.write().await; + + // Despite the mitigations taken against racing our own forks, forks can happen in our + // process but outside of our control (in libraries). As such, we back-stop by sleeping and + // trying again for a while if we do hit one of these fork races we do not control. + const MAX_ETXTBSY_WAIT: Duration = Duration::from_millis(100); + let mut retries: u32 = 0; + let mut sleep_millis = 1; + + let start_time = std::time::Instant::now(); + loop { + match fork_exec() { + Err(e) => { + if e.raw_os_error() == Some(libc::ETXTBSY) + && start_time.elapsed() < MAX_ETXTBSY_WAIT + { + tokio::time::sleep(std::time::Duration::from_millis(sleep_millis)) + .await; + retries += 1; + sleep_millis *= 2; + continue; + } else if retries > 0 { + break Err(format!( "Error launching process after {} {} for ETXTBSY. Final error was: {:?}", retries, if retries == 1 { "retry" } else { "retries" }, e )); - } else { - break Err(format!("Error launching process: {e:?}")); - } + } else { + break Err(format!("Error launching process: {e:?}")); + } + } + Ok(child) => break Ok(child), + } + } + } else { + let _read_locked = self.spawn_lock.read().await; + fork_exec().map_err(|e| format!("Error launching process: {e:?}")) } - Ok(child) => break Ok(child), - } + }?; + + debug!("spawned local process as {:?} for {:?}", child.id(), req); + let stdout_stream = FramedRead::new(child.stdout.take().unwrap(), BytesCodec::new()) + .map_ok(|bytes| ChildOutput::Stdout(bytes.into())) + .fuse() + .boxed(); + let stderr_stream = FramedRead::new(child.stderr.take().unwrap(), BytesCodec::new()) + .map_ok(|bytes| ChildOutput::Stderr(bytes.into())) + .fuse() + .boxed(); + let exit_stream = async move { + child + .wait() + .map_ok(|exit_status| { + ChildOutput::Exit(ExitCode( + exit_status + .code() + .or_else(|| exit_status.signal().map(Neg::neg)) + .expect("Child process should exit via returned code or signal."), + )) + }) + .await } - } else { - let _read_locked = self.spawn_lock.read().await; - fork_exec().map_err(|e| format!("Error launching process: {e:?}")) - } - }?; - - debug!("spawned local process as {:?} for {:?}", child.id(), req); - let stdout_stream = FramedRead::new(child.stdout.take().unwrap(), BytesCodec::new()) - .map_ok(|bytes| ChildOutput::Stdout(bytes.into())) - .fuse() - .boxed(); - let stderr_stream = FramedRead::new(child.stderr.take().unwrap(), BytesCodec::new()) - .map_ok(|bytes| ChildOutput::Stderr(bytes.into())) - .fuse() - .boxed(); - let exit_stream = async move { - child - .wait() - .map_ok(|exit_status| { - ChildOutput::Exit(ExitCode( - exit_status - .code() - .or_else(|| exit_status.signal().map(Neg::neg)) - .expect("Child process should exit via returned code or signal."), - )) - }) - .await + .into_stream() + .boxed(); + let result_stream = + futures::stream::select_all(vec![stdout_stream, stderr_stream, exit_stream]); + + Ok(result_stream + .map_err(|e| format!("Failed to consume process outputs: {e:?}")) + .boxed()) } - .into_stream() - .boxed(); - let result_stream = - futures::stream::select_all(vec![stdout_stream, stderr_stream, exit_stream]); - - Ok( - result_stream - .map_err(|e| format!("Failed to consume process outputs: {e:?}")) - .boxed(), - ) - } } #[async_trait] pub trait CapturedWorkdir { - type WorkdirToken: Clone + Send; - - async fn run_and_capture_workdir( - &self, - req: Process, - context: Context, - store: Store, - executor: Executor, - workdir_path: PathBuf, - workdir_token: Self::WorkdirToken, - exclusive_spawn: bool, - ) -> Result { - let start_time = Instant::now(); - let mut stdout = BytesMut::with_capacity(8192); - let mut stderr = BytesMut::with_capacity(8192); - - // Spawn the process. - // NB: We fully buffer the `Stream` into the stdout/stderr buffers, but the idea going forward - // is that we eventually want to pass incremental results on down the line for streaming - // process results to console logs, etc. - let exit_code_result = { - let workdir_token = workdir_token.clone(); - let exit_code_future = collect_child_outputs( - &mut stdout, - &mut stderr, - self - .run_in_workdir( - &context, - &workdir_path, - workdir_token, - req.clone(), - exclusive_spawn, - ) - .await?, - ); - if let Some(req_timeout) = req.timeout { - timeout(req_timeout, exit_code_future) - .await - .map_err(|e| e.to_string()) - .and_then(|r| r) - } else { - exit_code_future.await - } - }; + type WorkdirToken: Clone + Send; + + async fn run_and_capture_workdir( + &self, + req: Process, + context: Context, + store: Store, + executor: Executor, + workdir_path: PathBuf, + workdir_token: Self::WorkdirToken, + exclusive_spawn: bool, + ) -> Result { + let start_time = Instant::now(); + let mut stdout = BytesMut::with_capacity(8192); + let mut stderr = BytesMut::with_capacity(8192); + + // Spawn the process. + // NB: We fully buffer the `Stream` into the stdout/stderr buffers, but the idea going forward + // is that we eventually want to pass incremental results on down the line for streaming + // process results to console logs, etc. + let exit_code_result = { + let workdir_token = workdir_token.clone(); + let exit_code_future = collect_child_outputs( + &mut stdout, + &mut stderr, + self.run_in_workdir( + &context, + &workdir_path, + workdir_token, + req.clone(), + exclusive_spawn, + ) + .await?, + ); + if let Some(req_timeout) = req.timeout { + timeout(req_timeout, exit_code_future) + .await + .map_err(|e| e.to_string()) + .and_then(|r| r) + } else { + exit_code_future.await + } + }; - // Capture the process outputs. - self - .prepare_workdir_for_capture(&context, &workdir_path, workdir_token, &req) - .await?; - let output_snapshot = if req.output_files.is_empty() && req.output_directories.is_empty() { - store::Snapshot::empty() - } else { - let root = if let Some(ref working_directory) = req.working_directory { - workdir_path.join(working_directory) - } else { - workdir_path.clone() - }; - // Use no ignore patterns, because we are looking for explicitly listed paths. - let posix_fs = Arc::new( + // Capture the process outputs. + self.prepare_workdir_for_capture(&context, &workdir_path, workdir_token, &req) + .await?; + let output_snapshot = if req.output_files.is_empty() && req.output_directories.is_empty() { + store::Snapshot::empty() + } else { + let root = if let Some(ref working_directory) = req.working_directory { + workdir_path.join(working_directory) + } else { + workdir_path.clone() + }; + // Use no ignore patterns, because we are looking for explicitly listed paths. + let posix_fs = Arc::new( fs::PosixFS::new(root, fs::GitignoreStyleExcludes::empty(), executor.clone()).map_err( |err| { format!("Error making posix_fs to fetch local process execution output files: {err}") }, )?, ); - CommandRunner::construct_output_snapshot( - store.clone(), - posix_fs, - req.output_files, - req.output_directories, - ) - .await? - }; - - let elapsed = start_time.elapsed(); - let result_metadata = ProcessResultMetadata::new( - Some(elapsed.into()), - ProcessResultSource::Ran, - req.execution_environment, - context.run_id, - ); + CommandRunner::construct_output_snapshot( + store.clone(), + posix_fs, + req.output_files, + req.output_directories, + ) + .await? + }; - match exit_code_result { - Ok(exit_code) => { - let (stdout_digest, stderr_digest) = try_join!( - store.store_file_bytes(stdout.into(), true), - store.store_file_bytes(stderr.into(), true), - )?; - Ok(FallibleProcessResultWithPlatform { - stdout_digest, - stderr_digest, - exit_code, - output_directory: output_snapshot.into(), - metadata: result_metadata, - }) - } - Err(msg) if msg == "deadline has elapsed" => { - stderr.extend_from_slice( - format!( - "\n\nExceeded timeout of {:.1} seconds when executing local process: {}", - req.timeout.map(|dur| dur.as_secs_f32()).unwrap_or(-1.0), - req.description - ) - .as_bytes(), + let elapsed = start_time.elapsed(); + let result_metadata = ProcessResultMetadata::new( + Some(elapsed.into()), + ProcessResultSource::Ran, + req.execution_environment, + context.run_id, ); - let (stdout_digest, stderr_digest) = try_join!( - store.store_file_bytes(stdout.into(), true), - store.store_file_bytes(stderr.into(), true), - )?; - - Ok(FallibleProcessResultWithPlatform { - stdout_digest, - stderr_digest, - exit_code: -libc::SIGTERM, - output_directory: EMPTY_DIRECTORY_DIGEST.clone(), - metadata: result_metadata, - }) - } - Err(msg) => Err(msg), + match exit_code_result { + Ok(exit_code) => { + let (stdout_digest, stderr_digest) = try_join!( + store.store_file_bytes(stdout.into(), true), + store.store_file_bytes(stderr.into(), true), + )?; + Ok(FallibleProcessResultWithPlatform { + stdout_digest, + stderr_digest, + exit_code, + output_directory: output_snapshot.into(), + metadata: result_metadata, + }) + } + Err(msg) if msg == "deadline has elapsed" => { + stderr.extend_from_slice( + format!( + "\n\nExceeded timeout of {:.1} seconds when executing local process: {}", + req.timeout.map(|dur| dur.as_secs_f32()).unwrap_or(-1.0), + req.description + ) + .as_bytes(), + ); + + let (stdout_digest, stderr_digest) = try_join!( + store.store_file_bytes(stdout.into(), true), + store.store_file_bytes(stderr.into(), true), + )?; + + Ok(FallibleProcessResultWithPlatform { + stdout_digest, + stderr_digest, + exit_code: -libc::SIGTERM, + output_directory: EMPTY_DIRECTORY_DIGEST.clone(), + metadata: result_metadata, + }) + } + Err(msg) => Err(msg), + } + } + + /// + /// Spawn the given process in a working directory prepared with its expected input digest. + /// + /// NB: The implementer of this method must guarantee that the spawned process has completely + /// exited when the returned BoxStream is Dropped. Otherwise it might be possible for the process + /// to observe the working directory that it is running in being torn down. In most cases, this + /// requires Drop handlers to synchronously wait for their child processes to exit. + /// + /// If the process to be executed has an `argv[0]` that points into its input digest then + /// `exclusive_spawn` will be `true` and the spawn implementation should account for the + /// possibility of concurrent fork+exec holding open the cloned `argv[0]` file descriptor, which, + /// if unhandled, will result in ETXTBSY errors spawning the process. + /// + /// See the documentation note in `CommandRunner` in this file for more details. + /// + /// TODO(John Sirois): + /// Centralize local spawning to one object - we currently spawn here (in + /// process_execution::local::CommandRunner) to launch user `Process`es and in + /// process_execution::nailgun::CommandRunner when a jvm nailgun server needs to be started. The + /// proper handling of `exclusive_spawn` really requires a single point of control for all + /// fork+execs in the scheduler. For now we rely on the fact that the process_execution::nailgun + /// module is dead code in practice. + /// + async fn run_in_workdir<'s, 'c, 'w, 'r>( + &'s self, + context: &'c Context, + workdir_path: &'w Path, + workdir_token: Self::WorkdirToken, + req: Process, + exclusive_spawn: bool, + ) -> Result>, String>; + + /// + /// An optionally-implemented method which is called after the child process has completed, but + /// before capturing the sandbox. The default implementation does nothing. + /// + async fn prepare_workdir_for_capture( + &self, + _context: &Context, + _workdir_path: &Path, + _workdir_token: Self::WorkdirToken, + _req: &Process, + ) -> Result<(), String> { + Ok(()) } - } - - /// - /// Spawn the given process in a working directory prepared with its expected input digest. - /// - /// NB: The implementer of this method must guarantee that the spawned process has completely - /// exited when the returned BoxStream is Dropped. Otherwise it might be possible for the process - /// to observe the working directory that it is running in being torn down. In most cases, this - /// requires Drop handlers to synchronously wait for their child processes to exit. - /// - /// If the process to be executed has an `argv[0]` that points into its input digest then - /// `exclusive_spawn` will be `true` and the spawn implementation should account for the - /// possibility of concurrent fork+exec holding open the cloned `argv[0]` file descriptor, which, - /// if unhandled, will result in ETXTBSY errors spawning the process. - /// - /// See the documentation note in `CommandRunner` in this file for more details. - /// - /// TODO(John Sirois): - /// Centralize local spawning to one object - we currently spawn here (in - /// process_execution::local::CommandRunner) to launch user `Process`es and in - /// process_execution::nailgun::CommandRunner when a jvm nailgun server needs to be started. The - /// proper handling of `exclusive_spawn` really requires a single point of control for all - /// fork+execs in the scheduler. For now we rely on the fact that the process_execution::nailgun - /// module is dead code in practice. - /// - async fn run_in_workdir<'s, 'c, 'w, 'r>( - &'s self, - context: &'c Context, - workdir_path: &'w Path, - workdir_token: Self::WorkdirToken, - req: Process, - exclusive_spawn: bool, - ) -> Result>, String>; - - /// - /// An optionally-implemented method which is called after the child process has completed, but - /// before capturing the sandbox. The default implementation does nothing. - /// - async fn prepare_workdir_for_capture( - &self, - _context: &Context, - _workdir_path: &Path, - _workdir_token: Self::WorkdirToken, - _req: &Process, - ) -> Result<(), String> { - Ok(()) - } } /// /// Mutates a Process, replacing any `{chroot}` placeholders with `chroot_path`. /// pub fn apply_chroot(chroot_path: &str, req: &mut Process) { - for value in req.env.values_mut() { - if value.contains("{chroot}") { - *value = value.replace("{chroot}", chroot_path); + for value in req.env.values_mut() { + if value.contains("{chroot}") { + *value = value.replace("{chroot}", chroot_path); + } } - } - for value in &mut req.argv { - if value.contains("{chroot}") { - *value = value.replace("{chroot}", chroot_path); + for value in &mut req.argv { + if value.contains("{chroot}") { + *value = value.replace("{chroot}", chroot_path); + } } - } } /// Creates a Digest for the entire input sandbox contents of the given Process, including absolute /// symlinks to immutable inputs, named caches, and JDKs (if configured). pub async fn prepare_workdir_digest( - req: &Process, - input_digest: DirectoryDigest, - store: &Store, - named_caches: &NamedCaches, - immutable_inputs: Option<&ImmutableInputs>, - named_caches_prefix: Option<&Path>, - immutable_inputs_prefix: Option<&Path>, + req: &Process, + input_digest: DirectoryDigest, + store: &Store, + named_caches: &NamedCaches, + immutable_inputs: Option<&ImmutableInputs>, + named_caches_prefix: Option<&Path>, + immutable_inputs_prefix: Option<&Path>, ) -> Result { - let mut paths = Vec::new(); - - // Symlinks for immutable inputs and named caches. - let mut workdir_symlinks = Vec::new(); - { - if let Some(immutable_inputs) = immutable_inputs { - let symlinks = immutable_inputs - .local_paths(&req.input_digests.immutable_inputs) - .await?; + let mut paths = Vec::new(); + + // Symlinks for immutable inputs and named caches. + let mut workdir_symlinks = Vec::new(); + { + if let Some(immutable_inputs) = immutable_inputs { + let symlinks = immutable_inputs + .local_paths(&req.input_digests.immutable_inputs) + .await?; + + match immutable_inputs_prefix { + Some(prefix) => workdir_symlinks.extend(symlinks.into_iter().map(|symlink| { + WorkdirSymlink { + src: symlink.src, + dst: prefix.join( + symlink + .dst + .strip_prefix(immutable_inputs.workdir()) + .unwrap(), + ), + } + })), + None => workdir_symlinks.extend(symlinks), + } + } - match immutable_inputs_prefix { - Some(prefix) => workdir_symlinks.extend(symlinks.into_iter().map(|symlink| { - WorkdirSymlink { - src: symlink.src, - dst: prefix.join( - symlink - .dst - .strip_prefix(immutable_inputs.workdir()) - .unwrap(), - ), - } - })), - None => workdir_symlinks.extend(symlinks), - } + let symlinks = named_caches + .paths(&req.append_only_caches) + .await + .map_err(|err| { + StoreError::Unclassified(format!( + "Failed to make named cache(s) for local execution: {err:?}" + )) + })?; + match named_caches_prefix { + Some(prefix) => { + workdir_symlinks.extend(symlinks.into_iter().map(|symlink| WorkdirSymlink { + src: symlink.src, + dst: prefix.join(symlink.dst.strip_prefix(named_caches.base_path()).unwrap()), + })) + } + None => workdir_symlinks.extend(symlinks), + } } - - let symlinks = named_caches - .paths(&req.append_only_caches) - .await - .map_err(|err| { - StoreError::Unclassified(format!( - "Failed to make named cache(s) for local execution: {err:?}" - )) - })?; - match named_caches_prefix { - Some(prefix) => workdir_symlinks.extend(symlinks.into_iter().map(|symlink| WorkdirSymlink { - src: symlink.src, - dst: prefix.join(symlink.dst.strip_prefix(named_caches.base_path()).unwrap()), - })), - None => workdir_symlinks.extend(symlinks), + paths.extend(workdir_symlinks.iter().map(|symlink| TypedPath::Link { + path: &symlink.src, + target: &symlink.dst, + })); + + // Symlink for JDK. + if let Some(jdk_home) = &req.jdk_home { + paths.push(TypedPath::Link { + path: Path::new(".jdk"), + target: jdk_home, + }); } - } - paths.extend(workdir_symlinks.iter().map(|symlink| TypedPath::Link { - path: &symlink.src, - target: &symlink.dst, - })); - - // Symlink for JDK. - if let Some(jdk_home) = &req.jdk_home { - paths.push(TypedPath::Link { - path: Path::new(".jdk"), - target: jdk_home, - }); - } - - // The bazel remote execution API specifies that the parent directories for output files and - // output directories should be created before execution completes. - let parent_paths_to_create: HashSet<_> = req - .output_files - .iter() - .chain(req.output_directories.iter()) - .filter_map(|rel_path| rel_path.as_ref().parent()) - .filter(|parent| !parent.as_os_str().is_empty()) - .collect(); - paths.extend(parent_paths_to_create.into_iter().map(TypedPath::Dir)); - - // Finally, create a tree for all of the additional paths, and merge it with the input - // Digest. - let additions = DigestTrie::from_unique_paths(paths, &HashMap::new())?; - - store.merge(vec![input_digest, additions.into()]).await + + // The bazel remote execution API specifies that the parent directories for output files and + // output directories should be created before execution completes. + let parent_paths_to_create: HashSet<_> = req + .output_files + .iter() + .chain(req.output_directories.iter()) + .filter_map(|rel_path| rel_path.as_ref().parent()) + .filter(|parent| !parent.as_os_str().is_empty()) + .collect(); + paths.extend(parent_paths_to_create.into_iter().map(TypedPath::Dir)); + + // Finally, create a tree for all of the additional paths, and merge it with the input + // Digest. + let additions = DigestTrie::from_unique_paths(paths, &HashMap::new())?; + + store.merge(vec![input_digest, additions.into()]).await } /// Prepares the given workdir for use by the given Process. @@ -677,63 +679,63 @@ pub async fn prepare_workdir_digest( /// `exclusive_spawn` is required. /// pub async fn prepare_workdir( - workdir_path: PathBuf, - workdir_root_path: &Path, - req: &Process, - materialized_input_digest: DirectoryDigest, - store: &Store, - named_caches: &NamedCaches, - immutable_inputs: &ImmutableInputs, - named_caches_prefix: Option<&Path>, - immutable_inputs_prefix: Option<&Path>, + workdir_path: PathBuf, + workdir_root_path: &Path, + req: &Process, + materialized_input_digest: DirectoryDigest, + store: &Store, + named_caches: &NamedCaches, + immutable_inputs: &ImmutableInputs, + named_caches_prefix: Option<&Path>, + immutable_inputs_prefix: Option<&Path>, ) -> Result { - // Capture argv0 as the executable path so that we can test whether we have created it in the - // sandbox. - let maybe_executable_path = { - let mut executable_path = PathBuf::from(&req.argv[0]); - if executable_path.is_relative() { - if let Some(working_directory) = &req.working_directory { - executable_path = working_directory.as_ref().join(executable_path) - } - Some(workdir_path.join(executable_path)) - } else { - None - } - }; - - // Prepare the digest to use, and then materialize it. - in_workunit!("setup_sandbox", Level::Debug, |_workunit| async move { - let complete_input_digest = prepare_workdir_digest( - req, - materialized_input_digest, - store, - named_caches, - Some(immutable_inputs), - named_caches_prefix, - immutable_inputs_prefix, - ) - .await?; - - let mut mutable_paths = req.output_files.clone(); - mutable_paths.extend(req.output_directories.clone()); - store - .materialize_directory( - workdir_path, - workdir_root_path, - complete_input_digest, - false, - &mutable_paths, - Permissions::Writable, - ) - .await?; - - if let Some(executable_path) = maybe_executable_path { - Ok(tokio::fs::metadata(executable_path).await.is_ok()) - } else { - Ok(false) - } - }) - .await + // Capture argv0 as the executable path so that we can test whether we have created it in the + // sandbox. + let maybe_executable_path = { + let mut executable_path = PathBuf::from(&req.argv[0]); + if executable_path.is_relative() { + if let Some(working_directory) = &req.working_directory { + executable_path = working_directory.as_ref().join(executable_path) + } + Some(workdir_path.join(executable_path)) + } else { + None + } + }; + + // Prepare the digest to use, and then materialize it. + in_workunit!("setup_sandbox", Level::Debug, |_workunit| async move { + let complete_input_digest = prepare_workdir_digest( + req, + materialized_input_digest, + store, + named_caches, + Some(immutable_inputs), + named_caches_prefix, + immutable_inputs_prefix, + ) + .await?; + + let mut mutable_paths = req.output_files.clone(); + mutable_paths.extend(req.output_directories.clone()); + store + .materialize_directory( + workdir_path, + workdir_root_path, + complete_input_digest, + false, + &mutable_paths, + Permissions::Writable, + ) + .await?; + + if let Some(executable_path) = maybe_executable_path { + Ok(tokio::fs::metadata(executable_path).await.is_ok()) + } else { + Ok(false) + } + }) + .await } /// @@ -743,21 +745,21 @@ pub async fn prepare_workdir( /// decide whether to preserve it. /// pub fn create_sandbox( - executor: Executor, - base_directory: &Path, - description: &str, - keep_sandboxes: KeepSandboxes, + executor: Executor, + base_directory: &Path, + description: &str, + keep_sandboxes: KeepSandboxes, ) -> Result { - let workdir = tempfile::Builder::new() - .prefix("pants-sandbox-") - .tempdir_in(base_directory) - .map_err(|err| format!("Error making tempdir for local process execution: {err:?}"))?; - - let mut sandbox = AsyncDropSandbox(executor, workdir.path().to_owned(), Some(workdir)); - if keep_sandboxes == KeepSandboxes::Always { - sandbox.keep(description); - } - Ok(sandbox) + let workdir = tempfile::Builder::new() + .prefix("pants-sandbox-") + .tempdir_in(base_directory) + .map_err(|err| format!("Error making tempdir for local process execution: {err:?}"))?; + + let mut sandbox = AsyncDropSandbox(executor, workdir.path().to_owned(), Some(workdir)); + if keep_sandboxes == KeepSandboxes::Always { + sandbox.keep(description); + } + Ok(sandbox) } /// Dropping sandboxes can involve a lot of IO, so it is spawned to the background as a blocking @@ -766,92 +768,92 @@ pub fn create_sandbox( pub struct AsyncDropSandbox(Executor, PathBuf, Option); impl AsyncDropSandbox { - pub fn path(&self) -> &Path { - &self.1 - } - - /// - /// Consume the `TempDir` without deleting directory on the filesystem, meaning that the - /// temporary directory will no longer be automatically deleted when dropped. - /// - pub fn keep(&mut self, description: &str) { - if let Some(workdir) = self.2.take() { - let preserved_path = workdir.into_path(); - info!( - "Preserving local process execution dir {} for {}", - preserved_path.display(), - description, - ); + pub fn path(&self) -> &Path { + &self.1 + } + + /// + /// Consume the `TempDir` without deleting directory on the filesystem, meaning that the + /// temporary directory will no longer be automatically deleted when dropped. + /// + pub fn keep(&mut self, description: &str) { + if let Some(workdir) = self.2.take() { + let preserved_path = workdir.into_path(); + info!( + "Preserving local process execution dir {} for {}", + preserved_path.display(), + description, + ); + } } - } } impl Drop for AsyncDropSandbox { - fn drop(&mut self) { - if let Some(sandbox) = self.2.take() { - let _background_cleanup = self.0.native_spawn_blocking(|| std::mem::drop(sandbox)); + fn drop(&mut self) { + if let Some(sandbox) = self.2.take() { + let _background_cleanup = self.0.native_spawn_blocking(|| std::mem::drop(sandbox)); + } } - } } /// Create a file called __run.sh with the env, cwd and argv used by Pants to facilitate debugging. pub fn setup_run_sh_script( - sandbox_path: &Path, - env: &BTreeMap, - working_directory: &Option, - argv: &[String], - workdir_path: &Path, + sandbox_path: &Path, + env: &BTreeMap, + working_directory: &Option, + argv: &[String], + workdir_path: &Path, ) -> Result<(), String> { - let mut env_var_strings: Vec = vec![]; - for (key, value) in env.iter() { - let quoted_arg = bash::escape(value); - let arg_str = str::from_utf8("ed_arg) - .map_err(|e| format!("{e:?}"))? - .to_string(); - let formatted_assignment = format!("{key}={arg_str}"); - env_var_strings.push(formatted_assignment); - } - let stringified_env_vars: String = env_var_strings.join(" "); - - // Shell-quote every command-line argument, as necessary. - let mut full_command_line: Vec = vec![]; - for arg in argv.iter() { - let quoted_arg = bash::escape(arg); - let arg_str = str::from_utf8("ed_arg) - .map_err(|e| format!("{e:?}"))? - .to_string(); - full_command_line.push(arg_str); - } - - let stringified_cwd = { - let cwd = if let Some(ref working_directory) = working_directory { - workdir_path.join(working_directory) - } else { - workdir_path.to_owned() + let mut env_var_strings: Vec = vec![]; + for (key, value) in env.iter() { + let quoted_arg = bash::escape(value); + let arg_str = str::from_utf8("ed_arg) + .map_err(|e| format!("{e:?}"))? + .to_string(); + let formatted_assignment = format!("{key}={arg_str}"); + env_var_strings.push(formatted_assignment); + } + let stringified_env_vars: String = env_var_strings.join(" "); + + // Shell-quote every command-line argument, as necessary. + let mut full_command_line: Vec = vec![]; + for arg in argv.iter() { + let quoted_arg = bash::escape(arg); + let arg_str = str::from_utf8("ed_arg) + .map_err(|e| format!("{e:?}"))? + .to_string(); + full_command_line.push(arg_str); + } + + let stringified_cwd = { + let cwd = if let Some(ref working_directory) = working_directory { + workdir_path.join(working_directory) + } else { + workdir_path.to_owned() + }; + let quoted_cwd = bash::escape(cwd); + str::from_utf8("ed_cwd) + .map_err(|e| format!("{e:?}"))? + .to_string() }; - let quoted_cwd = bash::escape(cwd); - str::from_utf8("ed_cwd) - .map_err(|e| format!("{e:?}"))? - .to_string() - }; - - let stringified_command_line: String = full_command_line.join(" "); - let full_script = format!( - "#!/usr/bin/env bash + + let stringified_command_line: String = full_command_line.join(" "); + let full_script = format!( + "#!/usr/bin/env bash # This command line should execute the same process as pants did internally. cd {stringified_cwd} env -i {stringified_env_vars} {stringified_command_line} ", - ); - - let full_file_path = sandbox_path.join("__run.sh"); - - std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .mode(USER_EXECUTABLE_MODE) // Executable for user, read-only for others. - .open(full_file_path) - .map_err(|e| format!("{e:?}"))? - .write_all(full_script.as_bytes()) - .map_err(|e| format!("{e:?}")) + ); + + let full_file_path = sandbox_path.join("__run.sh"); + + std::fs::OpenOptions::new() + .create_new(true) + .write(true) + .mode(USER_EXECUTABLE_MODE) // Executable for user, read-only for others. + .open(full_file_path) + .map_err(|e| format!("{e:?}"))? + .write_all(full_script.as_bytes()) + .map_err(|e| format!("{e:?}")) } diff --git a/src/rust/engine/process_execution/src/local_tests.rs b/src/rust/engine/process_execution/src/local_tests.rs index 8a0fc90d0ed..5eb5bf2dacc 100644 --- a/src/rust/engine/process_execution/src/local_tests.rs +++ b/src/rust/engine/process_execution/src/local_tests.rs @@ -17,801 +17,802 @@ use testutil::{owned_string_vec, relative_paths}; use workunit_store::{RunningWorkunit, WorkunitStore}; use crate::{ - local, local::KeepSandboxes, CacheName, CommandRunner as CommandRunnerTrait, Context, - FallibleProcessResultWithPlatform, InputDigests, NamedCaches, Process, ProcessError, - RelativePath, + local, local::KeepSandboxes, CacheName, CommandRunner as CommandRunnerTrait, Context, + FallibleProcessResultWithPlatform, InputDigests, NamedCaches, Process, ProcessError, + RelativePath, }; #[derive(PartialEq, Debug)] struct LocalTestResult { - original: FallibleProcessResultWithPlatform, - stdout_bytes: Vec, - stderr_bytes: Vec, + original: FallibleProcessResultWithPlatform, + stdout_bytes: Vec, + stderr_bytes: Vec, } #[tokio::test] #[cfg(unix)] async fn stdout() { - let result = run_command_locally(Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"]))) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "foo".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + let result = run_command_locally(Process::new(owned_string_vec(&["/bin/echo", "-n", "foo"]))) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "foo".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] #[cfg(unix)] async fn stdout_and_stderr_and_exit_code() { - let result = run_command_locally(Process::new(owned_string_vec(&[ - "/bin/bash", - "-c", - "echo -n foo ; echo >&2 -n bar ; exit 1", - ]))) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "foo".as_bytes()); - assert_eq!(result.stderr_bytes, "bar".as_bytes()); - assert_eq!(result.original.exit_code, 1); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + let result = run_command_locally(Process::new(owned_string_vec(&[ + "/bin/bash", + "-c", + "echo -n foo ; echo >&2 -n bar ; exit 1", + ]))) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "foo".as_bytes()); + assert_eq!(result.stderr_bytes, "bar".as_bytes()); + assert_eq!(result.original.exit_code, 1); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] #[cfg(unix)] async fn capture_exit_code_signal() { - // Launch a process that kills itself with a signal. - let result = run_command_locally(Process::new(owned_string_vec(&[ - "/bin/bash", - "-c", - "kill $$", - ]))) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, -15); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + // Launch a process that kills itself with a signal. + let result = run_command_locally(Process::new(owned_string_vec(&[ + "/bin/bash", + "-c", + "kill $$", + ]))) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, -15); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] #[cfg(unix)] async fn env() { - let mut env: BTreeMap = BTreeMap::new(); - env.insert("FOO".to_string(), "foo".to_string()); - env.insert("BAR".to_string(), "not foo".to_string()); - - let result = - run_command_locally(Process::new(owned_string_vec(&["/usr/bin/env"])).env(env.clone())) - .await - .unwrap(); - - let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); - let got_env: BTreeMap = stdout - .split('\n') - .filter(|line| !line.is_empty()) - .map(|line| line.splitn(2, '=')) - .map(|mut parts| { - ( - parts.next().unwrap().to_string(), - parts.next().unwrap_or("").to_string(), - ) - }) - .filter(|x| x.0 != "PATH") - .collect(); + let mut env: BTreeMap = BTreeMap::new(); + env.insert("FOO".to_string(), "foo".to_string()); + env.insert("BAR".to_string(), "not foo".to_string()); - assert_eq!(env, got_env); + let result = + run_command_locally(Process::new(owned_string_vec(&["/usr/bin/env"])).env(env.clone())) + .await + .unwrap(); + + let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); + let got_env: BTreeMap = stdout + .split('\n') + .filter(|line| !line.is_empty()) + .map(|line| line.splitn(2, '=')) + .map(|mut parts| { + ( + parts.next().unwrap().to_string(), + parts.next().unwrap_or("").to_string(), + ) + }) + .filter(|x| x.0 != "PATH") + .collect(); + + assert_eq!(env, got_env); } #[tokio::test] #[cfg(unix)] async fn env_is_deterministic() { - fn make_request() -> Process { - let mut env = BTreeMap::new(); - env.insert("FOO".to_string(), "foo".to_string()); - env.insert("BAR".to_string(), "not foo".to_string()); - Process::new(owned_string_vec(&["/usr/bin/env"])).env(env) - } + fn make_request() -> Process { + let mut env = BTreeMap::new(); + env.insert("FOO".to_string(), "foo".to_string()); + env.insert("BAR".to_string(), "not foo".to_string()); + Process::new(owned_string_vec(&["/usr/bin/env"])).env(env) + } - let result1 = run_command_locally(make_request()).await; - let result2 = run_command_locally(make_request()).await; + let result1 = run_command_locally(make_request()).await; + let result2 = run_command_locally(make_request()).await; - assert_eq!(result1.unwrap(), result2.unwrap()); + assert_eq!(result1.unwrap(), result2.unwrap()); } #[tokio::test] async fn binary_not_found() { - let err_string = run_command_locally(Process::new(owned_string_vec(&["echo", "-n", "foo"]))) - .await - .expect_err("Want Err"); - assert!(err_string.to_string().contains("Failed to execute")); - assert!(err_string.to_string().contains("echo")); + let err_string = run_command_locally(Process::new(owned_string_vec(&["echo", "-n", "foo"]))) + .await + .expect_err("Want Err"); + assert!(err_string.to_string().contains("Failed to execute")); + assert!(err_string.to_string().contains("echo")); } #[tokio::test] async fn output_files_none() { - let result = run_command_locally(Process::new(owned_string_vec(&[ - &find_bash(), - "-c", - "exit 0", - ]))) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + let result = run_command_locally(Process::new(owned_string_vec(&[ + &find_bash(), + "-c", + "exit 0", + ]))) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] async fn output_files_one() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - format!("echo -n {} > roland.ext", TestData::roland().string()), - ]) - .output_files(relative_paths(&["roland.ext"]).collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + format!("echo -n {} > roland.ext", TestData::roland().string()), + ]) + .output_files(relative_paths(&["roland.ext"]).collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test] async fn output_dirs() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - format!( - "/bin/mkdir cats && echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", - TestData::roland().string(), - TestData::catnip().string() - ), - ]) - .output_files(relative_paths(&["treats.ext"]).collect()) - .output_directories(relative_paths(&["cats"]).collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::recursive().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + format!( + "/bin/mkdir cats && echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", + TestData::roland().string(), + TestData::catnip().string() + ), + ]) + .output_files(relative_paths(&["treats.ext"]).collect()) + .output_directories(relative_paths(&["cats"]).collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::recursive().directory_digest() + ); } #[tokio::test] async fn output_files_many() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - format!( - "echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", - TestData::roland().string(), - TestData::catnip().string() - ), - ]) - .output_files(relative_paths(&["cats/roland.ext", "treats.ext"]).collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::recursive().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + format!( + "echo -n {} > cats/roland.ext ; echo -n {} > treats.ext", + TestData::roland().string(), + TestData::catnip().string() + ), + ]) + .output_files(relative_paths(&["cats/roland.ext", "treats.ext"]).collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::recursive().directory_digest() + ); } #[tokio::test] async fn output_files_execution_failure() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - format!( - "echo -n {} > roland.ext ; exit 1", - TestData::roland().string() - ), - ]) - .output_files(relative_paths(&["roland.ext"]).collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 1); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + format!( + "echo -n {} > roland.ext ; exit 1", + TestData::roland().string() + ), + ]) + .output_files(relative_paths(&["roland.ext"]).collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 1); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test] async fn output_files_partial_output() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - format!("echo -n {} > roland.ext", TestData::roland().string()), - ]) - .output_files( - relative_paths(&["roland.ext", "susannah"]) - .into_iter() - .collect(), - ), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + format!("echo -n {} > roland.ext", TestData::roland().string()), + ]) + .output_files( + relative_paths(&["roland.ext", "susannah"]) + .into_iter() + .collect(), + ), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test] async fn output_overlapping_file_and_dir() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - format!("echo -n {} > cats/roland.ext", TestData::roland().string()), - ]) - .output_files(relative_paths(&["cats/roland.ext"]).collect()) - .output_directories(relative_paths(&["cats"]).collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::nested().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + format!("echo -n {} > cats/roland.ext", TestData::roland().string()), + ]) + .output_files(relative_paths(&["cats/roland.ext"]).collect()) + .output_directories(relative_paths(&["cats"]).collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::nested().directory_digest() + ); } #[tokio::test] async fn append_only_cache_created() { - let name = "geo"; - let dest_base = ".cache"; - let cache_name = CacheName::new(name.to_owned()).unwrap(); - let cache_dest = RelativePath::new(format!("{dest_base}/{name}")).unwrap(); - let result = run_command_locally( - Process::new(owned_string_vec(&["/bin/ls", dest_base])) - .append_only_caches(vec![(cache_name, cache_dest)].into_iter().collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, format!("{name}\n").as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + let name = "geo"; + let dest_base = ".cache"; + let cache_name = CacheName::new(name.to_owned()).unwrap(); + let cache_dest = RelativePath::new(format!("{dest_base}/{name}")).unwrap(); + let result = run_command_locally( + Process::new(owned_string_vec(&["/bin/ls", dest_base])) + .append_only_caches(vec![(cache_name, cache_dest)].into_iter().collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, format!("{name}\n").as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] async fn jdk_symlink() { - let preserved_work_tmpdir = TempDir::new().unwrap(); - let roland = TestData::roland().bytes(); - std::fs::write( - preserved_work_tmpdir.path().join("roland.ext"), - roland.clone(), - ) - .expect("Writing temporary file"); - - let mut process = Process::new(vec!["/bin/cat".to_owned(), ".jdk/roland.ext".to_owned()]); - process.timeout = one_second(); - process.description = "cat roland.ext".to_string(); - process.jdk_home = Some(preserved_work_tmpdir.path().to_path_buf()); - - let result = run_command_locally(process).await.unwrap(); - - assert_eq!(result.stdout_bytes, roland); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); + let preserved_work_tmpdir = TempDir::new().unwrap(); + let roland = TestData::roland().bytes(); + std::fs::write( + preserved_work_tmpdir.path().join("roland.ext"), + roland.clone(), + ) + .expect("Writing temporary file"); + + let mut process = Process::new(vec!["/bin/cat".to_owned(), ".jdk/roland.ext".to_owned()]); + process.timeout = one_second(); + process.description = "cat roland.ext".to_string(); + process.jdk_home = Some(preserved_work_tmpdir.path().to_path_buf()); + + let result = run_command_locally(process).await.unwrap(); + + assert_eq!(result.stdout_bytes, roland); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!(result.original.output_directory, *EMPTY_DIRECTORY_DIGEST); } #[tokio::test] #[cfg(unix)] async fn test_apply_chroot() { - let mut env: BTreeMap = BTreeMap::new(); - env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); + let mut env: BTreeMap = BTreeMap::new(); + env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); - let work_dir = TempDir::new().unwrap(); - let mut req = Process::new(owned_string_vec(&["/usr/bin/env"])).env(env.clone()); - local::apply_chroot(work_dir.path().to_str().unwrap(), &mut req); + let work_dir = TempDir::new().unwrap(); + let mut req = Process::new(owned_string_vec(&["/usr/bin/env"])).env(env.clone()); + local::apply_chroot(work_dir.path().to_str().unwrap(), &mut req); - let path = format!("/usr/bin:{}/bin", work_dir.path().to_str().unwrap()); + let path = format!("/usr/bin:{}/bin", work_dir.path().to_str().unwrap()); - assert_eq!(&path, req.env.get(&"PATH".to_string()).unwrap()); + assert_eq!(&path, req.env.get(&"PATH".to_string()).unwrap()); } #[tokio::test] async fn test_chroot_placeholder() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let mut env: BTreeMap = BTreeMap::new(); - env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); - - let work_tmpdir = TempDir::new().unwrap(); - let work_root = work_tmpdir.path().to_owned(); - - let result = run_command_locally_in_dir( - Process::new(vec!["/usr/bin/env".to_owned()]).env(env.clone()), - work_root.clone(), - KeepSandboxes::Always, - &mut workunit, - None, - None, - ) - .await - .unwrap(); - - let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); - let got_env: BTreeMap = stdout - .split('\n') - .filter(|line| !line.is_empty()) - .map(|line| line.splitn(2, '=')) - .map(|mut parts| { - ( - parts.next().unwrap().to_string(), - parts.next().unwrap_or("").to_string(), - ) - }) - .collect(); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let mut env: BTreeMap = BTreeMap::new(); + env.insert("PATH".to_string(), "/usr/bin:{chroot}/bin".to_string()); + + let work_tmpdir = TempDir::new().unwrap(); + let work_root = work_tmpdir.path().to_owned(); + + let result = run_command_locally_in_dir( + Process::new(vec!["/usr/bin/env".to_owned()]).env(env.clone()), + work_root.clone(), + KeepSandboxes::Always, + &mut workunit, + None, + None, + ) + .await + .unwrap(); - let path = format!("/usr/bin:{}", work_root.to_str().unwrap()); - assert!(got_env.get(&"PATH".to_string()).unwrap().starts_with(&path)); - assert!(got_env.get(&"PATH".to_string()).unwrap().ends_with("/bin")); + let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); + let got_env: BTreeMap = stdout + .split('\n') + .filter(|line| !line.is_empty()) + .map(|line| line.splitn(2, '=')) + .map(|mut parts| { + ( + parts.next().unwrap().to_string(), + parts.next().unwrap_or("").to_string(), + ) + }) + .collect(); + + let path = format!("/usr/bin:{}", work_root.to_str().unwrap()); + assert!(got_env.get(&"PATH".to_string()).unwrap().starts_with(&path)); + assert!(got_env.get(&"PATH".to_string()).unwrap().ends_with("/bin")); } #[tokio::test] async fn test_directory_preservation() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let preserved_work_tmpdir = TempDir::new().unwrap(); - let preserved_work_root = preserved_work_tmpdir.path().to_owned(); - - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - - // Prepare the store to contain /cats/roland.ext, because the EPR needs to materialize it and then run - // from the ./cats directory. - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .record_directory(&TestDirectory::containing_roland().directory(), true) - .await - .expect("Error saving directory"); - store - .record_directory(&TestDirectory::nested().directory(), true) - .await - .expect("Error saving directory"); - - let cp = which("cp").expect("No cp on $PATH."); - let bash_contents = format!("echo $PWD && {} roland.ext ..", cp.display()); - let argv = vec![find_bash(), "-c".to_owned(), bash_contents.to_owned()]; - - let mut process = - Process::new(argv.clone()).output_files(relative_paths(&["roland.ext"]).collect()); - process.input_digests = - InputDigests::with_input_files(TestDirectory::nested().directory_digest()); - process.working_directory = Some(RelativePath::new("cats").unwrap()); - - let result = run_command_locally_in_dir( - process, - preserved_work_root.clone(), - KeepSandboxes::Always, - &mut workunit, - Some(store), - Some(executor), - ) - .await; - result.unwrap(); - - assert!(preserved_work_root.exists()); - - // Collect all of the top level sub-dirs under our test workdir. - let subdirs = testutil::file::list_dir(&preserved_work_root); - assert_eq!(subdirs.len(), 1); - - // Then look for a file like e.g. `/tmp/abc1234/pants-sandbox-7zt4pH/roland.ext` - let rolands_path = preserved_work_root.join(&subdirs[0]).join("roland.ext"); - assert!(&rolands_path.exists()); - - // Ensure that when a directory is preserved, a __run.sh file is created with the process's - // command line and environment variables. - let run_script_path = preserved_work_root.join(&subdirs[0]).join("__run.sh"); - assert!(&run_script_path.exists()); - - std::fs::remove_file(&rolands_path).expect("Failed to remove roland."); - - // Confirm the script when run directly sets up the proper CWD. - let mut child = std::process::Command::new(&run_script_path) - .spawn() - .expect("Failed to launch __run.sh"); - let status = child - .wait() - .expect("Failed to gather the result of __run.sh."); - assert_eq!(Some(0), status.code()); - assert!(rolands_path.exists()); - - // Ensure the bash command line is provided. - let bytes_quoted_command_line = bash::escape(&bash_contents); - let quoted_command_line = str::from_utf8(&bytes_quoted_command_line).unwrap(); - assert!(std::fs::read_to_string(&run_script_path) - .unwrap() - .contains(quoted_command_line)); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let preserved_work_tmpdir = TempDir::new().unwrap(); + let preserved_work_root = preserved_work_tmpdir.path().to_owned(); + + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + + // Prepare the store to contain /cats/roland.ext, because the EPR needs to materialize it and then run + // from the ./cats directory. + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .record_directory(&TestDirectory::containing_roland().directory(), true) + .await + .expect("Error saving directory"); + store + .record_directory(&TestDirectory::nested().directory(), true) + .await + .expect("Error saving directory"); + + let cp = which("cp").expect("No cp on $PATH."); + let bash_contents = format!("echo $PWD && {} roland.ext ..", cp.display()); + let argv = vec![find_bash(), "-c".to_owned(), bash_contents.to_owned()]; + + let mut process = + Process::new(argv.clone()).output_files(relative_paths(&["roland.ext"]).collect()); + process.input_digests = + InputDigests::with_input_files(TestDirectory::nested().directory_digest()); + process.working_directory = Some(RelativePath::new("cats").unwrap()); + + let result = run_command_locally_in_dir( + process, + preserved_work_root.clone(), + KeepSandboxes::Always, + &mut workunit, + Some(store), + Some(executor), + ) + .await; + result.unwrap(); + + assert!(preserved_work_root.exists()); + + // Collect all of the top level sub-dirs under our test workdir. + let subdirs = testutil::file::list_dir(&preserved_work_root); + assert_eq!(subdirs.len(), 1); + + // Then look for a file like e.g. `/tmp/abc1234/pants-sandbox-7zt4pH/roland.ext` + let rolands_path = preserved_work_root.join(&subdirs[0]).join("roland.ext"); + assert!(&rolands_path.exists()); + + // Ensure that when a directory is preserved, a __run.sh file is created with the process's + // command line and environment variables. + let run_script_path = preserved_work_root.join(&subdirs[0]).join("__run.sh"); + assert!(&run_script_path.exists()); + + std::fs::remove_file(&rolands_path).expect("Failed to remove roland."); + + // Confirm the script when run directly sets up the proper CWD. + let mut child = std::process::Command::new(&run_script_path) + .spawn() + .expect("Failed to launch __run.sh"); + let status = child + .wait() + .expect("Failed to gather the result of __run.sh."); + assert_eq!(Some(0), status.code()); + assert!(rolands_path.exists()); + + // Ensure the bash command line is provided. + let bytes_quoted_command_line = bash::escape(&bash_contents); + let quoted_command_line = str::from_utf8(&bytes_quoted_command_line).unwrap(); + assert!(std::fs::read_to_string(&run_script_path) + .unwrap() + .contains(quoted_command_line)); } #[tokio::test] async fn test_directory_preservation_error() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let preserved_work_tmpdir = TempDir::new().unwrap(); - let preserved_work_root = preserved_work_tmpdir.path().to_owned(); - - assert!(preserved_work_root.exists()); - assert_eq!(testutil::file::list_dir(&preserved_work_root).len(), 0); - - run_command_locally_in_dir( - Process::new(vec!["doesnotexist".to_owned()]), - preserved_work_root.clone(), - KeepSandboxes::Always, - &mut workunit, - None, - None, - ) - .await - .expect_err("Want process to fail"); - - assert!(preserved_work_root.exists()); - // Collect all of the top level sub-dirs under our test workdir. - assert_eq!(testutil::file::list_dir(&preserved_work_root).len(), 1); + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let preserved_work_tmpdir = TempDir::new().unwrap(); + let preserved_work_root = preserved_work_tmpdir.path().to_owned(); + + assert!(preserved_work_root.exists()); + assert_eq!(testutil::file::list_dir(&preserved_work_root).len(), 0); + + run_command_locally_in_dir( + Process::new(vec!["doesnotexist".to_owned()]), + preserved_work_root.clone(), + KeepSandboxes::Always, + &mut workunit, + None, + None, + ) + .await + .expect_err("Want process to fail"); + + assert!(preserved_work_root.exists()); + // Collect all of the top level sub-dirs under our test workdir. + assert_eq!(testutil::file::list_dir(&preserved_work_root).len(), 1); } #[tokio::test] async fn all_containing_directories_for_outputs_are_created() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - format!( - // mkdir would normally fail, since birds/ doesn't yet exist, as would echo, since cats/ - // does not exist, but we create the containing directories for all outputs before the - // process executes. - "/bin/mkdir birds/falcons && echo -n {} > cats/roland.ext", - TestData::roland().string() - ), - ]) - .output_files(relative_paths(&["cats/roland.ext"]).collect()) - .output_directories(relative_paths(&["birds/falcons"]).collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::nested_dir_and_file().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + format!( + // mkdir would normally fail, since birds/ doesn't yet exist, as would echo, since cats/ + // does not exist, but we create the containing directories for all outputs before the + // process executes. + "/bin/mkdir birds/falcons && echo -n {} > cats/roland.ext", + TestData::roland().string() + ), + ]) + .output_files(relative_paths(&["cats/roland.ext"]).collect()) + .output_directories(relative_paths(&["birds/falcons"]).collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::nested_dir_and_file().directory_digest() + ); } #[tokio::test] async fn output_empty_dir() { - let result = run_command_locally( - Process::new(vec![ - find_bash(), - "-c".to_owned(), - "/bin/mkdir falcons".to_string(), - ]) - .output_directories(relative_paths(&["falcons"]).collect()), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_falcons_dir().directory_digest() - ); + let result = run_command_locally( + Process::new(vec![ + find_bash(), + "-c".to_owned(), + "/bin/mkdir falcons".to_string(), + ]) + .output_directories(relative_paths(&["falcons"]).collect()), + ) + .await + .unwrap(); + + assert_eq!(result.stdout_bytes, "".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_falcons_dir().directory_digest() + ); } #[tokio::test] async fn timeout() { - let argv = vec![ - find_bash(), - "-c".to_owned(), - "/bin/echo -n 'Calculating...'; /bin/sleep 0.5; /bin/echo -n 'European Burmese'".to_string(), - ]; - - let mut process = Process::new(argv); - process.timeout = Some(Duration::from_millis(100)); - process.description = "sleepy-cat".to_string(); - - let result = run_command_locally(process).await.unwrap(); - - assert_eq!(result.original.exit_code, -15); - let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); - let stderr = String::from_utf8(result.stderr_bytes.to_vec()).unwrap(); - assert!(&stdout.contains("Calculating...")); - assert!(&stderr.contains("Exceeded timeout")); - assert!(&stderr.contains("sleepy-cat")); + let argv = vec![ + find_bash(), + "-c".to_owned(), + "/bin/echo -n 'Calculating...'; /bin/sleep 0.5; /bin/echo -n 'European Burmese'" + .to_string(), + ]; + + let mut process = Process::new(argv); + process.timeout = Some(Duration::from_millis(100)); + process.description = "sleepy-cat".to_string(); + + let result = run_command_locally(process).await.unwrap(); + + assert_eq!(result.original.exit_code, -15); + let stdout = String::from_utf8(result.stdout_bytes.to_vec()).unwrap(); + let stderr = String::from_utf8(result.stderr_bytes.to_vec()).unwrap(); + assert!(&stdout.contains("Calculating...")); + assert!(&stderr.contains("Exceeded timeout")); + assert!(&stderr.contains("sleepy-cat")); } #[tokio::test] async fn working_directory() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - - // Prepare the store to contain /cats/roland.ext, because the EPR needs to materialize it and - // then run from the ./cats directory. - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .record_directory(&TestDirectory::containing_roland().directory(), true) - .await - .expect("Error saving directory"); - store - .record_directory(&TestDirectory::nested().directory(), true) + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + + // Prepare the store to contain /cats/roland.ext, because the EPR needs to materialize it and + // then run from the ./cats directory. + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .record_directory(&TestDirectory::containing_roland().directory(), true) + .await + .expect("Error saving directory"); + store + .record_directory(&TestDirectory::nested().directory(), true) + .await + .expect("Error saving directory"); + + let work_dir = TempDir::new().unwrap(); + + let mut process = Process::new(vec![find_bash(), "-c".to_owned(), "/bin/ls".to_string()]); + process.working_directory = Some(RelativePath::new("cats").unwrap()); + process.output_directories = relative_paths(&["roland.ext"]).collect::>(); + process.input_digests = + InputDigests::with_input_files(TestDirectory::nested().directory_digest()); + process.timeout = one_second(); + process.description = "confused-cat".to_string(); + + let result = run_command_locally_in_dir( + process, + work_dir.path().to_owned(), + KeepSandboxes::Never, + &mut workunit, + Some(store), + Some(executor), + ) .await - .expect("Error saving directory"); - - let work_dir = TempDir::new().unwrap(); - - let mut process = Process::new(vec![find_bash(), "-c".to_owned(), "/bin/ls".to_string()]); - process.working_directory = Some(RelativePath::new("cats").unwrap()); - process.output_directories = relative_paths(&["roland.ext"]).collect::>(); - process.input_digests = - InputDigests::with_input_files(TestDirectory::nested().directory_digest()); - process.timeout = one_second(); - process.description = "confused-cat".to_string(); - - let result = run_command_locally_in_dir( - process, - work_dir.path().to_owned(), - KeepSandboxes::Never, - &mut workunit, - Some(store), - Some(executor), - ) - .await - .unwrap(); - - assert_eq!(result.stdout_bytes, "roland.ext\n".as_bytes()); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); - assert_eq!( - result.original.output_directory, - TestDirectory::containing_roland().directory_digest() - ); + .unwrap(); + + assert_eq!(result.stdout_bytes, "roland.ext\n".as_bytes()); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); + assert_eq!( + result.original.output_directory, + TestDirectory::containing_roland().directory_digest() + ); } #[tokio::test] async fn immutable_inputs() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .record_directory(&TestDirectory::containing_roland().directory(), true) + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .record_directory(&TestDirectory::containing_roland().directory(), true) + .await + .expect("Error saving directory"); + store + .record_directory(&TestDirectory::containing_falcons_dir().directory(), true) + .await + .expect("Error saving directory"); + + let work_dir = TempDir::new().unwrap(); + + let mut process = Process::new(vec![find_bash(), "-c".to_owned(), "/bin/ls".to_string()]); + process.input_digests = InputDigests::new( + &store, + TestDirectory::containing_falcons_dir().directory_digest(), + { + let mut map = BTreeMap::new(); + map.insert( + RelativePath::new("cats").unwrap(), + TestDirectory::containing_roland().directory_digest(), + ); + map + }, + BTreeSet::default(), + ) .await - .expect("Error saving directory"); - store - .record_directory(&TestDirectory::containing_falcons_dir().directory(), true) + .unwrap(); + process.timeout = one_second(); + process.description = "confused-cat".to_string(); + + let result = run_command_locally_in_dir( + process, + work_dir.path().to_owned(), + KeepSandboxes::Never, + &mut workunit, + Some(store), + Some(executor), + ) .await - .expect("Error saving directory"); - - let work_dir = TempDir::new().unwrap(); - - let mut process = Process::new(vec![find_bash(), "-c".to_owned(), "/bin/ls".to_string()]); - process.input_digests = InputDigests::new( - &store, - TestDirectory::containing_falcons_dir().directory_digest(), - { - let mut map = BTreeMap::new(); - map.insert( - RelativePath::new("cats").unwrap(), - TestDirectory::containing_roland().directory_digest(), - ); - map - }, - BTreeSet::default(), - ) - .await - .unwrap(); - process.timeout = one_second(); - process.description = "confused-cat".to_string(); - - let result = run_command_locally_in_dir( - process, - work_dir.path().to_owned(), - KeepSandboxes::Never, - &mut workunit, - Some(store), - Some(executor), - ) - .await - .unwrap(); - - let stdout_lines = str::from_utf8(&result.stdout_bytes) - .unwrap() - .lines() - .collect::>(); - assert_eq!(stdout_lines, hashset! {"falcons", "cats"}); - assert_eq!(result.stderr_bytes, "".as_bytes()); - assert_eq!(result.original.exit_code, 0); + .unwrap(); + + let stdout_lines = str::from_utf8(&result.stdout_bytes) + .unwrap() + .lines() + .collect::>(); + assert_eq!(stdout_lines, hashset! {"falcons", "cats"}); + assert_eq!(result.stderr_bytes, "".as_bytes()); + assert_eq!(result.original.exit_code, 0); } #[tokio::test] async fn prepare_workdir_exclusive_relative() { - // Test that we detect that we should should exclusive spawn when a relative path that points - // outside of a working directory is used. - let _ = WorkunitStore::setup_for_tests(); - - let store_dir = TempDir::new().unwrap(); - let executor = task_executor::Executor::new(); - let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); - let (_caches_dir, named_caches, immutable_inputs) = - named_caches_and_immutable_inputs(store.clone()); - - store - .store_file_bytes(TestData::roland().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .store_file_bytes(TestData::catnip().bytes(), false) - .await - .expect("Error saving file bytes"); - store - .record_directory(&TestDirectory::recursive().directory(), true) + // Test that we detect that we should should exclusive spawn when a relative path that points + // outside of a working directory is used. + let _ = WorkunitStore::setup_for_tests(); + + let store_dir = TempDir::new().unwrap(); + let executor = task_executor::Executor::new(); + let store = Store::local_only(executor.clone(), store_dir.path()).unwrap(); + let (_caches_dir, named_caches, immutable_inputs) = + named_caches_and_immutable_inputs(store.clone()); + + store + .store_file_bytes(TestData::roland().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .store_file_bytes(TestData::catnip().bytes(), false) + .await + .expect("Error saving file bytes"); + store + .record_directory(&TestDirectory::recursive().directory(), true) + .await + .expect("Error saving directory"); + store + .record_directory(&TestDirectory::containing_roland().directory(), true) + .await + .expect("Error saving directory"); + + let work_dir = TempDir::new().unwrap(); + + // NB: This path is not marked executable, but that isn't (currently) relevant to the heuristic. + let mut process = Process::new(vec!["../treats.ext".to_owned()]) + .working_directory(Some(RelativePath::new("cats").unwrap())); + process.input_digests = InputDigests::new( + &store, + TestDirectory::recursive().directory_digest(), + BTreeMap::new(), + BTreeSet::new(), + ) .await - .expect("Error saving directory"); - store - .record_directory(&TestDirectory::containing_roland().directory(), true) + .unwrap(); + + let exclusive_spawn = local::prepare_workdir( + work_dir.path().to_owned(), + work_dir.path(), + &process, + TestDirectory::recursive().directory_digest(), + &store, + &named_caches, + &immutable_inputs, + None, + None, + ) .await - .expect("Error saving directory"); - - let work_dir = TempDir::new().unwrap(); - - // NB: This path is not marked executable, but that isn't (currently) relevant to the heuristic. - let mut process = Process::new(vec!["../treats.ext".to_owned()]) - .working_directory(Some(RelativePath::new("cats").unwrap())); - process.input_digests = InputDigests::new( - &store, - TestDirectory::recursive().directory_digest(), - BTreeMap::new(), - BTreeSet::new(), - ) - .await - .unwrap(); - - let exclusive_spawn = local::prepare_workdir( - work_dir.path().to_owned(), - work_dir.path(), - &process, - TestDirectory::recursive().directory_digest(), - &store, - &named_caches, - &immutable_inputs, - None, - None, - ) - .await - .unwrap(); - - assert!(exclusive_spawn); + .unwrap(); + + assert!(exclusive_spawn); } pub(crate) fn named_caches_and_immutable_inputs( - store: Store, + store: Store, ) -> (TempDir, NamedCaches, ImmutableInputs) { - let root = TempDir::new().unwrap(); - let root_path = root.path().to_owned(); - let named_cache_dir = root_path.join("named"); - - ( - root, - NamedCaches::new_local(named_cache_dir), - ImmutableInputs::new(store, &root_path).unwrap(), - ) + let root = TempDir::new().unwrap(); + let root_path = root.path().to_owned(); + let named_cache_dir = root_path.join("named"); + + ( + root, + NamedCaches::new_local(named_cache_dir), + ImmutableInputs::new(store, &root_path).unwrap(), + ) } async fn run_command_locally(req: Process) -> Result { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - let work_dir = TempDir::new().unwrap(); - let work_dir_path = work_dir.path().to_owned(); - run_command_locally_in_dir( - req, - work_dir_path, - KeepSandboxes::Never, - &mut workunit, - None, - None, - ) - .await + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + let work_dir = TempDir::new().unwrap(); + let work_dir_path = work_dir.path().to_owned(); + run_command_locally_in_dir( + req, + work_dir_path, + KeepSandboxes::Never, + &mut workunit, + None, + None, + ) + .await } async fn run_command_locally_in_dir( - req: Process, - dir: PathBuf, - cleanup: KeepSandboxes, - workunit: &mut RunningWorkunit, - store: Option, - executor: Option, + req: Process, + dir: PathBuf, + cleanup: KeepSandboxes, + workunit: &mut RunningWorkunit, + store: Option, + executor: Option, ) -> Result { - let store_dir = TempDir::new().unwrap(); - let executor = executor.unwrap_or_else(task_executor::Executor::new); - let store = - store.unwrap_or_else(|| Store::local_only(executor.clone(), store_dir.path()).unwrap()); - let (_caches_dir, named_caches, immutable_inputs) = - named_caches_and_immutable_inputs(store.clone()); - let runner = crate::local::CommandRunner::new( - store.clone(), - executor.clone(), - dir.clone(), - named_caches, - immutable_inputs, - cleanup, - ); - let original = runner.run(Context::default(), workunit, req).await?; - let stdout_bytes = store - .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) - .await?; - let stderr_bytes = store - .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) - .await?; - Ok(LocalTestResult { - original, - stdout_bytes, - stderr_bytes, - }) + let store_dir = TempDir::new().unwrap(); + let executor = executor.unwrap_or_else(task_executor::Executor::new); + let store = + store.unwrap_or_else(|| Store::local_only(executor.clone(), store_dir.path()).unwrap()); + let (_caches_dir, named_caches, immutable_inputs) = + named_caches_and_immutable_inputs(store.clone()); + let runner = crate::local::CommandRunner::new( + store.clone(), + executor.clone(), + dir.clone(), + named_caches, + immutable_inputs, + cleanup, + ); + let original = runner.run(Context::default(), workunit, req).await?; + let stdout_bytes = store + .load_file_bytes_with(original.stdout_digest, |bytes| bytes.to_vec()) + .await?; + let stderr_bytes = store + .load_file_bytes_with(original.stderr_digest, |bytes| bytes.to_vec()) + .await?; + Ok(LocalTestResult { + original, + stdout_bytes, + stderr_bytes, + }) } fn one_second() -> Option { - Some(Duration::from_millis(1000)) + Some(Duration::from_millis(1000)) } diff --git a/src/rust/engine/process_execution/src/named_caches.rs b/src/rust/engine/process_execution/src/named_caches.rs index 9f52e345259..2432179fe79 100644 --- a/src/rust/engine/process_execution/src/named_caches.rs +++ b/src/rust/engine/process_execution/src/named_caches.rs @@ -17,22 +17,22 @@ use store::WorkdirSymlink; pub struct CacheName(String); impl CacheName { - pub fn new(name: String) -> Result { - if name - .chars() - .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') - { - Ok(CacheName(name)) - } else { - Err(format!( + pub fn new(name: String) -> Result { + if name + .chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') + { + Ok(CacheName(name)) + } else { + Err(format!( "Cache names may only contain lowercase alphanumeric characters or underscores: got {name:?}" )) + } } - } - pub fn name(&self) -> &str { - &self.0 - } + pub fn name(&self) -> &str { + &self.0 + } } /// @@ -41,102 +41,104 @@ impl CacheName { /// Pants "owns" named caches, and may clear or otherwise prune them at any time. /// struct Inner { - /// The absolute path to the base of the directory storing named caches. This may be a local or - /// remote/virtualized path. - base_path: PathBuf, - /// An initializer function used to initialize a named cache at the given absolute path, once per - /// NamedCaches instance. - #[allow(clippy::type_complexity)] - initializer: Box futures::future::BoxFuture> + Send + Sync>, - /// Caches which have been initialized. - initialized: Mutex>>>, + /// The absolute path to the base of the directory storing named caches. This may be a local or + /// remote/virtualized path. + base_path: PathBuf, + /// An initializer function used to initialize a named cache at the given absolute path, once per + /// NamedCaches instance. + #[allow(clippy::type_complexity)] + initializer: Box futures::future::BoxFuture> + Send + Sync>, + /// Caches which have been initialized. + initialized: Mutex>>>, } #[derive(Clone)] pub struct NamedCaches(Arc); impl NamedCaches { - /// Create a NamedCache, potentially in a virtualized filesystem. Cache entries will be created - /// using the given initializer function. - pub fn new( - base_path: PathBuf, - initializer: impl Fn(&Path) -> futures::future::BoxFuture> - + Send - + Sync - + 'static, - ) -> Self { - Self(Arc::new(Inner { - base_path, - initializer: Box::new(initializer), - initialized: Mutex::default(), - })) - } - - /// Create a NamedCache in the local filesystem. - pub fn new_local(base_path: PathBuf) -> Self { - Self::new(base_path, |dst| { - tokio::fs::create_dir_all(dst) - .map_err(|e| format!("Failed to create path {}: {e}", dst.display())) - .boxed() - }) - } - - pub fn base_path(&self) -> &Path { - &self.0.base_path - } - - // This default suffix is also hard-coded into the Python options code in global_options.py - pub fn default_local_path() -> PathBuf { - default_cache_path().join("named_caches") - } - - fn cache_cell(&self, path: PathBuf) -> Arc> { - let mut cells = self.0.initialized.lock(); - if let Some(cell) = cells.get(&path) { - cell.clone() - } else { - let cell = Arc::new(OnceCell::new()); - cells.insert(path, cell.clone()); - cell + /// Create a NamedCache, potentially in a virtualized filesystem. Cache entries will be created + /// using the given initializer function. + pub fn new( + base_path: PathBuf, + initializer: impl Fn(&Path) -> futures::future::BoxFuture> + + Send + + Sync + + 'static, + ) -> Self { + Self(Arc::new(Inner { + base_path, + initializer: Box::new(initializer), + initialized: Mutex::default(), + })) } - } - - /// - /// Returns symlinks to create for the given set of NamedCaches, initializing them if necessary. - /// - pub async fn paths<'a>( - &'a self, - caches: &'a BTreeMap, - ) -> Result, String> { - // Collect the symlinks to create, and their destination cache cells. - let (symlinks, initialization_futures): (Vec<_>, Vec<_>) = { - caches - .iter() - .map(move |(cache_name, workdir_rel_path)| { - let symlink = WorkdirSymlink { - src: workdir_rel_path.clone(), - dst: self.0.base_path.join(&cache_name.0), - }; - - // Create the initialization future under the lock, but await it outside. - let dst: PathBuf = symlink.dst.clone(); - let named_caches: NamedCaches = self.clone(); - let initialization_future = async move { - named_caches - .cache_cell(dst.clone()) - .get_or_try_init(async move { (named_caches.0.initializer)(&dst).await }) - .await?; - Ok::<_, String>(()) - }; - - (symlink, initialization_future) + + /// Create a NamedCache in the local filesystem. + pub fn new_local(base_path: PathBuf) -> Self { + Self::new(base_path, |dst| { + tokio::fs::create_dir_all(dst) + .map_err(|e| format!("Failed to create path {}: {e}", dst.display())) + .boxed() }) - .unzip() - }; + } - // Ensure that all cache destinations have been created. - futures::future::try_join_all(initialization_futures).await?; + pub fn base_path(&self) -> &Path { + &self.0.base_path + } - Ok(symlinks) - } + // This default suffix is also hard-coded into the Python options code in global_options.py + pub fn default_local_path() -> PathBuf { + default_cache_path().join("named_caches") + } + + fn cache_cell(&self, path: PathBuf) -> Arc> { + let mut cells = self.0.initialized.lock(); + if let Some(cell) = cells.get(&path) { + cell.clone() + } else { + let cell = Arc::new(OnceCell::new()); + cells.insert(path, cell.clone()); + cell + } + } + + /// + /// Returns symlinks to create for the given set of NamedCaches, initializing them if necessary. + /// + pub async fn paths<'a>( + &'a self, + caches: &'a BTreeMap, + ) -> Result, String> { + // Collect the symlinks to create, and their destination cache cells. + let (symlinks, initialization_futures): (Vec<_>, Vec<_>) = { + caches + .iter() + .map(move |(cache_name, workdir_rel_path)| { + let symlink = WorkdirSymlink { + src: workdir_rel_path.clone(), + dst: self.0.base_path.join(&cache_name.0), + }; + + // Create the initialization future under the lock, but await it outside. + let dst: PathBuf = symlink.dst.clone(); + let named_caches: NamedCaches = self.clone(); + let initialization_future = async move { + named_caches + .cache_cell(dst.clone()) + .get_or_try_init( + async move { (named_caches.0.initializer)(&dst).await }, + ) + .await?; + Ok::<_, String>(()) + }; + + (symlink, initialization_future) + }) + .unzip() + }; + + // Ensure that all cache destinations have been created. + futures::future::try_join_all(initialization_futures).await?; + + Ok(symlinks) + } } diff --git a/src/rust/engine/process_execution/src/named_caches_tests.rs b/src/rust/engine/process_execution/src/named_caches_tests.rs index 446cf151e10..14b1a1cb009 100644 --- a/src/rust/engine/process_execution/src/named_caches_tests.rs +++ b/src/rust/engine/process_execution/src/named_caches_tests.rs @@ -4,15 +4,15 @@ use crate::named_caches::CacheName; #[test] fn alphanumeric_lowercase_are_valid() { - let name = "__mynamed_cache_1"; - let cache_name = CacheName::new(name.to_string()); - assert!(cache_name.is_ok()); - assert_eq!(name, cache_name.unwrap().name()); + let name = "__mynamed_cache_1"; + let cache_name = CacheName::new(name.to_string()); + assert!(cache_name.is_ok()); + assert_eq!(name, cache_name.unwrap().name()); } #[test] fn uppercase_characters_are_invalid() { - let name = "mYnamedcache"; - let cache_name = CacheName::new(name.to_string()); - assert!(cache_name.is_err()); + let name = "mYnamedcache"; + let cache_name = CacheName::new(name.to_string()); + assert!(cache_name.is_err()); } diff --git a/src/rust/engine/process_execution/src/switched.rs b/src/rust/engine/process_execution/src/switched.rs index 513628dc745..198b7793de6 100644 --- a/src/rust/engine/process_execution/src/switched.rs +++ b/src/rust/engine/process_execution/src/switched.rs @@ -10,116 +10,117 @@ use workunit_store::RunningWorkunit; use crate::{CommandRunner, Context, FallibleProcessResultWithPlatform, Process, ProcessError}; pub struct SwitchedCommandRunner { - true_runner: T, - false_runner: F, - predicate: P, + true_runner: T, + false_runner: F, + predicate: P, } impl fmt::Debug for SwitchedCommandRunner { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.debug_struct("SwitchedCommandRunner") - .finish_non_exhaustive() - } + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("SwitchedCommandRunner") + .finish_non_exhaustive() + } } impl SwitchedCommandRunner where - P: Fn(&Process) -> bool + Send + Sync, + P: Fn(&Process) -> bool + Send + Sync, { - pub fn new(true_runner: T, false_runner: F, predicate: P) -> Self { - Self { - true_runner, - false_runner, - predicate, + pub fn new(true_runner: T, false_runner: F, predicate: P) -> Self { + Self { + true_runner, + false_runner, + predicate, + } } - } } #[async_trait] impl CommandRunner for SwitchedCommandRunner where - T: CommandRunner, - F: CommandRunner, - P: Fn(&Process) -> bool + Send + Sync, + T: CommandRunner, + F: CommandRunner, + P: Fn(&Process) -> bool + Send + Sync, { - async fn run( - &self, - context: Context, - workunit: &mut RunningWorkunit, - req: Process, - ) -> Result { - if (self.predicate)(&req) { - self.true_runner.run(context, workunit, req).await - } else { - self.false_runner.run(context, workunit, req).await - } - } - - async fn shutdown(&self) -> Result<(), String> { - let true_runner_shutdown_fut = self.true_runner.shutdown(); - let false_runner_shutdown_fut = self.false_runner.shutdown(); - futures::try_join!(true_runner_shutdown_fut, false_runner_shutdown_fut)?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use async_trait::async_trait; - use workunit_store::{RunningWorkunit, WorkunitStore}; - - use crate::switched::SwitchedCommandRunner; - use crate::CommandRunner; - use crate::{Context, FallibleProcessResultWithPlatform, Process, ProcessError}; - - #[derive(Debug)] - struct MockCommandRunner(Result); - - #[async_trait] - impl CommandRunner for MockCommandRunner { async fn run( - &self, - _context: Context, - _workunit: &mut RunningWorkunit, - _req: Process, + &self, + context: Context, + workunit: &mut RunningWorkunit, + req: Process, ) -> Result { - self.0.clone() + if (self.predicate)(&req) { + self.true_runner.run(context, workunit, req).await + } else { + self.false_runner.run(context, workunit, req).await + } } async fn shutdown(&self) -> Result<(), String> { - Ok(()) + let true_runner_shutdown_fut = self.true_runner.shutdown(); + let false_runner_shutdown_fut = self.false_runner.shutdown(); + futures::try_join!(true_runner_shutdown_fut, false_runner_shutdown_fut)?; + Ok(()) } - } - - #[tokio::test] - async fn switched_command_runner() { - let (_, mut workunit) = WorkunitStore::setup_for_tests(); - - let left = MockCommandRunner(Err(ProcessError::Unclassified("left".to_string()))); - let right = MockCommandRunner(Err(ProcessError::Unclassified("right".to_string()))); - - let runner = SwitchedCommandRunner::new(left, right, |req| req.argv.get(0).unwrap() == "left"); - - let req = Process::new(vec!["left".to_string()]); - let err = runner - .run(Context::default(), &mut workunit, req) - .await - .expect_err("expected error"); - if let ProcessError::Unclassified(msg) = &err { - assert_eq!(msg, "left"); - } else { - panic!("unexpected value: {err:?}") +} + +#[cfg(test)] +mod tests { + use async_trait::async_trait; + use workunit_store::{RunningWorkunit, WorkunitStore}; + + use crate::switched::SwitchedCommandRunner; + use crate::CommandRunner; + use crate::{Context, FallibleProcessResultWithPlatform, Process, ProcessError}; + + #[derive(Debug)] + struct MockCommandRunner(Result); + + #[async_trait] + impl CommandRunner for MockCommandRunner { + async fn run( + &self, + _context: Context, + _workunit: &mut RunningWorkunit, + _req: Process, + ) -> Result { + self.0.clone() + } + + async fn shutdown(&self) -> Result<(), String> { + Ok(()) + } } - let req = Process::new(vec!["not-left".to_string()]); - let err = runner - .run(Context::default(), &mut workunit, req) - .await - .expect_err("expected error"); - if let ProcessError::Unclassified(msg) = &err { - assert_eq!(msg, "right"); - } else { - panic!("unexpected value: {err:?}") + #[tokio::test] + async fn switched_command_runner() { + let (_, mut workunit) = WorkunitStore::setup_for_tests(); + + let left = MockCommandRunner(Err(ProcessError::Unclassified("left".to_string()))); + let right = MockCommandRunner(Err(ProcessError::Unclassified("right".to_string()))); + + let runner = + SwitchedCommandRunner::new(left, right, |req| req.argv.get(0).unwrap() == "left"); + + let req = Process::new(vec!["left".to_string()]); + let err = runner + .run(Context::default(), &mut workunit, req) + .await + .expect_err("expected error"); + if let ProcessError::Unclassified(msg) = &err { + assert_eq!(msg, "left"); + } else { + panic!("unexpected value: {err:?}") + } + + let req = Process::new(vec!["not-left".to_string()]); + let err = runner + .run(Context::default(), &mut workunit, req) + .await + .expect_err("expected error"); + if let ProcessError::Unclassified(msg) = &err { + assert_eq!(msg, "right"); + } else { + panic!("unexpected value: {err:?}") + } } - } } diff --git a/src/rust/engine/process_execution/src/tests.rs b/src/rust/engine/process_execution/src/tests.rs index 89c78967f55..55de6e5b524 100644 --- a/src/rust/engine/process_execution/src/tests.rs +++ b/src/rust/engine/process_execution/src/tests.rs @@ -6,8 +6,8 @@ use std::hash::{Hash, Hasher}; use std::time::Duration; use crate::{ - Platform, Process, ProcessExecutionEnvironment, ProcessExecutionStrategy, ProcessResultMetadata, - ProcessResultSource, + Platform, Process, ProcessExecutionEnvironment, ProcessExecutionStrategy, + ProcessResultMetadata, ProcessResultSource, }; use prost_types::Timestamp; use protos::gen::build::bazel::remote::execution::v2 as remexec; @@ -16,138 +16,138 @@ use workunit_store::RunId; #[test] fn process_equality() { - // TODO: Tests like these would be cleaner with the builder pattern for the rust-side Process API. - - let process_generator = |description: String, timeout: Option| { - let mut p = Process::new(vec![]); - p.description = description; - p.timeout = timeout; - p - }; - - fn hash(hashable: &Hashable) -> u64 { - let mut hasher = DefaultHasher::new(); - hashable.hash(&mut hasher); - hasher.finish() - } - - let a = process_generator("One thing".to_string(), Some(Duration::new(0, 0))); - let b = process_generator("Another".to_string(), Some(Duration::new(0, 0))); - let c = process_generator("One thing".to_string(), Some(Duration::new(5, 0))); - let d = process_generator("One thing".to_string(), None); - - // Process should derive a PartialEq and Hash that ignores the description - assert_eq!(a, b); - assert_eq!(hash(&a), hash(&b)); - - // ..but not other fields. - assert_ne!(a, c); - assert_ne!(hash(&a), hash(&c)); - - // Absence of timeout is included in hash. - assert_ne!(a, d); - assert_ne!(hash(&a), hash(&d)); + // TODO: Tests like these would be cleaner with the builder pattern for the rust-side Process API. + + let process_generator = |description: String, timeout: Option| { + let mut p = Process::new(vec![]); + p.description = description; + p.timeout = timeout; + p + }; + + fn hash(hashable: &Hashable) -> u64 { + let mut hasher = DefaultHasher::new(); + hashable.hash(&mut hasher); + hasher.finish() + } + + let a = process_generator("One thing".to_string(), Some(Duration::new(0, 0))); + let b = process_generator("Another".to_string(), Some(Duration::new(0, 0))); + let c = process_generator("One thing".to_string(), Some(Duration::new(5, 0))); + let d = process_generator("One thing".to_string(), None); + + // Process should derive a PartialEq and Hash that ignores the description + assert_eq!(a, b); + assert_eq!(hash(&a), hash(&b)); + + // ..but not other fields. + assert_ne!(a, c); + assert_ne!(hash(&a), hash(&c)); + + // Absence of timeout is included in hash. + assert_ne!(a, d); + assert_ne!(hash(&a), hash(&d)); } #[test] fn process_result_metadata_to_and_from_executed_action_metadata() { - let env = ProcessExecutionEnvironment { - name: None, - platform: Platform::Linux_x86_64, - strategy: ProcessExecutionStrategy::Local, - }; - let action_metadata = ExecutedActionMetadata { - worker_start_timestamp: Some(Timestamp { - seconds: 100, - nanos: 20, - }), - worker_completed_timestamp: Some(Timestamp { - seconds: 120, - nanos: 50, - }), - ..ExecutedActionMetadata::default() - }; - - let converted_process_result: ProcessResultMetadata = ProcessResultMetadata::new_from_metadata( - action_metadata, - ProcessResultSource::Ran, - env.clone(), - RunId(0), - ); - assert_eq!( - converted_process_result, - ProcessResultMetadata::new( - Some(concrete_time::Duration::new(20, 30)), - ProcessResultSource::Ran, - env.clone(), - RunId(0), - ) - ); - - // The conversion from `ExecutedActionMetadata` to `ProcessResultMetadata` is lossy. - let restored_action_metadata: ExecutedActionMetadata = converted_process_result.into(); - assert_eq!( - restored_action_metadata, - ExecutedActionMetadata { - worker_start_timestamp: Some(Timestamp { - seconds: 0, - nanos: 0, - }), - worker_completed_timestamp: Some(Timestamp { - seconds: 20, - nanos: 30, - }), - ..ExecutedActionMetadata::default() - } - ); - - // The relevant metadata may be missing from either type. - let empty = ProcessResultMetadata::new(None, ProcessResultSource::Ran, env.clone(), RunId(0)); - let action_metadata_missing: ProcessResultMetadata = ProcessResultMetadata::new_from_metadata( - ExecutedActionMetadata::default(), - ProcessResultSource::Ran, - env, - RunId(0), - ); - assert_eq!(action_metadata_missing, empty); - let process_result_missing: ExecutedActionMetadata = empty.into(); - assert_eq!(process_result_missing, ExecutedActionMetadata::default()); + let env = ProcessExecutionEnvironment { + name: None, + platform: Platform::Linux_x86_64, + strategy: ProcessExecutionStrategy::Local, + }; + let action_metadata = ExecutedActionMetadata { + worker_start_timestamp: Some(Timestamp { + seconds: 100, + nanos: 20, + }), + worker_completed_timestamp: Some(Timestamp { + seconds: 120, + nanos: 50, + }), + ..ExecutedActionMetadata::default() + }; + + let converted_process_result: ProcessResultMetadata = ProcessResultMetadata::new_from_metadata( + action_metadata, + ProcessResultSource::Ran, + env.clone(), + RunId(0), + ); + assert_eq!( + converted_process_result, + ProcessResultMetadata::new( + Some(concrete_time::Duration::new(20, 30)), + ProcessResultSource::Ran, + env.clone(), + RunId(0), + ) + ); + + // The conversion from `ExecutedActionMetadata` to `ProcessResultMetadata` is lossy. + let restored_action_metadata: ExecutedActionMetadata = converted_process_result.into(); + assert_eq!( + restored_action_metadata, + ExecutedActionMetadata { + worker_start_timestamp: Some(Timestamp { + seconds: 0, + nanos: 0, + }), + worker_completed_timestamp: Some(Timestamp { + seconds: 20, + nanos: 30, + }), + ..ExecutedActionMetadata::default() + } + ); + + // The relevant metadata may be missing from either type. + let empty = ProcessResultMetadata::new(None, ProcessResultSource::Ran, env.clone(), RunId(0)); + let action_metadata_missing: ProcessResultMetadata = ProcessResultMetadata::new_from_metadata( + ExecutedActionMetadata::default(), + ProcessResultSource::Ran, + env, + RunId(0), + ); + assert_eq!(action_metadata_missing, empty); + let process_result_missing: ExecutedActionMetadata = empty.into(); + assert_eq!(process_result_missing, ExecutedActionMetadata::default()); } #[test] fn process_result_metadata_time_saved_from_cache() { - let env = ProcessExecutionEnvironment { - name: None, - platform: Platform::Linux_x86_64, - strategy: ProcessExecutionStrategy::Local, - }; - let mut metadata = ProcessResultMetadata::new( - Some(concrete_time::Duration::new(5, 150)), - ProcessResultSource::Ran, - env.clone(), - RunId(0), - ); - metadata.update_cache_hit_elapsed(Duration::new(1, 100)); - assert_eq!( - Duration::from(metadata.saved_by_cache.unwrap()), - Duration::new(4, 50) - ); - - // If the cache lookup took more time than the process, we return 0. - let mut metadata = ProcessResultMetadata::new( - Some(concrete_time::Duration::new(1, 0)), - ProcessResultSource::Ran, - env.clone(), - RunId(0), - ); - metadata.update_cache_hit_elapsed(Duration::new(5, 0)); - assert_eq!( - Duration::from(metadata.saved_by_cache.unwrap()), - Duration::new(0, 0) - ); - - // If the original process time wasn't recorded, we can't compute the time saved. - let mut metadata = ProcessResultMetadata::new(None, ProcessResultSource::Ran, env, RunId(0)); - metadata.update_cache_hit_elapsed(Duration::new(1, 100)); - assert_eq!(metadata.saved_by_cache, None); + let env = ProcessExecutionEnvironment { + name: None, + platform: Platform::Linux_x86_64, + strategy: ProcessExecutionStrategy::Local, + }; + let mut metadata = ProcessResultMetadata::new( + Some(concrete_time::Duration::new(5, 150)), + ProcessResultSource::Ran, + env.clone(), + RunId(0), + ); + metadata.update_cache_hit_elapsed(Duration::new(1, 100)); + assert_eq!( + Duration::from(metadata.saved_by_cache.unwrap()), + Duration::new(4, 50) + ); + + // If the cache lookup took more time than the process, we return 0. + let mut metadata = ProcessResultMetadata::new( + Some(concrete_time::Duration::new(1, 0)), + ProcessResultSource::Ran, + env.clone(), + RunId(0), + ); + metadata.update_cache_hit_elapsed(Duration::new(5, 0)); + assert_eq!( + Duration::from(metadata.saved_by_cache.unwrap()), + Duration::new(0, 0) + ); + + // If the original process time wasn't recorded, we can't compute the time saved. + let mut metadata = ProcessResultMetadata::new(None, ProcessResultSource::Ran, env, RunId(0)); + metadata.update_cache_hit_elapsed(Duration::new(1, 100)); + assert_eq!(metadata.saved_by_cache, None); } diff --git a/src/rust/engine/process_executor/src/main.rs b/src/rust/engine/process_executor/src/main.rs index 067205f2dd7..98304ae3e0f 100644 --- a/src/rust/engine/process_executor/src/main.rs +++ b/src/rust/engine/process_executor/src/main.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -37,8 +37,8 @@ use clap::StructOpt; use fs::{DirectoryDigest, Permissions, RelativePath}; use hashing::{Digest, Fingerprint}; use process_execution::{ - local::KeepSandboxes, CacheContentBehavior, Context, InputDigests, NamedCaches, Platform, - ProcessCacheScope, ProcessExecutionEnvironment, ProcessExecutionStrategy, + local::KeepSandboxes, CacheContentBehavior, Context, InputDigests, NamedCaches, Platform, + ProcessCacheScope, ProcessExecutionEnvironment, ProcessExecutionStrategy, }; use prost::Message; use protos::gen::build::bazel::remote::execution::v2::{Action, Command}; @@ -50,173 +50,173 @@ use workunit_store::{in_workunit, Level, WorkunitStore}; #[derive(Clone, Debug, Default)] struct ProcessMetadata { - instance_name: Option, - cache_key_gen_version: Option, + instance_name: Option, + cache_key_gen_version: Option, } #[derive(StructOpt)] struct CommandSpec { - #[structopt(last = true)] - argv: Vec, + #[structopt(last = true)] + argv: Vec, - /// Fingerprint (hex string) of the digest to use as the input file tree. - #[structopt(long)] - input_digest: Option, + /// Fingerprint (hex string) of the digest to use as the input file tree. + #[structopt(long)] + input_digest: Option, - /// Length of the proto-bytes whose digest to use as the input file tree. - #[structopt(long)] - input_digest_length: Option, + /// Length of the proto-bytes whose digest to use as the input file tree. + #[structopt(long)] + input_digest_length: Option, - /// Extra platform properties to set on the execution request during remote execution. - #[structopt(long)] - extra_platform_property: Vec, + /// Extra platform properties to set on the execution request during remote execution. + #[structopt(long)] + extra_platform_property: Vec, - /// Environment variables with which the process should be run. - #[structopt(long)] - env: Vec, + /// Environment variables with which the process should be run. + #[structopt(long)] + env: Vec, - /// Symlink a JDK from .jdk in the working directory. - /// For local execution, symlinks to the value of this flag. - /// For remote execution, just requests that some JDK is symlinked if this flag has any value. - /// will make this less weird in the future. - #[structopt(long)] - jdk: Option, + /// Symlink a JDK from .jdk in the working directory. + /// For local execution, symlinks to the value of this flag. + /// For remote execution, just requests that some JDK is symlinked if this flag has any value. + /// will make this less weird in the future. + #[structopt(long)] + jdk: Option, - /// Path to file that is considered to be output. - #[structopt(long)] - output_file_path: Vec, + /// Path to file that is considered to be output. + #[structopt(long)] + output_file_path: Vec, - /// Path to directory that is considered to be output. - #[structopt(long)] - output_directory_path: Vec, + /// Path to directory that is considered to be output. + #[structopt(long)] + output_directory_path: Vec, - /// Path to execute the binary at relative to its input digest root. - #[structopt(long)] - working_directory: Option, + /// Path to execute the binary at relative to its input digest root. + #[structopt(long)] + working_directory: Option, - #[structopt(long)] - concurrency_available: Option, + #[structopt(long)] + concurrency_available: Option, - #[structopt(long)] - cache_key_gen_version: Option, + #[structopt(long)] + cache_key_gen_version: Option, } #[derive(StructOpt)] struct ActionDigestSpec { - /// Fingerprint (hex string) of the digest of the action to run. - #[structopt(long)] - action_digest: Option, + /// Fingerprint (hex string) of the digest of the action to run. + #[structopt(long)] + action_digest: Option, - /// Length of the proto-bytes whose digest is the action to run. - #[structopt(long)] - action_digest_length: Option, + /// Length of the proto-bytes whose digest is the action to run. + #[structopt(long)] + action_digest_length: Option, } #[derive(StructOpt)] #[structopt(name = "process_executor", setting = clap::AppSettings::TrailingVarArg)] struct Opt { - #[structopt(flatten)] - command: CommandSpec, - - #[structopt(flatten)] - action_digest: ActionDigestSpec, - - #[structopt(long)] - buildbarn_url: Option, - - #[structopt(long)] - run_under: Option, - - /// The name of a directory (which may or may not exist), where the output tree will be materialized. - #[structopt(long)] - materialize_output_to: Option, - - /// Path to workdir. - #[structopt(long)] - work_dir: Option, - - ///Path to lmdb directory used for local file storage. - #[structopt(long)] - local_store_path: Option, - - /// Path to a directory to be used for named caches. - #[structopt(long)] - named_cache_path: Option, - - #[structopt(long)] - remote_instance_name: Option, - - /// The host:port of the gRPC server to connect to. Forces remote execution. - /// If unspecified, local execution will be performed. - #[structopt(long)] - server: Option, - - /// Path to file containing root certificate authority certificates for the execution server. - /// If not set, TLS will not be used when connecting to the execution server. - #[structopt(long)] - execution_root_ca_cert_file: Option, - - /// Path to file containing oauth bearer token for communication with the execution server. - /// If not set, no authorization will be provided to remote servers. - #[structopt(long)] - execution_oauth_bearer_token_path: Option, - - /// The host:port of the gRPC CAS server to connect to. - #[structopt(long)] - cas_server: Option, - - /// Path to file containing root certificate authority certificates for the CAS server. - /// If not set, TLS will not be used when connecting to the CAS server. - #[structopt(long)] - cas_root_ca_cert_file: Option, - - /// Path to file containing client certificates for the CAS server. - /// If not set, client authentication will not be used when connecting to the CAS server. - #[structopt(long)] - cas_client_certs_file: Option, - - /// Path to file containing client key for the CAS server. - /// If not set, client authentication will not be used when connecting to the CAS server. - #[structopt(long)] - cas_client_key_file: Option, - - /// Path to file containing oauth bearer token for communication with the CAS server. - /// If not set, no authorization will be provided to remote servers. - #[structopt(long)] - cas_oauth_bearer_token_path: Option, - - /// Number of bytes to include per-chunk when uploading bytes. - /// grpc imposes a hard message-size limit of around 4MB. - #[structopt(long, default_value = "3145728")] - upload_chunk_bytes: usize, - - /// Number of retries per request to the store service. - #[structopt(long, default_value = "3")] - store_rpc_retries: usize, - - /// Number of concurrent requests to the store service. - #[structopt(long, default_value = "128")] - store_rpc_concurrency: usize, - - /// Total size of blobs allowed to be sent in a single API call. - #[structopt(long, default_value = "4194304")] - store_batch_api_size_limit: usize, - - /// Number of concurrent requests to the execution service. - #[structopt(long, default_value = "128")] - execution_rpc_concurrency: usize, - - /// Number of concurrent requests to the cache service. - #[structopt(long, default_value = "128")] - cache_rpc_concurrency: usize, - - /// Overall timeout in seconds for each request from time of submission. - #[structopt(long, default_value = "600")] - overall_deadline_secs: u64, - - /// Extra header to pass on remote execution request. - #[structopt(long)] - header: Vec, + #[structopt(flatten)] + command: CommandSpec, + + #[structopt(flatten)] + action_digest: ActionDigestSpec, + + #[structopt(long)] + buildbarn_url: Option, + + #[structopt(long)] + run_under: Option, + + /// The name of a directory (which may or may not exist), where the output tree will be materialized. + #[structopt(long)] + materialize_output_to: Option, + + /// Path to workdir. + #[structopt(long)] + work_dir: Option, + + ///Path to lmdb directory used for local file storage. + #[structopt(long)] + local_store_path: Option, + + /// Path to a directory to be used for named caches. + #[structopt(long)] + named_cache_path: Option, + + #[structopt(long)] + remote_instance_name: Option, + + /// The host:port of the gRPC server to connect to. Forces remote execution. + /// If unspecified, local execution will be performed. + #[structopt(long)] + server: Option, + + /// Path to file containing root certificate authority certificates for the execution server. + /// If not set, TLS will not be used when connecting to the execution server. + #[structopt(long)] + execution_root_ca_cert_file: Option, + + /// Path to file containing oauth bearer token for communication with the execution server. + /// If not set, no authorization will be provided to remote servers. + #[structopt(long)] + execution_oauth_bearer_token_path: Option, + + /// The host:port of the gRPC CAS server to connect to. + #[structopt(long)] + cas_server: Option, + + /// Path to file containing root certificate authority certificates for the CAS server. + /// If not set, TLS will not be used when connecting to the CAS server. + #[structopt(long)] + cas_root_ca_cert_file: Option, + + /// Path to file containing client certificates for the CAS server. + /// If not set, client authentication will not be used when connecting to the CAS server. + #[structopt(long)] + cas_client_certs_file: Option, + + /// Path to file containing client key for the CAS server. + /// If not set, client authentication will not be used when connecting to the CAS server. + #[structopt(long)] + cas_client_key_file: Option, + + /// Path to file containing oauth bearer token for communication with the CAS server. + /// If not set, no authorization will be provided to remote servers. + #[structopt(long)] + cas_oauth_bearer_token_path: Option, + + /// Number of bytes to include per-chunk when uploading bytes. + /// grpc imposes a hard message-size limit of around 4MB. + #[structopt(long, default_value = "3145728")] + upload_chunk_bytes: usize, + + /// Number of retries per request to the store service. + #[structopt(long, default_value = "3")] + store_rpc_retries: usize, + + /// Number of concurrent requests to the store service. + #[structopt(long, default_value = "128")] + store_rpc_concurrency: usize, + + /// Total size of blobs allowed to be sent in a single API call. + #[structopt(long, default_value = "4194304")] + store_batch_api_size_limit: usize, + + /// Number of concurrent requests to the execution service. + #[structopt(long, default_value = "128")] + execution_rpc_concurrency: usize, + + /// Number of concurrent requests to the cache service. + #[structopt(long, default_value = "128")] + cache_rpc_concurrency: usize, + + /// Overall timeout in seconds for each request from time of submission. + #[structopt(long, default_value = "600")] + overall_deadline_secs: u64, + + /// Extra header to pass on remote execution request. + #[structopt(long)] + header: Vec, } /// A binary which takes args of format: @@ -228,24 +228,24 @@ struct Opt { /// It does not perform $PATH lookup or shell expansion. #[tokio::main] async fn main() { - env_logger::init(); - let workunit_store = WorkunitStore::new(false, log::Level::Debug); - workunit_store.init_thread_state(None); + env_logger::init(); + let workunit_store = WorkunitStore::new(false, log::Level::Debug); + workunit_store.init_thread_state(None); - let args = Opt::from_args(); + let args = Opt::from_args(); - let mut headers: BTreeMap = collection_from_keyvalues(args.header.iter()); + let mut headers: BTreeMap = collection_from_keyvalues(args.header.iter()); - let executor = task_executor::Executor::new(); + let executor = task_executor::Executor::new(); - let local_store_path = args - .local_store_path - .clone() - .unwrap_or_else(Store::default_path); + let local_store_path = args + .local_store_path + .clone() + .unwrap_or_else(Store::default_path); - let local_only_store = - Store::local_only(executor.clone(), local_store_path).expect("Error making local store"); - let store = match (&args.server, &args.cas_server) { + let local_only_store = + Store::local_only(executor.clone(), local_store_path).expect("Error making local store"); + let store = match (&args.server, &args.cas_server) { (_, Some(cas_server)) => { let root_ca_certs = args .cas_root_ca_cert_file @@ -304,180 +304,179 @@ async fn main() { } .expect("Error making remote store"); - let (mut request, process_metadata) = make_request(&store, &args) - .await - .expect("Failed to construct request"); - - if let Some(run_under) = args.run_under { - let run_under = shlex::split(&run_under).expect("Could not shlex --run-under arg"); - request.argv = run_under - .into_iter() - .chain(request.argv.into_iter()) - .collect(); - } - let workdir = args.work_dir.unwrap_or_else(std::env::temp_dir); - - let runner: Box = match args.server { - Some(address) => { - let root_ca_certs = args - .execution_root_ca_cert_file - .map(|path| std::fs::read(path).expect("Error reading root CA certs file")); - - let client_certs = args - .cas_client_certs_file - .as_ref() - .map(|path| std::fs::read(path).expect("Error reading root client certs file")); - - let client_key = args - .cas_client_key_file - .as_ref() - .map(|path| std::fs::read(path).expect("Error reading client authentication key file")); - - let mtls_data = match (client_certs, client_key) { - (Some(certs), Some(key)) => Some((certs, key)), - (None, None) => None, - _ => { - panic!("Must specify both --cas-client-certs-file and --cas-client-key-file or neither") + let (mut request, process_metadata) = make_request(&store, &args) + .await + .expect("Failed to construct request"); + + if let Some(run_under) = args.run_under { + let run_under = shlex::split(&run_under).expect("Could not shlex --run-under arg"); + request.argv = run_under + .into_iter() + .chain(request.argv.into_iter()) + .collect(); + } + let workdir = args.work_dir.unwrap_or_else(std::env::temp_dir); + + let runner: Box = match args.server { + Some(address) => { + let root_ca_certs = args + .execution_root_ca_cert_file + .map(|path| std::fs::read(path).expect("Error reading root CA certs file")); + + let client_certs = args + .cas_client_certs_file + .as_ref() + .map(|path| std::fs::read(path).expect("Error reading root client certs file")); + + let client_key = args.cas_client_key_file.as_ref().map(|path| { + std::fs::read(path).expect("Error reading client authentication key file") + }); + + let mtls_data = match (client_certs, client_key) { + (Some(certs), Some(key)) => Some((certs, key)), + (None, None) => None, + _ => { + panic!("Must specify both --cas-client-certs-file and --cas-client-key-file or neither") + } + }; + + let tls_config = grpc_util::tls::Config::new(root_ca_certs, mtls_data) + .expect("failed parsing root CA certs"); + + if let Some(oauth_path) = args.execution_oauth_bearer_token_path { + let token = std::fs::read_to_string(oauth_path) + .expect("Error reading oauth bearer token file"); + headers.insert( + "authorization".to_owned(), + format!("Bearer {}", token.trim()), + ); + } + + let remote_runner = remote::remote::CommandRunner::new( + &address, + process_metadata.instance_name.clone(), + process_metadata.cache_key_gen_version.clone(), + None, + tls_config.clone(), + headers.clone(), + store.clone(), + executor.clone(), + Duration::from_secs(args.overall_deadline_secs), + Duration::from_millis(100), + args.execution_rpc_concurrency, + None, + ) + .await + .expect("Failed to make remote command runner"); + + let command_runner_box: Box = { + Box::new( + remote::remote_cache::CommandRunner::from_provider_options( + RemoteCacheRunnerOptions { + inner: Arc::new(remote_runner), + instance_name: process_metadata.instance_name.clone(), + process_cache_namespace: process_metadata.cache_key_gen_version.clone(), + executor, + store: store.clone(), + cache_read: true, + cache_write: true, + warnings_behavior: + remote::remote_cache::RemoteCacheWarningsBehavior::Backoff, + cache_content_behavior: CacheContentBehavior::Defer, + append_only_caches_base_path: args + .named_cache_path + .map(|p| p.to_string_lossy().to_string()), + }, + RemoteCacheProviderOptions { + instance_name: process_metadata.instance_name.clone(), + action_cache_address: address, + tls_config, + headers, + concurrency_limit: args.cache_rpc_concurrency, + rpc_timeout: Duration::from_secs(2), + }, + ) + .await + .expect("Failed to make remote cache command runner"), + ) + }; + + command_runner_box } - }; - - let tls_config = grpc_util::tls::Config::new(root_ca_certs, mtls_data) - .expect("failed parsing root CA certs"); - - if let Some(oauth_path) = args.execution_oauth_bearer_token_path { - let token = - std::fs::read_to_string(oauth_path).expect("Error reading oauth bearer token file"); - headers.insert( - "authorization".to_owned(), - format!("Bearer {}", token.trim()), - ); - } - - let remote_runner = remote::remote::CommandRunner::new( - &address, - process_metadata.instance_name.clone(), - process_metadata.cache_key_gen_version.clone(), - None, - tls_config.clone(), - headers.clone(), - store.clone(), - executor.clone(), - Duration::from_secs(args.overall_deadline_secs), - Duration::from_millis(100), - args.execution_rpc_concurrency, - None, - ) - .await - .expect("Failed to make remote command runner"); - - let command_runner_box: Box = { - Box::new( - remote::remote_cache::CommandRunner::from_provider_options( - RemoteCacheRunnerOptions { - inner: Arc::new(remote_runner), - instance_name: process_metadata.instance_name.clone(), - process_cache_namespace: process_metadata.cache_key_gen_version.clone(), - executor, - store: store.clone(), - cache_read: true, - cache_write: true, - warnings_behavior: remote::remote_cache::RemoteCacheWarningsBehavior::Backoff, - cache_content_behavior: CacheContentBehavior::Defer, - append_only_caches_base_path: args - .named_cache_path - .map(|p| p.to_string_lossy().to_string()), - }, - RemoteCacheProviderOptions { - instance_name: process_metadata.instance_name.clone(), - action_cache_address: address, - tls_config, - headers, - concurrency_limit: args.cache_rpc_concurrency, - rpc_timeout: Duration::from_secs(2), - }, - ) - .await - .expect("Failed to make remote cache command runner"), - ) - }; - - command_runner_box + None => Box::new(process_execution::local::CommandRunner::new( + store.clone(), + executor, + workdir.clone(), + NamedCaches::new_local( + args.named_cache_path + .unwrap_or_else(NamedCaches::default_local_path), + ), + ImmutableInputs::new(store.clone(), &workdir).unwrap(), + KeepSandboxes::Never, + )) as Box, + }; + + let result = in_workunit!("process_executor", Level::Info, |workunit| async move { + runner.run(Context::default(), workunit, request).await + }) + .await + .expect("Error executing"); + + if let Some(output) = args.materialize_output_to { + // NB: We use `output` as the root directory, because there is no need to + // memoize a check for whether some other parent directory is hardlinkable. + let output_root = output.clone(); + store + .materialize_directory( + output, + &output_root, + result.output_directory, + false, + &BTreeSet::new(), + Permissions::Writable, + ) + .await + .unwrap(); } - None => Box::new(process_execution::local::CommandRunner::new( - store.clone(), - executor, - workdir.clone(), - NamedCaches::new_local( - args - .named_cache_path - .unwrap_or_else(NamedCaches::default_local_path), - ), - ImmutableInputs::new(store.clone(), &workdir).unwrap(), - KeepSandboxes::Never, - )) as Box, - }; - - let result = in_workunit!("process_executor", Level::Info, |workunit| async move { - runner.run(Context::default(), workunit, request).await - }) - .await - .expect("Error executing"); - - if let Some(output) = args.materialize_output_to { - // NB: We use `output` as the root directory, because there is no need to - // memoize a check for whether some other parent directory is hardlinkable. - let output_root = output.clone(); - store - .materialize_directory( - output, - &output_root, - result.output_directory, - false, - &BTreeSet::new(), - Permissions::Writable, - ) - .await - .unwrap(); - } - let stdout: Vec = store - .load_file_bytes_with(result.stdout_digest, |bytes| bytes.to_vec()) - .await - .unwrap(); + let stdout: Vec = store + .load_file_bytes_with(result.stdout_digest, |bytes| bytes.to_vec()) + .await + .unwrap(); - let stderr: Vec = store - .load_file_bytes_with(result.stderr_digest, |bytes| bytes.to_vec()) - .await - .unwrap(); + let stderr: Vec = store + .load_file_bytes_with(result.stderr_digest, |bytes| bytes.to_vec()) + .await + .unwrap(); - print!("{}", String::from_utf8(stdout).unwrap()); - eprint!("{}", String::from_utf8(stderr).unwrap()); - exit(result.exit_code); + print!("{}", String::from_utf8(stdout).unwrap()); + eprint!("{}", String::from_utf8(stderr).unwrap()); + exit(result.exit_code); } async fn make_request( - store: &Store, - args: &Opt, + store: &Store, + args: &Opt, ) -> Result<(process_execution::Process, ProcessMetadata), String> { - let execution_environment = if args.server.is_some() { - let strategy = ProcessExecutionStrategy::RemoteExecution(collection_from_keyvalues( - args.command.extra_platform_property.iter(), - )); - ProcessExecutionEnvironment { - name: None, - // TODO: Make configurable. - platform: Platform::Linux_x86_64, - strategy, - } - } else { - ProcessExecutionEnvironment { - name: None, - platform: Platform::current().unwrap(), - strategy: ProcessExecutionStrategy::Local, - } - }; + let execution_environment = if args.server.is_some() { + let strategy = ProcessExecutionStrategy::RemoteExecution(collection_from_keyvalues( + args.command.extra_platform_property.iter(), + )); + ProcessExecutionEnvironment { + name: None, + // TODO: Make configurable. + platform: Platform::Linux_x86_64, + strategy, + } + } else { + ProcessExecutionEnvironment { + name: None, + platform: Platform::current().unwrap(), + strategy: ProcessExecutionStrategy::Local, + } + }; - match ( + match ( args.command.input_digest, args.command.input_digest_length, args.action_digest.action_digest, @@ -514,232 +513,235 @@ async fn make_request( } async fn make_request_from_flat_args( - store: &Store, - args: &Opt, - input_files: Digest, - execution_environment: ProcessExecutionEnvironment, + store: &Store, + args: &Opt, + input_files: Digest, + execution_environment: ProcessExecutionEnvironment, ) -> Result<(process_execution::Process, ProcessMetadata), String> { - let output_files = args - .command - .output_file_path - .iter() - .map(RelativePath::new) - .collect::, _>>()?; - let output_directories = args - .command - .output_directory_path - .iter() - .map(RelativePath::new) - .collect::, _>>()?; - - let working_directory = args - .command - .working_directory - .clone() - .map(|path| { - RelativePath::new(path) - .map_err(|err| format!("working-directory must be a relative path: {err:?}")) - }) - .transpose()?; - - // TODO: Add support for immutable inputs. - let input_digests = InputDigests::new( - store, - DirectoryDigest::from_persisted_digest(input_files), - BTreeMap::default(), - BTreeSet::default(), - ) - .await - .map_err(|e| format!("Could not create input digest for process: {e:?}"))?; - - let process = process_execution::Process { - argv: args.command.argv.clone(), - env: collection_from_keyvalues(args.command.env.iter()), - working_directory, - input_digests, - output_files, - output_directories, - timeout: Some(Duration::new(15 * 60, 0)), - description: "process_executor".to_string(), - level: Level::Info, - append_only_caches: BTreeMap::new(), - jdk_home: args.command.jdk.clone(), - execution_slot_variable: None, - concurrency_available: args.command.concurrency_available.unwrap_or(0), - cache_scope: ProcessCacheScope::Always, - execution_environment, - remote_cache_speculation_delay: Duration::from_millis(0), - attempt: 0, - }; - let metadata = ProcessMetadata { - instance_name: args.remote_instance_name.clone(), - cache_key_gen_version: args.command.cache_key_gen_version.clone(), - }; - Ok((process, metadata)) + let output_files = args + .command + .output_file_path + .iter() + .map(RelativePath::new) + .collect::, _>>()?; + let output_directories = args + .command + .output_directory_path + .iter() + .map(RelativePath::new) + .collect::, _>>()?; + + let working_directory = args + .command + .working_directory + .clone() + .map(|path| { + RelativePath::new(path) + .map_err(|err| format!("working-directory must be a relative path: {err:?}")) + }) + .transpose()?; + + // TODO: Add support for immutable inputs. + let input_digests = InputDigests::new( + store, + DirectoryDigest::from_persisted_digest(input_files), + BTreeMap::default(), + BTreeSet::default(), + ) + .await + .map_err(|e| format!("Could not create input digest for process: {e:?}"))?; + + let process = process_execution::Process { + argv: args.command.argv.clone(), + env: collection_from_keyvalues(args.command.env.iter()), + working_directory, + input_digests, + output_files, + output_directories, + timeout: Some(Duration::new(15 * 60, 0)), + description: "process_executor".to_string(), + level: Level::Info, + append_only_caches: BTreeMap::new(), + jdk_home: args.command.jdk.clone(), + execution_slot_variable: None, + concurrency_available: args.command.concurrency_available.unwrap_or(0), + cache_scope: ProcessCacheScope::Always, + execution_environment, + remote_cache_speculation_delay: Duration::from_millis(0), + attempt: 0, + }; + let metadata = ProcessMetadata { + instance_name: args.remote_instance_name.clone(), + cache_key_gen_version: args.command.cache_key_gen_version.clone(), + }; + Ok((process, metadata)) } #[allow(clippy::redundant_closure)] // False positives for prost::Message::decode: https://github.com/rust-lang/rust-clippy/issues/5939 async fn extract_request_from_action_digest( - store: &Store, - action_digest: Digest, - execution_environment: ProcessExecutionEnvironment, - instance_name: Option, - cache_key_gen_version: Option, + store: &Store, + action_digest: Digest, + execution_environment: ProcessExecutionEnvironment, + instance_name: Option, + cache_key_gen_version: Option, ) -> Result<(process_execution::Process, ProcessMetadata), String> { - let action = store - .load_file_bytes_with(action_digest, |bytes| Action::decode(bytes)) - .await - .map_err(|e| e.enrich("Could not load action proto from CAS").to_string())? - .map_err(|err| format!("Error deserializing action proto {action_digest:?}: {err:?}"))?; + let action = store + .load_file_bytes_with(action_digest, |bytes| Action::decode(bytes)) + .await + .map_err(|e| e.enrich("Could not load action proto from CAS").to_string())? + .map_err(|err| format!("Error deserializing action proto {action_digest:?}: {err:?}"))?; - let command_digest = - require_digest(&action.command_digest).map_err(|err| format!("Bad Command digest: {err:?}"))?; - let command = store - .load_file_bytes_with(command_digest, |bytes| Command::decode(bytes)) - .await - .map_err(|e| { - e.enrich("Could not load command proto from CAS") - .to_string() - })? - .map_err(|err| format!("Error deserializing command proto {command_digest:?}: {err:?}"))?; - let working_directory = if command.working_directory.is_empty() { - None - } else { - Some( - RelativePath::new(command.working_directory) - .map_err(|err| format!("working-directory must be a relative path: {err:?}"))?, - ) - }; + let command_digest = require_digest(&action.command_digest) + .map_err(|err| format!("Bad Command digest: {err:?}"))?; + let command = store + .load_file_bytes_with(command_digest, |bytes| Command::decode(bytes)) + .await + .map_err(|e| { + e.enrich("Could not load command proto from CAS") + .to_string() + })? + .map_err(|err| format!("Error deserializing command proto {command_digest:?}: {err:?}"))?; + let working_directory = if command.working_directory.is_empty() { + None + } else { + Some( + RelativePath::new(command.working_directory) + .map_err(|err| format!("working-directory must be a relative path: {err:?}"))?, + ) + }; - let input_digests = InputDigests::with_input_files(DirectoryDigest::from_persisted_digest( - require_digest(&action.input_root_digest) - .map_err(|err| format!("Bad input root digest: {err:?}"))?, - )); + let input_digests = InputDigests::with_input_files(DirectoryDigest::from_persisted_digest( + require_digest(&action.input_root_digest) + .map_err(|err| format!("Bad input root digest: {err:?}"))?, + )); - // In case the local Store doesn't have the input root Directory, - // have it fetch it and identify it as a Directory, so that it doesn't get confused about the unknown metadata. - store - .load_directory(input_digests.complete.as_digest()) - .await - .map_err(|e| e.to_string())?; - - let process = process_execution::Process { - argv: command.arguments, - env: command - .environment_variables - .iter() - .filter(|env| { - // Filter out environment variables which will be (re-)set by ExecutionRequest - // construction. - env.name != process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME - }) - .map(|env| (env.name.clone(), env.value.clone())) - .collect(), - working_directory, - input_digests, - output_files: command - .output_files - .iter() - .map(RelativePath::new) - .collect::>()?, - output_directories: command - .output_directories - .iter() - .map(RelativePath::new) - .collect::>()?, - timeout: action.timeout.map(|timeout| { - Duration::from_nanos(timeout.nanos as u64 + timeout.seconds as u64 * 1000000000) - }), - execution_slot_variable: None, - concurrency_available: 0, - description: "".to_string(), - level: Level::Error, - append_only_caches: BTreeMap::new(), - jdk_home: None, - cache_scope: ProcessCacheScope::Always, - execution_environment, - remote_cache_speculation_delay: Duration::from_millis(0), - attempt: 0, - }; - - let metadata = ProcessMetadata { - instance_name, - cache_key_gen_version, - }; - - Ok((process, metadata)) + // In case the local Store doesn't have the input root Directory, + // have it fetch it and identify it as a Directory, so that it doesn't get confused about the unknown metadata. + store + .load_directory(input_digests.complete.as_digest()) + .await + .map_err(|e| e.to_string())?; + + let process = process_execution::Process { + argv: command.arguments, + env: command + .environment_variables + .iter() + .filter(|env| { + // Filter out environment variables which will be (re-)set by ExecutionRequest + // construction. + env.name != process_execution::CACHE_KEY_TARGET_PLATFORM_ENV_VAR_NAME + }) + .map(|env| (env.name.clone(), env.value.clone())) + .collect(), + working_directory, + input_digests, + output_files: command + .output_files + .iter() + .map(RelativePath::new) + .collect::>()?, + output_directories: command + .output_directories + .iter() + .map(RelativePath::new) + .collect::>()?, + timeout: action.timeout.map(|timeout| { + Duration::from_nanos(timeout.nanos as u64 + timeout.seconds as u64 * 1000000000) + }), + execution_slot_variable: None, + concurrency_available: 0, + description: "".to_string(), + level: Level::Error, + append_only_caches: BTreeMap::new(), + jdk_home: None, + cache_scope: ProcessCacheScope::Always, + execution_environment, + remote_cache_speculation_delay: Duration::from_millis(0), + attempt: 0, + }; + + let metadata = ProcessMetadata { + instance_name, + cache_key_gen_version, + }; + + Ok((process, metadata)) } async fn extract_request_from_buildbarn_url( - store: &Store, - buildbarn_url: &str, - execution_environment: ProcessExecutionEnvironment, - cache_key_gen_version: Option, + store: &Store, + buildbarn_url: &str, + execution_environment: ProcessExecutionEnvironment, + cache_key_gen_version: Option, ) -> Result<(process_execution::Process, ProcessMetadata), String> { - let url_parts: Vec<&str> = buildbarn_url.trim_end_matches('/').split('/').collect(); - if url_parts.len() < 4 { - return Err("Buildbarn URL didn't have enough parts".to_owned()); - } - let interesting_parts = &url_parts[url_parts.len() - 4..url_parts.len()]; - let kind = interesting_parts[0]; - let instance = interesting_parts[1]; - - let action_digest = match kind { - "action" => { - let action_fingerprint = Fingerprint::from_hex_string(interesting_parts[2])?; - let action_digest_length: usize = interesting_parts[3] - .parse() - .map_err(|err| format!("Couldn't parse action digest length as a number: {err:?}"))?; - Digest::new(action_fingerprint, action_digest_length) + let url_parts: Vec<&str> = buildbarn_url.trim_end_matches('/').split('/').collect(); + if url_parts.len() < 4 { + return Err("Buildbarn URL didn't have enough parts".to_owned()); } - "uncached_action_result" => { - let action_result_fingerprint = Fingerprint::from_hex_string(interesting_parts[2])?; - let action_result_digest_length: usize = interesting_parts[3].parse().map_err(|err| { - format!("Couldn't parse uncached action digest result length as a number: {err:?}") - })?; - let action_result_digest = - Digest::new(action_result_fingerprint, action_result_digest_length); - - let action_result = store - .load_file_bytes_with(action_result_digest, |bytes| { - UncachedActionResult::decode(bytes) - }) - .await - .map_err(|e| e.enrich("Could not load action result proto").to_string())? - .map_err(|err| format!("Error deserializing action result proto: {err:?}"))?; + let interesting_parts = &url_parts[url_parts.len() - 4..url_parts.len()]; + let kind = interesting_parts[0]; + let instance = interesting_parts[1]; + + let action_digest = match kind { + "action" => { + let action_fingerprint = Fingerprint::from_hex_string(interesting_parts[2])?; + let action_digest_length: usize = interesting_parts[3].parse().map_err(|err| { + format!("Couldn't parse action digest length as a number: {err:?}") + })?; + Digest::new(action_fingerprint, action_digest_length) + } + "uncached_action_result" => { + let action_result_fingerprint = Fingerprint::from_hex_string(interesting_parts[2])?; + let action_result_digest_length: usize = + interesting_parts[3].parse().map_err(|err| { + format!( + "Couldn't parse uncached action digest result length as a number: {err:?}" + ) + })?; + let action_result_digest = + Digest::new(action_result_fingerprint, action_result_digest_length); + + let action_result = store + .load_file_bytes_with(action_result_digest, |bytes| { + UncachedActionResult::decode(bytes) + }) + .await + .map_err(|e| e.enrich("Could not load action result proto").to_string())? + .map_err(|err| format!("Error deserializing action result proto: {err:?}"))?; + + require_digest(&action_result.action_digest)? + } + _ => { + return Err(format!( + "Wrong kind in buildbarn URL; wanted action or uncached_action_result, got {kind}" + )); + } + }; - require_digest(&action_result.action_digest)? - } - _ => { - return Err(format!( - "Wrong kind in buildbarn URL; wanted action or uncached_action_result, got {kind}" - )); - } - }; - - extract_request_from_action_digest( - store, - action_digest, - execution_environment, - Some(instance.to_owned()), - cache_key_gen_version, - ) - .await + extract_request_from_action_digest( + store, + action_digest, + execution_environment, + Some(instance.to_owned()), + cache_key_gen_version, + ) + .await } fn collection_from_keyvalues(keyvalues: It) -> Col where - Str: AsRef, - It: Iterator, - Col: FromIterator<(String, String)>, + Str: AsRef, + It: Iterator, + Col: FromIterator<(String, String)>, { - keyvalues - .map(|kv| { - let mut parts = kv.as_ref().splitn(2, '='); - ( - parts.next().unwrap().to_string(), - parts.next().unwrap_or_default().to_string(), - ) - }) - .collect() + keyvalues + .map(|kv| { + let mut parts = kv.as_ref().splitn(2, '='); + ( + parts.next().unwrap().to_string(), + parts.next().unwrap_or_default().to_string(), + ) + }) + .collect() } diff --git a/src/rust/engine/protos/build.rs b/src/rust/engine/protos/build.rs index 58f1ab366bc..682813604ce 100644 --- a/src/rust/engine/protos/build.rs +++ b/src/rust/engine/protos/build.rs @@ -4,16 +4,16 @@ use prost_build::Config; fn main() -> Result<(), Box> { - let mut config = Config::new(); - config.bytes(["."]); - config.disable_comments([ - // the comments on these fields contain invalid HTML/Markdown (e.g. "clientip:" outside of a code segment) - "google.rpc.ResourceInfo.owner", - "google.rpc.QuotaFailure.Violation.subject", - ]); + let mut config = Config::new(); + config.bytes(["."]); + config.disable_comments([ + // the comments on these fields contain invalid HTML/Markdown (e.g. "clientip:" outside of a code segment) + "google.rpc.ResourceInfo.owner", + "google.rpc.QuotaFailure.Violation.subject", + ]); - tonic_build::configure() + tonic_build::configure() .build_client(true) .build_server(true) .compile_with_config( @@ -39,5 +39,5 @@ fn main() -> Result<(), Box> { ], )?; - Ok(()) + Ok(()) } diff --git a/src/rust/engine/protos/src/conversions.rs b/src/rust/engine/protos/src/conversions.rs index e0929c3ef62..4b01599e0a4 100644 --- a/src/rust/engine/protos/src/conversions.rs +++ b/src/rust/engine/protos/src/conversions.rs @@ -1,55 +1,57 @@ // Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). impl<'a> From<&'a hashing::Digest> for crate::gen::build::bazel::remote::execution::v2::Digest { - fn from(d: &'a hashing::Digest) -> Self { - Self { - hash: d.hash.to_hex(), - size_bytes: d.size_bytes as i64, + fn from(d: &'a hashing::Digest) -> Self { + Self { + hash: d.hash.to_hex(), + size_bytes: d.size_bytes as i64, + } } - } } impl From for crate::gen::build::bazel::remote::execution::v2::Digest { - fn from(d: hashing::Digest) -> Self { - Self { - hash: d.hash.to_hex(), - size_bytes: d.size_bytes as i64, + fn from(d: hashing::Digest) -> Self { + Self { + hash: d.hash.to_hex(), + size_bytes: d.size_bytes as i64, + } } - } } impl<'a> TryFrom<&'a crate::gen::build::bazel::remote::execution::v2::Digest> for hashing::Digest { - type Error = String; + type Error = String; - fn try_from( - d: &crate::gen::build::bazel::remote::execution::v2::Digest, - ) -> Result { - hashing::Fingerprint::from_hex_string(&d.hash) - .map_err(|err| format!("Bad fingerprint in Digest {:?}: {:?}", &d.hash, err)) - .map(|fingerprint| hashing::Digest::new(fingerprint, d.size_bytes as usize)) - } + fn try_from( + d: &crate::gen::build::bazel::remote::execution::v2::Digest, + ) -> Result { + hashing::Fingerprint::from_hex_string(&d.hash) + .map_err(|err| format!("Bad fingerprint in Digest {:?}: {:?}", &d.hash, err)) + .map(|fingerprint| hashing::Digest::new(fingerprint, d.size_bytes as usize)) + } } impl TryFrom for hashing::Digest { - type Error = String; + type Error = String; - fn try_from( - d: crate::gen::build::bazel::remote::execution::v2::Digest, - ) -> Result { - hashing::Fingerprint::from_hex_string(&d.hash) - .map_err(|err| format!("Bad fingerprint in Digest {:?}: {:?}", &d.hash, err)) - .map(|fingerprint| hashing::Digest::new(fingerprint, d.size_bytes as usize)) - } + fn try_from( + d: crate::gen::build::bazel::remote::execution::v2::Digest, + ) -> Result { + hashing::Fingerprint::from_hex_string(&d.hash) + .map_err(|err| format!("Bad fingerprint in Digest {:?}: {:?}", &d.hash, err)) + .map(|fingerprint| hashing::Digest::new(fingerprint, d.size_bytes as usize)) + } } pub fn require_digest< - 'a, - D: Into>, + 'a, + D: Into>, >( - digest_opt: D, + digest_opt: D, ) -> Result { - match digest_opt.into() { - Some(digest) => hashing::Digest::try_from(digest), - None => Err("Protocol violation: Digest missing from a Remote Execution API protobuf.".into()), - } + match digest_opt.into() { + Some(digest) => hashing::Digest::try_from(digest), + None => { + Err("Protocol violation: Digest missing from a Remote Execution API protobuf.".into()) + } + } } diff --git a/src/rust/engine/protos/src/conversions_tests.rs b/src/rust/engine/protos/src/conversions_tests.rs index 7282cd5ed82..871dbf5c92f 100644 --- a/src/rust/engine/protos/src/conversions_tests.rs +++ b/src/rust/engine/protos/src/conversions_tests.rs @@ -6,48 +6,48 @@ use crate::gen::build::bazel::remote::execution::v2 as remexec; #[test] fn from_our_digest() { - let our_digest = &hashing::Digest::new( - hashing::Fingerprint::from_hex_string( - "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", - ) - .unwrap(), - 10, - ); - let converted: remexec::Digest = our_digest.into(); - let want = remexec::Digest { - hash: "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff".to_owned(), - size_bytes: 10, - }; - assert_eq!(converted, want); + let our_digest = &hashing::Digest::new( + hashing::Fingerprint::from_hex_string( + "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", + ) + .unwrap(), + 10, + ); + let converted: remexec::Digest = our_digest.into(); + let want = remexec::Digest { + hash: "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff".to_owned(), + size_bytes: 10, + }; + assert_eq!(converted, want); } #[test] fn from_bazel_digest() { - let bazel_digest = remexec::Digest { - hash: "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff".to_owned(), - size_bytes: 10, - }; - let converted: Result = (&bazel_digest).try_into(); - let want = hashing::Digest::new( - hashing::Fingerprint::from_hex_string( - "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", - ) - .unwrap(), - 10, - ); - assert_eq!(converted, Ok(want)); + let bazel_digest = remexec::Digest { + hash: "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff".to_owned(), + size_bytes: 10, + }; + let converted: Result = (&bazel_digest).try_into(); + let want = hashing::Digest::new( + hashing::Fingerprint::from_hex_string( + "0123456789abcdeffedcba98765432100000000000000000ffffffffffffffff", + ) + .unwrap(), + 10, + ); + assert_eq!(converted, Ok(want)); } #[test] fn from_bad_bazel_digest() { - let bazel_digest = remexec::Digest { - hash: "0".to_owned(), - size_bytes: 10, - }; - let converted: Result = (&bazel_digest).try_into(); - let err = converted.expect_err("Want Err converting bad digest"); - assert!( - err.starts_with("Bad fingerprint in Digest \"0\":"), - "Bad error message: {err}" - ); + let bazel_digest = remexec::Digest { + hash: "0".to_owned(), + size_bytes: 10, + }; + let converted: Result = (&bazel_digest).try_into(); + let err = converted.expect_err("Want Err converting bad digest"); + assert!( + err.starts_with("Bad fingerprint in Digest \"0\":"), + "Bad error message: {err}" + ); } diff --git a/src/rust/engine/protos/src/hashing.rs b/src/rust/engine/protos/src/hashing.rs index ccd4768c4f8..82eb68775d7 100644 --- a/src/rust/engine/protos/src/hashing.rs +++ b/src/rust/engine/protos/src/hashing.rs @@ -6,19 +6,19 @@ use crate::gen::pants::cache::dependency_inference_request::Metadata; use crate::gen::pants::cache::JavascriptInferenceMetadata; impl Hash for JavascriptInferenceMetadata { - fn hash(&self, state: &mut H) { - self.package_root.hash(state); - for pattern in &self.import_patterns { - pattern.pattern.hash(state); - pattern.replacements.hash(state); + fn hash(&self, state: &mut H) { + self.package_root.hash(state); + for pattern in &self.import_patterns { + pattern.pattern.hash(state); + pattern.replacements.hash(state); + } } - } } impl Hash for Metadata { - fn hash(&self, state: &mut H) { - match self { - Metadata::Js(m) => m.hash(state), + fn hash(&self, state: &mut H) { + match self { + Metadata::Js(m) => m.hash(state), + } } - } } diff --git a/src/rust/engine/protos/src/lib.rs b/src/rust/engine/protos/src/lib.rs index 35f21e006cd..5842b386be2 100644 --- a/src/rust/engine/protos/src/lib.rs +++ b/src/rust/engine/protos/src/lib.rs @@ -12,53 +12,53 @@ pub use conversions::require_digest; mod conversions_tests; pub mod gen { - // NOTE: Prost automatically relies on the existence of this nested module structure because - // it uses multiple `super` references (e.g., `super::super::super::Foo`) to traverse out of - // a module to refer to protos in other modules. - pub mod google { - pub mod bytestream { - tonic::include_proto!("google.bytestream"); - } - pub mod longrunning { - tonic::include_proto!("google.longrunning"); - } - pub mod rpc { - tonic::include_proto!("google.rpc"); + // NOTE: Prost automatically relies on the existence of this nested module structure because + // it uses multiple `super` references (e.g., `super::super::super::Foo`) to traverse out of + // a module to refer to protos in other modules. + pub mod google { + pub mod bytestream { + tonic::include_proto!("google.bytestream"); + } + pub mod longrunning { + tonic::include_proto!("google.longrunning"); + } + pub mod rpc { + tonic::include_proto!("google.rpc"); + } } - } - pub mod build { - pub mod bazel { - pub mod remote { - pub mod execution { - pub mod v2 { - tonic::include_proto!("build.bazel.remote.execution.v2"); + pub mod build { + pub mod bazel { + pub mod remote { + pub mod execution { + pub mod v2 { + tonic::include_proto!("build.bazel.remote.execution.v2"); - pub fn empty_digest() -> Digest { - Digest { + pub fn empty_digest() -> Digest { + Digest { hash: String::from( "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", ), size_bytes: 0, } + } + } + } + } + pub mod semver { + tonic::include_proto!("build.bazel.semver"); } - } } - } - pub mod semver { - tonic::include_proto!("build.bazel.semver"); - } } - } - pub mod buildbarn { - pub mod cas { - tonic::include_proto!("buildbarn.cas"); + pub mod buildbarn { + pub mod cas { + tonic::include_proto!("buildbarn.cas"); + } } - } - pub mod pants { - pub mod cache { - tonic::include_proto!("pants.cache"); + pub mod pants { + pub mod cache { + tonic::include_proto!("pants.cache"); + } } - } } mod verification; diff --git a/src/rust/engine/protos/src/verification.rs b/src/rust/engine/protos/src/verification.rs index 0801e006696..007a7b840bd 100644 --- a/src/rust/engine/protos/src/verification.rs +++ b/src/rust/engine/protos/src/verification.rs @@ -7,65 +7,65 @@ use hashing::Digest; use crate::gen::build::bazel::remote::execution::v2 as remote_execution; pub fn verify_directory_canonical( - digest: Digest, - directory: &remote_execution::Directory, + digest: Digest, + directory: &remote_execution::Directory, ) -> Result<(), String> { - verify_nodes(&directory.files, |n| &n.name, |n| n.digest.as_ref()) - .map_err(|e| format!("Invalid file in {digest:?}: {e}"))?; - verify_nodes(&directory.directories, |n| &n.name, |n| n.digest.as_ref()) - .map_err(|e| format!("Invalid directory in {digest:?}: {e}"))?; - let child_names: HashSet<&str> = directory - .files - .iter() - .map(|file_node| file_node.name.as_str()) - .chain( - directory - .directories + verify_nodes(&directory.files, |n| &n.name, |n| n.digest.as_ref()) + .map_err(|e| format!("Invalid file in {digest:?}: {e}"))?; + verify_nodes(&directory.directories, |n| &n.name, |n| n.digest.as_ref()) + .map_err(|e| format!("Invalid directory in {digest:?}: {e}"))?; + let child_names: HashSet<&str> = directory + .files .iter() - .map(|dir_node| dir_node.name.as_str()), - ) - .collect(); - if child_names.len() != directory.files.len() + directory.directories.len() { - return Err(format!( + .map(|file_node| file_node.name.as_str()) + .chain( + directory + .directories + .iter() + .map(|dir_node| dir_node.name.as_str()), + ) + .collect(); + if child_names.len() != directory.files.len() + directory.directories.len() { + return Err(format!( "Child paths must be unique, but a child path of {digest:?} was both a file and a directory: {directory:?}" )); - } - Ok(()) + } + Ok(()) } fn verify_nodes( - nodes: &[Node], - get_name: GetName, - get_digest: GetDigest, + nodes: &[Node], + get_name: GetName, + get_digest: GetDigest, ) -> Result<(), String> where - Node: prost::Message, - GetName: Fn(&Node) -> &str, - GetDigest: Fn(&Node) -> Option<&remote_execution::Digest>, + Node: prost::Message, + GetName: Fn(&Node) -> &str, + GetDigest: Fn(&Node) -> Option<&remote_execution::Digest>, { - let mut prev: Option<&Node> = None; - for node in nodes { - let name = get_name(node); - if name.is_empty() { - return Err(format!( - "A child name must not be empty, but {:?} had an empty name.", - get_digest(node), - )); - } else if name.contains('/') { - return Err(format!( - "All children must have one path segment, but found {name}" - )); - } - if let Some(p) = prev { - if name <= get_name(p) { - return Err(format!( - "Children must be sorted and unique, but {} was before {}", - get_name(p), - name, - )); - } + let mut prev: Option<&Node> = None; + for node in nodes { + let name = get_name(node); + if name.is_empty() { + return Err(format!( + "A child name must not be empty, but {:?} had an empty name.", + get_digest(node), + )); + } else if name.contains('/') { + return Err(format!( + "All children must have one path segment, but found {name}" + )); + } + if let Some(p) = prev { + if name <= get_name(p) { + return Err(format!( + "Children must be sorted and unique, but {} was before {}", + get_name(p), + name, + )); + } + } + prev = Some(node); } - prev = Some(node); - } - Ok(()) + Ok(()) } diff --git a/src/rust/engine/protos/src/verification_tests.rs b/src/rust/engine/protos/src/verification_tests.rs index d118218a91a..5c67eb649c6 100644 --- a/src/rust/engine/protos/src/verification_tests.rs +++ b/src/rust/engine/protos/src/verification_tests.rs @@ -15,184 +15,184 @@ const OTHER_DIRECTORY_SIZE: i64 = 0; #[test] fn empty_directory() { - assert_eq!( - Ok(()), - verify_directory_canonical(EMPTY_DIGEST, &Directory::default()) - ); + assert_eq!( + Ok(()), + verify_directory_canonical(EMPTY_DIGEST, &Directory::default()) + ); } #[test] fn canonical_directory() { - let directory = Directory { - files: vec![ - FileNode { - name: "roland".to_owned(), - digest: Some(Digest { - hash: HASH.to_owned(), - size_bytes: FILE_SIZE, - }), - ..FileNode::default() - }, - FileNode { - name: "simba".to_owned(), - digest: Some(Digest { - hash: HASH.to_owned(), - size_bytes: FILE_SIZE, - }), - ..FileNode::default() - }, - ], - directories: vec![ - DirectoryNode { - name: "cats".to_owned(), - digest: Some(Digest { - hash: DIRECTORY_HASH.to_owned(), - size_bytes: DIRECTORY_SIZE, - }), - }, - DirectoryNode { - name: "dogs".to_owned(), - digest: Some(Digest { - hash: OTHER_DIRECTORY_HASH.to_owned(), - size_bytes: OTHER_DIRECTORY_SIZE, - }), - }, - ], - ..Directory::default() - }; - - assert_eq!(Ok(()), verify_directory_canonical(EMPTY_DIGEST, &directory)); + let directory = Directory { + files: vec![ + FileNode { + name: "roland".to_owned(), + digest: Some(Digest { + hash: HASH.to_owned(), + size_bytes: FILE_SIZE, + }), + ..FileNode::default() + }, + FileNode { + name: "simba".to_owned(), + digest: Some(Digest { + hash: HASH.to_owned(), + size_bytes: FILE_SIZE, + }), + ..FileNode::default() + }, + ], + directories: vec![ + DirectoryNode { + name: "cats".to_owned(), + digest: Some(Digest { + hash: DIRECTORY_HASH.to_owned(), + size_bytes: DIRECTORY_SIZE, + }), + }, + DirectoryNode { + name: "dogs".to_owned(), + digest: Some(Digest { + hash: OTHER_DIRECTORY_HASH.to_owned(), + size_bytes: OTHER_DIRECTORY_SIZE, + }), + }, + ], + ..Directory::default() + }; + + assert_eq!(Ok(()), verify_directory_canonical(EMPTY_DIGEST, &directory)); } #[test] fn empty_child_name() { - let directory = Directory { - directories: vec![DirectoryNode { - name: "".to_owned(), - digest: Some(Digest { - hash: DIRECTORY_HASH.to_owned(), - size_bytes: DIRECTORY_SIZE, - }), - }], - ..Directory::default() - }; - - let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); - assert!( - error.contains("A child name must not be empty"), - "Bad error message: {error}" - ); + let directory = Directory { + directories: vec![DirectoryNode { + name: "".to_owned(), + digest: Some(Digest { + hash: DIRECTORY_HASH.to_owned(), + size_bytes: DIRECTORY_SIZE, + }), + }], + ..Directory::default() + }; + + let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); + assert!( + error.contains("A child name must not be empty"), + "Bad error message: {error}" + ); } #[test] fn multiple_path_segments_in_directory() { - let directory = Directory { - directories: vec![DirectoryNode { - name: "pets/cats".to_owned(), - digest: Some(Digest { - hash: DIRECTORY_HASH.to_owned(), - size_bytes: DIRECTORY_SIZE, - }), - }], - ..Directory::default() - }; - - let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); - assert!(error.contains("pets/cats"), "Bad error message: {error}"); + let directory = Directory { + directories: vec![DirectoryNode { + name: "pets/cats".to_owned(), + digest: Some(Digest { + hash: DIRECTORY_HASH.to_owned(), + size_bytes: DIRECTORY_SIZE, + }), + }], + ..Directory::default() + }; + + let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); + assert!(error.contains("pets/cats"), "Bad error message: {error}"); } #[test] fn multiple_path_segments_in_file() { - let directory = Directory { - files: vec![FileNode { - name: "cats/roland".to_owned(), - digest: Some(Digest { - hash: HASH.to_owned(), - size_bytes: FILE_SIZE, - }), - ..FileNode::default() - }], - ..Directory::default() - }; - - let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); - assert!(error.contains("cats/roland"), "Bad error message: {error}"); + let directory = Directory { + files: vec![FileNode { + name: "cats/roland".to_owned(), + digest: Some(Digest { + hash: HASH.to_owned(), + size_bytes: FILE_SIZE, + }), + ..FileNode::default() + }], + ..Directory::default() + }; + + let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); + assert!(error.contains("cats/roland"), "Bad error message: {error}"); } #[test] fn duplicate_path_in_directory() { - let directory = Directory { - directories: vec![ - DirectoryNode { - name: "cats".to_owned(), - digest: Some(Digest { - hash: DIRECTORY_HASH.to_owned(), - size_bytes: DIRECTORY_SIZE, - }), - }, - DirectoryNode { - name: "cats".to_owned(), - digest: Some(Digest { - hash: DIRECTORY_HASH.to_owned(), - size_bytes: DIRECTORY_SIZE, - }), - }, - ], - ..Directory::default() - }; - - let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); - assert!(error.contains("cats"), "Bad error message: {error}"); + let directory = Directory { + directories: vec![ + DirectoryNode { + name: "cats".to_owned(), + digest: Some(Digest { + hash: DIRECTORY_HASH.to_owned(), + size_bytes: DIRECTORY_SIZE, + }), + }, + DirectoryNode { + name: "cats".to_owned(), + digest: Some(Digest { + hash: DIRECTORY_HASH.to_owned(), + size_bytes: DIRECTORY_SIZE, + }), + }, + ], + ..Directory::default() + }; + + let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); + assert!(error.contains("cats"), "Bad error message: {error}"); } #[test] fn duplicate_path_in_file() { - let directory = Directory { - files: vec![ - FileNode { - name: "roland".to_owned(), - digest: Some(Digest { - hash: HASH.to_owned(), - size_bytes: FILE_SIZE, - }), - ..FileNode::default() - }, - FileNode { - name: "roland".to_owned(), - digest: Some(Digest { - hash: HASH.to_owned(), - size_bytes: FILE_SIZE, - }), - ..FileNode::default() - }, - ], - ..Directory::default() - }; - - let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); - assert!(error.contains("roland"), "Bad error message: {error}"); + let directory = Directory { + files: vec![ + FileNode { + name: "roland".to_owned(), + digest: Some(Digest { + hash: HASH.to_owned(), + size_bytes: FILE_SIZE, + }), + ..FileNode::default() + }, + FileNode { + name: "roland".to_owned(), + digest: Some(Digest { + hash: HASH.to_owned(), + size_bytes: FILE_SIZE, + }), + ..FileNode::default() + }, + ], + ..Directory::default() + }; + + let error = verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); + assert!(error.contains("roland"), "Bad error message: {error}"); } #[test] fn duplicate_path_in_file_and_directory() { - let directory = Directory { - files: vec![FileNode { - name: "roland".to_owned(), - digest: Some(Digest { - hash: HASH.to_owned(), - size_bytes: FILE_SIZE, - }), - ..FileNode::default() - }], - directories: vec![DirectoryNode { - name: "roland".to_owned(), - digest: Some(Digest { - hash: DIRECTORY_HASH.to_owned(), - size_bytes: DIRECTORY_SIZE, - }), - }], - ..Directory::default() - }; - - verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); + let directory = Directory { + files: vec![FileNode { + name: "roland".to_owned(), + digest: Some(Digest { + hash: HASH.to_owned(), + size_bytes: FILE_SIZE, + }), + ..FileNode::default() + }], + directories: vec![DirectoryNode { + name: "roland".to_owned(), + digest: Some(Digest { + hash: DIRECTORY_HASH.to_owned(), + size_bytes: DIRECTORY_SIZE, + }), + }], + ..Directory::default() + }; + + verify_directory_canonical(EMPTY_DIGEST, &directory).expect_err("Want error"); } diff --git a/src/rust/engine/remote_provider/remote_provider_opendal/src/action_cache_tests.rs b/src/rust/engine/remote_provider/remote_provider_opendal/src/action_cache_tests.rs index 5af8b14deb6..58d55604b42 100644 --- a/src/rust/engine/remote_provider/remote_provider_opendal/src/action_cache_tests.rs +++ b/src/rust/engine/remote_provider/remote_provider_opendal/src/action_cache_tests.rs @@ -18,94 +18,94 @@ use super::Provider; const BASE: &str = "opendal-testing-base"; fn test_path(digest: Digest) -> String { - let fingerprint = digest.hash.to_string(); - format!( - "{}/{}/{}/{}", - BASE, - &fingerprint[0..2], - &fingerprint[2..4], - fingerprint - ) + let fingerprint = digest.hash.to_string(); + format!( + "{}/{}/{}/{}", + BASE, + &fingerprint[0..2], + &fingerprint[2..4], + fingerprint + ) } fn remote_options() -> RemoteOptions { - RemoteOptions { - cas_address: "".to_owned(), - instance_name: None, - tls_config: tls::Config::default(), - headers: BTreeMap::new(), - chunk_size_bytes: 10000, - rpc_timeout: Duration::from_secs(5), - rpc_retries: 1, - rpc_concurrency_limit: 256, - capabilities_cell_opt: None, - batch_api_size_limit: 10000, - } + RemoteOptions { + cas_address: "".to_owned(), + instance_name: None, + tls_config: tls::Config::default(), + headers: BTreeMap::new(), + chunk_size_bytes: 10000, + rpc_timeout: Duration::from_secs(5), + rpc_retries: 1, + rpc_concurrency_limit: 256, + capabilities_cell_opt: None, + batch_api_size_limit: 10000, + } } fn new_provider() -> Provider { - Provider::new(Memory::default(), BASE.to_owned(), remote_options()).unwrap() + Provider::new(Memory::default(), BASE.to_owned(), remote_options()).unwrap() } async fn write_test_data(provider: &Provider, digest: Digest, data: remexec::ActionResult) { - provider - .operator - .write(&test_path(digest), data.to_bytes()) - .await - .unwrap() + provider + .operator + .write(&test_path(digest), data.to_bytes()) + .await + .unwrap() } #[tokio::test] async fn get_action_result_existing() { - let provider = new_provider(); - - let action_digest = Digest::of_bytes(b"get_action_cache test"); - let action_result = remexec::ActionResult { - exit_code: 123, - ..Default::default() - }; - write_test_data(&provider, action_digest, action_result.clone()).await; - - assert_eq!( - provider.get_action_result(action_digest, "").await, - Ok(Some(action_result)) - ); + let provider = new_provider(); + + let action_digest = Digest::of_bytes(b"get_action_cache test"); + let action_result = remexec::ActionResult { + exit_code: 123, + ..Default::default() + }; + write_test_data(&provider, action_digest, action_result.clone()).await; + + assert_eq!( + provider.get_action_result(action_digest, "").await, + Ok(Some(action_result)) + ); } #[tokio::test] async fn get_action_result_missing() { - let provider = new_provider(); + let provider = new_provider(); - let action_digest = Digest::of_bytes(b"update_action_cache test"); + let action_digest = Digest::of_bytes(b"update_action_cache test"); - assert_eq!( - provider.get_action_result(action_digest, "").await, - Ok(None) - ); + assert_eq!( + provider.get_action_result(action_digest, "").await, + Ok(None) + ); } #[tokio::test] async fn update_action_cache() { - let provider = new_provider(); - - let action_digest = Digest::of_bytes(b"update_action_cache test"); - let action_result = remexec::ActionResult { - exit_code: 123, - ..Default::default() - }; - - provider - .update_action_result(action_digest, action_result.clone()) - .await - .unwrap(); - - let stored = provider - .operator - .read(&test_path(action_digest)) - .await - .unwrap(); - assert_eq!( - remexec::ActionResult::decode(Bytes::from(stored)).unwrap(), - action_result - ); + let provider = new_provider(); + + let action_digest = Digest::of_bytes(b"update_action_cache test"); + let action_result = remexec::ActionResult { + exit_code: 123, + ..Default::default() + }; + + provider + .update_action_result(action_digest, action_result.clone()) + .await + .unwrap(); + + let stored = provider + .operator + .read(&test_path(action_digest)) + .await + .unwrap(); + assert_eq!( + remexec::ActionResult::decode(Bytes::from(stored)).unwrap(), + action_result + ); } diff --git a/src/rust/engine/remote_provider/remote_provider_opendal/src/byte_store_tests.rs b/src/rust/engine/remote_provider/remote_provider_opendal/src/byte_store_tests.rs index d23e623ac22..23d72359241 100644 --- a/src/rust/engine/remote_provider/remote_provider_opendal/src/byte_store_tests.rs +++ b/src/rust/engine/remote_provider/remote_provider_opendal/src/byte_store_tests.rs @@ -15,274 +15,274 @@ use crate::Provider; const BASE: &str = "opendal-testing-base"; fn test_path(data: &TestData) -> String { - let fingerprint = data.fingerprint().to_string(); - format!( - "{}/{}/{}/{}", - BASE, - &fingerprint[0..2], - &fingerprint[2..4], - fingerprint - ) + let fingerprint = data.fingerprint().to_string(); + format!( + "{}/{}/{}/{}", + BASE, + &fingerprint[0..2], + &fingerprint[2..4], + fingerprint + ) } fn remote_options() -> RemoteOptions { - RemoteOptions { - cas_address: "".to_owned(), - instance_name: None, - tls_config: tls::Config::default(), - headers: BTreeMap::new(), - chunk_size_bytes: 10000, - rpc_timeout: Duration::from_secs(5), - rpc_retries: 1, - rpc_concurrency_limit: 256, - capabilities_cell_opt: None, - batch_api_size_limit: 10000, - } + RemoteOptions { + cas_address: "".to_owned(), + instance_name: None, + tls_config: tls::Config::default(), + headers: BTreeMap::new(), + chunk_size_bytes: 10000, + rpc_timeout: Duration::from_secs(5), + rpc_retries: 1, + rpc_concurrency_limit: 256, + capabilities_cell_opt: None, + batch_api_size_limit: 10000, + } } fn new_provider() -> Provider { - Provider::new(Memory::default(), BASE.to_owned(), remote_options()).unwrap() + Provider::new(Memory::default(), BASE.to_owned(), remote_options()).unwrap() } async fn write_test_data(provider: &Provider, data: &TestData) { - provider - .operator - .write(&test_path(&data), data.bytes()) - .await - .unwrap(); + provider + .operator + .write(&test_path(&data), data.bytes()) + .await + .unwrap(); } #[tokio::test] async fn load_existing() { - let testdata = TestData::roland(); - let provider = new_provider(); - write_test_data(&provider, &testdata).await; - - let mut destination = Vec::new(); - let found = provider - .load(testdata.digest(), &mut destination) - .await - .unwrap(); - assert!(found); - assert_eq!(destination, testdata.bytes()) + let testdata = TestData::roland(); + let provider = new_provider(); + write_test_data(&provider, &testdata).await; + + let mut destination = Vec::new(); + let found = provider + .load(testdata.digest(), &mut destination) + .await + .unwrap(); + assert!(found); + assert_eq!(destination, testdata.bytes()) } #[tokio::test] async fn load_missing() { - let testdata = TestData::roland(); - let provider = new_provider(); - - let mut destination = Vec::new(); - let found = provider - .load(testdata.digest(), &mut destination) - .await - .unwrap(); - assert!(!found); - assert!(destination.is_empty()) + let testdata = TestData::roland(); + let provider = new_provider(); + + let mut destination = Vec::new(); + let found = provider + .load(testdata.digest(), &mut destination) + .await + .unwrap(); + assert!(!found); + assert!(destination.is_empty()) } #[tokio::test] async fn load_empty() { - // The empty file can be loaded even when it's not "physically" in the remote provider. - let testdata = TestData::empty(); - let provider = new_provider(); - - let mut destination = Vec::new(); - let found = provider - .load(testdata.digest(), &mut destination) - .await - .unwrap(); - assert!(found); - assert_eq!(destination, testdata.bytes()); + // The empty file can be loaded even when it's not "physically" in the remote provider. + let testdata = TestData::empty(); + let provider = new_provider(); + + let mut destination = Vec::new(); + let found = provider + .load(testdata.digest(), &mut destination) + .await + .unwrap(); + assert!(found); + assert_eq!(destination, testdata.bytes()); } #[tokio::test] async fn load_existing_wrong_digest_eror() { - let testdata = TestData::roland(); - let provider = new_provider(); - provider - .operator - .write(&test_path(&testdata), Bytes::from_static(b"not roland")) - .await - .unwrap(); - - let mut destination = Vec::new(); - let error = provider - .load(testdata.digest(), &mut destination) - .await - .expect_err("Want error"); - - assert!( - error.contains("Remote CAS gave wrong digest"), - "Bad error message, got: {error}" - ) + let testdata = TestData::roland(); + let provider = new_provider(); + provider + .operator + .write(&test_path(&testdata), Bytes::from_static(b"not roland")) + .await + .unwrap(); + + let mut destination = Vec::new(); + let error = provider + .load(testdata.digest(), &mut destination) + .await + .expect_err("Want error"); + + assert!( + error.contains("Remote CAS gave wrong digest"), + "Bad error message, got: {error}" + ) } #[tokio::test] async fn load_without_validation_existing() { - let testdata = TestData::roland(); - let bytes = Bytes::from_static(b"not roland"); - let provider = new_provider(); - provider - .operator - .write(&test_path(&testdata), bytes.clone()) - .await - .unwrap(); - - let mut destination = Vec::new(); - let found = provider - .load_without_validation(testdata.digest(), &mut destination) - .await - .unwrap(); - assert!(found); - assert_eq!(destination, bytes) + let testdata = TestData::roland(); + let bytes = Bytes::from_static(b"not roland"); + let provider = new_provider(); + provider + .operator + .write(&test_path(&testdata), bytes.clone()) + .await + .unwrap(); + + let mut destination = Vec::new(); + let found = provider + .load_without_validation(testdata.digest(), &mut destination) + .await + .unwrap(); + assert!(found); + assert_eq!(destination, bytes) } #[tokio::test] async fn load_without_validation_missing() { - let testdata = TestData::roland(); - let provider = new_provider(); - - let mut destination = Vec::new(); - let found = provider - .load_without_validation(testdata.digest(), &mut destination) - .await - .unwrap(); - assert!(!found); - assert!(destination.is_empty()) + let testdata = TestData::roland(); + let provider = new_provider(); + + let mut destination = Vec::new(); + let found = provider + .load_without_validation(testdata.digest(), &mut destination) + .await + .unwrap(); + assert!(!found); + assert!(destination.is_empty()) } async fn assert_store(provider: &Provider, testdata: &TestData) { - let result = provider.operator.read(&test_path(testdata)).await.unwrap(); - assert_eq!(result, testdata.bytes()); + let result = provider.operator.read(&test_path(testdata)).await.unwrap(); + assert_eq!(result, testdata.bytes()); } #[tokio::test] async fn store_bytes_data() { - let testdata = TestData::roland(); - let provider = new_provider(); + let testdata = TestData::roland(); + let provider = new_provider(); - provider - .store_bytes(testdata.digest(), testdata.bytes()) - .await - .unwrap(); + provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .unwrap(); - assert_store(&provider, &testdata).await; + assert_store(&provider, &testdata).await; } #[tokio::test] async fn store_bytes_empty() { - let testdata = TestData::empty(); - let provider = new_provider(); - - provider - .store_bytes(testdata.digest(), testdata.bytes()) - .await - .unwrap(); - - // We don't actually store an empty file. - assert!(!provider - .operator - .is_exist(&test_path(&testdata)) - .await - .unwrap()); + let testdata = TestData::empty(); + let provider = new_provider(); + + provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .unwrap(); + + // We don't actually store an empty file. + assert!(!provider + .operator + .is_exist(&test_path(&testdata)) + .await + .unwrap()); } #[tokio::test] async fn store_file_one_chunk() { - let testdata = TestData::roland(); - let provider = new_provider(); + let testdata = TestData::roland(); + let provider = new_provider(); - provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) - .await - .unwrap(); - assert_store(&provider, &testdata).await; + provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .unwrap(); + assert_store(&provider, &testdata).await; } #[tokio::test] async fn store_file_multiple_chunks() { - let testdata = TestData::all_the_henries(); - let provider = new_provider(); + let testdata = TestData::all_the_henries(); + let provider = new_provider(); - // Our current chunk size is the tokio::io::copy default (8KiB at - // the time of writing). - assert!(testdata.len() > 8 * 1024); + // Our current chunk size is the tokio::io::copy default (8KiB at + // the time of writing). + assert!(testdata.len() > 8 * 1024); - provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) - .await - .unwrap(); - assert_store(&provider, &testdata).await; + provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .unwrap(); + assert_store(&provider, &testdata).await; } #[tokio::test] async fn store_file_empty_file() { - let testdata = TestData::empty(); - let provider = new_provider(); + let testdata = TestData::empty(); + let provider = new_provider(); - provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) - .await - .unwrap(); - - // We don't actually store an empty file. - assert!(!provider - .operator - .is_exist(&test_path(&testdata)) - .await - .unwrap()); + provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .unwrap(); + + // We don't actually store an empty file. + assert!(!provider + .operator + .is_exist(&test_path(&testdata)) + .await + .unwrap()); } #[tokio::test] async fn list_missing_digests_none_missing() { - let testdata = TestData::roland(); - let provider = new_provider(); - write_test_data(&provider, &testdata).await; - - assert_eq!( - provider - .list_missing_digests(&mut vec![testdata.digest()].into_iter()) - .await, - Ok(HashSet::new()) - ) + let testdata = TestData::roland(); + let provider = new_provider(); + write_test_data(&provider, &testdata).await; + + assert_eq!( + provider + .list_missing_digests(&mut vec![testdata.digest()].into_iter()) + .await, + Ok(HashSet::new()) + ) } #[tokio::test] async fn list_missing_digests_some_missing() { - let testdata = TestData::roland(); - let digest = testdata.digest(); + let testdata = TestData::roland(); + let digest = testdata.digest(); - let provider = new_provider(); + let provider = new_provider(); - let mut digest_set = HashSet::new(); - digest_set.insert(digest); + let mut digest_set = HashSet::new(); + digest_set.insert(digest); - assert_eq!( - provider - .list_missing_digests(&mut vec![digest].into_iter()) - .await, - Ok(digest_set) - ) + assert_eq!( + provider + .list_missing_digests(&mut vec![digest].into_iter()) + .await, + Ok(digest_set) + ) } #[tokio::test] async fn list_missing_digests_empty_never_missing() { - let testdata = TestData::empty(); - let provider = new_provider(); - - assert_eq!( - provider - .list_missing_digests(&mut vec![testdata.digest()].into_iter()) - .await, - Ok(HashSet::new()) - ) + let testdata = TestData::empty(); + let provider = new_provider(); + + assert_eq!( + provider + .list_missing_digests(&mut vec![testdata.digest()].into_iter()) + .await, + Ok(HashSet::new()) + ) } diff --git a/src/rust/engine/remote_provider/remote_provider_opendal/src/lib.rs b/src/rust/engine/remote_provider/remote_provider_opendal/src/lib.rs index 9fd358d37b3..2cad1b8eaf5 100644 --- a/src/rust/engine/remote_provider/remote_provider_opendal/src/lib.rs +++ b/src/rust/engine/remote_provider/remote_provider_opendal/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -43,7 +43,7 @@ use tokio::fs::File; use workunit_store::ObservationMetric; use remote_provider_traits::{ - ActionCacheProvider, ByteStoreProvider, LoadDestination, RemoteOptions, + ActionCacheProvider, ByteStoreProvider, LoadDestination, RemoteOptions, }; #[cfg(test)] @@ -55,289 +55,289 @@ const GITHUB_ACTIONS_CACHE_VERSION: &str = "pants-1"; #[derive(Debug, Clone, Copy)] pub enum LoadMode { - Validate, - NoValidate, + Validate, + NoValidate, } pub struct Provider { - operator: Operator, - base_path: String, + operator: Operator, + base_path: String, } impl Provider { - pub fn new( - builder: B, - scope: String, - options: RemoteOptions, - ) -> Result { - let operator = Operator::new(builder) - .map_err(|e| { + pub fn new( + builder: B, + scope: String, + options: RemoteOptions, + ) -> Result { + let operator = Operator::new(builder) + .map_err(|e| { + format!( + "failed to initialise {} remote store provider: {e}", + B::SCHEME + ) + })? + .layer(ConcurrentLimitLayer::new(options.rpc_concurrency_limit)) + .layer( + // TODO: record Metric::RemoteStoreRequestTimeouts for timeouts + TimeoutLayer::new() + .with_timeout(options.rpc_timeout) + // TimeoutLayer requires specifying a non-zero minimum transfer speed too. + .with_speed(1), + ) + // TODO: RetryLayer doesn't seem to retry stores, but we should + .layer(RetryLayer::new().with_max_times(options.rpc_retries + 1)) + .finish(); + + let base_path = match options.instance_name { + Some(instance_name) => format!("{instance_name}/{scope}"), + None => scope, + }; + + Ok(Provider { + operator, + base_path, + }) + } + + pub fn fs(path: &str, scope: String, options: RemoteOptions) -> Result { + let mut builder = opendal::services::Fs::default(); + builder.root(path).enable_path_check(); + Provider::new(builder, scope, options) + } + + pub fn github_actions_cache( + url: &str, + scope: String, + options: RemoteOptions, + ) -> Result { + let mut builder = opendal::services::Ghac::default(); + + builder.version(GITHUB_ACTIONS_CACHE_VERSION); + builder.endpoint(url); + + // extract the token from the `authorization: Bearer ...` header because OpenDAL's Ghac service + // reasons about it separately (although does just stick it in its own `authorization: Bearer + // ...` header internally). + let header_help_blurb = "Using GitHub Actions Cache remote cache requires a token set in a `authorization: Bearer ...` header, set via [GLOBAL].remote_store_headers or [GLOBAL].remote_oauth_bearer_token_path"; + let Some(auth_header_value) = options.headers.get(AUTHORIZATION.as_str()) else { + let existing_headers = options.headers.keys().collect::>(); + return Err(format!( + "Expected to find '{}' header, but only found: {:?}. {}", + AUTHORIZATION, existing_headers, header_help_blurb, + )); + }; + + let Some(token) = auth_header_value.strip_prefix("Bearer ") else { + return Err(format!( + "Expected '{}' header to start with `Bearer `, found value starting with {:?}. {}", + AUTHORIZATION, + // only show the first few characters to not accidentally leak (all of) a secret, but + // still give the user something to start debugging + &auth_header_value[..4], + header_help_blurb, + )); + }; + + builder.runtime_token(token); + + Provider::new(builder, scope, options) + } + + fn path(&self, fingerprint: Fingerprint) -> String { + // We include the first two bytes as parent directories to make listings less wide. format!( - "failed to initialise {} remote store provider: {e}", - B::SCHEME + "{}/{:02x}/{:02x}/{}", + self.base_path, fingerprint.0[0], fingerprint.0[1], fingerprint ) - })? - .layer(ConcurrentLimitLayer::new(options.rpc_concurrency_limit)) - .layer( - // TODO: record Metric::RemoteStoreRequestTimeouts for timeouts - TimeoutLayer::new() - .with_timeout(options.rpc_timeout) - // TimeoutLayer requires specifying a non-zero minimum transfer speed too. - .with_speed(1), - ) - // TODO: RetryLayer doesn't seem to retry stores, but we should - .layer(RetryLayer::new().with_max_times(options.rpc_retries + 1)) - .finish(); - - let base_path = match options.instance_name { - Some(instance_name) => format!("{instance_name}/{scope}"), - None => scope, - }; - - Ok(Provider { - operator, - base_path, - }) - } - - pub fn fs(path: &str, scope: String, options: RemoteOptions) -> Result { - let mut builder = opendal::services::Fs::default(); - builder.root(path).enable_path_check(); - Provider::new(builder, scope, options) - } - - pub fn github_actions_cache( - url: &str, - scope: String, - options: RemoteOptions, - ) -> Result { - let mut builder = opendal::services::Ghac::default(); - - builder.version(GITHUB_ACTIONS_CACHE_VERSION); - builder.endpoint(url); - - // extract the token from the `authorization: Bearer ...` header because OpenDAL's Ghac service - // reasons about it separately (although does just stick it in its own `authorization: Bearer - // ...` header internally). - let header_help_blurb = "Using GitHub Actions Cache remote cache requires a token set in a `authorization: Bearer ...` header, set via [GLOBAL].remote_store_headers or [GLOBAL].remote_oauth_bearer_token_path"; - let Some(auth_header_value) = options.headers.get(AUTHORIZATION.as_str()) else { - let existing_headers = options.headers.keys().collect::>(); - return Err(format!( - "Expected to find '{}' header, but only found: {:?}. {}", - AUTHORIZATION, existing_headers, header_help_blurb, - )); - }; - - let Some(token) = auth_header_value.strip_prefix("Bearer ") else { - return Err(format!( - "Expected '{}' header to start with `Bearer `, found value starting with {:?}. {}", - AUTHORIZATION, - // only show the first few characters to not accidentally leak (all of) a secret, but - // still give the user something to start debugging - &auth_header_value[..4], - header_help_blurb, - )); - }; - - builder.runtime_token(token); - - Provider::new(builder, scope, options) - } - - fn path(&self, fingerprint: Fingerprint) -> String { - // We include the first two bytes as parent directories to make listings less wide. - format!( - "{}/{:02x}/{:02x}/{}", - self.base_path, fingerprint.0[0], fingerprint.0[1], fingerprint - ) - } - - async fn load_raw( - &self, - digest: Digest, - destination: &mut dyn LoadDestination, - mode: LoadMode, - ) -> Result { - // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we just magic - // it up here, and ignore it when storing. - if digest == EMPTY_DIGEST { - // `destination` starts off empty, so is already in the right state. - return Ok(true); } - let path = self.path(digest.hash); - let start = Instant::now(); - let mut reader = match self.operator.reader(&path).await { - Ok(reader) => reader, - Err(e) if e.kind() == opendal::ErrorKind::NotFound => return Ok(false), - Err(e) => return Err(format!("failed to read {}: {}", path, e)), - }; - - if let Some(workunit_store_handle) = workunit_store::get_workunit_store_handle() { - // TODO: this pretends that the time-to-first-byte can be approximated by "time to create - // reader", which is often not really true. - let timing: Result = Instant::now().duration_since(start).as_micros().try_into(); - if let Ok(obs) = timing { - workunit_store_handle - .store - .record_observation(ObservationMetric::RemoteStoreTimeToFirstByteMicros, obs); - } - } + async fn load_raw( + &self, + digest: Digest, + destination: &mut dyn LoadDestination, + mode: LoadMode, + ) -> Result { + // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we just magic + // it up here, and ignore it when storing. + if digest == EMPTY_DIGEST { + // `destination` starts off empty, so is already in the right state. + return Ok(true); + } - match mode { - LoadMode::Validate => { - let correct_digest = async_verified_copy(digest, false, &mut reader, destination) - .await - .map_err(|e| format!("failed to read {}: {}", path, e))?; + let path = self.path(digest.hash); + let start = Instant::now(); + let mut reader = match self.operator.reader(&path).await { + Ok(reader) => reader, + Err(e) if e.kind() == opendal::ErrorKind::NotFound => return Ok(false), + Err(e) => return Err(format!("failed to read {}: {}", path, e)), + }; + + if let Some(workunit_store_handle) = workunit_store::get_workunit_store_handle() { + // TODO: this pretends that the time-to-first-byte can be approximated by "time to create + // reader", which is often not really true. + let timing: Result = + Instant::now().duration_since(start).as_micros().try_into(); + if let Ok(obs) = timing { + workunit_store_handle + .store + .record_observation(ObservationMetric::RemoteStoreTimeToFirstByteMicros, obs); + } + } - if !correct_digest { - // TODO: include the actual digest here - return Err(format!("Remote CAS gave wrong digest: expected {digest:?}")); + match mode { + LoadMode::Validate => { + let correct_digest = async_verified_copy(digest, false, &mut reader, destination) + .await + .map_err(|e| format!("failed to read {}: {}", path, e))?; + + if !correct_digest { + // TODO: include the actual digest here + return Err(format!("Remote CAS gave wrong digest: expected {digest:?}")); + } + } + LoadMode::NoValidate => { + tokio::io::copy(&mut reader, destination) + .await + .map_err(|e| format!("failed to read {}: {}", path, e))?; + } } - } - LoadMode::NoValidate => { - tokio::io::copy(&mut reader, destination) - .await - .map_err(|e| format!("failed to read {}: {}", path, e))?; - } + Ok(true) + } + + /// Load `digest` trusting the contents from the remote, without validating that the digest + /// matches the downloaded bytes. + /// + /// This can/should be used for cases where the digest isn't the digest of the contents + /// (e.g. action cache). + pub async fn load_without_validation( + &self, + digest: Digest, + destination: &mut dyn LoadDestination, + ) -> Result { + self.load_raw(digest, destination, LoadMode::NoValidate) + .await } - Ok(true) - } - - /// Load `digest` trusting the contents from the remote, without validating that the digest - /// matches the downloaded bytes. - /// - /// This can/should be used for cases where the digest isn't the digest of the contents - /// (e.g. action cache). - pub async fn load_without_validation( - &self, - digest: Digest, - destination: &mut dyn LoadDestination, - ) -> Result { - self - .load_raw(digest, destination, LoadMode::NoValidate) - .await - } } #[async_trait] impl ByteStoreProvider for Provider { - async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String> { - // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we don't - // store it here, and magic it up when loading. - if digest == EMPTY_DIGEST { - return Ok(()); - } + async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String> { + // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we don't + // store it here, and magic it up when loading. + if digest == EMPTY_DIGEST { + return Ok(()); + } - let path = self.path(digest.hash); + let path = self.path(digest.hash); - match self.operator.write(&path, bytes).await { - Ok(()) => Ok(()), - // The item already exists, i.e. these bytes have already been stored. For example, - // concurrent executions that are caching the same bytes. This makes the assumption that - // which ever execution won the race to create the item successfully finishes the write, and - // so no wait + retry (or similar) here. - Err(e) if e.kind() == opendal::ErrorKind::AlreadyExists => Ok(()), - Err(e) => Err(format!("failed to write bytes to {path}: {e}")), + match self.operator.write(&path, bytes).await { + Ok(()) => Ok(()), + // The item already exists, i.e. these bytes have already been stored. For example, + // concurrent executions that are caching the same bytes. This makes the assumption that + // which ever execution won the race to create the item successfully finishes the write, and + // so no wait + retry (or similar) here. + Err(e) if e.kind() == opendal::ErrorKind::AlreadyExists => Ok(()), + Err(e) => Err(format!("failed to write bytes to {path}: {e}")), + } } - } - async fn store_file(&self, digest: Digest, mut file: File) -> Result<(), String> { - // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we don't - // store it here, and magic it up when loading. - if digest == EMPTY_DIGEST { - return Ok(()); - } + async fn store_file(&self, digest: Digest, mut file: File) -> Result<(), String> { + // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we don't + // store it here, and magic it up when loading. + if digest == EMPTY_DIGEST { + return Ok(()); + } - let path = self.path(digest.hash); - - let mut writer = match self.operator.writer(&path).await { - Ok(writer) => writer, - // The item already exists, i.e. these bytes have already been stored. For example, - // concurrent executions that are caching the same bytes. This makes the assumption that - // which ever execution won the race to create the item successfully finishes the write, and - // so no wait + retry (or similar) here. - Err(e) if e.kind() == opendal::ErrorKind::AlreadyExists => return Ok(()), - Err(e) => return Err(format!("failed to start write to {path}: {e} {}", e.kind())), - }; - - // TODO: it would be good to pass through options.chunk_size_bytes here - match tokio::io::copy(&mut file, &mut writer).await { - Ok(_) => writer.close().await.map_err(|e| { - format!("Uploading file with digest {digest:?} to {path}: failed to commit: {e}") - }), - Err(e) => { - let abort_err = writer.abort().await.err().map_or("".to_owned(), |e| { - format!(" (additional error while aborting = {e})") - }); - Err(format!( + let path = self.path(digest.hash); + + let mut writer = match self.operator.writer(&path).await { + Ok(writer) => writer, + // The item already exists, i.e. these bytes have already been stored. For example, + // concurrent executions that are caching the same bytes. This makes the assumption that + // which ever execution won the race to create the item successfully finishes the write, and + // so no wait + retry (or similar) here. + Err(e) if e.kind() == opendal::ErrorKind::AlreadyExists => return Ok(()), + Err(e) => return Err(format!("failed to start write to {path}: {e} {}", e.kind())), + }; + + // TODO: it would be good to pass through options.chunk_size_bytes here + match tokio::io::copy(&mut file, &mut writer).await { + Ok(_) => writer.close().await.map_err(|e| { + format!("Uploading file with digest {digest:?} to {path}: failed to commit: {e}") + }), + Err(e) => { + let abort_err = writer.abort().await.err().map_or("".to_owned(), |e| { + format!(" (additional error while aborting = {e})") + }); + Err(format!( "Uploading file with digest {digest:?} to {path}: failed to copy: {e}{abort_err}" )) - } + } + } + } + + async fn load( + &self, + digest: Digest, + destination: &mut dyn LoadDestination, + ) -> Result { + self.load_raw(digest, destination, LoadMode::Validate).await + } + + async fn list_missing_digests( + &self, + digests: &mut (dyn Iterator + Send), + ) -> Result, String> { + // NB. this is doing individual requests and thus may be expensive. + let existences = future::try_join_all(digests.map(|digest| async move { + // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we don't + // store it, but can still magic it up when loading, i.e. it is never missing. + if digest == EMPTY_DIGEST { + return Ok(None); + } + + let path = self.path(digest.hash); + match self.operator.is_exist(&path).await { + Ok(true) => Ok(None), + Ok(false) => Ok(Some(digest)), + Err(e) => Err(format!("failed to query {}: {}", path, e)), + } + })) + .await?; + + Ok(existences.into_iter().flatten().collect()) } - } - - async fn load( - &self, - digest: Digest, - destination: &mut dyn LoadDestination, - ) -> Result { - self.load_raw(digest, destination, LoadMode::Validate).await - } - - async fn list_missing_digests( - &self, - digests: &mut (dyn Iterator + Send), - ) -> Result, String> { - // NB. this is doing individual requests and thus may be expensive. - let existences = future::try_join_all(digests.map(|digest| async move { - // Some providers (e.g. GitHub Actions Cache) don't like storing an empty file, so we don't - // store it, but can still magic it up when loading, i.e. it is never missing. - if digest == EMPTY_DIGEST { - return Ok(None); - } - - let path = self.path(digest.hash); - match self.operator.is_exist(&path).await { - Ok(true) => Ok(None), - Ok(false) => Ok(Some(digest)), - Err(e) => Err(format!("failed to query {}: {}", path, e)), - } - })) - .await?; - - Ok(existences.into_iter().flatten().collect()) - } } #[async_trait] impl ActionCacheProvider for Provider { - async fn update_action_result( - &self, - action_digest: Digest, - action_result: ActionResult, - ) -> Result<(), String> { - let bytes = action_result.to_bytes(); - self.store_bytes(action_digest, bytes).await - } - async fn get_action_result( - &self, - action_digest: Digest, - _build_id: &str, - ) -> Result, String> { - let mut destination = Vec::new(); - - match self - .load_without_validation(action_digest, &mut destination) - .await? - { - false => Ok(None), - true => { - let bytes = Bytes::from(destination); - Ok(Some(ActionResult::decode(bytes).map_err(|e| { - format!("failed to decode action result for digest {action_digest:?}: {e}") - })?)) - } + async fn update_action_result( + &self, + action_digest: Digest, + action_result: ActionResult, + ) -> Result<(), String> { + let bytes = action_result.to_bytes(); + self.store_bytes(action_digest, bytes).await + } + async fn get_action_result( + &self, + action_digest: Digest, + _build_id: &str, + ) -> Result, String> { + let mut destination = Vec::new(); + + match self + .load_without_validation(action_digest, &mut destination) + .await? + { + false => Ok(None), + true => { + let bytes = Bytes::from(destination); + Ok(Some(ActionResult::decode(bytes).map_err(|e| { + format!("failed to decode action result for digest {action_digest:?}: {e}") + })?)) + } + } } - } } diff --git a/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache.rs b/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache.rs index be46284da53..c6c54d9188c 100644 --- a/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache.rs +++ b/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache.rs @@ -18,100 +18,100 @@ use tonic::{Code, Request}; use crate::apply_headers; pub struct Provider { - instance_name: Option, - action_cache_client: Arc>, + instance_name: Option, + action_cache_client: Arc>, } impl Provider { - pub async fn new( - RemoteCacheProviderOptions { - instance_name, - action_cache_address, - tls_config, - headers, - concurrency_limit, - rpc_timeout, - }: RemoteCacheProviderOptions, - ) -> Result { - let needs_tls = action_cache_address.starts_with("https://"); + pub async fn new( + RemoteCacheProviderOptions { + instance_name, + action_cache_address, + tls_config, + headers, + concurrency_limit, + rpc_timeout, + }: RemoteCacheProviderOptions, + ) -> Result { + let needs_tls = action_cache_address.starts_with("https://"); - let tls_client_config = needs_tls.then(|| tls_config.try_into()).transpose()?; + let tls_client_config = needs_tls.then(|| tls_config.try_into()).transpose()?; - let channel = - grpc_util::create_channel(&action_cache_address, tls_client_config.as_ref()).await?; - let http_headers = headers_to_http_header_map(&headers)?; - let channel = layered_service( - channel, - concurrency_limit, - http_headers, - Some((rpc_timeout, Metric::RemoteCacheRequestTimeouts)), - ); - let action_cache_client = Arc::new(ActionCacheClient::new(channel)); + let channel = + grpc_util::create_channel(&action_cache_address, tls_client_config.as_ref()).await?; + let http_headers = headers_to_http_header_map(&headers)?; + let channel = layered_service( + channel, + concurrency_limit, + http_headers, + Some((rpc_timeout, Metric::RemoteCacheRequestTimeouts)), + ); + let action_cache_client = Arc::new(ActionCacheClient::new(channel)); - Ok(Provider { - instance_name, - action_cache_client, - }) - } + Ok(Provider { + instance_name, + action_cache_client, + }) + } } #[async_trait] impl ActionCacheProvider for Provider { - async fn update_action_result( - &self, - action_digest: Digest, - action_result: ActionResult, - ) -> Result<(), String> { - let client = self.action_cache_client.as_ref().clone(); - retry_call( - client, - move |mut client, _| { - let update_action_cache_request = remexec::UpdateActionResultRequest { - instance_name: self.instance_name.clone().unwrap_or_else(|| "".to_owned()), - action_digest: Some(action_digest.into()), - action_result: Some(action_result.clone()), - ..remexec::UpdateActionResultRequest::default() - }; + async fn update_action_result( + &self, + action_digest: Digest, + action_result: ActionResult, + ) -> Result<(), String> { + let client = self.action_cache_client.as_ref().clone(); + retry_call( + client, + move |mut client, _| { + let update_action_cache_request = remexec::UpdateActionResultRequest { + instance_name: self.instance_name.clone().unwrap_or_else(|| "".to_owned()), + action_digest: Some(action_digest.into()), + action_result: Some(action_result.clone()), + ..remexec::UpdateActionResultRequest::default() + }; - async move { - client - .update_action_result(update_action_cache_request) - .await - } - }, - status_is_retryable, - ) - .await - .map_err(status_to_str)?; + async move { + client + .update_action_result(update_action_cache_request) + .await + } + }, + status_is_retryable, + ) + .await + .map_err(status_to_str)?; - Ok(()) - } + Ok(()) + } - async fn get_action_result( - &self, - action_digest: Digest, - build_id: &str, - ) -> Result, String> { - let client = self.action_cache_client.as_ref().clone(); - let response = retry_call( - client, - move |mut client, _| { - let request = remexec::GetActionResultRequest { - action_digest: Some(action_digest.into()), - instance_name: self.instance_name.clone().unwrap_or_default(), - ..remexec::GetActionResultRequest::default() - }; - let request = apply_headers(Request::new(request), build_id); - async move { client.get_action_result(request).await } - }, - status_is_retryable, - ) - .await; + async fn get_action_result( + &self, + action_digest: Digest, + build_id: &str, + ) -> Result, String> { + let client = self.action_cache_client.as_ref().clone(); + let response = retry_call( + client, + move |mut client, _| { + let request = remexec::GetActionResultRequest { + action_digest: Some(action_digest.into()), + instance_name: self.instance_name.clone().unwrap_or_default(), + ..remexec::GetActionResultRequest::default() + }; + let request = apply_headers(Request::new(request), build_id); + async move { client.get_action_result(request).await } + }, + status_is_retryable, + ) + .await; - match response { - Ok(response) => Ok(Some(response.into_inner())), - Err(status) if status.code() == Code::NotFound => Ok(None), - Err(status) => Err(status_to_str(status)), + match response { + Ok(response) => Ok(Some(response.into_inner())), + Err(status) if status.code() == Code::NotFound => Ok(None), + Err(status) => Err(status_to_str(status)), + } } - } } diff --git a/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache_tests.rs b/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache_tests.rs index 81256f383f7..5e1431ec134 100644 --- a/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache_tests.rs +++ b/src/rust/engine/remote_provider/remote_provider_reapi/src/action_cache_tests.rs @@ -10,111 +10,110 @@ use remote_provider_traits::{ActionCacheProvider, RemoteCacheProviderOptions}; use super::action_cache::Provider; async fn new_provider(cas: &StubCAS) -> Provider { - Provider::new(RemoteCacheProviderOptions { - instance_name: None, - action_cache_address: cas.address(), - tls_config: Default::default(), - headers: BTreeMap::new(), - concurrency_limit: 256, - rpc_timeout: Duration::from_secs(2), - }) - .await - .unwrap() + Provider::new(RemoteCacheProviderOptions { + instance_name: None, + action_cache_address: cas.address(), + tls_config: Default::default(), + headers: BTreeMap::new(), + concurrency_limit: 256, + rpc_timeout: Duration::from_secs(2), + }) + .await + .unwrap() } #[tokio::test] async fn get_action_result_existing() { - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; - - let action_digest = Digest::of_bytes(b"get_action_cache test"); - let action_result = remexec::ActionResult { - exit_code: 123, - ..Default::default() - }; - cas - .action_cache - .action_map - .lock() - .insert(action_digest.hash, action_result.clone()); - - assert_eq!( - provider.get_action_result(action_digest, "").await, - Ok(Some(action_result)) - ); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; + + let action_digest = Digest::of_bytes(b"get_action_cache test"); + let action_result = remexec::ActionResult { + exit_code: 123, + ..Default::default() + }; + cas.action_cache + .action_map + .lock() + .insert(action_digest.hash, action_result.clone()); + + assert_eq!( + provider.get_action_result(action_digest, "").await, + Ok(Some(action_result)) + ); } #[tokio::test] async fn get_action_result_missing() { - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; - let action_digest = Digest::of_bytes(b"update_action_cache test"); + let action_digest = Digest::of_bytes(b"update_action_cache test"); - assert_eq!( - provider.get_action_result(action_digest, "").await, - Ok(None) - ); + assert_eq!( + provider.get_action_result(action_digest, "").await, + Ok(None) + ); } #[tokio::test] async fn get_action_result_grpc_error() { - let cas = StubCAS::builder().ac_always_errors().build(); - let provider = new_provider(&cas).await; + let cas = StubCAS::builder().ac_always_errors().build(); + let provider = new_provider(&cas).await; - let action_digest = Digest::of_bytes(b"get_action_result_grpc_error test"); + let action_digest = Digest::of_bytes(b"get_action_result_grpc_error test"); - let error = provider - .get_action_result(action_digest, "") - .await - .expect_err("Want err"); + let error = provider + .get_action_result(action_digest, "") + .await + .expect_err("Want err"); - assert!( - error.contains("unavailable"), - "Bad error message, got: {error}" - ); + assert!( + error.contains("unavailable"), + "Bad error message, got: {error}" + ); } #[tokio::test] async fn update_action_cache() { - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; - - let action_digest = Digest::of_bytes(b"update_action_cache test"); - let action_result = remexec::ActionResult { - exit_code: 123, - ..Default::default() - }; - - provider - .update_action_result(action_digest, action_result.clone()) - .await - .unwrap(); - - assert_eq!( - cas.action_cache.action_map.lock()[&action_digest.hash], - action_result - ); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; + + let action_digest = Digest::of_bytes(b"update_action_cache test"); + let action_result = remexec::ActionResult { + exit_code: 123, + ..Default::default() + }; + + provider + .update_action_result(action_digest, action_result.clone()) + .await + .unwrap(); + + assert_eq!( + cas.action_cache.action_map.lock()[&action_digest.hash], + action_result + ); } #[tokio::test] async fn update_action_cache_grpc_error() { - let cas = StubCAS::builder().ac_always_errors().build(); - let provider = new_provider(&cas).await; - - let action_digest = Digest::of_bytes(b"update_action_cache_grpc_error test"); - let action_result = remexec::ActionResult { - exit_code: 123, - ..Default::default() - }; - - let error = provider - .update_action_result(action_digest, action_result.clone()) - .await - .expect_err("Want err"); - - assert!( - error.contains("unavailable"), - "Bad error message, got: {error}" - ); + let cas = StubCAS::builder().ac_always_errors().build(); + let provider = new_provider(&cas).await; + + let action_digest = Digest::of_bytes(b"update_action_cache_grpc_error test"); + let action_result = remexec::ActionResult { + exit_code: 123, + ..Default::default() + }; + + let error = provider + .update_action_result(action_digest, action_result.clone()) + .await + .expect_err("Want err"); + + assert!( + error.contains("unavailable"), + "Bad error message, got: {error}" + ); } diff --git a/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store.rs b/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store.rs index daa9943448d..09e28c79d3a 100644 --- a/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store.rs +++ b/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store.rs @@ -13,15 +13,15 @@ use bytes::Bytes; use futures::{FutureExt, StreamExt}; use grpc_util::retry::{retry_call, status_is_retryable}; use grpc_util::{ - headers_to_http_header_map, layered_service, status_ref_to_str, status_to_str, LayeredService, + headers_to_http_header_map, layered_service, status_ref_to_str, status_to_str, LayeredService, }; use hashing::{Digest, Hasher}; use protos::gen::build::bazel::remote::execution::v2 as remexec; use protos::gen::google::bytestream::byte_stream_client::ByteStreamClient; use remexec::{ - capabilities_client::CapabilitiesClient, - content_addressable_storage_client::ContentAddressableStorageClient, BatchUpdateBlobsRequest, - ServerCapabilities, + capabilities_client::CapabilitiesClient, + content_addressable_storage_client::ContentAddressableStorageClient, BatchUpdateBlobsRequest, + ServerCapabilities, }; use tokio::fs::File; use tokio::io::{AsyncRead, AsyncSeekExt, AsyncWriteExt}; @@ -32,263 +32,261 @@ use workunit_store::{Metric, ObservationMetric}; use remote_provider_traits::{ByteStoreProvider, LoadDestination, RemoteOptions}; pub struct Provider { - instance_name: Option, - chunk_size_bytes: usize, - _rpc_attempts: usize, - byte_stream_client: Arc>, - cas_client: Arc>, - capabilities_cell: Arc>, - capabilities_client: Arc>, - batch_api_size_limit: usize, + instance_name: Option, + chunk_size_bytes: usize, + _rpc_attempts: usize, + byte_stream_client: Arc>, + cas_client: Arc>, + capabilities_cell: Arc>, + capabilities_client: Arc>, + batch_api_size_limit: usize, } /// Represents an error from accessing a remote bytestore. #[derive(Debug)] enum ByteStoreError { - /// gRPC error - Grpc(Status), + /// gRPC error + Grpc(Status), - /// Other errors - Other(String), + /// Other errors + Other(String), } impl ByteStoreError { - fn is_retryable(&self) -> bool { - match self { - ByteStoreError::Grpc(status) => status_is_retryable(status), - ByteStoreError::Other(_) => false, + fn is_retryable(&self) -> bool { + match self { + ByteStoreError::Grpc(status) => status_is_retryable(status), + ByteStoreError::Other(_) => false, + } } - } } impl fmt::Display for ByteStoreError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ByteStoreError::Grpc(status) => fmt::Display::fmt(&status_ref_to_str(status), f), - ByteStoreError::Other(msg) => fmt::Display::fmt(msg, f), + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ByteStoreError::Grpc(status) => fmt::Display::fmt(&status_ref_to_str(status), f), + ByteStoreError::Other(msg) => fmt::Display::fmt(msg, f), + } } - } } impl std::error::Error for ByteStoreError {} impl Provider { - // TODO: Consider extracting these options to a struct with `impl Default`, similar to - // `super::LocalOptions`. - pub async fn new(options: RemoteOptions) -> Result { - let tls_client_config = options - .cas_address - .starts_with("https://") - .then(|| options.tls_config.try_into()) - .transpose()?; - - let channel = - grpc_util::create_channel(&options.cas_address, tls_client_config.as_ref()).await?; - let http_headers = headers_to_http_header_map(&options.headers)?; - let channel = layered_service( - channel, - options.rpc_concurrency_limit, - http_headers, - Some((options.rpc_timeout, Metric::RemoteStoreRequestTimeouts)), - ); - - let byte_stream_client = Arc::new(ByteStreamClient::new(channel.clone())); - - let cas_client = Arc::new(ContentAddressableStorageClient::new(channel.clone())); - - let capabilities_client = Arc::new(CapabilitiesClient::new(channel)); - - Ok(Provider { - instance_name: options.instance_name, - chunk_size_bytes: options.chunk_size_bytes, - _rpc_attempts: options.rpc_retries + 1, - byte_stream_client, - cas_client, - capabilities_cell: options - .capabilities_cell_opt - .unwrap_or_else(|| Arc::new(OnceCell::new())), - capabilities_client, - batch_api_size_limit: options.batch_api_size_limit, - }) - } - - async fn store_bytes_batch(&self, digest: Digest, bytes: Bytes) -> Result<(), ByteStoreError> { - let request = BatchUpdateBlobsRequest { - instance_name: self.instance_name.clone().unwrap_or_default(), - requests: vec![remexec::batch_update_blobs_request::Request { - digest: Some(digest.into()), - data: bytes, - compressor: remexec::compressor::Value::Identity as i32, - }], - }; - - let mut client = self.cas_client.as_ref().clone(); - client - .batch_update_blobs(request) - .await - .map_err(ByteStoreError::Grpc)?; - Ok(()) - } - - async fn store_source_stream( - &self, - digest: Digest, - source: Arc>, - ) -> Result<(), ByteStoreError> { - let len = digest.size_bytes; - let instance_name = self.instance_name.clone().unwrap_or_default(); - let resource_name = format!( - "{}{}uploads/{}/blobs/{}/{}", - &instance_name, - if instance_name.is_empty() { "" } else { "/" }, - uuid::Uuid::new_v4(), - digest.hash, - digest.size_bytes, - ); - - let mut client = self.byte_stream_client.as_ref().clone(); - - // we have to communicate the (first) error reading the underlying reader out of band - let error_occurred = Arc::new(parking_lot::Mutex::new(None)); - let error_occurred_stream = error_occurred.clone(); - - let chunk_size_bytes = self.chunk_size_bytes; - let stream = async_stream::stream! { - if len == 0 { - // if the reader is empty, the ReaderStream gives no elements, but we have to write at least - // one. - yield protos::gen::google::bytestream::WriteRequest { - resource_name: resource_name.clone(), - write_offset: 0, - finish_write: true, - data: Bytes::new(), + // TODO: Consider extracting these options to a struct with `impl Default`, similar to + // `super::LocalOptions`. + pub async fn new(options: RemoteOptions) -> Result { + let tls_client_config = options + .cas_address + .starts_with("https://") + .then(|| options.tls_config.try_into()) + .transpose()?; + + let channel = + grpc_util::create_channel(&options.cas_address, tls_client_config.as_ref()).await?; + let http_headers = headers_to_http_header_map(&options.headers)?; + let channel = layered_service( + channel, + options.rpc_concurrency_limit, + http_headers, + Some((options.rpc_timeout, Metric::RemoteStoreRequestTimeouts)), + ); + + let byte_stream_client = Arc::new(ByteStreamClient::new(channel.clone())); + + let cas_client = Arc::new(ContentAddressableStorageClient::new(channel.clone())); + + let capabilities_client = Arc::new(CapabilitiesClient::new(channel)); + + Ok(Provider { + instance_name: options.instance_name, + chunk_size_bytes: options.chunk_size_bytes, + _rpc_attempts: options.rpc_retries + 1, + byte_stream_client, + cas_client, + capabilities_cell: options + .capabilities_cell_opt + .unwrap_or_else(|| Arc::new(OnceCell::new())), + capabilities_client, + batch_api_size_limit: options.batch_api_size_limit, + }) + } + + async fn store_bytes_batch(&self, digest: Digest, bytes: Bytes) -> Result<(), ByteStoreError> { + let request = BatchUpdateBlobsRequest { + instance_name: self.instance_name.clone().unwrap_or_default(), + requests: vec![remexec::batch_update_blobs_request::Request { + digest: Some(digest.into()), + data: bytes, + compressor: remexec::compressor::Value::Identity as i32, + }], }; - return; - } - - // Read the source in appropriately sized chunks. - // NB. it is possible that this doesn't fill each chunk fully (i.e. may not send - // `chunk_size_bytes` in each request). For the usual sources, this should be unlikely. - let mut source = source.lock().await; - let reader_stream = tokio_util::io::ReaderStream::with_capacity(&mut *source, chunk_size_bytes); - let mut num_seen_bytes = 0; - - for await read_result in reader_stream { - match read_result { - Ok(data) => { - let write_offset = num_seen_bytes as i64; - num_seen_bytes += data.len(); + + let mut client = self.cas_client.as_ref().clone(); + client + .batch_update_blobs(request) + .await + .map_err(ByteStoreError::Grpc)?; + Ok(()) + } + + async fn store_source_stream( + &self, + digest: Digest, + source: Arc>, + ) -> Result<(), ByteStoreError> { + let len = digest.size_bytes; + let instance_name = self.instance_name.clone().unwrap_or_default(); + let resource_name = format!( + "{}{}uploads/{}/blobs/{}/{}", + &instance_name, + if instance_name.is_empty() { "" } else { "/" }, + uuid::Uuid::new_v4(), + digest.hash, + digest.size_bytes, + ); + + let mut client = self.byte_stream_client.as_ref().clone(); + + // we have to communicate the (first) error reading the underlying reader out of band + let error_occurred = Arc::new(parking_lot::Mutex::new(None)); + let error_occurred_stream = error_occurred.clone(); + + let chunk_size_bytes = self.chunk_size_bytes; + let stream = async_stream::stream! { + if len == 0 { + // if the reader is empty, the ReaderStream gives no elements, but we have to write at least + // one. yield protos::gen::google::bytestream::WriteRequest { resource_name: resource_name.clone(), - write_offset, - finish_write: num_seen_bytes == len, - data, - } - }, - Err(err) => { - // reading locally hit an error, so store it for re-processing below - *error_occurred_stream.lock() = Some(err); - // cut off here, no point continuing - break; + write_offset: 0, + finish_write: true, + data: Bytes::new(), + }; + return; } - } - } - }; - - // NB: We must box the future to avoid a stack overflow. - // Explicit type annotation is a workaround for https://github.com/rust-lang/rust/issues/64552 - let future: std::pin::Pin< - Box> + Send>, - > = Box::pin(client.write(Request::new(stream)).map(move |r| { - if let Some(ref read_err) = *error_occurred.lock() { - // check if reading `source` locally hit an error: if so, propagate that error (there will - // likely be a remote error too, because our write will be too short, but the local error is - // the interesting root cause) - return Err(ByteStoreError::Other(format!( - "Uploading file with digest {:?}: failed to read local source: {}", - digest, read_err - ))); - } - - match r { - Err(err) => Err(ByteStoreError::Grpc(err)), - Ok(response) => { - let response = response.into_inner(); - if response.committed_size == len as i64 { - Ok(()) - } else { - Err(ByteStoreError::Other(format!( - "Uploading file with digest {:?}: want committed size {} but got {}", - digest, len, response.committed_size - ))) + + // Read the source in appropriately sized chunks. + // NB. it is possible that this doesn't fill each chunk fully (i.e. may not send + // `chunk_size_bytes` in each request). For the usual sources, this should be unlikely. + let mut source = source.lock().await; + let reader_stream = tokio_util::io::ReaderStream::with_capacity(&mut *source, chunk_size_bytes); + let mut num_seen_bytes = 0; + + for await read_result in reader_stream { + match read_result { + Ok(data) => { + let write_offset = num_seen_bytes as i64; + num_seen_bytes += data.len(); + yield protos::gen::google::bytestream::WriteRequest { + resource_name: resource_name.clone(), + write_offset, + finish_write: num_seen_bytes == len, + data, + } + }, + Err(err) => { + // reading locally hit an error, so store it for re-processing below + *error_occurred_stream.lock() = Some(err); + // cut off here, no point continuing + break; + } + } } - } - } - })); - future.await - } - - async fn get_capabilities(&self) -> Result<&remexec::ServerCapabilities, ByteStoreError> { - let capabilities_fut = async { - let mut request = remexec::GetCapabilitiesRequest::default(); - if let Some(s) = self.instance_name.as_ref() { - request.instance_name = s.clone(); - } - - let mut client = self.capabilities_client.as_ref().clone(); - client - .get_capabilities(request) - .await - .map(|r| r.into_inner()) - .map_err(ByteStoreError::Grpc) - }; - - self - .capabilities_cell - .get_or_try_init(capabilities_fut) - .await - } + }; + + // NB: We must box the future to avoid a stack overflow. + // Explicit type annotation is a workaround for https://github.com/rust-lang/rust/issues/64552 + let future: std::pin::Pin< + Box> + Send>, + > = Box::pin(client.write(Request::new(stream)).map(move |r| { + if let Some(ref read_err) = *error_occurred.lock() { + // check if reading `source` locally hit an error: if so, propagate that error (there will + // likely be a remote error too, because our write will be too short, but the local error is + // the interesting root cause) + return Err(ByteStoreError::Other(format!( + "Uploading file with digest {:?}: failed to read local source: {}", + digest, read_err + ))); + } + + match r { + Err(err) => Err(ByteStoreError::Grpc(err)), + Ok(response) => { + let response = response.into_inner(); + if response.committed_size == len as i64 { + Ok(()) + } else { + Err(ByteStoreError::Other(format!( + "Uploading file with digest {:?}: want committed size {} but got {}", + digest, len, response.committed_size + ))) + } + } + } + })); + future.await + } + + async fn get_capabilities(&self) -> Result<&remexec::ServerCapabilities, ByteStoreError> { + let capabilities_fut = async { + let mut request = remexec::GetCapabilitiesRequest::default(); + if let Some(s) = self.instance_name.as_ref() { + request.instance_name = s.clone(); + } + + let mut client = self.capabilities_client.as_ref().clone(); + client + .get_capabilities(request) + .await + .map(|r| r.into_inner()) + .map_err(ByteStoreError::Grpc) + }; + + self.capabilities_cell + .get_or_try_init(capabilities_fut) + .await + } } #[async_trait] impl ByteStoreProvider for Provider { - async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String> { - let len = digest.size_bytes; - - let max_batch_total_size_bytes = { - let capabilities = self.get_capabilities().await.map_err(|e| e.to_string())?; - - capabilities - .cache_capabilities - .as_ref() - .map(|c| c.max_batch_total_size_bytes as usize) - .unwrap_or_default() - }; - - let batch_api_allowed_by_local_config = len <= self.batch_api_size_limit; - let batch_api_allowed_by_server_config = - max_batch_total_size_bytes == 0 || len < max_batch_total_size_bytes; - - retry_call( - bytes, - move |bytes, _| async move { - if batch_api_allowed_by_local_config && batch_api_allowed_by_server_config { - self.store_bytes_batch(digest, bytes).await - } else { - self - .store_source_stream(digest, Arc::new(Mutex::new(Cursor::new(bytes)))) - .await - } - }, - ByteStoreError::is_retryable, - ) - .await - .map_err(|e| e.to_string()) - } + async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String> { + let len = digest.size_bytes; + + let max_batch_total_size_bytes = { + let capabilities = self.get_capabilities().await.map_err(|e| e.to_string())?; + + capabilities + .cache_capabilities + .as_ref() + .map(|c| c.max_batch_total_size_bytes as usize) + .unwrap_or_default() + }; + + let batch_api_allowed_by_local_config = len <= self.batch_api_size_limit; + let batch_api_allowed_by_server_config = + max_batch_total_size_bytes == 0 || len < max_batch_total_size_bytes; + + retry_call( + bytes, + move |bytes, _| async move { + if batch_api_allowed_by_local_config && batch_api_allowed_by_server_config { + self.store_bytes_batch(digest, bytes).await + } else { + self.store_source_stream(digest, Arc::new(Mutex::new(Cursor::new(bytes)))) + .await + } + }, + ByteStoreError::is_retryable, + ) + .await + .map_err(|e| e.to_string()) + } - async fn store_file(&self, digest: Digest, file: File) -> Result<(), String> { - let source = Arc::new(Mutex::new(file)); - retry_call( + async fn store_file(&self, digest: Digest, file: File) -> Result<(), String> { + let source = Arc::new(Mutex::new(file)); + retry_call( source, move |source, retry_attempt| async move { if retry_attempt > 0 { @@ -309,116 +307,119 @@ impl ByteStoreProvider for Provider { ) .await .map_err(|e| e.to_string()) - } - - async fn load( - &self, - digest: Digest, - destination: &mut dyn LoadDestination, - ) -> Result { - let instance_name = self.instance_name.clone().unwrap_or_default(); - let resource_name = format!( - "{}{}blobs/{}/{}", - &instance_name, - if instance_name.is_empty() { "" } else { "/" }, - digest.hash, - digest.size_bytes - ); - - let request = protos::gen::google::bytestream::ReadRequest { - resource_name, - read_offset: 0, - // 0 means no limit. - read_limit: 0, - }; - let client = self.byte_stream_client.as_ref().clone(); - - let destination = Arc::new(Mutex::new(destination)); - - retry_call( - (client, request, destination), - move |(mut client, request, destination), retry_attempt| { - async move { - let mut start_opt = Some(Instant::now()); - let response = client.read(request).await?; - - let mut stream = response.into_inner().inspect(|_| { - // Record the observed time to receive the first response for this read. - if let Some(start) = start_opt.take() { - if let Some(workunit_store_handle) = workunit_store::get_workunit_store_handle() { - let timing: Result = - Instant::now().duration_since(start).as_micros().try_into(); - if let Ok(obs) = timing { - workunit_store_handle - .store - .record_observation(ObservationMetric::RemoteStoreTimeToFirstByteMicros, obs); - } - } - } - }); - - let mut writer = destination.lock().await; - let mut hasher = Hasher::new(); - if retry_attempt > 0 { - // if we're retrying, we need to clear out the destination to start the whole write - // fresh - writer.reset().await?; - } - while let Some(response) = stream.next().await { - let response = response?; - writer.write_all(&response.data).await?; - hasher.update(&response.data); - } - writer.shutdown().await?; + } - let actual_digest = hasher.finish(); - if actual_digest != digest { - // Return an `internal` status to attempt retry. - return Err(Status::internal(format!( + async fn load( + &self, + digest: Digest, + destination: &mut dyn LoadDestination, + ) -> Result { + let instance_name = self.instance_name.clone().unwrap_or_default(); + let resource_name = format!( + "{}{}blobs/{}/{}", + &instance_name, + if instance_name.is_empty() { "" } else { "/" }, + digest.hash, + digest.size_bytes + ); + + let request = protos::gen::google::bytestream::ReadRequest { + resource_name, + read_offset: 0, + // 0 means no limit. + read_limit: 0, + }; + let client = self.byte_stream_client.as_ref().clone(); + + let destination = Arc::new(Mutex::new(destination)); + + retry_call( + (client, request, destination), + move |(mut client, request, destination), retry_attempt| { + async move { + let mut start_opt = Some(Instant::now()); + let response = client.read(request).await?; + + let mut stream = response.into_inner().inspect(|_| { + // Record the observed time to receive the first response for this read. + if let Some(start) = start_opt.take() { + if let Some(workunit_store_handle) = + workunit_store::get_workunit_store_handle() + { + let timing: Result = + Instant::now().duration_since(start).as_micros().try_into(); + if let Ok(obs) = timing { + workunit_store_handle.store.record_observation( + ObservationMetric::RemoteStoreTimeToFirstByteMicros, + obs, + ); + } + } + } + }); + + let mut writer = destination.lock().await; + let mut hasher = Hasher::new(); + if retry_attempt > 0 { + // if we're retrying, we need to clear out the destination to start the whole write + // fresh + writer.reset().await?; + } + while let Some(response) = stream.next().await { + let response = response?; + writer.write_all(&response.data).await?; + hasher.update(&response.data); + } + writer.shutdown().await?; + + let actual_digest = hasher.finish(); + if actual_digest != digest { + // Return an `internal` status to attempt retry. + return Err(Status::internal(format!( "Remote CAS gave wrong digest: expected {digest:?}, got {actual_digest:?}" ))); - } + } - Ok(()) - } - .map(|read_result| match read_result { - Ok(()) => Ok(true), - Err(status) if status.code() == Code::NotFound => Ok(false), - Err(err) => Err(err), - }) - }, - status_is_retryable, - ) - .await - .map_err(|e| e.to_string()) - } - - async fn list_missing_digests( - &self, - digests: &mut (dyn Iterator + Send), - ) -> Result, String> { - let request = remexec::FindMissingBlobsRequest { - instance_name: self.instance_name.as_ref().cloned().unwrap_or_default(), - blob_digests: digests.into_iter().map(|d| d.into()).collect::>(), - }; - - let client = self.cas_client.as_ref().clone(); - let response = retry_call( - client, - move |mut client, _| { - let request = request.clone(); - async move { client.find_missing_blobs(request).await } - }, - status_is_retryable, - ) - .await - .map_err(status_to_str)?; - - response - .into_inner() - .missing_blob_digests - .iter() - .map(|digest| digest.try_into()) - .collect::, _>>() - } + Ok(()) + } + .map(|read_result| match read_result { + Ok(()) => Ok(true), + Err(status) if status.code() == Code::NotFound => Ok(false), + Err(err) => Err(err), + }) + }, + status_is_retryable, + ) + .await + .map_err(|e| e.to_string()) + } + + async fn list_missing_digests( + &self, + digests: &mut (dyn Iterator + Send), + ) -> Result, String> { + let request = remexec::FindMissingBlobsRequest { + instance_name: self.instance_name.as_ref().cloned().unwrap_or_default(), + blob_digests: digests.into_iter().map(|d| d.into()).collect::>(), + }; + + let client = self.cas_client.as_ref().clone(); + let response = retry_call( + client, + move |mut client, _| { + let request = request.clone(); + async move { client.find_missing_blobs(request).await } + }, + status_is_retryable, + ) + .await + .map_err(status_to_str)?; + + response + .into_inner() + .missing_blob_digests + .iter() + .map(|digest| digest.try_into()) + .collect::, _>>() + } } diff --git a/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store_tests.rs b/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store_tests.rs index 443eb0d979f..cb03c1dc887 100644 --- a/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store_tests.rs +++ b/src/rust/engine/remote_provider/remote_provider_reapi/src/byte_store_tests.rs @@ -20,278 +20,278 @@ const MEGABYTES: usize = 1024 * 1024; const STORE_BATCH_API_SIZE_LIMIT: usize = 4 * MEGABYTES; fn remote_options( - cas_address: String, - chunk_size_bytes: usize, - batch_api_size_limit: usize, + cas_address: String, + chunk_size_bytes: usize, + batch_api_size_limit: usize, ) -> RemoteOptions { - RemoteOptions { - cas_address, - instance_name: None, - tls_config: tls::Config::default(), - headers: BTreeMap::new(), - chunk_size_bytes, - rpc_timeout: Duration::from_secs(5), - rpc_retries: 1, - rpc_concurrency_limit: 256, - capabilities_cell_opt: None, - batch_api_size_limit, - } + RemoteOptions { + cas_address, + instance_name: None, + tls_config: tls::Config::default(), + headers: BTreeMap::new(), + chunk_size_bytes, + rpc_timeout: Duration::from_secs(5), + rpc_retries: 1, + rpc_concurrency_limit: 256, + capabilities_cell_opt: None, + batch_api_size_limit, + } } async fn new_provider(cas: &StubCAS) -> Provider { - Provider::new(remote_options( - cas.address(), - 10 * MEGABYTES, - STORE_BATCH_API_SIZE_LIMIT, - )) - .await - .unwrap() + Provider::new(remote_options( + cas.address(), + 10 * MEGABYTES, + STORE_BATCH_API_SIZE_LIMIT, + )) + .await + .unwrap() } async fn load_test(chunk_size: usize) { - let _ = WorkunitStore::setup_for_tests(); - let testdata = TestData::roland(); - let cas = StubCAS::builder() - .chunk_size_bytes(chunk_size) - .file(&testdata) - .build(); - - let provider = new_provider(&cas).await; - let mut destination = Vec::new(); - - let found = provider - .load(testdata.digest(), &mut destination) - .await - .unwrap(); - - assert!(found); - assert_eq!(destination, testdata.bytes()); + let _ = WorkunitStore::setup_for_tests(); + let testdata = TestData::roland(); + let cas = StubCAS::builder() + .chunk_size_bytes(chunk_size) + .file(&testdata) + .build(); + + let provider = new_provider(&cas).await; + let mut destination = Vec::new(); + + let found = provider + .load(testdata.digest(), &mut destination) + .await + .unwrap(); + + assert!(found); + assert_eq!(destination, testdata.bytes()); } #[tokio::test] async fn load_existing_less_than_one_chunk() { - load_test(TestData::roland().bytes().len() + 1).await; + load_test(TestData::roland().bytes().len() + 1).await; } #[tokio::test] async fn load_existing_exactly_one_chunk() { - load_test(TestData::roland().bytes().len()).await; + load_test(TestData::roland().bytes().len()).await; } #[tokio::test] async fn load_existing_multiple_chunks_exact() { - load_test(1).await; + load_test(1).await; } #[tokio::test] async fn load_existing_multiple_chunks_nonfactor() { - load_test(9).await; + load_test(9).await; } #[tokio::test] async fn load_missing() { - let testdata = TestData::roland(); - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; - let mut destination: Vec = Vec::new(); - - let found = provider - .load(testdata.digest(), &mut destination) - .await - .unwrap(); - - assert!(!found); - assert!(destination.is_empty()); + let testdata = TestData::roland(); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; + let mut destination: Vec = Vec::new(); + + let found = provider + .load(testdata.digest(), &mut destination) + .await + .unwrap(); + + assert!(!found); + assert!(destination.is_empty()); } #[tokio::test] async fn load_grpc_error() { - let testdata = TestData::roland(); - let cas = StubCAS::cas_always_errors(); + let testdata = TestData::roland(); + let cas = StubCAS::cas_always_errors(); - let provider = new_provider(&cas).await; - let mut destination = Vec::new(); + let provider = new_provider(&cas).await; + let mut destination = Vec::new(); - let error = provider - .load(testdata.digest(), &mut destination) - .await - .expect_err("Want error"); - - assert!( - error.contains("StubCAS is configured to always fail"), - "Bad error message, got: {error}" - ); - // retries: - assert_eq!( - cas.request_counts.lock().get(&RequestType::BSRead), - Some(&3) - ); + let error = provider + .load(testdata.digest(), &mut destination) + .await + .expect_err("Want error"); + + assert!( + error.contains("StubCAS is configured to always fail"), + "Bad error message, got: {error}" + ); + // retries: + assert_eq!( + cas.request_counts.lock().get(&RequestType::BSRead), + Some(&3) + ); } #[tokio::test] async fn load_existing_wrong_digest_error() { - let testdata = TestData::roland(); - let cas = StubCAS::builder() - .unverified_content( - TestData::roland().fingerprint(), - Bytes::from_static(b"not roland"), - ) - .build(); - - let provider = new_provider(&cas).await; - let mut destination = Vec::new(); + let testdata = TestData::roland(); + let cas = StubCAS::builder() + .unverified_content( + TestData::roland().fingerprint(), + Bytes::from_static(b"not roland"), + ) + .build(); + + let provider = new_provider(&cas).await; + let mut destination = Vec::new(); + + let error = provider + .load(testdata.digest(), &mut destination) + .await + .expect_err("Want error"); - let error = provider - .load(testdata.digest(), &mut destination) - .await - .expect_err("Want error"); - - assert!( - error.contains("Remote CAS gave wrong digest"), - "Bad error message, got: {error}" - ) + assert!( + error.contains("Remote CAS gave wrong digest"), + "Bad error message, got: {error}" + ) } fn assert_cas_store(cas: &StubCAS, testdata: &TestData, chunks: usize, chunk_size: usize) { - let blobs = cas.blobs.lock(); - assert_eq!(blobs.get(&testdata.fingerprint()), Some(&testdata.bytes())); - - let write_message_sizes = cas.write_message_sizes.lock(); - assert_eq!(write_message_sizes.len(), chunks); - for &size in write_message_sizes.iter() { - assert!( - size <= chunk_size, - "Size {} should have been <= {}", - size, - chunk_size - ); - } + let blobs = cas.blobs.lock(); + assert_eq!(blobs.get(&testdata.fingerprint()), Some(&testdata.bytes())); + + let write_message_sizes = cas.write_message_sizes.lock(); + assert_eq!(write_message_sizes.len(), chunks); + for &size in write_message_sizes.iter() { + assert!( + size <= chunk_size, + "Size {} should have been <= {}", + size, + chunk_size + ); + } } #[tokio::test] async fn store_file_one_chunk() { - let testdata = TestData::roland(); - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; - - provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) - .await - .unwrap(); + let testdata = TestData::roland(); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; - assert_cas_store(&cas, &testdata, 1, 1024) + provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .unwrap(); + + assert_cas_store(&cas, &testdata, 1, 1024) } #[tokio::test] async fn store_file_multiple_chunks() { - let testdata = TestData::all_the_henries(); - - let cas = StubCAS::empty(); - let chunk_size = 10 * 1024; - let provider = Provider::new(remote_options( - cas.address(), - chunk_size, - 0, // disable batch API, force streaming API - )) - .await - .unwrap(); - - provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) + let testdata = TestData::all_the_henries(); + + let cas = StubCAS::empty(); + let chunk_size = 10 * 1024; + let provider = Provider::new(remote_options( + cas.address(), + chunk_size, + 0, // disable batch API, force streaming API + )) .await .unwrap(); - assert_cas_store(&cas, &testdata, 98, chunk_size) + provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .unwrap(); + + assert_cas_store(&cas, &testdata, 98, chunk_size) } #[tokio::test] async fn store_file_empty_file() { - let testdata = TestData::empty(); - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; - - provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) - .await - .unwrap(); + let testdata = TestData::empty(); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; - assert_cas_store(&cas, &testdata, 1, 1024) + provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .unwrap(); + + assert_cas_store(&cas, &testdata, 1, 1024) } #[tokio::test] async fn store_file_grpc_error() { - let testdata = TestData::roland(); - let cas = StubCAS::cas_always_errors(); - let provider = new_provider(&cas).await; - - let error = provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) - .await - .expect_err("Want err"); - assert!( - error.contains("StubCAS is configured to always fail"), - "Bad error message, got: {error}" - ); - - // retries: - assert_eq!( - cas.request_counts.lock().get(&RequestType::BSWrite), - Some(&3) - ); + let testdata = TestData::roland(); + let cas = StubCAS::cas_always_errors(); + let provider = new_provider(&cas).await; + + let error = provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .expect_err("Want err"); + assert!( + error.contains("StubCAS is configured to always fail"), + "Bad error message, got: {error}" + ); + + // retries: + assert_eq!( + cas.request_counts.lock().get(&RequestType::BSWrite), + Some(&3) + ); } #[tokio::test] async fn store_file_connection_error() { - let testdata = TestData::roland(); - let provider = Provider::new(remote_options( - "http://doesnotexist.example".to_owned(), - 10 * MEGABYTES, - STORE_BATCH_API_SIZE_LIMIT, - )) - .await - .unwrap(); - - let error = provider - .store_file( - testdata.digest(), - mk_tempfile(Some(&testdata.bytes())).await, - ) + let testdata = TestData::roland(); + let provider = Provider::new(remote_options( + "http://doesnotexist.example".to_owned(), + 10 * MEGABYTES, + STORE_BATCH_API_SIZE_LIMIT, + )) .await - .expect_err("Want err"); - assert!( - error.contains("Unavailable: \"error trying to connect: dns error"), - "Bad error message, got: {error}" - ); + .unwrap(); + + let error = provider + .store_file( + testdata.digest(), + mk_tempfile(Some(&testdata.bytes())).await, + ) + .await + .expect_err("Want err"); + assert!( + error.contains("Unavailable: \"error trying to connect: dns error"), + "Bad error message, got: {error}" + ); } #[tokio::test] async fn store_file_source_read_error_immediately() { - let testdata = TestData::roland(); - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; + let testdata = TestData::roland(); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; - let temp_dir = TempDir::new().unwrap(); - let file_that_is_a_dir = File::open(temp_dir.path()).await.unwrap(); + let temp_dir = TempDir::new().unwrap(); + let file_that_is_a_dir = File::open(temp_dir.path()).await.unwrap(); - let error = provider - .store_file(testdata.digest(), file_that_is_a_dir) - .await - .expect_err("Want err"); - assert!( - error.contains("Is a directory"), - "Bad error message, got: {error}", - ) + let error = provider + .store_file(testdata.digest(), file_that_is_a_dir) + .await + .expect_err("Want err"); + assert!( + error.contains("Is a directory"), + "Bad error message, got: {error}", + ) } // TODO: it would also be good to validate the behaviour if the file reads start failing later @@ -299,181 +299,179 @@ async fn store_file_source_read_error_immediately() { #[tokio::test] async fn store_bytes_one_chunk() { - let testdata = TestData::roland(); - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; + let testdata = TestData::roland(); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; - provider - .store_bytes(testdata.digest(), testdata.bytes()) - .await - .unwrap(); + provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .unwrap(); - assert_cas_store(&cas, &testdata, 1, 1024) + assert_cas_store(&cas, &testdata, 1, 1024) } #[tokio::test] async fn store_bytes_multiple_chunks() { - let testdata = TestData::all_the_henries(); - - let cas = StubCAS::empty(); - let chunk_size = 10 * 1024; - let provider = Provider::new(remote_options( - cas.address(), - chunk_size, - 0, // disable batch API, force streaming API - )) - .await - .unwrap(); - - provider - .store_bytes(testdata.digest(), testdata.bytes()) + let testdata = TestData::all_the_henries(); + + let cas = StubCAS::empty(); + let chunk_size = 10 * 1024; + let provider = Provider::new(remote_options( + cas.address(), + chunk_size, + 0, // disable batch API, force streaming API + )) .await .unwrap(); - assert_cas_store(&cas, &testdata, 98, chunk_size) + provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .unwrap(); + + assert_cas_store(&cas, &testdata, 98, chunk_size) } #[tokio::test] async fn store_bytes_empty_file() { - let testdata = TestData::empty(); - let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; + let testdata = TestData::empty(); + let cas = StubCAS::empty(); + let provider = new_provider(&cas).await; - provider - .store_bytes(testdata.digest(), testdata.bytes()) - .await - .unwrap(); + provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .unwrap(); - assert_cas_store(&cas, &testdata, 1, 1024) + assert_cas_store(&cas, &testdata, 1, 1024) } #[tokio::test] async fn store_bytes_batch_grpc_error() { - let testdata = TestData::roland(); - let cas = StubCAS::cas_always_errors(); - let provider = new_provider(&cas).await; + let testdata = TestData::roland(); + let cas = StubCAS::cas_always_errors(); + let provider = new_provider(&cas).await; + + let error = provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .expect_err("Want err"); + assert!( + error.contains("StubCAS is configured to always fail"), + "Bad error message, got: {error}" + ); - let error = provider - .store_bytes(testdata.digest(), testdata.bytes()) - .await - .expect_err("Want err"); - assert!( - error.contains("StubCAS is configured to always fail"), - "Bad error message, got: {error}" - ); - - // retries: - assert_eq!( - cas - .request_counts - .lock() - .get(&RequestType::CASBatchUpdateBlobs), - Some(&3) - ); + // retries: + assert_eq!( + cas.request_counts + .lock() + .get(&RequestType::CASBatchUpdateBlobs), + Some(&3) + ); } #[tokio::test] async fn store_bytes_write_stream_grpc_error() { - let testdata = TestData::all_the_henries(); - let cas = StubCAS::cas_always_errors(); - let chunk_size = 10 * 1024; - let provider = Provider::new(remote_options( - cas.address(), - chunk_size, - 0, // disable batch API, force streaming API - )) - .await - .unwrap(); - - let error = provider - .store_bytes(testdata.digest(), testdata.bytes()) + let testdata = TestData::all_the_henries(); + let cas = StubCAS::cas_always_errors(); + let chunk_size = 10 * 1024; + let provider = Provider::new(remote_options( + cas.address(), + chunk_size, + 0, // disable batch API, force streaming API + )) .await - .expect_err("Want err"); - assert!( - error.contains("StubCAS is configured to always fail"), - "Bad error message, got: {error}" - ); - - // retries: - assert_eq!( - cas.request_counts.lock().get(&RequestType::BSWrite), - Some(&3) - ); + .unwrap(); + + let error = provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .expect_err("Want err"); + assert!( + error.contains("StubCAS is configured to always fail"), + "Bad error message, got: {error}" + ); + + // retries: + assert_eq!( + cas.request_counts.lock().get(&RequestType::BSWrite), + Some(&3) + ); } #[tokio::test] async fn store_bytes_connection_error() { - let testdata = TestData::roland(); - let provider = Provider::new(remote_options( - "http://doesnotexist.example".to_owned(), - 10 * MEGABYTES, - STORE_BATCH_API_SIZE_LIMIT, - )) - .await - .unwrap(); - - let error = provider - .store_bytes(testdata.digest(), testdata.bytes()) + let testdata = TestData::roland(); + let provider = Provider::new(remote_options( + "http://doesnotexist.example".to_owned(), + 10 * MEGABYTES, + STORE_BATCH_API_SIZE_LIMIT, + )) .await - .expect_err("Want err"); - assert!( - error.contains("Unavailable: \"error trying to connect: dns error"), - "Bad error message, got: {error}" - ); + .unwrap(); + + let error = provider + .store_bytes(testdata.digest(), testdata.bytes()) + .await + .expect_err("Want err"); + assert!( + error.contains("Unavailable: \"error trying to connect: dns error"), + "Bad error message, got: {error}" + ); } #[tokio::test] async fn list_missing_digests_none_missing() { - let testdata = TestData::roland(); - let _ = WorkunitStore::setup_for_tests(); - let cas = StubCAS::builder().file(&testdata).build(); + let testdata = TestData::roland(); + let _ = WorkunitStore::setup_for_tests(); + let cas = StubCAS::builder().file(&testdata).build(); - let provider = new_provider(&cas).await; + let provider = new_provider(&cas).await; - assert_eq!( - provider - .list_missing_digests(&mut vec![testdata.digest()].into_iter()) - .await, - Ok(HashSet::new()) - ) + assert_eq!( + provider + .list_missing_digests(&mut vec![testdata.digest()].into_iter()) + .await, + Ok(HashSet::new()) + ) } #[tokio::test] async fn list_missing_digests_some_missing() { - let cas = StubCAS::empty(); + let cas = StubCAS::empty(); - let provider = new_provider(&cas).await; - let digest = TestData::roland().digest(); + let provider = new_provider(&cas).await; + let digest = TestData::roland().digest(); - let mut digest_set = HashSet::new(); - digest_set.insert(digest); + let mut digest_set = HashSet::new(); + digest_set.insert(digest); - assert_eq!( - provider - .list_missing_digests(&mut vec![digest].into_iter()) - .await, - Ok(digest_set) - ) + assert_eq!( + provider + .list_missing_digests(&mut vec![digest].into_iter()) + .await, + Ok(digest_set) + ) } #[tokio::test] async fn list_missing_digests_grpc_error() { - let cas = StubCAS::cas_always_errors(); - let provider = new_provider(&cas).await; + let cas = StubCAS::cas_always_errors(); + let provider = new_provider(&cas).await; - let error = provider - .list_missing_digests(&mut vec![TestData::roland().digest()].into_iter()) - .await - .expect_err("Want error"); - assert!( - error.contains("StubCAS is configured to always fail"), - "Bad error message, got: {error}" - ); - // retries: - assert_eq!( - cas - .request_counts - .lock() - .get(&RequestType::CASFindMissingBlobs), - Some(&3) - ); + let error = provider + .list_missing_digests(&mut vec![TestData::roland().digest()].into_iter()) + .await + .expect_err("Want error"); + assert!( + error.contains("StubCAS is configured to always fail"), + "Bad error message, got: {error}" + ); + // retries: + assert_eq!( + cas.request_counts + .lock() + .get(&RequestType::CASFindMissingBlobs), + Some(&3) + ); } diff --git a/src/rust/engine/remote_provider/remote_provider_reapi/src/lib.rs b/src/rust/engine/remote_provider/remote_provider_reapi/src/lib.rs index 255962f5716..d91e895b68d 100644 --- a/src/rust/engine/remote_provider/remote_provider_reapi/src/lib.rs +++ b/src/rust/engine/remote_provider/remote_provider_reapi/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -40,20 +40,20 @@ pub mod byte_store_tests; /// Apply REAPI request metadata header to a `tonic::Request`. pub fn apply_headers(mut request: Request, build_id: &str) -> Request { - let reapi_request_metadata = remexec::RequestMetadata { - tool_details: Some(remexec::ToolDetails { - tool_name: "pants".into(), - ..remexec::ToolDetails::default() - }), - tool_invocation_id: build_id.to_string(), - ..remexec::RequestMetadata::default() - }; + let reapi_request_metadata = remexec::RequestMetadata { + tool_details: Some(remexec::ToolDetails { + tool_name: "pants".into(), + ..remexec::ToolDetails::default() + }), + tool_invocation_id: build_id.to_string(), + ..remexec::RequestMetadata::default() + }; - let md = request.metadata_mut(); - md.insert_bin( - "google.devtools.remoteexecution.v1test.requestmetadata-bin", - BinaryMetadataValue::try_from(reapi_request_metadata.to_bytes()).unwrap(), - ); + let md = request.metadata_mut(); + md.insert_bin( + "google.devtools.remoteexecution.v1test.requestmetadata-bin", + BinaryMetadataValue::try_from(reapi_request_metadata.to_bytes()).unwrap(), + ); - request + request } diff --git a/src/rust/engine/remote_provider/remote_provider_traits/src/lib.rs b/src/rust/engine/remote_provider/remote_provider_traits/src/lib.rs index ce714327527..1298c7ff27b 100644 --- a/src/rust/engine/remote_provider/remote_provider_traits/src/lib.rs +++ b/src/rust/engine/remote_provider/remote_provider_traits/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -41,94 +41,94 @@ use tokio::io::{AsyncSeekExt, AsyncWrite}; // TODO: Consider providing `impl Default`, similar to `remote::LocalOptions`. #[derive(Clone)] pub struct RemoteOptions { - // TODO: this is currently framed for the REAPI provider, with some options used by others, would - // be good to generalise - pub cas_address: String, - pub instance_name: Option, - pub headers: BTreeMap, - pub tls_config: grpc_util::tls::Config, - pub chunk_size_bytes: usize, - pub rpc_timeout: Duration, - pub rpc_retries: usize, - pub rpc_concurrency_limit: usize, - pub capabilities_cell_opt: Option>>, - pub batch_api_size_limit: usize, + // TODO: this is currently framed for the REAPI provider, with some options used by others, would + // be good to generalise + pub cas_address: String, + pub instance_name: Option, + pub headers: BTreeMap, + pub tls_config: grpc_util::tls::Config, + pub chunk_size_bytes: usize, + pub rpc_timeout: Duration, + pub rpc_retries: usize, + pub rpc_concurrency_limit: usize, + pub capabilities_cell_opt: Option>>, + pub batch_api_size_limit: usize, } #[derive(Clone)] pub struct RemoteCacheProviderOptions { - // TODO: this is currently framed for the REAPI provider, with some options used by others, would - // be good to generalise - // TODO: this is structurally very similar to `RemoteOptions`: maybe they should be the same? (see - // comment in `choose_action_cache_provider` too) - pub instance_name: Option, - pub action_cache_address: String, - pub tls_config: grpc_util::tls::Config, - pub headers: BTreeMap, - pub concurrency_limit: usize, - pub rpc_timeout: Duration, + // TODO: this is currently framed for the REAPI provider, with some options used by others, would + // be good to generalise + // TODO: this is structurally very similar to `RemoteOptions`: maybe they should be the same? (see + // comment in `choose_action_cache_provider` too) + pub instance_name: Option, + pub action_cache_address: String, + pub tls_config: grpc_util::tls::Config, + pub headers: BTreeMap, + pub concurrency_limit: usize, + pub rpc_timeout: Duration, } #[async_trait] pub trait ByteStoreProvider: Sync + Send + 'static { - /// Store the bytes readable from `file` into the remote store - async fn store_file(&self, digest: Digest, file: File) -> Result<(), String>; + /// Store the bytes readable from `file` into the remote store + async fn store_file(&self, digest: Digest, file: File) -> Result<(), String>; - /// Store the bytes in `bytes` into the remote store, as an optimisation of `store_file` when the - /// bytes are already in memory - async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String>; + /// Store the bytes in `bytes` into the remote store, as an optimisation of `store_file` when the + /// bytes are already in memory + async fn store_bytes(&self, digest: Digest, bytes: Bytes) -> Result<(), String>; - /// Load the data stored (if any) in the remote store for `digest` into `destination`. Returns - /// true when found, false when not. - async fn load( - &self, - digest: Digest, - destination: &mut dyn LoadDestination, - ) -> Result; + /// Load the data stored (if any) in the remote store for `digest` into `destination`. Returns + /// true when found, false when not. + async fn load( + &self, + digest: Digest, + destination: &mut dyn LoadDestination, + ) -> Result; - /// Return any digests from `digests` that are not (currently) available in the remote store. - async fn list_missing_digests( - &self, - digests: &mut (dyn Iterator + Send), - ) -> Result, String>; + /// Return any digests from `digests` that are not (currently) available in the remote store. + async fn list_missing_digests( + &self, + digests: &mut (dyn Iterator + Send), + ) -> Result, String>; } /// Places that write the result of a remote `load` #[async_trait] pub trait LoadDestination: AsyncWrite + Send + Sync + Unpin + 'static { - /// Clear out the writer and start again, if there's been previous contents written - async fn reset(&mut self) -> std::io::Result<()>; + /// Clear out the writer and start again, if there's been previous contents written + async fn reset(&mut self) -> std::io::Result<()>; } #[async_trait] impl LoadDestination for tokio::fs::File { - async fn reset(&mut self) -> std::io::Result<()> { - self.rewind().await?; - self.set_len(0).await - } + async fn reset(&mut self) -> std::io::Result<()> { + self.rewind().await?; + self.set_len(0).await + } } #[async_trait] impl LoadDestination for Vec { - async fn reset(&mut self) -> std::io::Result<()> { - self.clear(); - Ok(()) - } + async fn reset(&mut self) -> std::io::Result<()> { + self.clear(); + Ok(()) + } } /// This `ActionCacheProvider` trait captures the operations required to be able to cache command /// executions remotely. #[async_trait] pub trait ActionCacheProvider: Sync + Send + 'static { - async fn update_action_result( - &self, - action_digest: Digest, - action_result: ActionResult, - ) -> Result<(), String>; + async fn update_action_result( + &self, + action_digest: Digest, + action_result: ActionResult, + ) -> Result<(), String>; - async fn get_action_result( - &self, - action_digest: Digest, - build_id: &str, - ) -> Result, String>; + async fn get_action_result( + &self, + action_digest: Digest, + build_id: &str, + ) -> Result, String>; } diff --git a/src/rust/engine/remote_provider/src/lib.rs b/src/rust/engine/remote_provider/src/lib.rs index 788a7535e32..6a6b45c42cb 100644 --- a/src/rust/engine/remote_provider/src/lib.rs +++ b/src/rust/engine/remote_provider/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -30,96 +30,96 @@ use std::sync::Arc; // Re-export these so that consumers don't have to know about the exact arrangement of underlying // crates. pub use remote_provider_traits::{ - ActionCacheProvider, ByteStoreProvider, LoadDestination, RemoteCacheProviderOptions, - RemoteOptions, + ActionCacheProvider, ByteStoreProvider, LoadDestination, RemoteCacheProviderOptions, + RemoteOptions, }; const REAPI_ADDRESS_SCHEMAS: [&str; 4] = ["grpc://", "grpcs://", "http://", "https://"]; // TODO(#19902): a unified view of choosing a provider would be nice pub async fn choose_byte_store_provider( - options: RemoteOptions, + options: RemoteOptions, ) -> Result, String> { - let address = options.cas_address.clone(); - if REAPI_ADDRESS_SCHEMAS.iter().any(|s| address.starts_with(s)) { - Ok(Arc::new( - remote_provider_reapi::byte_store::Provider::new(options).await?, - )) - } else if let Some(path) = address.strip_prefix("file://") { - // It's a bit weird to support local "file://" for a 'remote' store... but this is handy for - // testing. - Ok(Arc::new(remote_provider_opendal::Provider::fs( - path, - "byte-store".to_owned(), - options, - )?)) - } else if let Some(url) = address.strip_prefix("github-actions-cache+") { - // This is relying on python validating that it was set as `github-actions-cache+https://...` so - // incorrect values could easily slip through here and cause downstream confusion. We're - // intending to change the approach (https://github.com/pantsbuild/pants/issues/19902) so this - // is tolerable for now. - Ok(Arc::new( - remote_provider_opendal::Provider::github_actions_cache( - url, - "byte-store".to_owned(), - options, - )?, - )) - } else { - Err(format!( + let address = options.cas_address.clone(); + if REAPI_ADDRESS_SCHEMAS.iter().any(|s| address.starts_with(s)) { + Ok(Arc::new( + remote_provider_reapi::byte_store::Provider::new(options).await?, + )) + } else if let Some(path) = address.strip_prefix("file://") { + // It's a bit weird to support local "file://" for a 'remote' store... but this is handy for + // testing. + Ok(Arc::new(remote_provider_opendal::Provider::fs( + path, + "byte-store".to_owned(), + options, + )?)) + } else if let Some(url) = address.strip_prefix("github-actions-cache+") { + // This is relying on python validating that it was set as `github-actions-cache+https://...` so + // incorrect values could easily slip through here and cause downstream confusion. We're + // intending to change the approach (https://github.com/pantsbuild/pants/issues/19902) so this + // is tolerable for now. + Ok(Arc::new( + remote_provider_opendal::Provider::github_actions_cache( + url, + "byte-store".to_owned(), + options, + )?, + )) + } else { + Err(format!( "Cannot initialise remote byte store provider with address {address}, as the scheme is not supported", )) - } + } } pub async fn choose_action_cache_provider( - options: RemoteCacheProviderOptions, + options: RemoteCacheProviderOptions, ) -> Result, String> { - let address = options.action_cache_address.clone(); + let address = options.action_cache_address.clone(); - // TODO: we shouldn't need to gin up a whole copy of this struct; it'd be better to have the two - // set of remoting options managed together. - let remote_options = RemoteOptions { - cas_address: address.clone(), - instance_name: options.instance_name.clone(), - headers: options.headers.clone(), - tls_config: options.tls_config.clone(), - rpc_timeout: options.rpc_timeout, - rpc_concurrency_limit: options.concurrency_limit, - // TODO: these should either be passed through or not synthesized here - chunk_size_bytes: 0, - rpc_retries: 0, - capabilities_cell_opt: None, - batch_api_size_limit: 0, - }; + // TODO: we shouldn't need to gin up a whole copy of this struct; it'd be better to have the two + // set of remoting options managed together. + let remote_options = RemoteOptions { + cas_address: address.clone(), + instance_name: options.instance_name.clone(), + headers: options.headers.clone(), + tls_config: options.tls_config.clone(), + rpc_timeout: options.rpc_timeout, + rpc_concurrency_limit: options.concurrency_limit, + // TODO: these should either be passed through or not synthesized here + chunk_size_bytes: 0, + rpc_retries: 0, + capabilities_cell_opt: None, + batch_api_size_limit: 0, + }; - if REAPI_ADDRESS_SCHEMAS.iter().any(|s| address.starts_with(s)) { - Ok(Arc::new( - remote_provider_reapi::action_cache::Provider::new(options).await?, - )) - } else if let Some(path) = address.strip_prefix("file://") { - // It's a bit weird to support local "file://" for a 'remote' store... but this is handy for - // testing. - Ok(Arc::new(remote_provider_opendal::Provider::fs( - path, - "action-cache".to_owned(), - remote_options, - )?)) - } else if let Some(url) = address.strip_prefix("github-actions-cache+") { - // This is relying on python validating that it was set as `github-actions-cache+https://...` so - // incorrect values could easily slip through here and cause downstream confusion. We're - // intending to change the approach (https://github.com/pantsbuild/pants/issues/19902) so this - // is tolerable for now. - Ok(Arc::new( - remote_provider_opendal::Provider::github_actions_cache( - url, - "action-cache".to_owned(), - remote_options, - )?, - )) - } else { - Err(format!( + if REAPI_ADDRESS_SCHEMAS.iter().any(|s| address.starts_with(s)) { + Ok(Arc::new( + remote_provider_reapi::action_cache::Provider::new(options).await?, + )) + } else if let Some(path) = address.strip_prefix("file://") { + // It's a bit weird to support local "file://" for a 'remote' store... but this is handy for + // testing. + Ok(Arc::new(remote_provider_opendal::Provider::fs( + path, + "action-cache".to_owned(), + remote_options, + )?)) + } else if let Some(url) = address.strip_prefix("github-actions-cache+") { + // This is relying on python validating that it was set as `github-actions-cache+https://...` so + // incorrect values could easily slip through here and cause downstream confusion. We're + // intending to change the approach (https://github.com/pantsbuild/pants/issues/19902) so this + // is tolerable for now. + Ok(Arc::new( + remote_provider_opendal::Provider::github_actions_cache( + url, + "action-cache".to_owned(), + remote_options, + )?, + )) + } else { + Err(format!( "Cannot initialise remote action cache provider with address {address}, as the scheme is not supported", )) - } + } } diff --git a/src/rust/engine/rule_graph/src/builder.rs b/src/rust/engine/rule_graph/src/builder.rs index 50c470e6477..057e0b39960 100644 --- a/src/rust/engine/rule_graph/src/builder.rs +++ b/src/rust/engine/rule_graph/src/builder.rs @@ -3,7 +3,7 @@ use crate::rules::{DependencyKey, ParamTypes, Query, Rule, RuleId}; use crate::{ - params_str, Entry, EntryWithDeps, Reentry, RootEntry, RuleEdges, RuleEntry, RuleGraph, + params_str, Entry, EntryWithDeps, Reentry, RootEntry, RuleEdges, RuleEntry, RuleGraph, }; use std::collections::{BTreeMap, VecDeque}; @@ -18,71 +18,73 @@ use petgraph::Direction; #[derive(Debug, Eq, PartialEq, Hash, Clone)] enum Node { - // A root node in the rule graph. - Query(Query), - // An inner node in the rule graph. - Rule(R), - // An inner node in the rule graph which must first locate its `in_scope_params`, and will then - // execute the given Query. - // - // This is a leaf rather than an actual connection to the Query node to avoid introducing - // unnecessary graph cycles. - Reentry(Query, ParamTypes), - // A leaf node in the rule graph which is satisfied by consuming a single parameter. - Param(R::TypeId), + // A root node in the rule graph. + Query(Query), + // An inner node in the rule graph. + Rule(R), + // An inner node in the rule graph which must first locate its `in_scope_params`, and will then + // execute the given Query. + // + // This is a leaf rather than an actual connection to the Query node to avoid introducing + // unnecessary graph cycles. + Reentry(Query, ParamTypes), + // A leaf node in the rule graph which is satisfied by consuming a single parameter. + Param(R::TypeId), } impl std::fmt::Display for Node { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Node::Query(q) => write!(f, "{q}"), - Node::Rule(r) => write!(f, "{r}"), - Node::Param(p) => write!(f, "Param({p})"), - Node::Reentry(q, in_scope) => write!(f, "Reentry({}, {})", q.product, params_str(in_scope)), + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Node::Query(q) => write!(f, "{q}"), + Node::Rule(r) => write!(f, "{r}"), + Node::Param(p) => write!(f, "Param({p})"), + Node::Reentry(q, in_scope) => { + write!(f, "Reentry({}, {})", q.product, params_str(in_scope)) + } + } } - } } impl Node { - fn dependency_keys(&self) -> Vec> { - // TODO: Give Query an internal DependencyKey to avoid cloning here. - match self { - Node::Rule(r) => r.dependency_keys().into_iter().cloned().collect(), - Node::Reentry(_, in_scope_params) => in_scope_params - .iter() - .cloned() - .map(DependencyKey::new) - .collect(), - Node::Query(q) => vec![DependencyKey::new(q.product)], - Node::Param(_) => vec![], + fn dependency_keys(&self) -> Vec> { + // TODO: Give Query an internal DependencyKey to avoid cloning here. + match self { + Node::Rule(r) => r.dependency_keys().into_iter().cloned().collect(), + Node::Reentry(_, in_scope_params) => in_scope_params + .iter() + .cloned() + .map(DependencyKey::new) + .collect(), + Node::Query(q) => vec![DependencyKey::new(q.product)], + Node::Param(_) => vec![], + } } - } - - /// - /// Add the parameter types which are always required to satisfy this Node (regardless of what - /// its dependencies require) to the given set. - /// - fn add_inherent_in_set(&self, in_set: &mut ParamTypes) { - match self { - Node::Reentry(query, in_scope_params) => { - // Reentry nodes include in_sets computed from their Query and their dependencies. - in_set.extend( - query - .params - .iter() - .filter(|p| !in_scope_params.contains(p)) - .cloned(), - ); - } - Node::Param(p) => { - // Params are always leaves with an in-set of their own value, and no out-set. - in_set.insert(*p); - } - Node::Rule(_) | Node::Query(_) => { - // Rules and Queries only have in_sets computed from their dependencies. - } + + /// + /// Add the parameter types which are always required to satisfy this Node (regardless of what + /// its dependencies require) to the given set. + /// + fn add_inherent_in_set(&self, in_set: &mut ParamTypes) { + match self { + Node::Reentry(query, in_scope_params) => { + // Reentry nodes include in_sets computed from their Query and their dependencies. + in_set.extend( + query + .params + .iter() + .filter(|p| !in_scope_params.contains(p)) + .cloned(), + ); + } + Node::Param(p) => { + // Params are always leaves with an in-set of their own value, and no out-set. + in_set.insert(*p); + } + Node::Rule(_) | Node::Query(_) => { + // Rules and Queries only have in_sets computed from their dependencies. + } + } } - } } /// @@ -91,33 +93,33 @@ impl Node { /// #[derive(Debug, Eq, PartialEq, Hash, Clone)] struct ParamsLabeled { - node: Node, - // Params that are actually consumed by transitive dependencies. - in_set: ParamTypes, - // Params that the Node's transitive dependents have available for consumption. - out_set: ParamTypes, + node: Node, + // Params that are actually consumed by transitive dependencies. + in_set: ParamTypes, + // Params that the Node's transitive dependents have available for consumption. + out_set: ParamTypes, } impl ParamsLabeled { - fn new(node: Node, out_set: ParamTypes) -> ParamsLabeled { - ParamsLabeled { - node, - in_set: ParamTypes::new(), - out_set, + fn new(node: Node, out_set: ParamTypes) -> ParamsLabeled { + ParamsLabeled { + node, + in_set: ParamTypes::new(), + out_set, + } } - } } impl std::fmt::Display for ParamsLabeled { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "ParamsLabeled(node: {}, in: ({}), out: ({}))", - self.node, - params_str(&self.in_set), - params_str(&self.out_set) - ) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "ParamsLabeled(node: {}, in: ({}), out: ({}))", + self.node, + params_str(&self.in_set), + params_str(&self.out_set) + ) + } } /// @@ -131,529 +133,541 @@ impl std::fmt::Display for ParamsLabeled { struct MaybeDeleted(T, Option); impl MaybeDeleted { - fn new(t: T) -> MaybeDeleted { - MaybeDeleted(t, None) - } - - fn inner(&self) -> Option<&T> { - if self.is_deleted() { - None - } else { - Some(&self.0) + fn new(t: T) -> MaybeDeleted { + MaybeDeleted(t, None) + } + + fn inner(&self) -> Option<&T> { + if self.is_deleted() { + None + } else { + Some(&self.0) + } } - } - fn deleted_reason(&self) -> Option<&Reason> { - self.1.as_ref() - } + fn deleted_reason(&self) -> Option<&Reason> { + self.1.as_ref() + } - fn is_deleted(&self) -> bool { - self.1.is_some() - } + fn is_deleted(&self) -> bool { + self.1.is_some() + } - fn mark_deleted(&mut self, reason: Reason) { - self.1 = Some(reason); - } + fn mark_deleted(&mut self, reason: Reason) { + self.1 = Some(reason); + } } impl std::fmt::Display for MaybeDeleted { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if let Some(ref reason) = self.1 { - write!(f, "Deleted(reason: {:?}, {})", reason, self.0) - } else { - write!(f, "{}", self.0) + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(ref reason) = self.1 { + write!(f, "Deleted(reason: {:?}, {})", reason, self.0) + } else { + write!(f, "{}", self.0) + } } - } } #[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)] enum NodePrunedReason { - Ambiguous, - Monomorphized, - NoDependents, - NoSourceOfParam, - NoValidCombinationsOfDependencies, + Ambiguous, + Monomorphized, + NoDependents, + NoSourceOfParam, + NoValidCombinationsOfDependencies, } #[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)] enum EdgePrunedReason { - DoesNotConsumeProvidedParam, - SmallerParamSetAvailable, + DoesNotConsumeProvidedParam, + SmallerParamSetAvailable, } // Nodes labeled with out_sets. type Graph = - DiGraph<(Node, ParamTypes<::TypeId>), DependencyKey<::TypeId>, u32>; + DiGraph<(Node, ParamTypes<::TypeId>), DependencyKey<::TypeId>, u32>; // Nodes labeled with out_sets and possibly marked deleted. type OutLabeledGraph = DiGraph< - MaybeDeleted<(Node, ParamTypes<::TypeId>), NodePrunedReason>, - DependencyKey<::TypeId>, - u32, + MaybeDeleted<(Node, ParamTypes<::TypeId>), NodePrunedReason>, + DependencyKey<::TypeId>, + u32, >; // Nodes labeled with both an out_set and in_set, and possibly marked deleted. type LabeledGraph = DiGraph< - MaybeDeleted, NodePrunedReason>, - DependencyKey<::TypeId>, - u32, + MaybeDeleted, NodePrunedReason>, + DependencyKey<::TypeId>, + u32, >; // Nodes labeled with both out_sets and in_sets, and both edges and nodes possibly marked deleted. type MonomorphizedGraph = DiGraph< - MaybeDeleted, NodePrunedReason>, - MaybeDeleted::TypeId>, EdgePrunedReason>, - u32, + MaybeDeleted, NodePrunedReason>, + MaybeDeleted::TypeId>, EdgePrunedReason>, + u32, >; // Node labeled with in_sets. type InLabeledGraph = - DiGraph<(Node, ParamTypes<::TypeId>), DependencyKey<::TypeId>, u32>; + DiGraph<(Node, ParamTypes<::TypeId>), DependencyKey<::TypeId>, u32>; /// /// Given the set of Rules and Queries, produce a RuleGraph that allows dependency nodes /// to be found statically. /// pub struct Builder { - rules: BTreeMap>, - queries: IndexSet>, - params: ParamTypes, + rules: BTreeMap>, + queries: IndexSet>, + params: ParamTypes, } impl Builder { - pub fn new(rules: IndexSet, mut queries: IndexSet>) -> Builder { - // Extend the Queries with those assumed by Reentry nodes. - queries.extend(rules.iter().flat_map(|rule| { - rule - .dependency_keys() - .into_iter() - .filter_map(|dk| dk.as_reentry_query()) - })); - - // Group rules by product/return type. - let mut rules_by_type = BTreeMap::new(); - for rule in rules { - rules_by_type - .entry(rule.product()) - .or_insert_with(Vec::new) - .push(rule); - } + pub fn new(rules: IndexSet, mut queries: IndexSet>) -> Builder { + // Extend the Queries with those assumed by Reentry nodes. + queries.extend(rules.iter().flat_map(|rule| { + rule.dependency_keys() + .into_iter() + .filter_map(|dk| dk.as_reentry_query()) + })); + + // Group rules by product/return type. + let mut rules_by_type = BTreeMap::new(); + for rule in rules { + rules_by_type + .entry(rule.product()) + .or_insert_with(Vec::new) + .push(rule); + } - // The set of all input Params in the graph: ie, those provided either via Queries, or via - // a Rule with a DependencyKey that provides a Param. - let params = queries - .iter() - .flat_map(|query| query.params.iter().cloned()) - .chain( - rules_by_type - .values() - .flatten() - .flat_map(|rule| rule.dependency_keys()) - .flat_map(|dk| dk.provided_params.iter().cloned()), - ) - .collect::>(); + // The set of all input Params in the graph: ie, those provided either via Queries, or via + // a Rule with a DependencyKey that provides a Param. + let params = queries + .iter() + .flat_map(|query| query.params.iter().cloned()) + .chain( + rules_by_type + .values() + .flatten() + .flat_map(|rule| rule.dependency_keys()) + .flat_map(|dk| dk.provided_params.iter().cloned()), + ) + .collect::>(); - Builder { - rules: rules_by_type, - queries, - params, - } - } - - pub fn graph(self) -> Result, String> { - // 0. validate that the rules all have unique rule ids. - self.validate_rule_ids()?; - // 1. build a polymorphic graph, where nodes might have multiple legal sources of dependencies - let initial_polymorphic_graph = self.initial_polymorphic(); - // 2. run live variable analysis on the polymorphic graph to gather a conservative (ie, overly - // large) set of used Params. - let polymorphic_live_params_graph = self.live_param_labeled_graph(initial_polymorphic_graph); - // 3. monomorphize by partitioning a node (and its dependents) for each valid combination of its - // dependencies while mantaining liveness sets. - let monomorphic_live_params_graph = Self::monomorphize(polymorphic_live_params_graph); - // 4. choose the best dependencies via in/out sets. fail if: - // * invalid required param at a Query - // * take smallest option, fail for equal-sized sets - let pruned_edges_graph = self.prune_edges(monomorphic_live_params_graph)?; - // 5. generate the final graph for nodes reachable from queries - self.finalize(pruned_edges_graph) - } - - /// - /// Validate that all rules have unique RuleIds. - /// - fn validate_rule_ids(&self) -> Result<(), String> { - let mut invalid_rule_ids: Vec<&RuleId> = self - .rules - .values() - .flatten() - .map(|rule| rule.id()) - .duplicates() - .collect(); - match invalid_rule_ids.len() { - 0 => Ok(()), - _ => { - invalid_rule_ids.sort(); - Err(format!( - "The following rule ids were each used by more than one rule: {}", - invalid_rule_ids.iter().join(", ") - )) - } + Builder { + rules: rules_by_type, + queries, + params, + } } - } - - /// - /// Builds a polymorphic graph while computing an out_set for each node in the graph by accounting - /// for which `Param`s are available at each use site. During this phase, nodes may have multiple - /// dependency edges per `DependencyKey`, which is what makes them "polymorphic". Each of the - /// possible ways to compute a dependency will likely have different input `Param` requirements, - /// and each node in this phase represents all of those possibilities. - /// - fn initial_polymorphic(&self) -> OutLabeledGraph { - let mut graph: Graph = DiGraph::new(); - - // Initialize the graph with nodes for Queries, Params, and Reentries. - let queries = self - .queries - .iter() - .map(|query| { - ( - query, - graph.add_node(( - Node::Query(query.clone()), - query.params.iter().cloned().collect(), - )), - ) - }) - .collect::>(); - let params = self - .params - .iter() - .cloned() - .map(|param| { - ( - param, - graph.add_node((Node::Param(param), ParamTypes::new())), - ) - }) - .collect::>(); - let rules_by_id: HashMap<&RuleId, &R> = - self.rules.values().flatten().map(|r| (r.id(), r)).collect(); + pub fn graph(self) -> Result, String> { + // 0. validate that the rules all have unique rule ids. + self.validate_rule_ids()?; + // 1. build a polymorphic graph, where nodes might have multiple legal sources of dependencies + let initial_polymorphic_graph = self.initial_polymorphic(); + // 2. run live variable analysis on the polymorphic graph to gather a conservative (ie, overly + // large) set of used Params. + let polymorphic_live_params_graph = + self.live_param_labeled_graph(initial_polymorphic_graph); + // 3. monomorphize by partitioning a node (and its dependents) for each valid combination of its + // dependencies while mantaining liveness sets. + let monomorphic_live_params_graph = Self::monomorphize(polymorphic_live_params_graph); + // 4. choose the best dependencies via in/out sets. fail if: + // * invalid required param at a Query + // * take smallest option, fail for equal-sized sets + let pruned_edges_graph = self.prune_edges(monomorphic_live_params_graph)?; + // 5. generate the final graph for nodes reachable from queries + self.finalize(pruned_edges_graph) + } - // Rules and Reentries are created on the fly based on the out_set of dependents. - let mut rules: HashMap<(R, ParamTypes), NodeIndex> = HashMap::default(); - #[allow(clippy::type_complexity)] - let mut reentries: HashMap< - ( - Query, - ParamTypes, - ParamTypes, - ), - NodeIndex, - > = HashMap::default(); - let mut satisfiable_nodes: HashSet> = HashSet::default(); - let mut unsatisfiable_nodes: HashMap, Vec>> = - HashMap::default(); - - // Starting from Queries, visit all reachable nodes in the graph. - let mut visited = HashSet::default(); - let mut to_visit = queries.values().cloned().collect::>(); - let mut iteration = 0; - while let Some(node_id) = to_visit.pop() { - if !visited.visit(node_id) { - continue; - } - iteration += 1; - if iteration % 1000 == 0 { - log::trace!( - "initial_polymorphic iteration {}: {} nodes", - iteration, - graph.node_count() - ); - } - - // Collect the candidates that might satisfy the dependency keys of the node (if it has any). - let candidates_by_key = graph[node_id] - .0 - .dependency_keys() - .into_iter() - .map(|dependency_key| { - if let Some(in_scope_params) = dependency_key.in_scope_params.as_ref() { - // If a DependencyKey has `in_scope_params`, it is solved by re-entering the graph with - // a Query. - let query = Query::new( - dependency_key.product, - dependency_key - .provided_params - .iter() - .chain(in_scope_params.iter()) - .cloned(), - ); - let in_scope_params = in_scope_params.into_iter().cloned().collect(); - return (dependency_key, vec![Node::Reentry(query, in_scope_params)]); - } - - let mut candidates = Vec::new(); - if let Some(rule_id) = &dependency_key.rule_id { - // New call-by-name semantics. - candidates.extend(rules_by_id.get(rule_id).map(|&r| Node::Rule(r.clone()))); - // TODO: Once we are entirely call-by-name, we can get rid of the entire edifice - // of multiple candidates and the unsatisfiable_nodes mechanism, and modify this - // function to return a Result, which will be Err if there is no rule with a - // matching RuleId for some node. - assert!(candidates.len() < 2); - } else { - // Old call-by-type semantics. - if dependency_key.provided_params.is_empty() - && graph[node_id].1.contains(&dependency_key.product()) - && params.contains_key(&dependency_key.product()) - { - candidates.push(Node::Param(dependency_key.product())); + /// + /// Validate that all rules have unique RuleIds. + /// + fn validate_rule_ids(&self) -> Result<(), String> { + let mut invalid_rule_ids: Vec<&RuleId> = self + .rules + .values() + .flatten() + .map(|rule| rule.id()) + .duplicates() + .collect(); + match invalid_rule_ids.len() { + 0 => Ok(()), + _ => { + invalid_rule_ids.sort(); + Err(format!( + "The following rule ids were each used by more than one rule: {}", + invalid_rule_ids.iter().join(", ") + )) } + } + } - if let Some(rules) = self.rules.get(&dependency_key.product()) { - candidates.extend(rules.iter().map(|r| Node::Rule(r.clone()))); - }; - } - - (dependency_key, candidates) - }) - .collect::>(); + /// + /// Builds a polymorphic graph while computing an out_set for each node in the graph by accounting + /// for which `Param`s are available at each use site. During this phase, nodes may have multiple + /// dependency edges per `DependencyKey`, which is what makes them "polymorphic". Each of the + /// possible ways to compute a dependency will likely have different input `Param` requirements, + /// and each node in this phase represents all of those possibilities. + /// + fn initial_polymorphic(&self) -> OutLabeledGraph { + let mut graph: Graph = DiGraph::new(); + + // Initialize the graph with nodes for Queries, Params, and Reentries. + let queries = self + .queries + .iter() + .map(|query| { + ( + query, + graph.add_node(( + Node::Query(query.clone()), + query.params.iter().cloned().collect(), + )), + ) + }) + .collect::>(); + let params = self + .params + .iter() + .cloned() + .map(|param| { + ( + param, + graph.add_node((Node::Param(param), ParamTypes::new())), + ) + }) + .collect::>(); - // If any dependency keys could not be satisfied, continue. - let unsatisfiable_keys = candidates_by_key - .iter() - .filter_map(|(dependency_key, candidates)| { - if candidates.is_empty() { - Some(dependency_key.clone()) - } else { - None - } - }) - .collect::>(); - if !unsatisfiable_keys.is_empty() { - unsatisfiable_nodes.insert(node_id, unsatisfiable_keys); - continue; - } - - // Determine which Params are unambiguously consumed by this node: we eagerly remove them - // from the out_set of all other dependencies to shrink the total number of unique nodes - // created during this phase. The rest will be chosen during monomorphize, where the out_set - // shrinks further. - let consumed_from_out_set = candidates_by_key - .values() - .filter_map(|candidates| { - if candidates.len() != 1 { - None - } else if let Node::Param(p) = candidates[0] { - Some(p) - } else { - None - } - }) - .collect::>(); + let rules_by_id: HashMap<&RuleId, &R> = + self.rules.values().flatten().map(|r| (r.id(), r)).collect(); - // Create nodes for each of the candidates using the computed out_set. - let out_set = graph[node_id] - .1 - .iter() - .filter(|p| !consumed_from_out_set.contains(p)) - .cloned() - .collect::>(); - for (dependency_key, candidates) in candidates_by_key { - for candidate in candidates { - match candidate { - Node::Param(p) => { - graph.add_edge(node_id, *params.get(&p).unwrap(), dependency_key.clone()); + // Rules and Reentries are created on the fly based on the out_set of dependents. + let mut rules: HashMap<(R, ParamTypes), NodeIndex> = HashMap::default(); + #[allow(clippy::type_complexity)] + let mut reentries: HashMap< + ( + Query, + ParamTypes, + ParamTypes, + ), + NodeIndex, + > = HashMap::default(); + let mut satisfiable_nodes: HashSet> = HashSet::default(); + let mut unsatisfiable_nodes: HashMap, Vec>> = + HashMap::default(); + + // Starting from Queries, visit all reachable nodes in the graph. + let mut visited = HashSet::default(); + let mut to_visit = queries.values().cloned().collect::>(); + let mut iteration = 0; + while let Some(node_id) = to_visit.pop() { + if !visited.visit(node_id) { + continue; } - Node::Reentry(query, in_scope_params) => { - let out_set = { - let mut out_set = out_set.clone(); - out_set.extend(dependency_key.provided_params.iter().cloned()); - out_set - }; - let reentry_id = reentries - .entry((query.clone(), in_scope_params.clone(), out_set.clone())) - .or_insert_with(|| { - graph.add_node((Node::Reentry(query.clone(), in_scope_params), out_set)) - }); - graph.add_edge(node_id, *reentry_id, dependency_key.clone()); - to_visit.push(*reentry_id); + iteration += 1; + if iteration % 1000 == 0 { + log::trace!( + "initial_polymorphic iteration {}: {} nodes", + iteration, + graph.node_count() + ); } - Node::Rule(rule) => { - // If the key provides a Param for the Rule to consume, include it in the out_set for - // the dependency node. - let out_set = { - let mut out_set = out_set.clone(); - out_set.extend(dependency_key.provided_params.iter().cloned()); - out_set - }; - let rule_id = rules - .entry((rule.clone(), out_set.clone())) - .or_insert_with(|| graph.add_node((Node::Rule(rule.clone()), out_set))); - graph.add_edge(node_id, *rule_id, dependency_key.clone()); - to_visit.push(*rule_id); + + // Collect the candidates that might satisfy the dependency keys of the node (if it has any). + let candidates_by_key = graph[node_id] + .0 + .dependency_keys() + .into_iter() + .map(|dependency_key| { + if let Some(in_scope_params) = dependency_key.in_scope_params.as_ref() { + // If a DependencyKey has `in_scope_params`, it is solved by re-entering the graph with + // a Query. + let query = Query::new( + dependency_key.product, + dependency_key + .provided_params + .iter() + .chain(in_scope_params.iter()) + .cloned(), + ); + let in_scope_params = in_scope_params.into_iter().cloned().collect(); + return (dependency_key, vec![Node::Reentry(query, in_scope_params)]); + } + + let mut candidates = Vec::new(); + if let Some(rule_id) = &dependency_key.rule_id { + // New call-by-name semantics. + candidates.extend(rules_by_id.get(rule_id).map(|&r| Node::Rule(r.clone()))); + // TODO: Once we are entirely call-by-name, we can get rid of the entire edifice + // of multiple candidates and the unsatisfiable_nodes mechanism, and modify this + // function to return a Result, which will be Err if there is no rule with a + // matching RuleId for some node. + assert!(candidates.len() < 2); + } else { + // Old call-by-type semantics. + if dependency_key.provided_params.is_empty() + && graph[node_id].1.contains(&dependency_key.product()) + && params.contains_key(&dependency_key.product()) + { + candidates.push(Node::Param(dependency_key.product())); + } + + if let Some(rules) = self.rules.get(&dependency_key.product()) { + candidates.extend(rules.iter().map(|r| Node::Rule(r.clone()))); + }; + } + + (dependency_key, candidates) + }) + .collect::>(); + + // If any dependency keys could not be satisfied, continue. + let unsatisfiable_keys = candidates_by_key + .iter() + .filter_map(|(dependency_key, candidates)| { + if candidates.is_empty() { + Some(dependency_key.clone()) + } else { + None + } + }) + .collect::>(); + if !unsatisfiable_keys.is_empty() { + unsatisfiable_nodes.insert(node_id, unsatisfiable_keys); + continue; } - Node::Query(_) => unreachable!("A Query may not be a dependency."), - } + + // Determine which Params are unambiguously consumed by this node: we eagerly remove them + // from the out_set of all other dependencies to shrink the total number of unique nodes + // created during this phase. The rest will be chosen during monomorphize, where the out_set + // shrinks further. + let consumed_from_out_set = candidates_by_key + .values() + .filter_map(|candidates| { + if candidates.len() != 1 { + None + } else if let Node::Param(p) = candidates[0] { + Some(p) + } else { + None + } + }) + .collect::>(); + + // Create nodes for each of the candidates using the computed out_set. + let out_set = graph[node_id] + .1 + .iter() + .filter(|p| !consumed_from_out_set.contains(p)) + .cloned() + .collect::>(); + for (dependency_key, candidates) in candidates_by_key { + for candidate in candidates { + match candidate { + Node::Param(p) => { + graph.add_edge( + node_id, + *params.get(&p).unwrap(), + dependency_key.clone(), + ); + } + Node::Reentry(query, in_scope_params) => { + let out_set = { + let mut out_set = out_set.clone(); + out_set.extend(dependency_key.provided_params.iter().cloned()); + out_set + }; + let reentry_id = reentries + .entry((query.clone(), in_scope_params.clone(), out_set.clone())) + .or_insert_with(|| { + graph.add_node(( + Node::Reentry(query.clone(), in_scope_params), + out_set, + )) + }); + graph.add_edge(node_id, *reentry_id, dependency_key.clone()); + to_visit.push(*reentry_id); + } + Node::Rule(rule) => { + // If the key provides a Param for the Rule to consume, include it in the out_set for + // the dependency node. + let out_set = { + let mut out_set = out_set.clone(); + out_set.extend(dependency_key.provided_params.iter().cloned()); + out_set + }; + let rule_id = rules + .entry((rule.clone(), out_set.clone())) + .or_insert_with(|| { + graph.add_node((Node::Rule(rule.clone()), out_set)) + }); + graph.add_edge(node_id, *rule_id, dependency_key.clone()); + to_visit.push(*rule_id); + } + Node::Query(_) => unreachable!("A Query may not be a dependency."), + } + } + } + + satisfiable_nodes.insert(graph[node_id].0.clone()); } - } - satisfiable_nodes.insert(graph[node_id].0.clone()); + // Mark all unsatisfiable nodes deleted. + graph.map( + |node_id, node| { + let mut result = MaybeDeleted::new(node.clone()); + if unsatisfiable_nodes.contains_key(&node_id) { + result.mark_deleted(NodePrunedReason::NoSourceOfParam); + } + result + }, + |_, edge_weight| edge_weight.clone(), + ) } - // Mark all unsatisfiable nodes deleted. - graph.map( - |node_id, node| { - let mut result = MaybeDeleted::new(node.clone()); - if unsatisfiable_nodes.contains_key(&node_id) { - result.mark_deleted(NodePrunedReason::NoSourceOfParam); - } - result - }, - |_, edge_weight| edge_weight.clone(), - ) - } - - /// - /// Splits Rules in the graph that have multiple valid sources of a dependency, and recalculates - /// their in/out sets to attempt to re-join with other copies of the Rule with identical sets. - /// Similar to `live_param_labeled_graph`, this is an analysis that propagates both up and down - /// the graph (and maintains the in/out sets while doing so). Visiting a node might cause us to - /// split it and re-calculate the in/out sets for each split; we then visit the nodes affected - /// by the split to ensure that they are updated as well, and so on. - /// - /// During this phase, the out_set of a node is used to determine which Params are legal to - /// consume in each subgraph: as this information propagates down the graph, Param dependencies - /// might be eliminated, which results in corresponding changes to the in_set which flow back - /// up the graph. As the in_sets shrink, we shrink the out_sets as well to avoid creating - /// redundant nodes: although the params might still technically be declared by the dependents, we - /// can be sure that any not contained in the in_set are not used. - /// - /// Any node that has only invalid sources of a dependency (such as those that do not consume a - /// provided param, or those that consume a Param that is not present in their scope) will be - /// removed (which may also cause its dependents to be removed, for the same reason). This is safe - /// to do at any time during the monomorphize run, because the in/out sets are adjusted in tandem - /// based on the current dependencies/dependents. - /// - /// The exit condition for this phase is that all valid combinations of dependencies have the - /// same minimal in_set. This occurs when all splits that would result in smaller sets of - /// dependencies for a node have been executed. Note though that it might still be the case that - /// a node has multiple sources of a particular dependency with the _same_ param requirements. - /// This represents ambiguity that must be handled (likely by erroring) in later phases. - /// - fn monomorphize(graph: LabeledGraph) -> MonomorphizedGraph { - // The monomorphized graph contains nodes and edges that have been marked deleted, because: - // 1. we expose which nodes and edges were deleted to allow for improved error messages - // 2. it is slow to delete nodes/edges with petgraph: marking them is much cheaper. - // Initialize with no deleted nodes/edges. - let mut graph: MonomorphizedGraph = graph.map( - |_node_id, node| node.clone(), - |_edge_id, edge_weight| MaybeDeleted::new(edge_weight.clone()), - ); - - // In order to reduce the number of permutations rapidly, we make a best effort attempt to - // visit a node before any of its dependencies using DFS-post-order. We need to visit all - // nodes in the graph, but because monomorphizing a node enqueues its dependents we may - // visit some of them multiple times. - // - // DFS converges much more quickly than BFS. We use an IndexSet to preserve the initial walk - // order while still removing duplicates. The nodes that should be visited last will remain at - // the bottom of the set/stack, and will stay there until things above them have been removed. - let mut to_visit = { - let mut dfs = DfsPostOrder { - stack: graph.externals(Direction::Incoming).collect(), - discovered: graph.visit_map(), - finished: graph.visit_map(), - }; - let mut to_visit = Vec::new(); - while let Some(node_id) = dfs.next(&graph) { - to_visit.push(node_id); - } - // The IndexSet acts like a stack (ie, we can only remove from the end) so we reverse the DFS - // order to ensure that the last nodes in the DFS end up at the bottom of the stack. - to_visit.into_iter().rev().collect::>() - }; - - // Both the in_set and out_set shrink as this phase continues and nodes are monomorphized. - // Because the sets only shrink, we are able to eliminate direct dependencies based on their - // absence from the sets. - // - // If a node has been monomorphized and all of its dependencies have minimal in_sets, then we - // can assume that its in_set is minimal too: it has _stopped_ shrinking. That means that we can - // additionally prune dependencies transitively in cases where in_sets contain things that are - // not in a node's out_set (since the out_set will not grow, and the minimal in_set represents - // the node's true requirements). - let mut minimal_in_set = HashSet::default(); - - // Should be called after a Node has been successfully reduced (regardless of whether it became - // monomorphic) to maybe mark it minimal. - let maybe_mark_minimal_in_set = |minimal_in_set: &mut HashSet>, - graph: &MonomorphizedGraph, - node_id: NodeIndex| { - let dependencies_are_minimal = graph - .edges_directed(node_id, Direction::Outgoing) - .filter_map(|edge_ref| { - if graph[edge_ref.target()].is_deleted() || edge_ref.weight().is_deleted() { - None - } else { - Some(edge_ref.target()) - } - }) - .all(|dependency_id| node_id == dependency_id || minimal_in_set.contains(&dependency_id)); - if dependencies_are_minimal { - minimal_in_set.insert(node_id); - } - }; - - // If a node splits the same way multiple times without becoming minimal, we mark it ambiguous - // the second time. - let mut suspected_ambiguous = HashSet::default(); - - let mut iteration = 0; - let mut maybe_in_loop = HashSet::default(); - let mut looping = false; - while let Some(node_id) = to_visit.pop() { - let node = if let Some(node) = graph[node_id].inner() { - node - } else { - continue; - }; - match node.node { - Node::Rule(_) | Node::Reentry { .. } => { - // Fall through to visit the Rule or Reentry node. - } - Node::Param(_) => { - // Ensure that the leaf is marked minimal, but don't bother to visit. - minimal_in_set.insert(node_id); - continue; - } - Node::Query(_) => { - // Don't bother to visit. - continue; - } - } - - // TODO: This value is mostly arbitrary, but should be increased to allow for solving the - // largest known rulesets that we've encountered. It should really only be triggered in - // case of implementation bugs (as we would prefer for a solution to fail via the usual - // pathways if it can). - // - // See https://github.com/pantsbuild/pants/issues/11269 for plans to improve this - // implementation. - iteration += 1; - if iteration > 10000000 { - looping = true; - } - if iteration % 1000 == 0 { - let live_count = graph - .node_references() - .filter(|node_ref| !node_ref.weight().is_deleted()) - .count(); - let minimal_count = graph - .node_references() - .filter(|node_ref| { - !node_ref.weight().is_deleted() && minimal_in_set.contains(&node_ref.id()) - }) - .count(); - log::trace!( + /// + /// Splits Rules in the graph that have multiple valid sources of a dependency, and recalculates + /// their in/out sets to attempt to re-join with other copies of the Rule with identical sets. + /// Similar to `live_param_labeled_graph`, this is an analysis that propagates both up and down + /// the graph (and maintains the in/out sets while doing so). Visiting a node might cause us to + /// split it and re-calculate the in/out sets for each split; we then visit the nodes affected + /// by the split to ensure that they are updated as well, and so on. + /// + /// During this phase, the out_set of a node is used to determine which Params are legal to + /// consume in each subgraph: as this information propagates down the graph, Param dependencies + /// might be eliminated, which results in corresponding changes to the in_set which flow back + /// up the graph. As the in_sets shrink, we shrink the out_sets as well to avoid creating + /// redundant nodes: although the params might still technically be declared by the dependents, we + /// can be sure that any not contained in the in_set are not used. + /// + /// Any node that has only invalid sources of a dependency (such as those that do not consume a + /// provided param, or those that consume a Param that is not present in their scope) will be + /// removed (which may also cause its dependents to be removed, for the same reason). This is safe + /// to do at any time during the monomorphize run, because the in/out sets are adjusted in tandem + /// based on the current dependencies/dependents. + /// + /// The exit condition for this phase is that all valid combinations of dependencies have the + /// same minimal in_set. This occurs when all splits that would result in smaller sets of + /// dependencies for a node have been executed. Note though that it might still be the case that + /// a node has multiple sources of a particular dependency with the _same_ param requirements. + /// This represents ambiguity that must be handled (likely by erroring) in later phases. + /// + fn monomorphize(graph: LabeledGraph) -> MonomorphizedGraph { + // The monomorphized graph contains nodes and edges that have been marked deleted, because: + // 1. we expose which nodes and edges were deleted to allow for improved error messages + // 2. it is slow to delete nodes/edges with petgraph: marking them is much cheaper. + // Initialize with no deleted nodes/edges. + let mut graph: MonomorphizedGraph = graph.map( + |_node_id, node| node.clone(), + |_edge_id, edge_weight| MaybeDeleted::new(edge_weight.clone()), + ); + + // In order to reduce the number of permutations rapidly, we make a best effort attempt to + // visit a node before any of its dependencies using DFS-post-order. We need to visit all + // nodes in the graph, but because monomorphizing a node enqueues its dependents we may + // visit some of them multiple times. + // + // DFS converges much more quickly than BFS. We use an IndexSet to preserve the initial walk + // order while still removing duplicates. The nodes that should be visited last will remain at + // the bottom of the set/stack, and will stay there until things above them have been removed. + let mut to_visit = { + let mut dfs = DfsPostOrder { + stack: graph.externals(Direction::Incoming).collect(), + discovered: graph.visit_map(), + finished: graph.visit_map(), + }; + let mut to_visit = Vec::new(); + while let Some(node_id) = dfs.next(&graph) { + to_visit.push(node_id); + } + // The IndexSet acts like a stack (ie, we can only remove from the end) so we reverse the DFS + // order to ensure that the last nodes in the DFS end up at the bottom of the stack. + to_visit.into_iter().rev().collect::>() + }; + + // Both the in_set and out_set shrink as this phase continues and nodes are monomorphized. + // Because the sets only shrink, we are able to eliminate direct dependencies based on their + // absence from the sets. + // + // If a node has been monomorphized and all of its dependencies have minimal in_sets, then we + // can assume that its in_set is minimal too: it has _stopped_ shrinking. That means that we can + // additionally prune dependencies transitively in cases where in_sets contain things that are + // not in a node's out_set (since the out_set will not grow, and the minimal in_set represents + // the node's true requirements). + let mut minimal_in_set = HashSet::default(); + + // Should be called after a Node has been successfully reduced (regardless of whether it became + // monomorphic) to maybe mark it minimal. + let maybe_mark_minimal_in_set = + |minimal_in_set: &mut HashSet>, + graph: &MonomorphizedGraph, + node_id: NodeIndex| { + let dependencies_are_minimal = graph + .edges_directed(node_id, Direction::Outgoing) + .filter_map(|edge_ref| { + if graph[edge_ref.target()].is_deleted() || edge_ref.weight().is_deleted() { + None + } else { + Some(edge_ref.target()) + } + }) + .all(|dependency_id| { + node_id == dependency_id || minimal_in_set.contains(&dependency_id) + }); + if dependencies_are_minimal { + minimal_in_set.insert(node_id); + } + }; + + // If a node splits the same way multiple times without becoming minimal, we mark it ambiguous + // the second time. + let mut suspected_ambiguous = HashSet::default(); + + let mut iteration = 0; + let mut maybe_in_loop = HashSet::default(); + let mut looping = false; + while let Some(node_id) = to_visit.pop() { + let node = if let Some(node) = graph[node_id].inner() { + node + } else { + continue; + }; + match node.node { + Node::Rule(_) | Node::Reentry { .. } => { + // Fall through to visit the Rule or Reentry node. + } + Node::Param(_) => { + // Ensure that the leaf is marked minimal, but don't bother to visit. + minimal_in_set.insert(node_id); + continue; + } + Node::Query(_) => { + // Don't bother to visit. + continue; + } + } + + // TODO: This value is mostly arbitrary, but should be increased to allow for solving the + // largest known rulesets that we've encountered. It should really only be triggered in + // case of implementation bugs (as we would prefer for a solution to fail via the usual + // pathways if it can). + // + // See https://github.com/pantsbuild/pants/issues/11269 for plans to improve this + // implementation. + iteration += 1; + if iteration > 10000000 { + looping = true; + } + if iteration % 1000 == 0 { + let live_count = graph + .node_references() + .filter(|node_ref| !node_ref.weight().is_deleted()) + .count(); + let minimal_count = graph + .node_references() + .filter(|node_ref| { + !node_ref.weight().is_deleted() && minimal_in_set.contains(&node_ref.id()) + }) + .count(); + log::trace!( "rule_graph monomorphize: iteration {}: live: {}, minimal: {}, to_visit: {}, total: {}", iteration, live_count, @@ -661,58 +675,59 @@ impl Builder { to_visit.len(), graph.node_count(), ); - } - - // Group dependencies by DependencyKey. - #[allow(clippy::type_complexity)] - let dependencies_by_key: Vec, NodeIndex)>> = - Self::edges_by_dependency_key(&graph, node_id, false) - .into_values() - .map(|edge_refs| { - edge_refs - .iter() - .map(|edge_ref| (edge_ref.weight().0.clone(), edge_ref.target())) - .collect() - }) - .collect(); - - // A node with no declared dependencies is always already minimal. - if dependencies_by_key.is_empty() { - minimal_in_set.insert(node_id); - - // But we ensure that its out_set is accurate before continuing. - if node.out_set != node.in_set { - graph.node_weight_mut(node_id).unwrap().0.out_set = graph[node_id].0.in_set.clone(); - } - continue; - } - - // Group dependents by out_set. - #[allow(clippy::type_complexity)] - let dependents_by_out_set: HashMap< - ParamTypes, - Vec<(DependencyKey, _)>, - > = { - let mut dbos = HashMap::default(); - for edge_ref in graph.edges_directed(node_id, Direction::Incoming) { - if edge_ref.weight().is_deleted() || graph[edge_ref.source()].is_deleted() { - continue; - } - - // Compute the out_set of this dependent, plus the provided param, if any. - let mut out_set = graph[edge_ref.source()].0.out_set.clone(); - out_set.extend(edge_ref.weight().0.provided_params.iter().cloned()); - dbos - .entry(out_set) - .or_insert_with(Vec::new) - .push((edge_ref.weight().0.clone(), edge_ref.source())); - } - dbos - }; - let had_dependents = !dependents_by_out_set.is_empty(); + } - let trace_str = if looping { - format!( + // Group dependencies by DependencyKey. + #[allow(clippy::type_complexity)] + let dependencies_by_key: Vec< + Vec<(DependencyKey, NodeIndex)>, + > = Self::edges_by_dependency_key(&graph, node_id, false) + .into_values() + .map(|edge_refs| { + edge_refs + .iter() + .map(|edge_ref| (edge_ref.weight().0.clone(), edge_ref.target())) + .collect() + }) + .collect(); + + // A node with no declared dependencies is always already minimal. + if dependencies_by_key.is_empty() { + minimal_in_set.insert(node_id); + + // But we ensure that its out_set is accurate before continuing. + if node.out_set != node.in_set { + graph.node_weight_mut(node_id).unwrap().0.out_set = + graph[node_id].0.in_set.clone(); + } + continue; + } + + // Group dependents by out_set. + #[allow(clippy::type_complexity)] + let dependents_by_out_set: HashMap< + ParamTypes, + Vec<(DependencyKey, _)>, + > = { + let mut dbos = HashMap::default(); + for edge_ref in graph.edges_directed(node_id, Direction::Incoming) { + if edge_ref.weight().is_deleted() || graph[edge_ref.source()].is_deleted() { + continue; + } + + // Compute the out_set of this dependent, plus the provided param, if any. + let mut out_set = graph[edge_ref.source()].0.out_set.clone(); + out_set.extend(edge_ref.weight().0.provided_params.iter().cloned()); + dbos.entry(out_set) + .or_insert_with(Vec::new) + .push((edge_ref.weight().0.clone(), edge_ref.source())); + } + dbos + }; + let had_dependents = !dependents_by_out_set.is_empty(); + + let trace_str = if looping { + format!( "creating monomorphizations (from {} dependent sets and {:?} dependencies) for {:?}: {} with {:#?} and {:#?}", dependents_by_out_set.len(), dependencies_by_key @@ -734,133 +749,133 @@ impl Builder { .map(params_str) .collect::>(), ) - } else { - "".to_owned() - }; - - // Generate the monomorphizations of this Node, where each key is a potential node to - // create, and the dependents and dependencies to give it (respectively). - let mut monomorphizations = HashMap::default(); - for (out_set, dependents) in dependents_by_out_set { - for (node, dependencies) in Self::monomorphizations( - &graph, - node_id, - out_set.clone(), - &minimal_in_set, - &dependencies_by_key, - ) { - let entry = monomorphizations - .entry(node) - .or_insert_with(|| (HashSet::default(), HashSet::default())); - entry.0.extend(dependents.iter().cloned()); - entry.1.extend(dependencies); - } - } - - // The goal of this phase is to shrink the in_sets as much as possible via dependency changes. - // - // The base case for a node then is that its dependencies cannot be partitioned to produce - // disjoint in_sets, and that the in/out sets are accurate based on any transitive changes - // above or below this node. If both of these conditions are satisified, the node is valid. - // - // If a node splits in a way that results in an identical node once, we mark it suspected - // ambiguous: if it does so again, we mark it deleted as ambiguous. - let is_suspected_ambiguous = - if let Some((_, dependencies)) = monomorphizations.get(&graph[node_id].0) { - // We generated an identical node: if there was only one output node and its dependencies were - // also identical, then we have nooped. - let had_original_dependencies = dependencies - == &graph - .edges_directed(node_id, Direction::Outgoing) - .filter_map(|edge_ref| { - if graph[edge_ref.target()].is_deleted() { - None - } else { - edge_ref - .weight() - .inner() - .map(|dk| (dk.clone(), edge_ref.target())) + } else { + "".to_owned() + }; + + // Generate the monomorphizations of this Node, where each key is a potential node to + // create, and the dependents and dependencies to give it (respectively). + let mut monomorphizations = HashMap::default(); + for (out_set, dependents) in dependents_by_out_set { + for (node, dependencies) in Self::monomorphizations( + &graph, + node_id, + out_set.clone(), + &minimal_in_set, + &dependencies_by_key, + ) { + let entry = monomorphizations + .entry(node) + .or_insert_with(|| (HashSet::default(), HashSet::default())); + entry.0.extend(dependents.iter().cloned()); + entry.1.extend(dependencies); } - }) - .collect::>(); - if had_original_dependencies && monomorphizations.len() == 1 { - // This node cannot be reduced. If its dependencies had minimal in_sets, then it now also - // has a minimal in_set. - maybe_mark_minimal_in_set(&mut minimal_in_set, &graph, node_id); - if looping { - log::trace!( - "not able to reduce {:?}: {} (had {} monomorphizations)", - node_id, - graph[node_id], - monomorphizations.len() - ); } - continue; - } - - // If more than one node was generated, but one of them had the original dependencies, then - // the node is potentially ambiguous. - had_original_dependencies - } else { - false - }; - if looping { - log::trace!("{}", trace_str); + // The goal of this phase is to shrink the in_sets as much as possible via dependency changes. + // + // The base case for a node then is that its dependencies cannot be partitioned to produce + // disjoint in_sets, and that the in/out sets are accurate based on any transitive changes + // above or below this node. If both of these conditions are satisified, the node is valid. + // + // If a node splits in a way that results in an identical node once, we mark it suspected + // ambiguous: if it does so again, we mark it deleted as ambiguous. + let is_suspected_ambiguous = + if let Some((_, dependencies)) = monomorphizations.get(&graph[node_id].0) { + // We generated an identical node: if there was only one output node and its dependencies were + // also identical, then we have nooped. + let had_original_dependencies = dependencies + == &graph + .edges_directed(node_id, Direction::Outgoing) + .filter_map(|edge_ref| { + if graph[edge_ref.target()].is_deleted() { + None + } else { + edge_ref + .weight() + .inner() + .map(|dk| (dk.clone(), edge_ref.target())) + } + }) + .collect::>(); + if had_original_dependencies && monomorphizations.len() == 1 { + // This node cannot be reduced. If its dependencies had minimal in_sets, then it now also + // has a minimal in_set. + maybe_mark_minimal_in_set(&mut minimal_in_set, &graph, node_id); + if looping { + log::trace!( + "not able to reduce {:?}: {} (had {} monomorphizations)", + node_id, + graph[node_id], + monomorphizations.len() + ); + } + continue; + } + + // If more than one node was generated, but one of them had the original dependencies, then + // the node is potentially ambiguous. + had_original_dependencies + } else { + false + }; - maybe_in_loop.insert(node_id); - if maybe_in_loop.len() > 5 { - let subgraph = graph.filter_map( - |node_id, node| { - if maybe_in_loop.contains(&node_id) { - Some(format!("{node_id:?}: {node}")) - } else { - None - } - }, - |_, edge_weight| Some(edge_weight.clone()), - ); + if looping { + log::trace!("{}", trace_str); + + maybe_in_loop.insert(node_id); + if maybe_in_loop.len() > 5 { + let subgraph = graph.filter_map( + |node_id, node| { + if maybe_in_loop.contains(&node_id) { + Some(format!("{node_id:?}: {node}")) + } else { + None + } + }, + |_, edge_weight| Some(edge_weight.clone()), + ); + + panic!( + "Loop subgraph: {}", + petgraph::dot::Dot::with_config(&subgraph, &[]) + ); + } + } - panic!( - "Loop subgraph: {}", - petgraph::dot::Dot::with_config(&subgraph, &[]) - ); - } - } - - // Needs changes. Mark this node deleted. - let ambiguous = is_suspected_ambiguous && suspected_ambiguous.contains(&node_id); - graph - .node_weight_mut(node_id) - .unwrap() - .mark_deleted(if ambiguous { - NodePrunedReason::Ambiguous - } else if !monomorphizations.is_empty() { - NodePrunedReason::Monomorphized - } else if had_dependents { - NodePrunedReason::NoValidCombinationsOfDependencies - } else { - NodePrunedReason::NoDependents - }); - // And schedule visits for all dependents and dependencies. - to_visit.extend(graph.neighbors_undirected(node_id)); - - // Generate a replacement node for each monomorphization of this rule. - for (new_node, (dependents, dependencies)) in monomorphizations { - let is_suspected_ambiguous_node = if is_suspected_ambiguous { - let is_identical = new_node == graph[node_id].0; - if ambiguous && is_identical { - // This is the identical copy of an ambiguous node: the original node has been deleted - // as ambiguous, and we skip creating the new copy. - continue; - } - is_identical - } else { - false - }; + // Needs changes. Mark this node deleted. + let ambiguous = is_suspected_ambiguous && suspected_ambiguous.contains(&node_id); + graph + .node_weight_mut(node_id) + .unwrap() + .mark_deleted(if ambiguous { + NodePrunedReason::Ambiguous + } else if !monomorphizations.is_empty() { + NodePrunedReason::Monomorphized + } else if had_dependents { + NodePrunedReason::NoValidCombinationsOfDependencies + } else { + NodePrunedReason::NoDependents + }); + // And schedule visits for all dependents and dependencies. + to_visit.extend(graph.neighbors_undirected(node_id)); + + // Generate a replacement node for each monomorphization of this rule. + for (new_node, (dependents, dependencies)) in monomorphizations { + let is_suspected_ambiguous_node = if is_suspected_ambiguous { + let is_identical = new_node == graph[node_id].0; + if ambiguous && is_identical { + // This is the identical copy of an ambiguous node: the original node has been deleted + // as ambiguous, and we skip creating the new copy. + continue; + } + is_identical + } else { + false + }; - if looping { - log::trace!( + if looping { + log::trace!( " generating {:#?}, with {} dependents and {} dependencies ({} minimal) which consumes: {:#?}", new_node, dependents.len(), @@ -871,768 +886,788 @@ impl Builder { .map(|(dk, di)| (dk.to_string(), graph[*di].to_string(),)) .collect::>() ); - } + } - let replacement_id = graph.add_node(MaybeDeleted::new(new_node)); - if is_suspected_ambiguous_node { - // We suspect that this node is ambiguous, but aren't sure yet: if it splits again the - // same way in the future, it will be deleted as ambiguous. - suspected_ambiguous.insert(replacement_id); - } + let replacement_id = graph.add_node(MaybeDeleted::new(new_node)); + if is_suspected_ambiguous_node { + // We suspect that this node is ambiguous, but aren't sure yet: if it splits again the + // same way in the future, it will be deleted as ambiguous. + suspected_ambiguous.insert(replacement_id); + } - if looping { - log::trace!("node: creating: {:?}", replacement_id); - } + if looping { + log::trace!("node: creating: {:?}", replacement_id); + } - // Give all dependents edges to the new node. - for (dependency_key, dependent_id) in &dependents { - // Add a new edge. - let mut edge = MaybeDeleted::new(dependency_key.clone()); - for p in &dependency_key.provided_params { - // NB: If the edge is invalid because it does not consume the provide param, we - // create it as deleted with that reason. - if !graph[replacement_id].0.in_set.contains(p) { - edge.mark_deleted(EdgePrunedReason::DoesNotConsumeProvidedParam); - } - } - if looping { - log::trace!("dependent edge: adding: ({:?}, {})", dependent_id, edge); - } - graph.add_edge(*dependent_id, replacement_id, edge); - } + // Give all dependents edges to the new node. + for (dependency_key, dependent_id) in &dependents { + // Add a new edge. + let mut edge = MaybeDeleted::new(dependency_key.clone()); + for p in &dependency_key.provided_params { + // NB: If the edge is invalid because it does not consume the provide param, we + // create it as deleted with that reason. + if !graph[replacement_id].0.in_set.contains(p) { + edge.mark_deleted(EdgePrunedReason::DoesNotConsumeProvidedParam); + } + } + if looping { + log::trace!("dependent edge: adding: ({:?}, {})", dependent_id, edge); + } + graph.add_edge(*dependent_id, replacement_id, edge); + } - // And give the replacement node edges to this combination of dependencies. - for (dependency_key, dependency_id) in dependencies { - // NB: When a node depends on itself, we adjust the destination of that self-edge to point to - // the new node. - let dependency_id = if dependency_id == node_id { - replacement_id - } else { - dependency_id - }; - if looping { - log::trace!("dependency edge: adding: ({dependency_key:?}, {dependency_id:?})"); - } - graph.add_edge( - replacement_id, - dependency_id, - MaybeDeleted::new(dependency_key), - ); + // And give the replacement node edges to this combination of dependencies. + for (dependency_key, dependency_id) in dependencies { + // NB: When a node depends on itself, we adjust the destination of that self-edge to point to + // the new node. + let dependency_id = if dependency_id == node_id { + replacement_id + } else { + dependency_id + }; + if looping { + log::trace!( + "dependency edge: adding: ({dependency_key:?}, {dependency_id:?})" + ); + } + graph.add_edge( + replacement_id, + dependency_id, + MaybeDeleted::new(dependency_key), + ); + } + + // Now that all edges have been created, maybe mark it minimal and potentially ambiguous. + maybe_mark_minimal_in_set(&mut minimal_in_set, &graph, replacement_id); + } } - // Now that all edges have been created, maybe mark it minimal and potentially ambiguous. - maybe_mark_minimal_in_set(&mut minimal_in_set, &graph, replacement_id); - } + graph } - graph - } - - /// - /// Execute live variable analysis to determine which Params are used by each node. - /// - /// See https://en.wikipedia.org/wiki/Live_variable_analysis - /// - fn live_param_labeled_graph(&self, graph: OutLabeledGraph) -> LabeledGraph { - // Add in_sets for each node, with all sets empty initially. - let mut graph: LabeledGraph = graph.map( - |_node_id, node| { - MaybeDeleted( - ParamsLabeled::new((node.0).0.clone(), (node.0).1.clone()), - node.1, - ) - }, - |_edge_id, edge_weight| edge_weight.clone(), - ); - - // Information flows up (the in_sets) this graph. - let mut to_visit = graph - .node_references() - .map(|nr| nr.id()) - .collect::>(); - while let Some(node_id) = to_visit.pop_front() { - if graph[node_id].is_deleted() { - continue; - } - - // Compute an initial in_set from the Node's dependencies. - let mut in_set = Self::dependencies_in_set( - node_id, - graph - .edges_directed(node_id, Direction::Outgoing) - .filter(|edge_ref| !graph[edge_ref.target()].is_deleted()) - .map(|edge_ref| { - ( - edge_ref.weight().clone(), - edge_ref.target(), - &graph[edge_ref.target()].0.in_set, - ) - }), - ); + /// + /// Execute live variable analysis to determine which Params are used by each node. + /// + /// See https://en.wikipedia.org/wiki/Live_variable_analysis + /// + fn live_param_labeled_graph(&self, graph: OutLabeledGraph) -> LabeledGraph { + // Add in_sets for each node, with all sets empty initially. + let mut graph: LabeledGraph = graph.map( + |_node_id, node| { + MaybeDeleted( + ParamsLabeled::new((node.0).0.clone(), (node.0).1.clone()), + node.1, + ) + }, + |_edge_id, edge_weight| edge_weight.clone(), + ); + + // Information flows up (the in_sets) this graph. + let mut to_visit = graph + .node_references() + .map(|nr| nr.id()) + .collect::>(); + while let Some(node_id) = to_visit.pop_front() { + if graph[node_id].is_deleted() { + continue; + } + + // Compute an initial in_set from the Node's dependencies. + let mut in_set = Self::dependencies_in_set( + node_id, + graph + .edges_directed(node_id, Direction::Outgoing) + .filter(|edge_ref| !graph[edge_ref.target()].is_deleted()) + .map(|edge_ref| { + ( + edge_ref.weight().clone(), + edge_ref.target(), + &graph[edge_ref.target()].0.in_set, + ) + }), + ); - // Then extend it with Node-specific params. - graph[node_id].0.node.add_inherent_in_set(&mut in_set); + // Then extend it with Node-specific params. + graph[node_id].0.node.add_inherent_in_set(&mut in_set); - if in_set != graph[node_id].0.in_set { - to_visit.extend(graph.neighbors_directed(node_id, Direction::Incoming)); - graph[node_id].0.in_set = in_set; - } + if in_set != graph[node_id].0.in_set { + to_visit.extend(graph.neighbors_directed(node_id, Direction::Incoming)); + graph[node_id].0.in_set = in_set; + } + } + + graph } - graph - } - - /// - /// After nodes have been monomorphized, they have the smallest dependency sets possible. - /// In cases where a node still has more than one source of a dependency, this phase statically - /// decides which source of each DependencyKey to use, and prunes edges to the rest. - /// - /// If a node has too many dependencies (in an ambiguous way) or too few, this phase will fail - /// slowly to collect all errored nodes (including those that have been deleted), and render the - /// most specific error possible. - /// - fn prune_edges(&self, mut graph: MonomorphizedGraph) -> Result, String> { - // Walk from roots, choosing one source for each DependencyKey of each node. - let mut visited = graph.visit_map(); - let mut errored = HashMap::default(); - // NB: We visit any node that is enqueued, even if it is deleted. - let mut to_visit = graph - .node_references() - .filter_map(|node_ref| match node_ref.weight().0 { - ParamsLabeled { - node: Node::Query(_), - .. - } => Some(node_ref.id()), - _ => None, - }) - .collect::>(); - - while let Some(node_id) = to_visit.pop() { - if !visited.visit(node_id) { - continue; - } - // See the note above about deleted nodes. - let node = &graph[node_id].0.node; - - let edges_by_dependency_key = Self::edges_by_dependency_key(&graph, node_id, true); - let mut edges_to_delete = Vec::new(); - for (dependency_key, edge_refs) in edges_by_dependency_key { - // Filter out any dependencies that are not satisfiable for this node based on its type and - // in/out sets and any that were deleted/invalid. - let relevant_edge_refs: Vec<_> = match node { - Node::Query(q) => { - // Only dependencies with in_sets that are a subset of our params can be used. - edge_refs - .iter() - .filter(|edge_ref| { - !edge_ref.weight().is_deleted() && !graph[edge_ref.target()].is_deleted() - }) - .filter(|edge_ref| { - let dependency_in_set = &graph[edge_ref.target()].0.in_set; - dependency_in_set.is_subset(&q.params) - }) - .collect() - } - Node::Rule(_) | Node::Reentry { .. } => { - // If there is a provided param, only dependencies that consume it can be used. - edge_refs - .iter() - .filter(|edge_ref| { - !edge_ref.weight().is_deleted() && !graph[edge_ref.target()].is_deleted() - }) - .filter(|edge_ref| { - dependency_key - .provided_params - .iter() - .all(|p| graph[edge_ref.target()].0.in_set.contains(p)) - }) - .collect() - } - p @ Node::Param(_) => { - panic!( - "A Param should not have dependencies: {:?} had {:#?}", - p, - edge_refs - .iter() - .map(|edge_ref| format!("{}", graph[edge_ref.target()].0.node)) - .collect::>() - ); - } - }; + /// + /// After nodes have been monomorphized, they have the smallest dependency sets possible. + /// In cases where a node still has more than one source of a dependency, this phase statically + /// decides which source of each DependencyKey to use, and prunes edges to the rest. + /// + /// If a node has too many dependencies (in an ambiguous way) or too few, this phase will fail + /// slowly to collect all errored nodes (including those that have been deleted), and render the + /// most specific error possible. + /// + fn prune_edges(&self, mut graph: MonomorphizedGraph) -> Result, String> { + // Walk from roots, choosing one source for each DependencyKey of each node. + let mut visited = graph.visit_map(); + let mut errored = HashMap::default(); + // NB: We visit any node that is enqueued, even if it is deleted. + let mut to_visit = graph + .node_references() + .filter_map(|node_ref| match node_ref.weight().0 { + ParamsLabeled { + node: Node::Query(_), + .. + } => Some(node_ref.id()), + _ => None, + }) + .collect::>(); - // We prefer the dependency with the smallest set of input Params, as that minimizes Rule - // identities in the graph and biases toward receiving values from dependencies (which do not - // affect our identity) rather than dependents. - #[allow(clippy::comparison_chain)] - let chosen_edges = { - let mut minimum_param_set_size = ::std::usize::MAX; - let mut chosen_edges = Vec::new(); - for edge_ref in relevant_edge_refs { - let param_set_size = graph[edge_ref.target()].0.in_set.len(); - if param_set_size < minimum_param_set_size { - chosen_edges.clear(); - chosen_edges.push(edge_ref); - minimum_param_set_size = param_set_size; - } else if param_set_size == minimum_param_set_size { - chosen_edges.push(edge_ref); + while let Some(node_id) = to_visit.pop() { + if !visited.visit(node_id) { + continue; } - } - chosen_edges - }; - match chosen_edges.len() { - 1 => { - // Schedule this dependency to be visited, and mark edges to all other choices deleted. - let chosen_edge = chosen_edges[0]; - to_visit.push(chosen_edge.target()); - edges_to_delete.extend( - edge_refs - .iter() - .map(|edge_ref| edge_ref.id()) - .filter(|edge_ref_id| *edge_ref_id != chosen_edge.id()), - ); - } - 0 => { - // If there were no live nodes for this DependencyKey, traverse into any nodes - // that were deleted. We do not traverse deleted edges, as they represent this node - // having eliminated the dependency for a specific reason that we should render here. - if edge_refs - .iter() - .all(|edge_ref| graph[edge_ref.target()].deleted_reason().is_some()) - { - to_visit.extend( - edge_refs - .iter() - .filter(|edge_ref| { - matches!( + // See the note above about deleted nodes. + let node = &graph[node_id].0.node; + + let edges_by_dependency_key = Self::edges_by_dependency_key(&graph, node_id, true); + let mut edges_to_delete = Vec::new(); + for (dependency_key, edge_refs) in edges_by_dependency_key { + // Filter out any dependencies that are not satisfiable for this node based on its type and + // in/out sets and any that were deleted/invalid. + let relevant_edge_refs: Vec<_> = match node { + Node::Query(q) => { + // Only dependencies with in_sets that are a subset of our params can be used. + edge_refs + .iter() + .filter(|edge_ref| { + !edge_ref.weight().is_deleted() + && !graph[edge_ref.target()].is_deleted() + }) + .filter(|edge_ref| { + let dependency_in_set = &graph[edge_ref.target()].0.in_set; + dependency_in_set.is_subset(&q.params) + }) + .collect() + } + Node::Rule(_) | Node::Reentry { .. } => { + // If there is a provided param, only dependencies that consume it can be used. + edge_refs + .iter() + .filter(|edge_ref| { + !edge_ref.weight().is_deleted() + && !graph[edge_ref.target()].is_deleted() + }) + .filter(|edge_ref| { + dependency_key + .provided_params + .iter() + .all(|p| graph[edge_ref.target()].0.in_set.contains(p)) + }) + .collect() + } + p @ Node::Param(_) => { + panic!( + "A Param should not have dependencies: {:?} had {:#?}", + p, + edge_refs + .iter() + .map(|edge_ref| format!("{}", graph[edge_ref.target()].0.node)) + .collect::>() + ); + } + }; + + // We prefer the dependency with the smallest set of input Params, as that minimizes Rule + // identities in the graph and biases toward receiving values from dependencies (which do not + // affect our identity) rather than dependents. + #[allow(clippy::comparison_chain)] + let chosen_edges = { + let mut minimum_param_set_size = ::std::usize::MAX; + let mut chosen_edges = Vec::new(); + for edge_ref in relevant_edge_refs { + let param_set_size = graph[edge_ref.target()].0.in_set.len(); + if param_set_size < minimum_param_set_size { + chosen_edges.clear(); + chosen_edges.push(edge_ref); + minimum_param_set_size = param_set_size; + } else if param_set_size == minimum_param_set_size { + chosen_edges.push(edge_ref); + } + } + chosen_edges + }; + match chosen_edges.len() { + 1 => { + // Schedule this dependency to be visited, and mark edges to all other choices deleted. + let chosen_edge = chosen_edges[0]; + to_visit.push(chosen_edge.target()); + edges_to_delete.extend( + edge_refs + .iter() + .map(|edge_ref| edge_ref.id()) + .filter(|edge_ref_id| *edge_ref_id != chosen_edge.id()), + ); + } + 0 => { + // If there were no live nodes for this DependencyKey, traverse into any nodes + // that were deleted. We do not traverse deleted edges, as they represent this node + // having eliminated the dependency for a specific reason that we should render here. + if edge_refs + .iter() + .all(|edge_ref| graph[edge_ref.target()].deleted_reason().is_some()) + { + to_visit.extend( + edge_refs + .iter() + .filter(|edge_ref| { + matches!( graph[edge_ref.target()].deleted_reason(), Some(NodePrunedReason::Ambiguous) | Some(NodePrunedReason::NoSourceOfParam) | Some(NodePrunedReason::NoValidCombinationsOfDependencies) ) - }) - .map(|edge_ref| edge_ref.target()), - ); + }) + .map(|edge_ref| edge_ref.target()), + ); + } + errored.entry(node_id).or_insert_with(Vec::new).push( + self.render_no_source_of_dependency_error( + &graph, + node, + dependency_key, + edge_refs, + ), + ); + } + _ => { + // Render and visit only the chosen candidates to see why they were ambiguous. + to_visit.extend(chosen_edges.iter().map(|edge_ref| edge_ref.target())); + errored + .entry(node_id) + .or_insert_with(Vec::new) + .push(format!( + "Too many sources of dependency {} for {}: {:#?}", + dependency_key, + node, + chosen_edges + .iter() + .map(|edge_ref| { + format!( + "{} (for {})", + graph[edge_ref.target()].0.node, + params_str(&graph[edge_ref.target()].0.in_set) + ) + }) + .collect::>() + )); + } + } } - errored.entry(node_id).or_insert_with(Vec::new).push( - self.render_no_source_of_dependency_error(&graph, node, dependency_key, edge_refs), - ); - } - _ => { - // Render and visit only the chosen candidates to see why they were ambiguous. - to_visit.extend(chosen_edges.iter().map(|edge_ref| edge_ref.target())); - errored - .entry(node_id) - .or_insert_with(Vec::new) - .push(format!( - "Too many sources of dependency {} for {}: {:#?}", - dependency_key, - node, - chosen_edges - .iter() - .map(|edge_ref| { - format!( - "{} (for {})", - graph[edge_ref.target()].0.node, - params_str(&graph[edge_ref.target()].0.in_set) - ) - }) - .collect::>() - )); - } - } - } - for edge_to_delete in edges_to_delete { - graph[edge_to_delete].mark_deleted(EdgePrunedReason::SmallerParamSetAvailable); - } - - // Validate masked params. - if let Node::Rule(rule) = &graph[node_id].0.node { - for masked_param in rule.masked_params() { - if graph[node_id].0.in_set.contains(&masked_param) { - let in_set = params_str(&graph[node_id].0.in_set); - let dependencies = graph - .edges_directed(node_id, Direction::Outgoing) - .filter(|edge_ref| { - !edge_ref.weight().is_deleted() - && !edge_ref.weight().0.provides(&masked_param) - && graph[edge_ref.target()].0.in_set.contains(&masked_param) - }) - .map(|edge_ref| { - let dep_id = edge_ref.target(); - format!( - "{} for {}", - graph[dep_id].0.node, - params_str(&graph[dep_id].0.in_set) - ) - }) - .collect::>() - .join("\n "); - errored - .entry(node_id) - .or_insert_with(Vec::new) - .push(format!( + for edge_to_delete in edges_to_delete { + graph[edge_to_delete].mark_deleted(EdgePrunedReason::SmallerParamSetAvailable); + } + + // Validate masked params. + if let Node::Rule(rule) = &graph[node_id].0.node { + for masked_param in rule.masked_params() { + if graph[node_id].0.in_set.contains(&masked_param) { + let in_set = params_str(&graph[node_id].0.in_set); + let dependencies = graph + .edges_directed(node_id, Direction::Outgoing) + .filter(|edge_ref| { + !edge_ref.weight().is_deleted() + && !edge_ref.weight().0.provides(&masked_param) + && graph[edge_ref.target()].0.in_set.contains(&masked_param) + }) + .map(|edge_ref| { + let dep_id = edge_ref.target(); + format!( + "{} for {}", + graph[dep_id].0.node, + params_str(&graph[dep_id].0.in_set) + ) + }) + .collect::>() + .join("\n "); + errored + .entry(node_id) + .or_insert_with(Vec::new) + .push(format!( "Rule `{rule} (for {in_set})` masked the parameter type `{masked_param}`, but \ it was required by some dependencies:\n {dependencies}" )); - } + } + } + } } - } - } - if errored.is_empty() { - // Finally, return a new graph with all deleted data discarded. - Ok(graph.filter_map( - |_node_id, node| { - node - .inner() - .map(|node| (node.node.clone(), node.in_set.clone())) - }, - |_edge_id, edge| edge.inner().cloned(), - )) - } else { - // Render the most specific errors. - Err(Self::render_prune_errors(&graph, errored)) + if errored.is_empty() { + // Finally, return a new graph with all deleted data discarded. + Ok(graph.filter_map( + |_node_id, node| { + node.inner() + .map(|node| (node.node.clone(), node.in_set.clone())) + }, + |_edge_id, edge| edge.inner().cloned(), + )) + } else { + // Render the most specific errors. + Err(Self::render_prune_errors(&graph, errored)) + } } - } - - #[allow(clippy::type_complexity)] - fn render_no_source_of_dependency_error( - &self, - graph: &MonomorphizedGraph, - node: &Node, - dependency_key: DependencyKey, - edge_refs: Vec, EdgePrunedReason>, u32>>, - ) -> String { - if self.rules.contains_key(&dependency_key.product()) { - format!( - "No source of dependency {} for {}. All potential sources were eliminated: {:#?}", - dependency_key, - node, - edge_refs - .iter() - .map(|edge_ref| { - // An edge being deleted carries more information than a node being deleted, because a - // deleted edge from X to Y describes specifically why X cannot use Y. - let node = &graph[edge_ref.target()]; - let reason = edge_ref - .weight() - .deleted_reason() - .map(|r| format!("{r:?}")) - .or_else(|| node.deleted_reason().map(|r| format!("{r:?}"))); - let reason_suffix = if let Some(reason) = reason { - format!("{reason}: ") - } else { - "".to_owned() - }; + + #[allow(clippy::type_complexity)] + fn render_no_source_of_dependency_error( + &self, + graph: &MonomorphizedGraph, + node: &Node, + dependency_key: DependencyKey, + edge_refs: Vec< + EdgeReference, EdgePrunedReason>, u32>, + >, + ) -> String { + if self.rules.contains_key(&dependency_key.product()) { format!( - "{}{} (for {})", - reason_suffix, - node.0.node, - params_str(&node.0.in_set) + "No source of dependency {} for {}. All potential sources were eliminated: {:#?}", + dependency_key, + node, + edge_refs + .iter() + .map(|edge_ref| { + // An edge being deleted carries more information than a node being deleted, because a + // deleted edge from X to Y describes specifically why X cannot use Y. + let node = &graph[edge_ref.target()]; + let reason = edge_ref + .weight() + .deleted_reason() + .map(|r| format!("{r:?}")) + .or_else(|| node.deleted_reason().map(|r| format!("{r:?}"))); + let reason_suffix = if let Some(reason) = reason { + format!("{reason}: ") + } else { + "".to_owned() + }; + format!( + "{}{} (for {})", + reason_suffix, + node.0.node, + params_str(&node.0.in_set) + ) + }) + .collect::>() ) - }) - .collect::>() - ) - } else if dependency_key.provided_params.is_empty() { - format!( - "No installed rules return the type {}, and it was not provided by potential \ + } else if dependency_key.provided_params.is_empty() { + format!( + "No installed rules return the type {}, and it was not provided by potential \ callers of {}.\nIf that type should be computed by a rule, ensure that that \ rule is installed.\nIf it should be provided by a caller, ensure that it is included \ in any relevant Query or Get.", - dependency_key.product(), - node, - ) - } else { - format!( + dependency_key.product(), + node, + ) + } else { + format!( "No installed rules return the type {} to satisfy {} for {}.\nEnsure that the rule you are \ expecting to use is installed.", dependency_key.product(), dependency_key, node, ) + } } - } - - fn render_prune_errors( - graph: &MonomorphizedGraph, - errored: HashMap, Vec>, - ) -> String { - // Leaf errors have no dependencies in the errored map. - let mut leaf_errors = errored - .iter() - .filter(|(&node_id, _)| { - !graph - .neighbors_directed(node_id, Direction::Outgoing) - .any(|dependency_id| errored.contains_key(&dependency_id) && node_id != dependency_id) - }) - .flat_map(|(_, errors)| { - let mut errors = errors.clone(); - errors.sort(); - errors.into_iter().map(|e| e.trim().replace('\n', "\n ")) - }) - .collect::>(); - - leaf_errors.sort(); - leaf_errors.dedup(); - - let subgraph = graph.filter_map( - |node_id, node| { - errored - .get(&node_id) - .map(|errors| format!("{}:\n{}", node, errors.join("\n"))) - }, - |_, edge_weight| Some(edge_weight.clone()), - ); - - log::trace!( - "// errored subgraph:\n{}", - petgraph::dot::Dot::with_config(&subgraph, &[]) - ); - - format!( - "Encountered {} rule graph error{}:\n {}", - leaf_errors.len(), - if leaf_errors.len() == 1 { "" } else { "s" }, - leaf_errors.join("\n "), - ) - } - - /// - /// Takes a Graph that has been pruned to eliminate unambiguous choices: any duplicate edges at - /// this point are errors. - /// - fn finalize(self, graph: InLabeledGraph) -> Result, String> { - let entry_for = |node_id| -> Entry { - let (node, in_set): &(Node, ParamTypes<_>) = &graph[node_id]; - match node { - Node::Rule(rule) => Entry::WithDeps(Intern::new(EntryWithDeps::Rule(RuleEntry { - params: in_set.clone(), - rule: rule.clone(), - }))), - Node::Query(q) => Entry::WithDeps(Intern::new(EntryWithDeps::Root(RootEntry(q.clone())))), - Node::Param(p) => Entry::Param(*p), - Node::Reentry(q, _) => Entry::WithDeps(Intern::new(EntryWithDeps::Reentry(Reentry { - params: in_set.clone(), - query: q.clone(), - }))), - } - }; - - // Visit the reachable portion of the graph to create Edges, starting from roots. - let mut rule_dependency_edges = HashMap::default(); - let mut visited = graph.visit_map(); - let mut to_visit = graph.externals(Direction::Incoming).collect::>(); - while let Some(node_id) = to_visit.pop() { - if !visited.visit(node_id) { - continue; - } - - // Create an entry for the node, and schedule its dependencies to be visited. - let entry = entry_for(node_id); - to_visit.extend(graph.neighbors_directed(node_id, Direction::Outgoing)); - - // Convert the graph edges into RuleEdges: graph pruning should already have confirmed that - // there was one dependency per DependencyKey. - let dependencies = graph - .edges_directed(node_id, Direction::Outgoing) - .map(|edge_ref| { - ( - edge_ref.weight().clone(), - Intern::new(entry_for(edge_ref.target())), - ) - }) - .collect::>>>(); - match entry { - Entry::WithDeps(wd) => { - rule_dependency_edges.insert(wd, RuleEdges { dependencies }); - } - Entry::Param(p) => { - if !dependencies.is_empty() { - return Err(format!( + fn render_prune_errors( + graph: &MonomorphizedGraph, + errored: HashMap, Vec>, + ) -> String { + // Leaf errors have no dependencies in the errored map. + let mut leaf_errors = errored + .iter() + .filter(|(&node_id, _)| { + !graph + .neighbors_directed(node_id, Direction::Outgoing) + .any(|dependency_id| { + errored.contains_key(&dependency_id) && node_id != dependency_id + }) + }) + .flat_map(|(_, errors)| { + let mut errors = errors.clone(); + errors.sort(); + errors.into_iter().map(|e| e.trim().replace('\n', "\n ")) + }) + .collect::>(); + + leaf_errors.sort(); + leaf_errors.dedup(); + + let subgraph = graph.filter_map( + |node_id, node| { + errored + .get(&node_id) + .map(|errors| format!("{}:\n{}", node, errors.join("\n"))) + }, + |_, edge_weight| Some(edge_weight.clone()), + ); + + log::trace!( + "// errored subgraph:\n{}", + petgraph::dot::Dot::with_config(&subgraph, &[]) + ); + + format!( + "Encountered {} rule graph error{}:\n {}", + leaf_errors.len(), + if leaf_errors.len() == 1 { "" } else { "s" }, + leaf_errors.join("\n "), + ) + } + + /// + /// Takes a Graph that has been pruned to eliminate unambiguous choices: any duplicate edges at + /// this point are errors. + /// + fn finalize(self, graph: InLabeledGraph) -> Result, String> { + let entry_for = |node_id| -> Entry { + let (node, in_set): &(Node, ParamTypes<_>) = &graph[node_id]; + match node { + Node::Rule(rule) => Entry::WithDeps(Intern::new(EntryWithDeps::Rule(RuleEntry { + params: in_set.clone(), + rule: rule.clone(), + }))), + Node::Query(q) => { + Entry::WithDeps(Intern::new(EntryWithDeps::Root(RootEntry(q.clone())))) + } + Node::Param(p) => Entry::Param(*p), + Node::Reentry(q, _) => { + Entry::WithDeps(Intern::new(EntryWithDeps::Reentry(Reentry { + params: in_set.clone(), + query: q.clone(), + }))) + } + } + }; + + // Visit the reachable portion of the graph to create Edges, starting from roots. + let mut rule_dependency_edges = HashMap::default(); + let mut visited = graph.visit_map(); + let mut to_visit = graph.externals(Direction::Incoming).collect::>(); + while let Some(node_id) = to_visit.pop() { + if !visited.visit(node_id) { + continue; + } + + // Create an entry for the node, and schedule its dependencies to be visited. + let entry = entry_for(node_id); + to_visit.extend(graph.neighbors_directed(node_id, Direction::Outgoing)); + + // Convert the graph edges into RuleEdges: graph pruning should already have confirmed that + // there was one dependency per DependencyKey. + let dependencies = graph + .edges_directed(node_id, Direction::Outgoing) + .map(|edge_ref| { + ( + edge_ref.weight().clone(), + Intern::new(entry_for(edge_ref.target())), + ) + }) + .collect::>>>(); + + match entry { + Entry::WithDeps(wd) => { + rule_dependency_edges.insert(wd, RuleEdges { dependencies }); + } + Entry::Param(p) => { + if !dependencies.is_empty() { + return Err(format!( "Param {p} should not have had dependencies, but had: {dependencies:#?}", )); - } + } + } + } } - } + + Ok(RuleGraph { + queries: self.queries.into_iter().collect(), + rule_dependency_edges, + // TODO + unreachable_rules: Vec::default(), + }) } - Ok(RuleGraph { - queries: self.queries.into_iter().collect(), - rule_dependency_edges, - // TODO - unreachable_rules: Vec::default(), - }) - } - - /// - /// Groups the DependencyKeys of the given node (regardless of whether it is deleted) including - /// any empty groups for any keys that were declared by the node but didn't have edges. - /// - #[allow(clippy::type_complexity)] - fn edges_by_dependency_key( - graph: &MonomorphizedGraph, - node_id: NodeIndex, - include_deleted_dependencies: bool, - ) -> BTreeMap< - DependencyKey, - Vec, EdgePrunedReason>, u32>>, - > { - let node = &graph[node_id].0.node; - let mut edges_by_dependency_key = node - .dependency_keys() - .into_iter() - .map(|dk| (dk, vec![])) - .collect::>(); - for edge_ref in graph.edges_directed(node_id, Direction::Outgoing) { - if !include_deleted_dependencies - && (edge_ref.weight().is_deleted() || graph[edge_ref.target()].is_deleted()) - { - continue; - } - - let dependency_key = &edge_ref.weight().0; - edges_by_dependency_key + /// + /// Groups the DependencyKeys of the given node (regardless of whether it is deleted) including + /// any empty groups for any keys that were declared by the node but didn't have edges. + /// + #[allow(clippy::type_complexity)] + fn edges_by_dependency_key( + graph: &MonomorphizedGraph, + node_id: NodeIndex, + include_deleted_dependencies: bool, + ) -> BTreeMap< + DependencyKey, + Vec, EdgePrunedReason>, u32>>, + > { + let node = &graph[node_id].0.node; + let mut edges_by_dependency_key = node + .dependency_keys() + .into_iter() + .map(|dk| (dk, vec![])) + .collect::>(); + for edge_ref in graph.edges_directed(node_id, Direction::Outgoing) { + if !include_deleted_dependencies + && (edge_ref.weight().is_deleted() || graph[edge_ref.target()].is_deleted()) + { + continue; + } + + let dependency_key = &edge_ref.weight().0; + edges_by_dependency_key .get_mut(dependency_key) .unwrap_or_else(|| { panic!("{node} did not declare a dependency {dependency_key}, but had an edge for it."); }) .push(edge_ref); - } - edges_by_dependency_key - } - - /// - /// Calculates the in_set required to satisfy the given dependency via the given DependencyKey. - /// - fn dependency_in_set<'a>( - node_id: NodeIndex, - dependency_key: &'a DependencyKey, - dependency_id: NodeIndex, - dependency_in_set: &'a ParamTypes, - ) -> Box + 'a> { - // The in_sets of the dependency, less any Params "provided" (ie "declared variables" - // in the context of live variable analysis) by the relevant DependencyKey. - if dependency_id == node_id { - // A self-edge to this node does not contribute Params to its own liveness set, for two - // reasons: - // 1. it should always be a noop. - // 2. any time it is _not_ a noop, it is probably because we're busying updating the - // liveness set, and the node contributing to its own set ends up using a stale - // result. - return Box::new(std::iter::empty()); + } + edges_by_dependency_key } - if dependency_key.provided_params.is_empty() { - Box::new(dependency_in_set.iter().cloned()) - } else { - // If the DependencyKey "provides" the Param, it does not count toward our in-set. - Box::new( - dependency_in_set - .iter() - .filter(move |p| !dependency_key.provides(*p)) - .cloned(), - ) - } - } - - /// - /// Calculates the in_set required to satisfy the given set of dependency edges with their - /// in_sets. - /// - fn dependencies_in_set<'a>( - node_id: NodeIndex, - dependency_edges: impl Iterator< - Item = ( - DependencyKey, - NodeIndex, - &'a ParamTypes, - ), - >, - ) -> ParamTypes { - // Union the in_sets of our dependencies, less any Params "provided" (ie "declared variables" - // in the context of live variable analysis) by the relevant DependencyKeys. - let mut in_set = ParamTypes::new(); - for (dependency_key, dependency_id, dependency_in_set) in dependency_edges { - in_set.extend(Self::dependency_in_set( - node_id, - &dependency_key, - dependency_id, - dependency_in_set, - )); - } - in_set - } - - /// - /// Given a node and a mapping of all legal sources of each of its dependencies, generates a - /// simplified node for each legal set. - /// - /// Note that because ambiguities are preserved (to allow for useful errors - /// post-monomorphization), the output is a set of dependencies which might contain multiple - /// entries per DependencyKey. - /// - /// Unfortunately, we cannot eliminate dependencies based on their in_sets not being a subset of - /// the out_set, because it's possible that the in_sets have not shrunk (transitively) to their - /// true requirements yet. See the doc string of `monomorphize`. We _are_ able to reject - /// dependencies that _directly_ depend on something that is not present though: either via a - /// direct dependency on a Param node that is not present in the out_set, or a DependencyKey's - /// provided_param that is not in the in_set of a combination. - /// - #[allow(clippy::type_complexity)] - fn monomorphizations( - graph: &MonomorphizedGraph, - node_id: NodeIndex, - out_set: ParamTypes, - minimal_in_set: &HashSet>, - deps: &[Vec<(DependencyKey, NodeIndex)>], - ) -> HashMap, HashSet<(DependencyKey, NodeIndex)>> { - let mut combinations = HashMap::default(); - - // We start by computing per-dependency in_sets, and filtering out dependencies that will be - // illegal in any possible combination. - let filtered_deps: Vec< - Vec<( - DependencyKey, - NodeIndex, - ParamTypes, - )>, - > = deps - .iter() - .map(|choices| { - choices - .iter() - .filter(|(_, dependency_id)| { - // If the candidate is a Param, it must be present in the out_set. - if let Node::Param(ref p) = graph[*dependency_id].0.node { - out_set.contains(p) - } else { - true - } - }) - .map(|(dependency_key, dependency_id)| { - let dependency_in_set = Self::dependency_in_set( - node_id, - dependency_key, - *dependency_id, - &graph[*dependency_id].0.in_set, + /// + /// Calculates the in_set required to satisfy the given dependency via the given DependencyKey. + /// + fn dependency_in_set<'a>( + node_id: NodeIndex, + dependency_key: &'a DependencyKey, + dependency_id: NodeIndex, + dependency_in_set: &'a ParamTypes, + ) -> Box + 'a> { + // The in_sets of the dependency, less any Params "provided" (ie "declared variables" + // in the context of live variable analysis) by the relevant DependencyKey. + if dependency_id == node_id { + // A self-edge to this node does not contribute Params to its own liveness set, for two + // reasons: + // 1. it should always be a noop. + // 2. any time it is _not_ a noop, it is probably because we're busying updating the + // liveness set, and the node contributing to its own set ends up using a stale + // result. + return Box::new(std::iter::empty()); + } + + if dependency_key.provided_params.is_empty() { + Box::new(dependency_in_set.iter().cloned()) + } else { + // If the DependencyKey "provides" the Param, it does not count toward our in-set. + Box::new( + dependency_in_set + .iter() + .filter(move |p| !dependency_key.provides(*p)) + .cloned(), ) - .collect::>(); - (dependency_key.clone(), *dependency_id, dependency_in_set) - }) - .collect() - }) - .collect(); - - // Then generate the combinations of possibly valid deps. - for combination in combinations_of_one(&filtered_deps) { - // Union the pre-filtered per-dependency in_sets. - let in_set = { - let mut in_set = - combination - .iter() - .fold(ParamTypes::new(), |mut in_set, (_, _, dep_in_set)| { - in_set.extend(dep_in_set.iter().cloned()); - in_set - }); - graph[node_id].0.node.add_inherent_in_set(&mut in_set); + } + } + + /// + /// Calculates the in_set required to satisfy the given set of dependency edges with their + /// in_sets. + /// + fn dependencies_in_set<'a>( + node_id: NodeIndex, + dependency_edges: impl Iterator< + Item = ( + DependencyKey, + NodeIndex, + &'a ParamTypes, + ), + >, + ) -> ParamTypes { + // Union the in_sets of our dependencies, less any Params "provided" (ie "declared variables" + // in the context of live variable analysis) by the relevant DependencyKeys. + let mut in_set = ParamTypes::new(); + for (dependency_key, dependency_id, dependency_in_set) in dependency_edges { + in_set.extend(Self::dependency_in_set( + node_id, + &dependency_key, + dependency_id, + dependency_in_set, + )); + } in_set - }; + } - // Confirm that this combination of deps is satisfiable in terms of the in_set. - let in_set_satisfiable = combination - .iter() - .all(|(dependency_key, dependency_id, _)| { - let dependency_in_set = if *dependency_id == node_id { - // Is a self edge: use the in_set that we're considering creating. - &in_set - } else { - &graph[*dependency_id].0.in_set - }; - - // Any param provided by this key must be consumed. - dependency_key - .provided_params + /// + /// Given a node and a mapping of all legal sources of each of its dependencies, generates a + /// simplified node for each legal set. + /// + /// Note that because ambiguities are preserved (to allow for useful errors + /// post-monomorphization), the output is a set of dependencies which might contain multiple + /// entries per DependencyKey. + /// + /// Unfortunately, we cannot eliminate dependencies based on their in_sets not being a subset of + /// the out_set, because it's possible that the in_sets have not shrunk (transitively) to their + /// true requirements yet. See the doc string of `monomorphize`. We _are_ able to reject + /// dependencies that _directly_ depend on something that is not present though: either via a + /// direct dependency on a Param node that is not present in the out_set, or a DependencyKey's + /// provided_param that is not in the in_set of a combination. + /// + #[allow(clippy::type_complexity)] + fn monomorphizations( + graph: &MonomorphizedGraph, + node_id: NodeIndex, + out_set: ParamTypes, + minimal_in_set: &HashSet>, + deps: &[Vec<(DependencyKey, NodeIndex)>], + ) -> HashMap, HashSet<(DependencyKey, NodeIndex)>> { + let mut combinations = HashMap::default(); + + // We start by computing per-dependency in_sets, and filtering out dependencies that will be + // illegal in any possible combination. + let filtered_deps: Vec< + Vec<( + DependencyKey, + NodeIndex, + ParamTypes, + )>, + > = deps .iter() - .all(|p| dependency_in_set.contains(p)) - }); - if !in_set_satisfiable { - continue; - } - - // Compute the out_set for this combination. Any Params that are consumed here are removed - // from the out_set that Rule dependencies will be allowed to consume. Params that weren't - // present in the out_set were already filtered near the top of this method. - let out_set = { - let mut out_set = out_set.clone(); - for (_, dependency_id, _) in &combination { - if let Node::Param(p) = graph[*dependency_id].0.node { - out_set.remove(&p); - } + .map(|choices| { + choices + .iter() + .filter(|(_, dependency_id)| { + // If the candidate is a Param, it must be present in the out_set. + if let Node::Param(ref p) = graph[*dependency_id].0.node { + out_set.contains(p) + } else { + true + } + }) + .map(|(dependency_key, dependency_id)| { + let dependency_in_set = Self::dependency_in_set( + node_id, + dependency_key, + *dependency_id, + &graph[*dependency_id].0.in_set, + ) + .collect::>(); + (dependency_key.clone(), *dependency_id, dependency_in_set) + }) + .collect() + }) + .collect(); + + // Then generate the combinations of possibly valid deps. + for combination in combinations_of_one(&filtered_deps) { + // Union the pre-filtered per-dependency in_sets. + let in_set = { + let mut in_set = + combination + .iter() + .fold(ParamTypes::new(), |mut in_set, (_, _, dep_in_set)| { + in_set.extend(dep_in_set.iter().cloned()); + in_set + }); + graph[node_id].0.node.add_inherent_in_set(&mut in_set); + in_set + }; + + // Confirm that this combination of deps is satisfiable in terms of the in_set. + let in_set_satisfiable = + combination + .iter() + .all(|(dependency_key, dependency_id, _)| { + let dependency_in_set = if *dependency_id == node_id { + // Is a self edge: use the in_set that we're considering creating. + &in_set + } else { + &graph[*dependency_id].0.in_set + }; + + // Any param provided by this key must be consumed. + dependency_key + .provided_params + .iter() + .all(|p| dependency_in_set.contains(p)) + }); + if !in_set_satisfiable { + continue; + } + + // Compute the out_set for this combination. Any Params that are consumed here are removed + // from the out_set that Rule dependencies will be allowed to consume. Params that weren't + // present in the out_set were already filtered near the top of this method. + let out_set = { + let mut out_set = out_set.clone(); + for (_, dependency_id, _) in &combination { + if let Node::Param(p) = graph[*dependency_id].0.node { + out_set.remove(&p); + } + } + out_set + }; + + // We can eliminate this candidate if any dependencies have minimal in_sets which contain + // values not present in the computed out_set (meaning that they consume a Param that isn't + // in scope). If their in_sets are not minimal, then they might shrink further in the future, + // and so we cannot eliminate them quite yet. + let out_set_satisfiable = + combination + .iter() + .all(|(_, dependency_id, dependency_in_set)| { + matches!(graph[*dependency_id].0.node, Node::Param(_)) + || !minimal_in_set.contains(dependency_id) + || dependency_in_set.difference(&out_set).next().is_none() + }); + if !out_set_satisfiable { + continue; + } + + // If we've made it this far, we're worth recording. Huzzah! + let entry = ParamsLabeled { + node: graph[node_id].0.node.clone(), + in_set: in_set.clone(), + // NB: See the method doc. Although our dependents could technically still provide a + // larger set of params, anything not in the in_set is not consumed in this subgraph, + // and the out_set shrinks correspondingly to avoid creating redundant nodes. + out_set: out_set.intersection(&in_set).cloned().collect(), + }; + combinations + .entry(entry) + .or_insert_with(HashSet::default) + .extend(combination.into_iter().map(|(dk, di, _)| (dk.clone(), *di))); } - out_set - }; - - // We can eliminate this candidate if any dependencies have minimal in_sets which contain - // values not present in the computed out_set (meaning that they consume a Param that isn't - // in scope). If their in_sets are not minimal, then they might shrink further in the future, - // and so we cannot eliminate them quite yet. - let out_set_satisfiable = combination - .iter() - .all(|(_, dependency_id, dependency_in_set)| { - matches!(graph[*dependency_id].0.node, Node::Param(_)) - || !minimal_in_set.contains(dependency_id) - || dependency_in_set.difference(&out_set).next().is_none() - }); - if !out_set_satisfiable { - continue; - } - - // If we've made it this far, we're worth recording. Huzzah! - let entry = ParamsLabeled { - node: graph[node_id].0.node.clone(), - in_set: in_set.clone(), - // NB: See the method doc. Although our dependents could technically still provide a - // larger set of params, anything not in the in_set is not consumed in this subgraph, - // and the out_set shrinks correspondingly to avoid creating redundant nodes. - out_set: out_set.intersection(&in_set).cloned().collect(), - }; - combinations - .entry(entry) - .or_insert_with(HashSet::default) - .extend(combination.into_iter().map(|(dk, di, _)| (dk.clone(), *di))); - } - combinations - } + combinations + } } /// /// Generate all combinations of one element from each input vector. /// pub(crate) fn combinations_of_one(input: &[Vec]) -> Box> + '_> { - combinations_of_one_helper(input, input.len()) + combinations_of_one_helper(input, input.len()) } fn combinations_of_one_helper( - input: &[Vec], - combination_len: usize, + input: &[Vec], + combination_len: usize, ) -> Box> + '_> { - match input.len() { - 0 => Box::new(std::iter::empty()), - 1 => Box::new(input[0].iter().map(move |item| { - let mut output = Vec::with_capacity(combination_len); - output.push(item); - output - })), - len => { - let last_idx = len - 1; - Box::new(input[last_idx].iter().flat_map(move |item| { - combinations_of_one_helper(&input[..last_idx], combination_len).map(move |mut prefix| { - prefix.push(item); - prefix - }) - })) + match input.len() { + 0 => Box::new(std::iter::empty()), + 1 => Box::new(input[0].iter().map(move |item| { + let mut output = Vec::with_capacity(combination_len); + output.push(item); + output + })), + len => { + let last_idx = len - 1; + Box::new(input[last_idx].iter().flat_map(move |item| { + combinations_of_one_helper(&input[..last_idx], combination_len).map( + move |mut prefix| { + prefix.push(item); + prefix + }, + ) + })) + } } - } } diff --git a/src/rust/engine/rule_graph/src/lib.rs b/src/rust/engine/rule_graph/src/lib.rs index c820c0cd3d9..39dcab54901 100644 --- a/src/rust/engine/rule_graph/src/lib.rs +++ b/src/rust/engine/rule_graph/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -37,19 +37,19 @@ use internment::Intern; pub use crate::builder::Builder; pub use crate::rules::{ - DependencyKey, DisplayForGraph, DisplayForGraphArgs, ParamTypes, Query, Rule, RuleId, TypeId, + DependencyKey, DisplayForGraph, DisplayForGraphArgs, ParamTypes, Query, Rule, RuleId, TypeId, }; #[derive(Eq, Hash, PartialEq, Clone, Debug)] struct UnreachableError { - rule: R, - diagnostic: Diagnostic, + rule: R, + diagnostic: Diagnostic, } impl UnreachableError { - #[allow(dead_code)] - fn new(rule: R) -> UnreachableError { - UnreachableError { + #[allow(dead_code)] + fn new(rule: R) -> UnreachableError { + UnreachableError { rule, diagnostic: Diagnostic { params: ParamTypes::default(), @@ -57,37 +57,37 @@ impl UnreachableError { details: vec![], }, } - } + } } #[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)] pub enum EntryWithDeps { - Root(RootEntry), - Rule(RuleEntry), - Reentry(Reentry), + Root(RootEntry), + Rule(RuleEntry), + Reentry(Reentry), } impl EntryWithDeps { - pub fn rule(&self) -> Option { - match self { - EntryWithDeps::Rule(RuleEntry { rule, .. }) => Some(rule.clone()), - EntryWithDeps::Root(_) | EntryWithDeps::Reentry(_) => None, + pub fn rule(&self) -> Option { + match self { + EntryWithDeps::Rule(RuleEntry { rule, .. }) => Some(rule.clone()), + EntryWithDeps::Root(_) | EntryWithDeps::Reentry(_) => None, + } } - } - pub fn params(&self) -> &ParamTypes { - match self { - EntryWithDeps::Rule(ref ie) => &ie.params, - EntryWithDeps::Root(ref re) => &re.0.params, - EntryWithDeps::Reentry(ref re) => &re.params, + pub fn params(&self) -> &ParamTypes { + match self { + EntryWithDeps::Rule(ref ie) => &ie.params, + EntryWithDeps::Root(ref re) => &re.0.params, + EntryWithDeps::Reentry(ref re) => &re.params, + } } - } } #[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)] pub enum Entry { - Param(R::TypeId), - WithDeps(Intern>), + Param(R::TypeId), + WithDeps(Intern>), } #[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)] @@ -95,29 +95,29 @@ pub struct RootEntry(Query); #[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)] pub struct Reentry { - params: ParamTypes, - pub query: Query, + params: ParamTypes, + pub query: Query, } #[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)] pub struct RuleEntry { - params: ParamTypes, - rule: R, + params: ParamTypes, + rule: R, } impl RuleEntry { - pub fn rule(&self) -> &R { - &self.rule - } + pub fn rule(&self) -> &R { + &self.rule + } } type RuleDependencyEdges = HashMap>, RuleEdges>; #[derive(Eq, Hash, PartialEq, Clone, Debug)] struct Diagnostic { - params: ParamTypes, - reason: String, - details: Vec<(Entry, Option<&'static str>)>, + params: ParamTypes, + reason: String, + details: Vec<(Entry, Option<&'static str>)>, } /// @@ -125,413 +125,413 @@ struct Diagnostic { /// #[derive(Debug)] pub struct RuleGraph { - queries: Vec>, - rule_dependency_edges: RuleDependencyEdges, - unreachable_rules: Vec>, + queries: Vec>, + rule_dependency_edges: RuleDependencyEdges, + unreachable_rules: Vec>, } // TODO: We can't derive this due to https://github.com/rust-lang/rust/issues/26925, which // unnecessarily requires `Rule: Default`. impl Default for RuleGraph { - fn default() -> Self { - RuleGraph { - queries: Vec::default(), - rule_dependency_edges: RuleDependencyEdges::default(), - unreachable_rules: Vec::default(), + fn default() -> Self { + RuleGraph { + queries: Vec::default(), + rule_dependency_edges: RuleDependencyEdges::default(), + unreachable_rules: Vec::default(), + } } - } } fn params_str(params: &ParamTypes) -> String { - T::display(params.iter().cloned()) + T::display(params.iter().cloned()) } pub fn entry_str(entry: &Entry) -> String { - entry.fmt_for_graph(DisplayForGraphArgs { multiline: false }) + entry.fmt_for_graph(DisplayForGraphArgs { multiline: false }) } #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)] pub struct GraphVizEntryWithAttrs { - entry_str: String, - attrs_str: Option, + entry_str: String, + attrs_str: Option, } pub enum Palette { - Olive, - Gray, - Orange, - Blue, + Olive, + Gray, + Orange, + Blue, } impl Palette { - // https://c.eev.ee/kouyou/ is wonderful for selecting lovely color juxtapositions across multiple - // different color axes. - fn color_string(&self) -> String { - // These color values are all in HSV. See https://www.graphviz.org/doc/info/colors.html for - // other methods of specifying - // colors. https://renenyffenegger.ch/notes/tools/Graphviz/attributes/_color/index may also be - // useful. - match self { - Self::Olive => "0.2214,0.7179,0.8528".to_string(), - Self::Gray => "0.576,0,0.6242".to_string(), - Self::Orange => "0.08,0.5,0.976".to_string(), - Self::Blue => "0.5,1,0.9".to_string(), + // https://c.eev.ee/kouyou/ is wonderful for selecting lovely color juxtapositions across multiple + // different color axes. + fn color_string(&self) -> String { + // These color values are all in HSV. See https://www.graphviz.org/doc/info/colors.html for + // other methods of specifying + // colors. https://renenyffenegger.ch/notes/tools/Graphviz/attributes/_color/index may also be + // useful. + match self { + Self::Olive => "0.2214,0.7179,0.8528".to_string(), + Self::Gray => "0.576,0,0.6242".to_string(), + Self::Orange => "0.08,0.5,0.976".to_string(), + Self::Blue => "0.5,1,0.9".to_string(), + } } - } } impl DisplayForGraph for Palette { - fn fmt_for_graph(&self, _: DisplayForGraphArgs) -> String { - format!("[color=\"{}\",style=filled]", self.color_string()) - } + fn fmt_for_graph(&self, _: DisplayForGraphArgs) -> String { + format!("[color=\"{}\",style=filled]", self.color_string()) + } } impl DisplayForGraph for Entry { - fn fmt_for_graph(&self, display_args: DisplayForGraphArgs) -> String { - match self { - Entry::WithDeps(e) => e.fmt_for_graph(display_args), - Entry::Param(type_id) => format!("Param({type_id})"), + fn fmt_for_graph(&self, display_args: DisplayForGraphArgs) -> String { + match self { + Entry::WithDeps(e) => e.fmt_for_graph(display_args), + Entry::Param(type_id) => format!("Param({type_id})"), + } } - } } impl DisplayForGraph for EntryWithDeps { - fn fmt_for_graph(&self, display_args: DisplayForGraphArgs) -> String { - match self { - &EntryWithDeps::Rule(RuleEntry { - ref rule, - ref params, - }) => format!( - "{}{}for {}", - rule.fmt_for_graph(display_args), - display_args.line_separator(), - params_str(params) - ), - EntryWithDeps::Root(root) => format!( - "Query({}){}for {}", - root.0.product, - display_args.line_separator(), - params_str(&root.0.params) - ), - EntryWithDeps::Reentry(reentry) => format!( - "Reentry({}){}for {}", - reentry.query.product, - display_args.line_separator(), - params_str(&reentry.params) - ), + fn fmt_for_graph(&self, display_args: DisplayForGraphArgs) -> String { + match self { + &EntryWithDeps::Rule(RuleEntry { + ref rule, + ref params, + }) => format!( + "{}{}for {}", + rule.fmt_for_graph(display_args), + display_args.line_separator(), + params_str(params) + ), + EntryWithDeps::Root(root) => format!( + "Query({}){}for {}", + root.0.product, + display_args.line_separator(), + params_str(&root.0.params) + ), + EntryWithDeps::Reentry(reentry) => format!( + "Reentry({}){}for {}", + reentry.query.product, + display_args.line_separator(), + params_str(&reentry.params) + ), + } } - } } /// /// Apply coloration to several nodes. /// pub fn visualize_entry( - entry: &Entry, - display_args: DisplayForGraphArgs, + entry: &Entry, + display_args: DisplayForGraphArgs, ) -> GraphVizEntryWithAttrs { - let entry_str = entry.fmt_for_graph(display_args); - let attrs_str = match entry { - Entry::WithDeps(e) => { - // Color "singleton" entries (with no params)! - if e.params().is_empty() { - Some(Palette::Olive.fmt_for_graph(display_args)) - } else { - // Color "intrinsic" entries (provided by the rust codebase)! - e.rule() - .and_then(|r| r.color()) - .map(|color| color.fmt_for_graph(display_args)) - } - } - &Entry::Param(_) => { - // Color "Param"s. - Some(Palette::Orange.fmt_for_graph(display_args)) + let entry_str = entry.fmt_for_graph(display_args); + let attrs_str = match entry { + Entry::WithDeps(e) => { + // Color "singleton" entries (with no params)! + if e.params().is_empty() { + Some(Palette::Olive.fmt_for_graph(display_args)) + } else { + // Color "intrinsic" entries (provided by the rust codebase)! + e.rule() + .and_then(|r| r.color()) + .map(|color| color.fmt_for_graph(display_args)) + } + } + &Entry::Param(_) => { + // Color "Param"s. + Some(Palette::Orange.fmt_for_graph(display_args)) + } + }; + GraphVizEntryWithAttrs { + entry_str, + attrs_str, } - }; - GraphVizEntryWithAttrs { - entry_str, - attrs_str, - } } fn entry_with_deps_str(entry: &EntryWithDeps) -> String { - entry.fmt_for_graph(DisplayForGraphArgs { multiline: false }) + entry.fmt_for_graph(DisplayForGraphArgs { multiline: false }) } impl RuleGraph { - pub fn new( - rules: IndexSet, - queries: IndexSet>, - ) -> Result, String> { - Builder::new(rules, queries).graph() - } - - pub fn find_root_edges>( - &self, - param_inputs: I, - product: R::TypeId, - ) -> Result, String> { - let (_, edges) = self.find_root(param_inputs, product)?; - Ok(edges) - } - - /// - /// Create a copy of this RuleGraph filtered to only the subgraph below the root matched by the - /// given product and params. - /// - pub fn subgraph>( - &self, - param_inputs: I, - product: R::TypeId, - ) -> Result, String> { - let (root, _) = self.find_root(param_inputs, product)?; - - // Walk the graph, starting from root entries. - let mut entry_stack: Vec<_> = vec![root]; - let mut reachable = HashMap::default(); - while let Some(entry) = entry_stack.pop() { - if reachable.contains_key(&entry) { - continue; - } - - if let Some(edges) = self.rule_dependency_edges.get(&entry) { - reachable.insert(entry, edges.clone()); - - entry_stack.extend(edges.all_dependencies().filter_map(|e| match e.as_ref() { - Entry::WithDeps(e) => Some(e), - _ => None, - })); - } else { - return Err(format!("Unknown entry in RuleGraph: {entry:?}")); - } + pub fn new( + rules: IndexSet, + queries: IndexSet>, + ) -> Result, String> { + Builder::new(rules, queries).graph() } - Ok(RuleGraph { - queries: self.queries.clone(), - rule_dependency_edges: reachable, - unreachable_rules: Vec::default(), - }) - } - - /// - /// Returns all types consumed by rules within this RuleGraph. - /// - pub fn consumed_types(&self) -> HashSet { - self - .rule_dependency_edges - .iter() - .flat_map(|(entry, edges)| { - entry - .params() - .iter() - .cloned() - .chain(edges.dependencies.keys().map(|k| k.product())) - }) - .collect() - } - - /// - /// Find the entrypoint in this RuleGraph for the given product and params. - /// - pub fn find_root>( - &self, - param_inputs: I, - product: R::TypeId, - ) -> Result<(Intern>, RuleEdges), String> { - let params: ParamTypes<_> = param_inputs.into_iter().collect(); - - // Attempt to find an exact match. - let maybe_root = Intern::new(EntryWithDeps::Root(RootEntry(Query { - product, - params: params.clone(), - }))); - if let Some(edges) = self.rule_dependency_edges.get(&maybe_root) { - return Ok((maybe_root, edges.clone())); + pub fn find_root_edges>( + &self, + param_inputs: I, + product: R::TypeId, + ) -> Result, String> { + let (_, edges) = self.find_root(param_inputs, product)?; + Ok(edges) } - // Otherwise, scan for partial/subset matches. - // TODO: Is it worth indexing this by product type? - let subset_matches = self - .rule_dependency_edges - .iter() - .filter_map(|(entry, edges)| match entry.as_ref() { - EntryWithDeps::Root(ref root_entry) - if root_entry.0.product == product && root_entry.0.params.is_subset(¶ms) => - { - Some((entry, edges)) - } - _ => None, - }) - .collect::>(); - - match subset_matches.len() { - 1 => { - let (root_entry, edges) = subset_matches[0]; - Ok((*root_entry, edges.clone())) - } - 0 => { - // The Params were all registered as RootRules, but the combination wasn't legal. - let mut suggestions: Vec<_> = self - .rule_dependency_edges - .keys() - .filter_map(|entry| match entry.as_ref() { - EntryWithDeps::Root(ref root_entry) if root_entry.0.product == product => { - Some(format!("Params({})", params_str(&root_entry.0.params))) + /// + /// Create a copy of this RuleGraph filtered to only the subgraph below the root matched by the + /// given product and params. + /// + pub fn subgraph>( + &self, + param_inputs: I, + product: R::TypeId, + ) -> Result, String> { + let (root, _) = self.find_root(param_inputs, product)?; + + // Walk the graph, starting from root entries. + let mut entry_stack: Vec<_> = vec![root]; + let mut reachable = HashMap::default(); + while let Some(entry) = entry_stack.pop() { + if reachable.contains_key(&entry) { + continue; } - _ => None, - }) - .collect(); - let suggestions_str = if suggestions.is_empty() { - format!( - "return the type {}. Try registering QueryRule({} for {}).", - product, + + if let Some(edges) = self.rule_dependency_edges.get(&entry) { + reachable.insert(entry, edges.clone()); + + entry_stack.extend(edges.all_dependencies().filter_map(|e| match e.as_ref() { + Entry::WithDeps(e) => Some(e), + _ => None, + })); + } else { + return Err(format!("Unknown entry in RuleGraph: {entry:?}")); + } + } + + Ok(RuleGraph { + queries: self.queries.clone(), + rule_dependency_edges: reachable, + unreachable_rules: Vec::default(), + }) + } + + /// + /// Returns all types consumed by rules within this RuleGraph. + /// + pub fn consumed_types(&self) -> HashSet { + self.rule_dependency_edges + .iter() + .flat_map(|(entry, edges)| { + entry + .params() + .iter() + .cloned() + .chain(edges.dependencies.keys().map(|k| k.product())) + }) + .collect() + } + + /// + /// Find the entrypoint in this RuleGraph for the given product and params. + /// + pub fn find_root>( + &self, + param_inputs: I, + product: R::TypeId, + ) -> Result<(Intern>, RuleEdges), String> { + let params: ParamTypes<_> = param_inputs.into_iter().collect(); + + // Attempt to find an exact match. + let maybe_root = Intern::new(EntryWithDeps::Root(RootEntry(Query { product, - params_str(¶ms), - ) - } else { - suggestions.sort(); - format!( + params: params.clone(), + }))); + if let Some(edges) = self.rule_dependency_edges.get(&maybe_root) { + return Ok((maybe_root, edges.clone())); + } + + // Otherwise, scan for partial/subset matches. + // TODO: Is it worth indexing this by product type? + let subset_matches = self + .rule_dependency_edges + .iter() + .filter_map(|(entry, edges)| match entry.as_ref() { + EntryWithDeps::Root(ref root_entry) + if root_entry.0.product == product + && root_entry.0.params.is_subset(¶ms) => + { + Some((entry, edges)) + } + _ => None, + }) + .collect::>(); + + match subset_matches.len() { + 1 => { + let (root_entry, edges) = subset_matches[0]; + Ok((*root_entry, edges.clone())) + } + 0 => { + // The Params were all registered as RootRules, but the combination wasn't legal. + let mut suggestions: Vec<_> = self + .rule_dependency_edges + .keys() + .filter_map(|entry| match entry.as_ref() { + EntryWithDeps::Root(ref root_entry) if root_entry.0.product == product => { + Some(format!("Params({})", params_str(&root_entry.0.params))) + } + _ => None, + }) + .collect(); + let suggestions_str = if suggestions.is_empty() { + format!( + "return the type {}. Try registering QueryRule({} for {}).", + product, + product, + params_str(¶ms), + ) + } else { + suggestions.sort(); + format!( "can compute {} given input Params({}), but it can be produced using:\n {}", product, params_str(¶ms), suggestions.join("\n ") ) - }; - Err(format!("No installed QueryRules {suggestions_str}",)) - } - _ => { - let match_strs = subset_matches - .into_iter() - .map(|(e, _)| entry_with_deps_str(e)) - .collect::>(); - Err(format!( - "More than one set of @rules can compute {} for input Params({}):\n {}", - product, - params_str(¶ms), - match_strs.join("\n "), - )) - } + }; + Err(format!("No installed QueryRules {suggestions_str}",)) + } + _ => { + let match_strs = subset_matches + .into_iter() + .map(|(e, _)| entry_with_deps_str(e)) + .collect::>(); + Err(format!( + "More than one set of @rules can compute {} for input Params({}):\n {}", + product, + params_str(¶ms), + match_strs.join("\n "), + )) + } + } } - } - - /// - /// TODO: It's not clear what is preventing `Node` implementations from ending up with non-Inner - /// entries, but it would be good to make it typesafe instead. - /// - pub fn edges_for_inner(&self, entry: &Entry) -> Option> { - if let Entry::WithDeps(ref e) = entry { - self.rule_dependency_edges.get(e).cloned() - } else { - panic!("not an inner entry! {entry:?}") + + /// + /// TODO: It's not clear what is preventing `Node` implementations from ending up with non-Inner + /// entries, but it would be good to make it typesafe instead. + /// + pub fn edges_for_inner(&self, entry: &Entry) -> Option> { + if let Entry::WithDeps(ref e) = entry { + self.rule_dependency_edges.get(e).cloned() + } else { + panic!("not an inner entry! {entry:?}") + } } - } - pub fn validate_reachability(&self) -> Result<(), String> { - if self.unreachable_rules.is_empty() { - return Ok(()); + pub fn validate_reachability(&self) -> Result<(), String> { + if self.unreachable_rules.is_empty() { + return Ok(()); + } + + // TODO: This method is currently a noop: see https://github.com/pantsbuild/pants/issues/10649. + Ok(()) } - // TODO: This method is currently a noop: see https://github.com/pantsbuild/pants/issues/10649. - Ok(()) - } - - pub fn visualize(&self, f: &mut dyn io::Write) -> io::Result<()> { - let display_args = DisplayForGraphArgs { multiline: true }; - let mut queries_strs = self - .queries - .iter() - .map(|q| q.to_string()) - .collect::>(); - queries_strs.sort(); - writeln!(f, "digraph {{")?; - writeln!(f, " /*")?; - writeln!(f, " queries:")?; - writeln!( - f, - "{}", - queries_strs - .iter() - .map(|q| format!(" {}", q)) - .collect::>() - .join(",\n") - )?; - writeln!(f, " */")?; - writeln!(f, " // root entries")?; - let mut root_rule_strs = self - .rule_dependency_edges - .iter() - .filter_map(|(k, deps)| match k.as_ref() { - EntryWithDeps::Root(_) => { - let root_str = k.fmt_for_graph(display_args); - let mut dep_entries = deps - .all_dependencies() - .map(|d| visualize_entry(d, display_args)) - .collect::>(); - dep_entries.sort(); - let deps_with_attrs = dep_entries + pub fn visualize(&self, f: &mut dyn io::Write) -> io::Result<()> { + let display_args = DisplayForGraphArgs { multiline: true }; + let mut queries_strs = self + .queries .iter() - .cloned() - .filter(|d| d.attrs_str.is_some()) - .map(|d| format!("\"{}\" {}", d.entry_str, d.attrs_str.unwrap())) - .collect::>() - .join("\n"); - Some(format!( - " \"{}\" {}\n{} \"{}\" -> {{{}}}", - root_str, - Palette::Blue.fmt_for_graph(display_args), - deps_with_attrs, - root_str, - dep_entries - .iter() - .cloned() - .map(|d| format!("\"{}\"", d.entry_str)) - .collect::>() - .join(" ") - )) - } - _ => None, - }) - .collect::>(); - root_rule_strs.sort(); - writeln!(f, "{}", root_rule_strs.join("\n"))?; - - writeln!(f, " // internal entries")?; - let mut internal_rule_strs = self - .rule_dependency_edges - .iter() - .filter_map(|(k, deps)| match k.as_ref() { - &EntryWithDeps::Rule(_) => { - let mut dep_entries = deps - .all_dependencies() - .map(|d| visualize_entry(d, display_args)) - .collect::>(); - dep_entries.sort(); - let deps_with_attrs = dep_entries + .map(|q| q.to_string()) + .collect::>(); + queries_strs.sort(); + writeln!(f, "digraph {{")?; + writeln!(f, " /*")?; + writeln!(f, " queries:")?; + writeln!( + f, + "{}", + queries_strs + .iter() + .map(|q| format!(" {}", q)) + .collect::>() + .join(",\n") + )?; + writeln!(f, " */")?; + writeln!(f, " // root entries")?; + let mut root_rule_strs = self + .rule_dependency_edges .iter() - .cloned() - .filter(|d| d.attrs_str.is_some()) - .map(|d| format!("\"{}\" {}", d.entry_str, d.attrs_str.unwrap())) - .collect::>() - .join("\n"); - Some(format!( - "{} \"{}\" -> {{{}}}", - deps_with_attrs, - k.fmt_for_graph(display_args), - dep_entries - .iter() - .cloned() - .map(|d| format!("\"{}\"", d.entry_str)) - .collect::>() - .join(" "), - )) - } - _ => None, - }) - .collect::>(); - internal_rule_strs.sort(); - writeln!(f, "{}", internal_rule_strs.join("\n"))?; - writeln!(f, "}}") - } + .filter_map(|(k, deps)| match k.as_ref() { + EntryWithDeps::Root(_) => { + let root_str = k.fmt_for_graph(display_args); + let mut dep_entries = deps + .all_dependencies() + .map(|d| visualize_entry(d, display_args)) + .collect::>(); + dep_entries.sort(); + let deps_with_attrs = dep_entries + .iter() + .cloned() + .filter(|d| d.attrs_str.is_some()) + .map(|d| format!("\"{}\" {}", d.entry_str, d.attrs_str.unwrap())) + .collect::>() + .join("\n"); + Some(format!( + " \"{}\" {}\n{} \"{}\" -> {{{}}}", + root_str, + Palette::Blue.fmt_for_graph(display_args), + deps_with_attrs, + root_str, + dep_entries + .iter() + .cloned() + .map(|d| format!("\"{}\"", d.entry_str)) + .collect::>() + .join(" ") + )) + } + _ => None, + }) + .collect::>(); + root_rule_strs.sort(); + writeln!(f, "{}", root_rule_strs.join("\n"))?; + + writeln!(f, " // internal entries")?; + let mut internal_rule_strs = self + .rule_dependency_edges + .iter() + .filter_map(|(k, deps)| match k.as_ref() { + &EntryWithDeps::Rule(_) => { + let mut dep_entries = deps + .all_dependencies() + .map(|d| visualize_entry(d, display_args)) + .collect::>(); + dep_entries.sort(); + let deps_with_attrs = dep_entries + .iter() + .cloned() + .filter(|d| d.attrs_str.is_some()) + .map(|d| format!("\"{}\" {}", d.entry_str, d.attrs_str.unwrap())) + .collect::>() + .join("\n"); + Some(format!( + "{} \"{}\" -> {{{}}}", + deps_with_attrs, + k.fmt_for_graph(display_args), + dep_entries + .iter() + .cloned() + .map(|d| format!("\"{}\"", d.entry_str)) + .collect::>() + .join(" "), + )) + } + _ => None, + }) + .collect::>(); + internal_rule_strs.sort(); + writeln!(f, "{}", internal_rule_strs.join("\n"))?; + writeln!(f, "}}") + } } /// @@ -539,27 +539,27 @@ impl RuleGraph { /// #[derive(Eq, PartialEq, Clone, Debug)] pub struct RuleEdges { - dependencies: HashMap, Intern>>, + dependencies: HashMap, Intern>>, } impl RuleEdges { - pub fn entry_for(&self, dependency_key: &DependencyKey) -> Option>> { - self.dependencies.get(dependency_key).cloned() - } + pub fn entry_for(&self, dependency_key: &DependencyKey) -> Option>> { + self.dependencies.get(dependency_key).cloned() + } - pub fn all_dependencies(&self) -> impl Iterator>> { - self.dependencies.values() - } + pub fn all_dependencies(&self) -> impl Iterator>> { + self.dependencies.values() + } } // TODO: We can't derive this due to https://github.com/rust-lang/rust/issues/26925, which // unnecessarily requires `Rule: Default`. impl Default for RuleEdges { - fn default() -> Self { - RuleEdges { - dependencies: HashMap::default(), + fn default() -> Self { + RuleEdges { + dependencies: HashMap::default(), + } } - } } #[cfg(test)] diff --git a/src/rust/engine/rule_graph/src/rules.rs b/src/rust/engine/rule_graph/src/rules.rs index b222f8997a6..59dee12d5cb 100644 --- a/src/rust/engine/rule_graph/src/rules.rs +++ b/src/rust/engine/rule_graph/src/rules.rs @@ -13,14 +13,14 @@ use super::{params_str, Palette}; pub type ParamTypes = BTreeSet; pub trait TypeId: - Clone + Copy + Debug + DeepSizeOf + Display + Hash + Eq + Ord + Sized + Send + Sync + 'static + Clone + Copy + Debug + DeepSizeOf + Display + Hash + Eq + Ord + Sized + Send + Sync + 'static { - /// - /// Render a string for a collection of TypeIds. - /// - fn display(type_ids: I) -> String - where - I: Iterator; + /// + /// Render a string for a collection of TypeIds. + /// + fn display(type_ids: I) -> String + where + I: Iterator; } // Identifies a specific Rule when called by name. @@ -30,152 +30,151 @@ pub trait TypeId: pub struct RuleId(String); impl RuleId { - pub fn new(id: &str) -> Self { - Self(id.into()) - } + pub fn new(id: &str) -> Self { + Self(id.into()) + } - pub fn from_string(s: String) -> Self { - Self(s) - } + pub fn from_string(s: String) -> Self { + Self(s) + } } impl Display for RuleId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } } // NB: Most of our expected usecases for multiple-provided-parameters involve two parameters, hence // the SmallVec sizing here. See also `Self::provides`. #[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug, PartialOrd, Ord)] pub struct DependencyKey { - // The id of the rule represented by this DependencyKey, if provided at the callsite - // (e.g., via call-by-name semantics). - pub rule_id: Option, - - pub product: T, - // The param types which are introduced into scope at the callsite ("provided"). - pub provided_params: SmallVec<[T; 2]>, - // The param types which must already be in scope at the callsite, regardless of whether they - // are consumed in order to produce the product type. - // - // If a `DependencyKey` declares any `in_scope_params`, then _only_ those params (and provided - // params) are available to the callee. - pub in_scope_params: Option>, + // The id of the rule represented by this DependencyKey, if provided at the callsite + // (e.g., via call-by-name semantics). + pub rule_id: Option, + + pub product: T, + // The param types which are introduced into scope at the callsite ("provided"). + pub provided_params: SmallVec<[T; 2]>, + // The param types which must already be in scope at the callsite, regardless of whether they + // are consumed in order to produce the product type. + // + // If a `DependencyKey` declares any `in_scope_params`, then _only_ those params (and provided + // params) are available to the callee. + pub in_scope_params: Option>, } impl DependencyKey { - pub fn new(product: T) -> Self { - DependencyKey { - rule_id: None, - product, - provided_params: SmallVec::default(), - in_scope_params: None, + pub fn new(product: T) -> Self { + DependencyKey { + rule_id: None, + product, + provided_params: SmallVec::default(), + in_scope_params: None, + } + } + + pub fn for_known_rule(rule_id: RuleId, product: T) -> Self { + DependencyKey { + rule_id: Some(rule_id), + product, + provided_params: SmallVec::default(), + in_scope_params: None, + } } - } - - pub fn for_known_rule(rule_id: RuleId, product: T) -> Self { - DependencyKey { - rule_id: Some(rule_id), - product, - provided_params: SmallVec::default(), - in_scope_params: None, + + pub fn provided_params>(self, provided_params: I) -> Self { + let mut provided_params = provided_params.into_iter().collect::>(); + provided_params.sort(); + + #[cfg(debug_assertions)] + { + let original_len = provided_params.len(); + provided_params.dedup(); + if original_len != provided_params.len() { + panic!("Expected unique provided params."); + } + } + + Self { + provided_params, + ..self + } } - } - - pub fn provided_params>(self, provided_params: I) -> Self { - let mut provided_params = provided_params.into_iter().collect::>(); - provided_params.sort(); - - #[cfg(debug_assertions)] - { - let original_len = provided_params.len(); - provided_params.dedup(); - if original_len != provided_params.len() { - panic!("Expected unique provided params."); - } + + pub fn in_scope_params>(self, in_scope_params: I) -> Self { + let mut in_scope_params = in_scope_params.into_iter().collect::>(); + in_scope_params.sort(); + + #[cfg(debug_assertions)] + { + let original_len = in_scope_params.len(); + in_scope_params.dedup(); + if original_len != in_scope_params.len() { + panic!("Expected unique in_scope params."); + } + } + + Self { + in_scope_params: Some(in_scope_params), + ..self + } } - Self { - provided_params, - ..self + /// + /// Returns the product (output) type for this dependency. + /// + pub fn product(&self) -> T { + self.product } - } - - pub fn in_scope_params>(self, in_scope_params: I) -> Self { - let mut in_scope_params = in_scope_params.into_iter().collect::>(); - in_scope_params.sort(); - - #[cfg(debug_assertions)] - { - let original_len = in_scope_params.len(); - in_scope_params.dedup(); - if original_len != in_scope_params.len() { - panic!("Expected unique in_scope params."); - } + + /// + /// True if this DependencyKey provides the given type. + /// + /// NB: This is a linear scan, but that should be fine for small numbers of provided + /// params: see the struct doc. + /// + pub fn provides(&self, t: &T) -> bool { + self.provided_params.contains(t) } - Self { - in_scope_params: Some(in_scope_params), - ..self + /// + /// If this DependencyKey has in_scope_params, returns an equivalent Query, + /// + pub fn as_reentry_query(&self) -> Option> { + self.in_scope_params.as_ref().map(|in_scope_params| { + Query::new( + self.product, + self.provided_params + .iter() + .chain(in_scope_params.iter()) + .cloned(), + ) + }) } - } - - /// - /// Returns the product (output) type for this dependency. - /// - pub fn product(&self) -> T { - self.product - } - - /// - /// True if this DependencyKey provides the given type. - /// - /// NB: This is a linear scan, but that should be fine for small numbers of provided - /// params: see the struct doc. - /// - pub fn provides(&self, t: &T) -> bool { - self.provided_params.contains(t) - } - - /// - /// If this DependencyKey has in_scope_params, returns an equivalent Query, - /// - pub fn as_reentry_query(&self) -> Option> { - self.in_scope_params.as_ref().map(|in_scope_params| { - Query::new( - self.product, - self - .provided_params - .iter() - .chain(in_scope_params.iter()) - .cloned(), - ) - }) - } } impl Display for DependencyKey { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if let Some(rule_id) = &self.rule_id { - write!( - f, - "{}({:?}, ...) -> {}", - rule_id, self.provided_params, self.product - ) - } else if self.provided_params.is_empty() { - write!(f, "{}", self.product) - } else { - write!(f, "Get({}, {:?})", self.product, self.provided_params) + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(rule_id) = &self.rule_id { + write!( + f, + "{}({:?}, ...) -> {}", + rule_id, self.provided_params, self.product + ) + } else if self.provided_params.is_empty() { + write!(f, "{}", self.product) + } else { + write!(f, "Get({}, {:?})", self.product, self.provided_params) + } } - } } pub trait DisplayForGraph { - /// - /// Return a pretty-printed representation of this Rule's graph node, suitable for graphviz. - /// - fn fmt_for_graph(&self, display_args: DisplayForGraphArgs) -> String; + /// + /// Return a pretty-printed representation of this Rule's graph node, suitable for graphviz. + /// + fn fmt_for_graph(&self, display_args: DisplayForGraphArgs) -> String; } /// @@ -183,91 +182,91 @@ pub trait DisplayForGraph { /// #[derive(Clone, Copy)] pub struct DisplayForGraphArgs { - pub multiline: bool, + pub multiline: bool, } impl DisplayForGraphArgs { - pub fn line_separator(&self) -> &'static str { - if self.multiline { - "\n" - } else { - " " + pub fn line_separator(&self) -> &'static str { + if self.multiline { + "\n" + } else { + " " + } } - } - pub fn optional_line_separator(&self) -> &'static str { - if self.multiline { - "\n" - } else { - "" + pub fn optional_line_separator(&self) -> &'static str { + if self.multiline { + "\n" + } else { + "" + } } - } } pub trait Rule: - Clone + Debug + Display + Hash + Eq + Sized + DisplayForGraph + Send + Sync + 'static + Clone + Debug + Display + Hash + Eq + Sized + DisplayForGraph + Send + Sync + 'static { - type TypeId: TypeId; - - /// - /// Returns the id of this Rule. - /// - fn id(&self) -> &RuleId; - - /// - /// Returns the product (output) type for this Rule. - /// - fn product(&self) -> Self::TypeId; - - /// - /// Return keys for the dependencies of this Rule. - /// - fn dependency_keys(&self) -> Vec<&DependencyKey>; - - /// - /// Returns types which this rule is not allowed to consume from the calling scope. - /// - fn masked_params(&self) -> Vec; - - /// - /// True if this rule implementation should be required to be reachable in the RuleGraph. - /// - fn require_reachable(&self) -> bool; - - /// - /// Return any specific color this rule should be drawn with on the visualized graph. Note that - /// this coloration setting may be superseded by other factors. - /// - fn color(&self) -> Option; + type TypeId: TypeId; + + /// + /// Returns the id of this Rule. + /// + fn id(&self) -> &RuleId; + + /// + /// Returns the product (output) type for this Rule. + /// + fn product(&self) -> Self::TypeId; + + /// + /// Return keys for the dependencies of this Rule. + /// + fn dependency_keys(&self) -> Vec<&DependencyKey>; + + /// + /// Returns types which this rule is not allowed to consume from the calling scope. + /// + fn masked_params(&self) -> Vec; + + /// + /// True if this rule implementation should be required to be reachable in the RuleGraph. + /// + fn require_reachable(&self) -> bool; + + /// + /// Return any specific color this rule should be drawn with on the visualized graph. Note that + /// this coloration setting may be superseded by other factors. + /// + fn color(&self) -> Option; } #[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)] pub struct Query { - pub product: T, - pub params: ParamTypes, + pub product: T, + pub params: ParamTypes, } impl Query { - pub fn new>(product: T, params: I) -> Query { - Query { - product, - params: params.into_iter().collect(), + pub fn new>(product: T, params: I) -> Query { + Query { + product, + params: params.into_iter().collect(), + } } - } } impl Display for Query { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - self.fmt_for_graph(DisplayForGraphArgs { multiline: false }) - ) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + self.fmt_for_graph(DisplayForGraphArgs { multiline: false }) + ) + } } impl DisplayForGraph for Query { - fn fmt_for_graph(&self, _: DisplayForGraphArgs) -> String { - format!("Query({} for {})", self.product, params_str(&self.params)) - } + fn fmt_for_graph(&self, _: DisplayForGraphArgs) -> String { + format!("Query({} for {})", self.product, params_str(&self.params)) + } } diff --git a/src/rust/engine/rule_graph/src/tests.rs b/src/rust/engine/rule_graph/src/tests.rs index cb565531a3c..d2add6788c6 100644 --- a/src/rust/engine/rule_graph/src/tests.rs +++ b/src/rust/engine/rule_graph/src/tests.rs @@ -9,1101 +9,1115 @@ use crate::{DependencyKey, Palette, Query, RuleGraph, RuleId}; #[test] fn combinations_of_one_test() { - let combo = |input: Vec>| -> Vec> { - combinations_of_one(&input) - .map(|output| output.into_iter().cloned().collect()) - .collect() - }; - let empty: Vec> = vec![]; - - // Any empty set means the whole result is empty. - assert_eq!(empty, combo(vec![])); - assert_eq!(empty, combo(vec![vec![1, 2], vec![]])); - assert_eq!(empty, combo(vec![vec![], vec![1, 2]])); - - assert_eq!(vec![vec![1]], combo(vec![vec![1]])); - assert_eq!( - vec![vec![1, 3], vec![2, 3]], - combo(vec![vec![1, 2], vec![3]]) - ); - assert_eq!( - vec![vec![1, 2, 4], vec![1, 3, 4]], - combo(vec![vec![1], vec![2, 3], vec![4]]) - ); + let combo = |input: Vec>| -> Vec> { + combinations_of_one(&input) + .map(|output| output.into_iter().cloned().collect()) + .collect() + }; + let empty: Vec> = vec![]; + + // Any empty set means the whole result is empty. + assert_eq!(empty, combo(vec![])); + assert_eq!(empty, combo(vec![vec![1, 2], vec![]])); + assert_eq!(empty, combo(vec![vec![], vec![1, 2]])); + + assert_eq!(vec![vec![1]], combo(vec![vec![1]])); + assert_eq!( + vec![vec![1, 3], vec![2, 3]], + combo(vec![vec![1, 2], vec![3]]) + ); + assert_eq!( + vec![vec![1, 2, 4], vec![1, 3, 4]], + combo(vec![vec![1], vec![2, 3], vec![4]]) + ); } #[test] fn validation() { - let rules = indexset![ - Rule::new("a", "get_a", vec![DependencyKey::new("b")]), - Rule::new("a", "get_a", vec![DependencyKey::new("c")]), - Rule::new("b", "get_b", vec![DependencyKey::new("c")]), - Rule::new("t", "get_t", vec![DependencyKey::new("u")]), - Rule::new("t", "get_t", vec![DependencyKey::new("v")]) - ]; - let queries = indexset![Query::new("a", vec!["b"])]; - assert!(RuleGraph::new(rules, queries) - .err() - .unwrap() - .contains("The following rule ids were each used by more than one rule: get_a, get_t")); + let rules = indexset![ + Rule::new("a", "get_a", vec![DependencyKey::new("b")]), + Rule::new("a", "get_a", vec![DependencyKey::new("c")]), + Rule::new("b", "get_b", vec![DependencyKey::new("c")]), + Rule::new("t", "get_t", vec![DependencyKey::new("u")]), + Rule::new("t", "get_t", vec![DependencyKey::new("v")]) + ]; + let queries = indexset![Query::new("a", vec!["b"])]; + assert!(RuleGraph::new(rules, queries) + .err() + .unwrap() + .contains("The following rule ids were each used by more than one rule: get_a, get_t")); } #[test] fn basic() { - let rules = indexset![Rule::new("a", "a_from_b", vec![DependencyKey::new("b")])]; - let queries = indexset![Query::new("a", vec!["b"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); + let rules = indexset![Rule::new("a", "a_from_b", vec![DependencyKey::new("b")])]; + let queries = indexset![Query::new("a", vec!["b"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["b"], "a").unwrap(); + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["b"], "a").unwrap(); } #[test] fn singleton() { - let rules = indexset![Rule::new("a", "a_singleton", vec![])]; - let queries = indexset![Query::new("a", vec![])]; - let graph = RuleGraph::new(rules, queries).unwrap(); + let rules = indexset![Rule::new("a", "a_singleton", vec![])]; + let queries = indexset![Query::new("a", vec![])]; + let graph = RuleGraph::new(rules, queries).unwrap(); - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec![""], "a").unwrap(); + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec![""], "a").unwrap(); } #[test] fn insufficient_query() { - let rules = indexset![Rule::new("a", "a_from_b", vec![DependencyKey::new("b")])]; - let queries = indexset![Query::new("a", vec![])]; + let rules = indexset![Rule::new("a", "a_from_b", vec![DependencyKey::new("b")])]; + let queries = indexset![Query::new("a", vec![])]; - assert!(RuleGraph::new(rules, queries) - .err() - .unwrap() - .contains("No installed rules return the type b")); + assert!(RuleGraph::new(rules, queries) + .err() + .unwrap() + .contains("No installed rules return the type b")); } #[test] fn no_rules() { - let rules: IndexSet = indexset![]; - let queries = indexset![Query::new("a", vec![])]; + let rules: IndexSet = indexset![]; + let queries = indexset![Query::new("a", vec![])]; - assert!(RuleGraph::new(rules, queries) - .err() - .unwrap() - .contains("No installed rules return the type a")); + assert!(RuleGraph::new(rules, queries) + .err() + .unwrap() + .contains("No installed rules return the type a")); } #[test] fn ambiguity() { - let rules = indexset![ - Rule::new("a", "a_from_b", vec![DependencyKey::new("b")]), - Rule::new("a", "a_from_c", vec![DependencyKey::new("c")]), - ]; - let queries = indexset![Query::new("a", vec!["b", "c"])]; - - assert!(RuleGraph::new(rules, queries) - .err() - .unwrap() - .contains("Encountered 1 rule graph error:\n Too many")); + let rules = indexset![ + Rule::new("a", "a_from_b", vec![DependencyKey::new("b")]), + Rule::new("a", "a_from_c", vec![DependencyKey::new("c")]), + ]; + let queries = indexset![Query::new("a", vec!["b", "c"])]; + + assert!(RuleGraph::new(rules, queries) + .err() + .unwrap() + .contains("Encountered 1 rule graph error:\n Too many")); } #[test] fn by_name_simple() { - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::for_known_rule(RuleId::new("b_from_c"), "b")] - ), - Rule::new("b", "b_from_c", vec![DependencyKey::new("c")]), - Rule::new("b", "b_from_d", vec![DependencyKey::new("d")]), - ]; - let queries = indexset![Query::new("a", vec!["c", "d"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["c", "d"], "a").unwrap(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::for_known_rule(RuleId::new("b_from_c"), "b")] + ), + Rule::new("b", "b_from_c", vec![DependencyKey::new("c")]), + Rule::new("b", "b_from_d", vec![DependencyKey::new("d")]), + ]; + let queries = indexset![Query::new("a", vec!["c", "d"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["c", "d"], "a").unwrap(); } #[test] fn masked_params() { - // The middle rule masks "e", so even though the query provides it, construction should fail. - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b").provided_params(vec!["c"])] - ), - Rule::new( - "b", - "b_from_c", - vec![ - DependencyKey::new("c"), - DependencyKey::new("c").provided_params(vec!["d"]) - ], - ) - .masked_params(vec!["e"]), - Rule::new( - "c", - "c_from_d", - vec![DependencyKey::new("d"), DependencyKey::new("e")], - ), - ]; - let queries = indexset![Query::new("a", vec!["e"])]; - - let res = RuleGraph::new(rules, queries).err().unwrap(); - assert!(res.contains( - "Encountered 1 rule graph error:\n \ + // The middle rule masks "e", so even though the query provides it, construction should fail. + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b").provided_params(vec!["c"])] + ), + Rule::new( + "b", + "b_from_c", + vec![ + DependencyKey::new("c"), + DependencyKey::new("c").provided_params(vec!["d"]) + ], + ) + .masked_params(vec!["e"]), + Rule::new( + "c", + "c_from_d", + vec![DependencyKey::new("d"), DependencyKey::new("e")], + ), + ]; + let queries = indexset![Query::new("a", vec!["e"])]; + + let res = RuleGraph::new(rules, queries).err().unwrap(); + assert!(res.contains( + "Encountered 1 rule graph error:\n \ Rule `b_from_c(2) -> b (for c+e)` masked the parameter type `e`, but it" - )); + )); } #[test] fn nested_single() { - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b").provided_params(vec!["c"])] - ), - Rule::new( - "b", - "b_from_c", - vec![DependencyKey::new("c"), DependencyKey::new("d")], - ), - ]; - let queries = indexset![Query::new("a", vec!["d"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["d"], "a").unwrap(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b").provided_params(vec!["c"])] + ), + Rule::new( + "b", + "b_from_c", + vec![DependencyKey::new("c"), DependencyKey::new("d")], + ), + ]; + let queries = indexset![Query::new("a", vec!["d"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["d"], "a").unwrap(); } #[test] fn nested_multiple() { - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b").provided_params(vec!["c"])] - ), - Rule::new( - "b", - "b_from_c", - vec![DependencyKey::new("c"), DependencyKey::new("d")], - ), - Rule::new( - "b", - "b_from_other_unreachable", - vec![DependencyKey::new("d")], - ), - ]; - let queries = indexset![Query::new("a", vec!["d"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["d"], "a").unwrap(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b").provided_params(vec!["c"])] + ), + Rule::new( + "b", + "b_from_c", + vec![DependencyKey::new("c"), DependencyKey::new("d")], + ), + Rule::new( + "b", + "b_from_other_unreachable", + vec![DependencyKey::new("d")], + ), + ]; + let queries = indexset![Query::new("a", vec!["d"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["d"], "a").unwrap(); } #[test] fn self_cycle_simple() { - let rules = indexset![Rule::new( - "Fib", - "fib", - vec![ - DependencyKey::new("int"), - DependencyKey::new("Fib").provided_params(vec!["int"]), - ], - )]; - let queries = indexset![ - Query::new("Fib", vec!["int"]), - Query::new("Fib", vec!["Fib"]), - ]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["int"], "Fib").unwrap(); - graph.find_root_edges(vec!["Fib"], "Fib").unwrap(); + let rules = indexset![Rule::new( + "Fib", + "fib", + vec![ + DependencyKey::new("int"), + DependencyKey::new("Fib").provided_params(vec!["int"]), + ], + )]; + let queries = indexset![ + Query::new("Fib", vec!["int"]), + Query::new("Fib", vec!["Fib"]), + ]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["int"], "Fib").unwrap(); + graph.find_root_edges(vec!["Fib"], "Fib").unwrap(); } #[test] fn self_cycle_with_external_dep() { - let rules = indexset![ - Rule::new( - "Thing", - "transitive_thing", - vec![ - DependencyKey::new("int"), - // We expect this to be a self-cycle. - DependencyKey::new("Thing").provided_params(vec!["int"]), - // And this to be satisfied by the second rule, even though we already have an int in scope. - DependencyKey::new("int").provided_params(vec!["ExternalDep"]), - ], - ), - Rule::new( - "int", - "external_dep", - vec![DependencyKey::new("ExternalDep")], - ), - ]; - let queries = indexset![Query::new("Thing", vec!["int"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["int"], "Thing").unwrap(); + let rules = indexset![ + Rule::new( + "Thing", + "transitive_thing", + vec![ + DependencyKey::new("int"), + // We expect this to be a self-cycle. + DependencyKey::new("Thing").provided_params(vec!["int"]), + // And this to be satisfied by the second rule, even though we already have an int in scope. + DependencyKey::new("int").provided_params(vec!["ExternalDep"]), + ], + ), + Rule::new( + "int", + "external_dep", + vec![DependencyKey::new("ExternalDep")], + ), + ]; + let queries = indexset![Query::new("Thing", vec!["int"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["int"], "Thing").unwrap(); } #[test] fn multiple_provided() { - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b").provided_params(vec!["c", "d"])] - ), - Rule::new( - "b", - "b_from_c_and_d", - vec![DependencyKey::new("c"), DependencyKey::new("d"),], - ), - ]; - let queries = indexset![Query::new("a", vec![])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec![], "a").unwrap(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b").provided_params(vec!["c", "d"])] + ), + Rule::new( + "b", + "b_from_c_and_d", + vec![DependencyKey::new("c"), DependencyKey::new("d"),], + ), + ]; + let queries = indexset![Query::new("a", vec![])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec![], "a").unwrap(); } #[test] fn in_scope_basic() { - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b") - .provided_params(vec!["c"]) - .in_scope_params(vec!["d"])] - ), - Rule::new("b", "b_from_c_and_d", vec![DependencyKey::new("c")],), - ]; - - // Valid when the param is in scope at the callsite (even if it isn't consumed). - let queries = indexset![Query::new("a", vec!["d"])]; - let graph = RuleGraph::new(rules.clone(), queries).unwrap(); - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["d"], "a").unwrap(); - - // Invalid otherwise when the param is not in scope. - let queries = indexset![Query::new("a", vec![])]; - RuleGraph::new(rules, queries).err().unwrap(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b") + .provided_params(vec!["c"]) + .in_scope_params(vec!["d"])] + ), + Rule::new("b", "b_from_c_and_d", vec![DependencyKey::new("c")],), + ]; + + // Valid when the param is in scope at the callsite (even if it isn't consumed). + let queries = indexset![Query::new("a", vec!["d"])]; + let graph = RuleGraph::new(rules.clone(), queries).unwrap(); + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["d"], "a").unwrap(); + + // Invalid otherwise when the param is not in scope. + let queries = indexset![Query::new("a", vec![])]; + RuleGraph::new(rules, queries).err().unwrap(); } #[test] fn in_scope_filtered() { - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b") - .provided_params(vec!["c"]) - .in_scope_params(vec!["d"])] - ), - Rule::new( - "b", - "b_from_c_and_d", - vec![DependencyKey::new("c"), DependencyKey::new("e")], - ), - ]; - - // Even though both "d" and "e" are in scope at the Query, only "c" and "d" are in scope - // below `a_from_b`. - let queries = indexset![Query::new("a", vec!["d", "e"])]; - RuleGraph::new(rules, queries).err().unwrap(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b") + .provided_params(vec!["c"]) + .in_scope_params(vec!["d"])] + ), + Rule::new( + "b", + "b_from_c_and_d", + vec![DependencyKey::new("c"), DependencyKey::new("e")], + ), + ]; + + // Even though both "d" and "e" are in scope at the Query, only "c" and "d" are in scope + // below `a_from_b`. + let queries = indexset![Query::new("a", vec!["d", "e"])]; + RuleGraph::new(rules, queries).err().unwrap(); } #[test] fn in_scope_computed() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b") - .provided_params(vec!["c"]) - .in_scope_params(vec!["d"])] - ), - Rule::new("b", "b_from_c", vec![DependencyKey::new("c")],), - Rule::new("d", "d", vec![],), - ]; - - // Valid when the `in_scope` param can be computed or is a singleton. - let queries = indexset![Query::new("a", vec![])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec![], "a").unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b") + .provided_params(vec!["c"]) + .in_scope_params(vec!["d"])] + ), + Rule::new("b", "b_from_c", vec![DependencyKey::new("c")],), + Rule::new("d", "d", vec![],), + ]; + + // Valid when the `in_scope` param can be computed or is a singleton. + let queries = indexset![Query::new("a", vec![])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec![], "a").unwrap(); } #[test] fn in_scope_provided() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new( - "a", - "a_from_b", - vec![DependencyKey::new("b") - .provided_params(vec!["c", "d"]) - .in_scope_params(vec!["d"])] - ), - Rule::new( - "b", - "b_from_c", - vec![DependencyKey::new("c"), DependencyKey::new("d")], - ), - ]; - - // Valid when the `in_scope` param can be computed or is a singleton. - let queries = indexset![Query::new("a", vec![])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec![], "a").unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new( + "a", + "a_from_b", + vec![DependencyKey::new("b") + .provided_params(vec!["c", "d"]) + .in_scope_params(vec!["d"])] + ), + Rule::new( + "b", + "b_from_c", + vec![DependencyKey::new("c"), DependencyKey::new("d")], + ), + ]; + + // Valid when the `in_scope` param can be computed or is a singleton. + let queries = indexset![Query::new("a", vec![])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec![], "a").unwrap(); } #[test] fn ambiguous_cycle() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new( - "Root", - "me1", - vec![ - DependencyKey::new("ME").provided_params(vec!["P"]), - DependencyKey::new("ME").provided_params(vec!["MPP"]), - ], - ), - Rule::new("ME", "me2", vec![DependencyKey::new("FERR")]), - Rule::new( - "FERR", - "ferr", - vec![DependencyKey::new("PD"), DependencyKey::new("FPR")], - ), - Rule::new("PD", "pd_for_p", vec![DependencyKey::new("P")]), - Rule::new("PD", "pd_for_mpp", vec![DependencyKey::new("MPP")]), - Rule::new("FPR", "fpr_for_p", vec![DependencyKey::new("P")]), - Rule::new("FPR", "fpr_for_mpp", vec![DependencyKey::new("MPP")]), - ]; - let queries = indexset![Query::new("Root", vec![])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec![], "Root").unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new( + "Root", + "me1", + vec![ + DependencyKey::new("ME").provided_params(vec!["P"]), + DependencyKey::new("ME").provided_params(vec!["MPP"]), + ], + ), + Rule::new("ME", "me2", vec![DependencyKey::new("FERR")]), + Rule::new( + "FERR", + "ferr", + vec![DependencyKey::new("PD"), DependencyKey::new("FPR")], + ), + Rule::new("PD", "pd_for_p", vec![DependencyKey::new("P")]), + Rule::new("PD", "pd_for_mpp", vec![DependencyKey::new("MPP")]), + Rule::new("FPR", "fpr_for_p", vec![DependencyKey::new("P")]), + Rule::new("FPR", "fpr_for_mpp", vec![DependencyKey::new("MPP")]), + ]; + let queries = indexset![Query::new("Root", vec![])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec![], "Root").unwrap(); } #[test] fn natural_loop() { - let rules = indexset![ - Rule::new( - "A", - "a", - vec![ - DependencyKey::new("D"), - DependencyKey::new("B").provided_params(vec!["E"]) - ], - ), - Rule::new( - "B", - "b", - vec![ - DependencyKey::new("E"), - DependencyKey::new("C").provided_params(vec!["F"]) - ], - ), - Rule::new( - "C", - "c", - vec![ - DependencyKey::new("F"), - DependencyKey::new("A").provided_params(vec!["D"]) - ], - ), - ]; - let queries = indexset![Query::new("A", vec!["D"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["D"], "A").unwrap(); + let rules = indexset![ + Rule::new( + "A", + "a", + vec![ + DependencyKey::new("D"), + DependencyKey::new("B").provided_params(vec!["E"]) + ], + ), + Rule::new( + "B", + "b", + vec![ + DependencyKey::new("E"), + DependencyKey::new("C").provided_params(vec!["F"]) + ], + ), + Rule::new( + "C", + "c", + vec![ + DependencyKey::new("F"), + DependencyKey::new("A").provided_params(vec!["D"]) + ], + ), + ]; + let queries = indexset![Query::new("A", vec!["D"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["D"], "A").unwrap(); } #[test] fn multi_path_cycle() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new( - "A", - "sao", - vec![ - DependencyKey::new("AWO").provided_params(vec!["AS"]), - DependencyKey::new("AWO").provided_params(vec!["FS"]), - ], - ), - Rule::new("AWO", "awofs", vec![DependencyKey::new("FS")]), - Rule::new( - "AWO", - "awoas", - vec![DependencyKey::new("AS"), DependencyKey::new("A")], - ), - ]; - let queries = indexset![Query::new("A", vec![])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec![], "A").unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new( + "A", + "sao", + vec![ + DependencyKey::new("AWO").provided_params(vec!["AS"]), + DependencyKey::new("AWO").provided_params(vec!["FS"]), + ], + ), + Rule::new("AWO", "awofs", vec![DependencyKey::new("FS")]), + Rule::new( + "AWO", + "awoas", + vec![DependencyKey::new("AS"), DependencyKey::new("A")], + ), + ]; + let queries = indexset![Query::new("A", vec![])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec![], "A").unwrap(); } #[test] fn mutual_recursion() { - let rules = indexset![ - Rule::new( - "IsEven", - "is_even", - vec![ - DependencyKey::new("int"), - DependencyKey::new("IsOdd").provided_params(vec!["int"]), - ], - ), - Rule::new( - "IsOdd", - "is_odd", - vec![ - DependencyKey::new("int"), - DependencyKey::new("IsEven").provided_params(vec!["int"]), - ], - ), - ]; - let queries = indexset![ - Query::new("IsEven", vec!["int"]), - Query::new("IsOdd", vec!["int"]), - ]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["int"], "IsEven").unwrap(); - graph.find_root_edges(vec!["int"], "IsOdd").unwrap(); + let rules = indexset![ + Rule::new( + "IsEven", + "is_even", + vec![ + DependencyKey::new("int"), + DependencyKey::new("IsOdd").provided_params(vec!["int"]), + ], + ), + Rule::new( + "IsOdd", + "is_odd", + vec![ + DependencyKey::new("int"), + DependencyKey::new("IsEven").provided_params(vec!["int"]), + ], + ), + ]; + let queries = indexset![ + Query::new("IsEven", vec!["int"]), + Query::new("IsOdd", vec!["int"]), + ]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["int"], "IsEven").unwrap(); + graph.find_root_edges(vec!["int"], "IsOdd").unwrap(); } #[test] fn wide() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new( - "Output", - "one", - vec![DependencyKey::new("Output").provided_params(vec!["A"])] - ), - Rule::new( - "Output", - "two", - vec![ - DependencyKey::new("A"), - DependencyKey::new("Output").provided_params(vec!["B"]) - ], - ), - Rule::new( - "Output", - "three", - vec![ - DependencyKey::new("B"), - DependencyKey::new("Output").provided_params(vec!["C"]) - ], - ), - Rule::new( - "Output", - "four", - vec![DependencyKey::new("C"), DependencyKey::new("D")], - ), - ]; - let queries = indexset![Query::new("Output", vec!["D"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); - graph.find_root_edges(vec!["D"], "Output").unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new( + "Output", + "one", + vec![DependencyKey::new("Output").provided_params(vec!["A"])] + ), + Rule::new( + "Output", + "two", + vec![ + DependencyKey::new("A"), + DependencyKey::new("Output").provided_params(vec!["B"]) + ], + ), + Rule::new( + "Output", + "three", + vec![ + DependencyKey::new("B"), + DependencyKey::new("Output").provided_params(vec!["C"]) + ], + ), + Rule::new( + "Output", + "four", + vec![DependencyKey::new("C"), DependencyKey::new("D")], + ), + ]; + let queries = indexset![Query::new("Output", vec!["D"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); + graph.find_root_edges(vec!["D"], "Output").unwrap(); } #[test] fn reduced_source_roots() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new("SourceRootConfig", "construct_scope_source", vec![]), - Rule::new( - "OptionalSourceRootsResult", - "get_optional_source_roots", - vec![ - DependencyKey::new("SourceRootsRequest"), - DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), - ], - ), - Rule::new( - "Digest", - "remove_digest_prefix", - vec![DependencyKey::new("RemovePrefix")], - ), - Rule::new( - "Snapshot", - "snapshot_from_digest", - vec![DependencyKey::new("Digest")], - ), - Rule::new( - "Digest", - "digest_subset", - vec![DependencyKey::new("DigestSubset")], - ), - Rule::new( - "Digest", - "digest_from_pathglobs", - vec![DependencyKey::new("PathGlobs")], - ), - Rule::new( - "Digest", - "merge_digests", - vec![DependencyKey::new("MergeDigests")], - ), - Rule::new( - "SourceRootsResult", - "get_source_roots", - vec![ - DependencyKey::new("SourceRootsRequest"), - DependencyKey::new("OptionalSourceRootsResult").provided_params(vec!["SourceRootsRequest"]), - ], - ), - Rule::new( - "OptionalSourceRoot", - "get_optional_source_root", - vec![ - DependencyKey::new("SourceRootRequest"), - DependencyKey::new("SourceRootConfig"), - DependencyKey::new("Snapshot").provided_params(vec!["PathGlobs"]), - DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), - ], - ), - Rule::new( - "StrippedSourceFiles", - "strip_source_roots", - vec![ - DependencyKey::new("SourceFiles"), - DependencyKey::new("Snapshot").provided_params(vec!["DigestSubset"]), - DependencyKey::new("SourceRootsResult").provided_params(vec!["SourceRootsRequest"]), - DependencyKey::new("Snapshot").provided_params(vec!["RemovePrefix"]), - DependencyKey::new("Digest").provided_params(vec!["DigestSubset"]), - DependencyKey::new("Digest").provided_params(vec!["RemovePrefix"]), - DependencyKey::new("Snapshot").provided_params(vec!["MergeDigests"]), - ], - ), - ]; - let queries = indexset![Query::new("StrippedSourceFiles", vec!["SourceFiles"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new("SourceRootConfig", "construct_scope_source", vec![]), + Rule::new( + "OptionalSourceRootsResult", + "get_optional_source_roots", + vec![ + DependencyKey::new("SourceRootsRequest"), + DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), + ], + ), + Rule::new( + "Digest", + "remove_digest_prefix", + vec![DependencyKey::new("RemovePrefix")], + ), + Rule::new( + "Snapshot", + "snapshot_from_digest", + vec![DependencyKey::new("Digest")], + ), + Rule::new( + "Digest", + "digest_subset", + vec![DependencyKey::new("DigestSubset")], + ), + Rule::new( + "Digest", + "digest_from_pathglobs", + vec![DependencyKey::new("PathGlobs")], + ), + Rule::new( + "Digest", + "merge_digests", + vec![DependencyKey::new("MergeDigests")], + ), + Rule::new( + "SourceRootsResult", + "get_source_roots", + vec![ + DependencyKey::new("SourceRootsRequest"), + DependencyKey::new("OptionalSourceRootsResult") + .provided_params(vec!["SourceRootsRequest"]), + ], + ), + Rule::new( + "OptionalSourceRoot", + "get_optional_source_root", + vec![ + DependencyKey::new("SourceRootRequest"), + DependencyKey::new("SourceRootConfig"), + DependencyKey::new("Snapshot").provided_params(vec!["PathGlobs"]), + DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), + ], + ), + Rule::new( + "StrippedSourceFiles", + "strip_source_roots", + vec![ + DependencyKey::new("SourceFiles"), + DependencyKey::new("Snapshot").provided_params(vec!["DigestSubset"]), + DependencyKey::new("SourceRootsResult").provided_params(vec!["SourceRootsRequest"]), + DependencyKey::new("Snapshot").provided_params(vec!["RemovePrefix"]), + DependencyKey::new("Digest").provided_params(vec!["DigestSubset"]), + DependencyKey::new("Digest").provided_params(vec!["RemovePrefix"]), + DependencyKey::new("Snapshot").provided_params(vec!["MergeDigests"]), + ], + ), + ]; + let queries = indexset![Query::new("StrippedSourceFiles", vec!["SourceFiles"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); } #[test] fn reduced_codegen_cycle() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new( - "Process", - "setup_pex_cli_process", - vec![ - DependencyKey::new("PexCliProcess"), - DependencyKey::new("ProcessResult").provided_params(vec!["Process"]), - ], - ), - Rule::new( - "ProcessResult", - "fallible_to_exec_result_or_raise", - vec![ - DependencyKey::new("FallibleProcessResult"), - DependencyKey::new("ProductDescription"), - ], - ), - Rule::new( - "MultiPlatformProcess", - "upcast_process", - vec![DependencyKey::new("Process")], - ), - Rule::new( - "ProductDescription", - "get_multi_platform_request_description", - vec![DependencyKey::new("MultiPlatformProcess")], - ), - Rule::new( - "FallibleProcessResult", - "remove_platform_information", - vec![DependencyKey::new("MultiPlatformProcess")], - ), - ]; - let queries = indexset![Query::new("Process", vec!["PexCliProcess"])]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new( + "Process", + "setup_pex_cli_process", + vec![ + DependencyKey::new("PexCliProcess"), + DependencyKey::new("ProcessResult").provided_params(vec!["Process"]), + ], + ), + Rule::new( + "ProcessResult", + "fallible_to_exec_result_or_raise", + vec![ + DependencyKey::new("FallibleProcessResult"), + DependencyKey::new("ProductDescription"), + ], + ), + Rule::new( + "MultiPlatformProcess", + "upcast_process", + vec![DependencyKey::new("Process")], + ), + Rule::new( + "ProductDescription", + "get_multi_platform_request_description", + vec![DependencyKey::new("MultiPlatformProcess")], + ), + Rule::new( + "FallibleProcessResult", + "remove_platform_information", + vec![DependencyKey::new("MultiPlatformProcess")], + ), + ]; + let queries = indexset![Query::new("Process", vec!["PexCliProcess"])]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); } #[test] fn full_scale_target() { - let _logger = env_logger::try_init(); - let rules = indexset![ - Rule::new( - "InferredDependencies", - "infer_python_conftest_dependencies", - vec![ - DependencyKey::new("InferConftestDependencies"), - DependencyKey::new("HydratedSources").provided_params(vec!["HydrateSourcesRequest"]), - DependencyKey::new("AncestorFiles").provided_params(vec!["AncestorFilesRequest"]), - DependencyKey::new("Owners").provided_params(vec!["OwnersRequest"]), - ], - ), - Rule::new( - "ThirdPartyModuleToAddressMapping", - "map_third_party_modules_to_addresses", - vec![DependencyKey::new("Targets").provided_params(vec!["AddressSpecs"])], - ), - Rule::new( - "Targets", - "resolve_targets", - vec![ - DependencyKey::new("UnexpandedTargets"), - DependencyKey::new("Subtargets").provided_params(vec!["Address"]), - ], - ), - Rule::new( - "Owners", - "find_owners", - vec![ - DependencyKey::new("OwnersRequest"), - DependencyKey::new("Targets").provided_params(vec!["AddressSpecs"]), - DependencyKey::new("UnexpandedTargets").provided_params(vec!["AddressSpecs"]), - DependencyKey::new("BuildFileAddress").provided_params(vec!["Address"]), - ], - ), - Rule::new( - "AddressesWithOrigins", - "resolve_addresses_with_origins", - vec![ - DependencyKey::new("Specs"), - DependencyKey::new("AddressesWithOrigins").provided_params(vec!["AddressSpecs"]), - DependencyKey::new("AddressesWithOrigins").provided_params(vec!["FilesystemSpecs"]), - ], - ), - Rule::new( - "InferredDependencies", - "infer_python_dependencies", - vec![ - DependencyKey::new("InferPythonDependencies"), - DependencyKey::new("StrippedSourceFiles").provided_params(vec!["SourceFilesRequest"]), - DependencyKey::for_known_rule(RuleId::new("map_module_to_address"), "PythonModuleOwner") - .provided_params(vec!["PythonModule"]), - ], - ), - Rule::new( - "InferredDependencies", - "infer_python_init_dependencies", - vec![ - DependencyKey::new("InferInitDependencies"), - DependencyKey::new("HydratedSources").provided_params(vec!["HydrateSourcesRequest"]), - DependencyKey::new("AncestorFiles").provided_params(vec!["AncestorFilesRequest"]), - DependencyKey::new("Owners").provided_params(vec!["OwnersRequest"]), - ], - ), - Rule::new( - "PythonModuleOwner", - "map_module_to_address", - vec![ - DependencyKey::new("PythonModule"), - DependencyKey::new("FirstPartyModuleToAddressMapping"), - DependencyKey::new("ThirdPartyModuleToAddressMapping"), - ], - ), - Rule::new( - "DownloadedExternalTool", - "download_external_tool", - vec![ - DependencyKey::new("ExternalToolRequest"), - DependencyKey::new("Digest").provided_params(vec!["DownloadFile"]), - DependencyKey::new("ExtractedDigest").provided_params(vec!["MaybeExtractable"]), - ], - ), - Rule::new( - "GlobalOptions", - "construct_scope_", - vec![DependencyKey::new("ScopedOptions").provided_params(vec!["Scope"])], - ), - Rule::new( - "PexEnvironment", - "find_pex_python", - vec![DependencyKey::new("BinaryPaths").provided_params(vec!["BinaryPathRequest"])], - ), - Rule::new( - "ProcessResult", - "fallible_to_exec_result_or_raise", - vec![ - DependencyKey::new("FallibleProcessResult"), - DependencyKey::new("ProductDescription"), - ], - ), - Rule::new( - "HydratedSources", - "hydrate_sources", - vec![ - DependencyKey::new("HydrateSourcesRequest"), - DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), - DependencyKey::new("GeneratedSources") - .provided_params(vec!["GeneratePythonFromProtobufRequest"],), - ], - ), - Rule::new( - "Digest", - "merge_digests", - vec![DependencyKey::new("MergeDigests")], - ), - Rule::new( - "SourceFiles", - "determine_source_files", - vec![ - DependencyKey::new("SourceFilesRequest"), - DependencyKey::new("HydratedSources").provided_params(vec!["HydrateSourcesRequest"]), - ], - ), - Rule::new( - "UnexpandedTargets", - "resolve_unexpanded_targets", - vec![ - DependencyKey::new("Addresses"), - DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), - ], - ), - Rule::new( - "ExtractedDigest", - "maybe_extract", - vec![ - DependencyKey::new("MaybeExtractable"), - DependencyKey::new("ProcessResult").provided_params(vec!["Process"]), - ], - ), - Rule::new( - "AddressesWithOrigins", - "addresses_with_origins_from_address_specs", - vec![ - DependencyKey::new("AddressSpecs"), - DependencyKey::new("Address").provided_params(vec!["AddressInput"]), - DependencyKey::new("TargetAdaptor").provided_params(vec!["Address"]), - DependencyKey::new("UnexpandedTargets").provided_params(vec!["Addresses"]), - DependencyKey::new("AddressFamily").provided_params(vec!["Dir"]), - ], - ), - Rule::new( - "BuildFileAddress", - "find_build_file", - vec![ - DependencyKey::new("Address"), - DependencyKey::new("AddressFamily").provided_params(vec!["Dir"]), - ], - ), - Rule::new( - "AncestorFiles", - "find_missing_ancestor_files", - vec![DependencyKey::new("AncestorFilesRequest")], - ), - Rule::new( - "Digest", - "download_file", - vec![DependencyKey::new("DownloadFile")], - ), - Rule::new( - "BinaryPaths", - "find_binary", - vec![ - DependencyKey::new("BinaryPathRequest"), - DependencyKey::new("FallibleProcessResult").provided_params(vec!["Process"]), - ], - ), - Rule::new( - "OptionalSourceRootsResult", - "get_optional_source_roots", - vec![ - DependencyKey::new("SourceRootsRequest"), - DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), - ], - ), - Rule::new( - "AddressesWithOrigins", - "addresses_with_origins_from_filesystem_specs", - vec![ - DependencyKey::new("FilesystemSpecs"), - DependencyKey::new("Owners").provided_params(vec!["OwnersRequest"]), - ], - ), - Rule::new( - "RegisteredTargetTypes", - "registered_target_types_singleton", - vec![], - ), - Rule::new( - "Addresses", - "strip_address_origins", - vec![DependencyKey::new("AddressesWithOrigins")], - ), - Rule::new( - "FallibleProcessResult", - "remove_platform_information", - vec![DependencyKey::new("FallibleProcessResultWithPlatform")], - ), - Rule::new( - "ScopedOptions", - "scope_options", - vec![DependencyKey::new("Scope"), DependencyKey::new("_Options"),], - ), - Rule::new( - "TransitiveTargets", - "transitive_targets", - vec![ - DependencyKey::new("Targets"), - DependencyKey::new("Targets").provided_params(vec!["DependenciesRequest"]), - ], - ), - Rule::new( - "FirstPartyModuleToAddressMapping", - "map_first_party_modules_to_addresses", - vec![ - DependencyKey::new("Targets").provided_params(vec!["AddressSpecs"]), - DependencyKey::new("StrippedSourceFiles").provided_params(vec!["SourceFilesRequest"]), - ], - ), - Rule::new( - "Digest", - "digest_subset", - vec![DependencyKey::new("DigestSubset")], - ), - Rule::new( - "_Options", - "parse_options", - vec![DependencyKey::new("OptionsBootstrapper")], - ), - Rule::new( - "GeneratedSources", - "generate_python_from_protobuf", - vec![ - DependencyKey::new("GeneratePythonFromProtobufRequest"), - DependencyKey::new("DownloadedExternalTool").provided_params(vec!["ExternalToolRequest"]), - DependencyKey::new("ProcessResult").provided_params(vec!["Process"]), - DependencyKey::new("TransitiveTargets").provided_params(vec!["Addresses"]), - DependencyKey::new("StrippedSourceFiles").provided_params(vec!["SourceFilesRequest"]), - DependencyKey::new("Digest").provided_params(vec!["MergeDigests"]), - DependencyKey::new("SourceRoot").provided_params(vec!["SourceRootRequest"]), - ], - ), - Rule::new( - "SourceRoot", - "get_source_root", - vec![ - DependencyKey::new("SourceRootRequest"), - DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), - ], - ), - Rule::new( - "MultiPlatformProcess", - "upcast_process", - vec![DependencyKey::new("Process")], - ), - Rule::new( - "TargetAdaptor", - "find_target_adaptor", - vec![ - DependencyKey::new("Address"), - DependencyKey::new("AddressFamily").provided_params(vec!["Dir"]), - ], - ), - Rule::new( - "Address", - "resolve_address", - vec![DependencyKey::new("AddressInput")], - ), - Rule::new( - "FallibleProcessResultWithPlatform", - "run_multiplaform_process", - vec![DependencyKey::new("MultiPlatformProcess")], - ), - Rule::new( - "SourceRootsResult", - "get_source_roots", - vec![ - DependencyKey::new("SourceRootsRequest"), - DependencyKey::new("OptionalSourceRootsResult").provided_params(vec!["SourceRootsRequest"]), - ], - ), - Rule::new( - "OptionalSourceRoot", - "get_optional_source_root", - vec![ - DependencyKey::new("SourceRootRequest"), - DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), - ], - ), - Rule::new( - "StrippedSourceFiles", - "strip_source_roots", - vec![ - DependencyKey::new("SourceFiles"), - DependencyKey::new("SourceRootsResult").provided_params(vec!["SourceRootsRequest"]), - DependencyKey::new("Digest").provided_params(vec!["DigestSubset"]), - ], - ), - Rule::new( - "Subtargets", - "generate_subtargets", - vec![ - DependencyKey::new("Address"), - DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), - ], - ), - Rule::new( - "ProductDescription", - "get_multi_platform_request_description", - vec![DependencyKey::new("MultiPlatformProcess")], - ), - Rule::new( - "Addresses", - "resolve_dependencies", - vec![ - DependencyKey::new("DependenciesRequest"), - DependencyKey::new("RegisteredTargetTypes"), - DependencyKey::new("Address").provided_params(vec!["AddressInput"]), - DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), - DependencyKey::new("InferredDependencies").provided_params(vec!["InferPythonDependencies"]), - DependencyKey::new("InferredDependencies").provided_params(vec!["InferInitDependencies"]), - DependencyKey::new("InferredDependencies") - .provided_params(vec!["InferConftestDependencies"]), - DependencyKey::new("Subtargets").provided_params(vec!["Address"]), - ], - ), - Rule::new( - "WrappedTarget", - "resolve_target", - vec![ - DependencyKey::new("Address"), - DependencyKey::new("RegisteredTargetTypes"), - DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), - DependencyKey::new("TargetAdaptor").provided_params(vec!["Address"]), - ], - ), - Rule::new( - "AddressFamily", - "parse_address_family", - vec![ - DependencyKey::new("GlobalOptions"), - DependencyKey::new("Dir"), - ], - ), - ]; - let queries = indexset![ - Query::new("AddressesWithOrigins", vec!["OptionsBootstrapper", "Specs"]), - Query::new("UnexpandedTargets", vec!["OptionsBootstrapper", "Specs"]), - Query::new("Addresses", vec!["OptionsBootstrapper", "Specs"]), - Query::new("Owners", vec!["OptionsBootstrapper", "OwnersRequest"]), - Query::new("Targets", vec!["OptionsBootstrapper", "Specs"]), - Query::new("TransitiveTargets", vec!["OptionsBootstrapper", "Specs"]), - Query::new( - "HydratedSources", - vec!["OptionsBootstrapper", "HydrateSourcesRequest"], - ), - Query::new( - "SourceFiles", - vec!["OptionsBootstrapper", "SourceFilesRequest"], - ), - Query::new( - "StrippedSourceFiles", - vec!["OptionsBootstrapper", "SourceFiles"], - ), - ]; - let graph = RuleGraph::new(rules, queries).unwrap(); - - graph.validate_reachability().unwrap(); + let _logger = env_logger::try_init(); + let rules = indexset![ + Rule::new( + "InferredDependencies", + "infer_python_conftest_dependencies", + vec![ + DependencyKey::new("InferConftestDependencies"), + DependencyKey::new("HydratedSources") + .provided_params(vec!["HydrateSourcesRequest"]), + DependencyKey::new("AncestorFiles").provided_params(vec!["AncestorFilesRequest"]), + DependencyKey::new("Owners").provided_params(vec!["OwnersRequest"]), + ], + ), + Rule::new( + "ThirdPartyModuleToAddressMapping", + "map_third_party_modules_to_addresses", + vec![DependencyKey::new("Targets").provided_params(vec!["AddressSpecs"])], + ), + Rule::new( + "Targets", + "resolve_targets", + vec![ + DependencyKey::new("UnexpandedTargets"), + DependencyKey::new("Subtargets").provided_params(vec!["Address"]), + ], + ), + Rule::new( + "Owners", + "find_owners", + vec![ + DependencyKey::new("OwnersRequest"), + DependencyKey::new("Targets").provided_params(vec!["AddressSpecs"]), + DependencyKey::new("UnexpandedTargets").provided_params(vec!["AddressSpecs"]), + DependencyKey::new("BuildFileAddress").provided_params(vec!["Address"]), + ], + ), + Rule::new( + "AddressesWithOrigins", + "resolve_addresses_with_origins", + vec![ + DependencyKey::new("Specs"), + DependencyKey::new("AddressesWithOrigins").provided_params(vec!["AddressSpecs"]), + DependencyKey::new("AddressesWithOrigins").provided_params(vec!["FilesystemSpecs"]), + ], + ), + Rule::new( + "InferredDependencies", + "infer_python_dependencies", + vec![ + DependencyKey::new("InferPythonDependencies"), + DependencyKey::new("StrippedSourceFiles") + .provided_params(vec!["SourceFilesRequest"]), + DependencyKey::for_known_rule( + RuleId::new("map_module_to_address"), + "PythonModuleOwner" + ) + .provided_params(vec!["PythonModule"]), + ], + ), + Rule::new( + "InferredDependencies", + "infer_python_init_dependencies", + vec![ + DependencyKey::new("InferInitDependencies"), + DependencyKey::new("HydratedSources") + .provided_params(vec!["HydrateSourcesRequest"]), + DependencyKey::new("AncestorFiles").provided_params(vec!["AncestorFilesRequest"]), + DependencyKey::new("Owners").provided_params(vec!["OwnersRequest"]), + ], + ), + Rule::new( + "PythonModuleOwner", + "map_module_to_address", + vec![ + DependencyKey::new("PythonModule"), + DependencyKey::new("FirstPartyModuleToAddressMapping"), + DependencyKey::new("ThirdPartyModuleToAddressMapping"), + ], + ), + Rule::new( + "DownloadedExternalTool", + "download_external_tool", + vec![ + DependencyKey::new("ExternalToolRequest"), + DependencyKey::new("Digest").provided_params(vec!["DownloadFile"]), + DependencyKey::new("ExtractedDigest").provided_params(vec!["MaybeExtractable"]), + ], + ), + Rule::new( + "GlobalOptions", + "construct_scope_", + vec![DependencyKey::new("ScopedOptions").provided_params(vec!["Scope"])], + ), + Rule::new( + "PexEnvironment", + "find_pex_python", + vec![DependencyKey::new("BinaryPaths").provided_params(vec!["BinaryPathRequest"])], + ), + Rule::new( + "ProcessResult", + "fallible_to_exec_result_or_raise", + vec![ + DependencyKey::new("FallibleProcessResult"), + DependencyKey::new("ProductDescription"), + ], + ), + Rule::new( + "HydratedSources", + "hydrate_sources", + vec![ + DependencyKey::new("HydrateSourcesRequest"), + DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), + DependencyKey::new("GeneratedSources") + .provided_params(vec!["GeneratePythonFromProtobufRequest"],), + ], + ), + Rule::new( + "Digest", + "merge_digests", + vec![DependencyKey::new("MergeDigests")], + ), + Rule::new( + "SourceFiles", + "determine_source_files", + vec![ + DependencyKey::new("SourceFilesRequest"), + DependencyKey::new("HydratedSources") + .provided_params(vec!["HydrateSourcesRequest"]), + ], + ), + Rule::new( + "UnexpandedTargets", + "resolve_unexpanded_targets", + vec![ + DependencyKey::new("Addresses"), + DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), + ], + ), + Rule::new( + "ExtractedDigest", + "maybe_extract", + vec![ + DependencyKey::new("MaybeExtractable"), + DependencyKey::new("ProcessResult").provided_params(vec!["Process"]), + ], + ), + Rule::new( + "AddressesWithOrigins", + "addresses_with_origins_from_address_specs", + vec![ + DependencyKey::new("AddressSpecs"), + DependencyKey::new("Address").provided_params(vec!["AddressInput"]), + DependencyKey::new("TargetAdaptor").provided_params(vec!["Address"]), + DependencyKey::new("UnexpandedTargets").provided_params(vec!["Addresses"]), + DependencyKey::new("AddressFamily").provided_params(vec!["Dir"]), + ], + ), + Rule::new( + "BuildFileAddress", + "find_build_file", + vec![ + DependencyKey::new("Address"), + DependencyKey::new("AddressFamily").provided_params(vec!["Dir"]), + ], + ), + Rule::new( + "AncestorFiles", + "find_missing_ancestor_files", + vec![DependencyKey::new("AncestorFilesRequest")], + ), + Rule::new( + "Digest", + "download_file", + vec![DependencyKey::new("DownloadFile")], + ), + Rule::new( + "BinaryPaths", + "find_binary", + vec![ + DependencyKey::new("BinaryPathRequest"), + DependencyKey::new("FallibleProcessResult").provided_params(vec!["Process"]), + ], + ), + Rule::new( + "OptionalSourceRootsResult", + "get_optional_source_roots", + vec![ + DependencyKey::new("SourceRootsRequest"), + DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), + ], + ), + Rule::new( + "AddressesWithOrigins", + "addresses_with_origins_from_filesystem_specs", + vec![ + DependencyKey::new("FilesystemSpecs"), + DependencyKey::new("Owners").provided_params(vec!["OwnersRequest"]), + ], + ), + Rule::new( + "RegisteredTargetTypes", + "registered_target_types_singleton", + vec![], + ), + Rule::new( + "Addresses", + "strip_address_origins", + vec![DependencyKey::new("AddressesWithOrigins")], + ), + Rule::new( + "FallibleProcessResult", + "remove_platform_information", + vec![DependencyKey::new("FallibleProcessResultWithPlatform")], + ), + Rule::new( + "ScopedOptions", + "scope_options", + vec![DependencyKey::new("Scope"), DependencyKey::new("_Options"),], + ), + Rule::new( + "TransitiveTargets", + "transitive_targets", + vec![ + DependencyKey::new("Targets"), + DependencyKey::new("Targets").provided_params(vec!["DependenciesRequest"]), + ], + ), + Rule::new( + "FirstPartyModuleToAddressMapping", + "map_first_party_modules_to_addresses", + vec![ + DependencyKey::new("Targets").provided_params(vec!["AddressSpecs"]), + DependencyKey::new("StrippedSourceFiles") + .provided_params(vec!["SourceFilesRequest"]), + ], + ), + Rule::new( + "Digest", + "digest_subset", + vec![DependencyKey::new("DigestSubset")], + ), + Rule::new( + "_Options", + "parse_options", + vec![DependencyKey::new("OptionsBootstrapper")], + ), + Rule::new( + "GeneratedSources", + "generate_python_from_protobuf", + vec![ + DependencyKey::new("GeneratePythonFromProtobufRequest"), + DependencyKey::new("DownloadedExternalTool") + .provided_params(vec!["ExternalToolRequest"]), + DependencyKey::new("ProcessResult").provided_params(vec!["Process"]), + DependencyKey::new("TransitiveTargets").provided_params(vec!["Addresses"]), + DependencyKey::new("StrippedSourceFiles") + .provided_params(vec!["SourceFilesRequest"]), + DependencyKey::new("Digest").provided_params(vec!["MergeDigests"]), + DependencyKey::new("SourceRoot").provided_params(vec!["SourceRootRequest"]), + ], + ), + Rule::new( + "SourceRoot", + "get_source_root", + vec![ + DependencyKey::new("SourceRootRequest"), + DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), + ], + ), + Rule::new( + "MultiPlatformProcess", + "upcast_process", + vec![DependencyKey::new("Process")], + ), + Rule::new( + "TargetAdaptor", + "find_target_adaptor", + vec![ + DependencyKey::new("Address"), + DependencyKey::new("AddressFamily").provided_params(vec!["Dir"]), + ], + ), + Rule::new( + "Address", + "resolve_address", + vec![DependencyKey::new("AddressInput")], + ), + Rule::new( + "FallibleProcessResultWithPlatform", + "run_multiplaform_process", + vec![DependencyKey::new("MultiPlatformProcess")], + ), + Rule::new( + "SourceRootsResult", + "get_source_roots", + vec![ + DependencyKey::new("SourceRootsRequest"), + DependencyKey::new("OptionalSourceRootsResult") + .provided_params(vec!["SourceRootsRequest"]), + ], + ), + Rule::new( + "OptionalSourceRoot", + "get_optional_source_root", + vec![ + DependencyKey::new("SourceRootRequest"), + DependencyKey::new("OptionalSourceRoot").provided_params(vec!["SourceRootRequest"]), + ], + ), + Rule::new( + "StrippedSourceFiles", + "strip_source_roots", + vec![ + DependencyKey::new("SourceFiles"), + DependencyKey::new("SourceRootsResult").provided_params(vec!["SourceRootsRequest"]), + DependencyKey::new("Digest").provided_params(vec!["DigestSubset"]), + ], + ), + Rule::new( + "Subtargets", + "generate_subtargets", + vec![ + DependencyKey::new("Address"), + DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), + ], + ), + Rule::new( + "ProductDescription", + "get_multi_platform_request_description", + vec![DependencyKey::new("MultiPlatformProcess")], + ), + Rule::new( + "Addresses", + "resolve_dependencies", + vec![ + DependencyKey::new("DependenciesRequest"), + DependencyKey::new("RegisteredTargetTypes"), + DependencyKey::new("Address").provided_params(vec!["AddressInput"]), + DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), + DependencyKey::new("InferredDependencies") + .provided_params(vec!["InferPythonDependencies"]), + DependencyKey::new("InferredDependencies") + .provided_params(vec!["InferInitDependencies"]), + DependencyKey::new("InferredDependencies") + .provided_params(vec!["InferConftestDependencies"]), + DependencyKey::new("Subtargets").provided_params(vec!["Address"]), + ], + ), + Rule::new( + "WrappedTarget", + "resolve_target", + vec![ + DependencyKey::new("Address"), + DependencyKey::new("RegisteredTargetTypes"), + DependencyKey::new("WrappedTarget").provided_params(vec!["Address"]), + DependencyKey::new("TargetAdaptor").provided_params(vec!["Address"]), + ], + ), + Rule::new( + "AddressFamily", + "parse_address_family", + vec![ + DependencyKey::new("GlobalOptions"), + DependencyKey::new("Dir"), + ], + ), + ]; + let queries = indexset![ + Query::new("AddressesWithOrigins", vec!["OptionsBootstrapper", "Specs"]), + Query::new("UnexpandedTargets", vec!["OptionsBootstrapper", "Specs"]), + Query::new("Addresses", vec!["OptionsBootstrapper", "Specs"]), + Query::new("Owners", vec!["OptionsBootstrapper", "OwnersRequest"]), + Query::new("Targets", vec!["OptionsBootstrapper", "Specs"]), + Query::new("TransitiveTargets", vec!["OptionsBootstrapper", "Specs"]), + Query::new( + "HydratedSources", + vec!["OptionsBootstrapper", "HydrateSourcesRequest"], + ), + Query::new( + "SourceFiles", + vec!["OptionsBootstrapper", "SourceFilesRequest"], + ), + Query::new( + "StrippedSourceFiles", + vec!["OptionsBootstrapper", "SourceFiles"], + ), + ]; + let graph = RuleGraph::new(rules, queries).unwrap(); + + graph.validate_reachability().unwrap(); } impl super::TypeId for &'static str { - fn display(type_ids: I) -> String - where - I: Iterator, - { - type_ids.collect::>().join("+") - } + fn display(type_ids: I) -> String + where + I: Iterator, + { + type_ids.collect::>().join("+") + } } #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct Rule { - id: RuleId, - product: &'static str, - name: &'static str, - dependency_keys: Vec>, - masked_params: Vec<&'static str>, -} - -impl Rule { - fn new( + id: RuleId, product: &'static str, name: &'static str, dependency_keys: Vec>, - ) -> Self { - Self { - id: RuleId::new(name), - product, - name, - dependency_keys, - masked_params: vec![], + masked_params: Vec<&'static str>, +} + +impl Rule { + fn new( + product: &'static str, + name: &'static str, + dependency_keys: Vec>, + ) -> Self { + Self { + id: RuleId::new(name), + product, + name, + dependency_keys, + masked_params: vec![], + } } - } - fn masked_params(mut self, masked_params: Vec<&'static str>) -> Self { - self.masked_params = masked_params; - self - } + fn masked_params(mut self, masked_params: Vec<&'static str>) -> Self { + self.masked_params = masked_params; + self + } } impl super::Rule for Rule { - type TypeId = &'static str; + type TypeId = &'static str; - fn id(&self) -> &RuleId { - &self.id - } + fn id(&self) -> &RuleId { + &self.id + } - fn product(&self) -> Self::TypeId { - self.product - } + fn product(&self) -> Self::TypeId { + self.product + } - fn dependency_keys(&self) -> Vec<&DependencyKey> { - self.dependency_keys.iter().collect() - } + fn dependency_keys(&self) -> Vec<&DependencyKey> { + self.dependency_keys.iter().collect() + } - fn masked_params(&self) -> Vec { - self.masked_params.clone() - } + fn masked_params(&self) -> Vec { + self.masked_params.clone() + } - fn require_reachable(&self) -> bool { - !self.name.ends_with("_unreachable") - } + fn require_reachable(&self) -> bool { + !self.name.ends_with("_unreachable") + } - fn color(&self) -> Option { - None - } + fn color(&self) -> Option { + None + } } impl super::DisplayForGraph for Rule { - fn fmt_for_graph(&self, _: super::DisplayForGraphArgs) -> String { - self.to_string() - } + fn fmt_for_graph(&self, _: super::DisplayForGraphArgs) -> String { + self.to_string() + } } impl fmt::Display for Rule { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!( - f, - "{}({}) -> {}", - self.name, - self.dependency_keys.len(), - self.product - ) - } + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!( + f, + "{}({}) -> {}", + self.name, + self.dependency_keys.len(), + self.product + ) + } } diff --git a/src/rust/engine/sharded_lmdb/src/lib.rs b/src/rust/engine/sharded_lmdb/src/lib.rs index b944674cd68..ea25a6626ee 100644 --- a/src/rust/engine/sharded_lmdb/src/lib.rs +++ b/src/rust/engine/sharded_lmdb/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -36,8 +36,8 @@ use std::time::{self, Duration}; use bytes::{BufMut, Bytes}; use hashing::{sync_verified_copy, AgedFingerprint, Digest, Fingerprint, FINGERPRINT_SIZE}; use lmdb::{ - self, Cursor, Database, DatabaseFlags, Environment, EnvironmentCopyFlags, EnvironmentFlags, - RwTransaction, Transaction, WriteFlags, + self, Cursor, Database, DatabaseFlags, Environment, EnvironmentCopyFlags, EnvironmentFlags, + RwTransaction, Transaction, WriteFlags, }; use log::trace; use tempfile::TempDir; @@ -60,45 +60,45 @@ const VERSIONED_FINGERPRINT_SIZE: usize = FINGERPRINT_SIZE + 1; pub struct VersionedFingerprint([u8; VERSIONED_FINGERPRINT_SIZE]); impl VersionedFingerprint { - pub fn new(fingerprint: Fingerprint, version: u8) -> VersionedFingerprint { - let mut buf = [0; VERSIONED_FINGERPRINT_SIZE]; - buf[0..FINGERPRINT_SIZE].copy_from_slice(&fingerprint.0[..]); - buf[FINGERPRINT_SIZE] = version; - VersionedFingerprint(buf) - } - - pub fn get_fingerprint(&self) -> Fingerprint { - let mut buf = [0; FINGERPRINT_SIZE]; - buf.copy_from_slice(&self.0[0..FINGERPRINT_SIZE]); - Fingerprint(buf) - } - - pub fn from_bytes_unsafe(bytes: &[u8]) -> VersionedFingerprint { - if bytes.len() != VERSIONED_FINGERPRINT_SIZE { - panic!( - "Input value was not a versioned fingerprint; had length: {}", - bytes.len() - ); + pub fn new(fingerprint: Fingerprint, version: u8) -> VersionedFingerprint { + let mut buf = [0; VERSIONED_FINGERPRINT_SIZE]; + buf[0..FINGERPRINT_SIZE].copy_from_slice(&fingerprint.0[..]); + buf[FINGERPRINT_SIZE] = version; + VersionedFingerprint(buf) } - let mut buf = [0; VERSIONED_FINGERPRINT_SIZE]; - buf.clone_from_slice(&bytes[0..VERSIONED_FINGERPRINT_SIZE]); - VersionedFingerprint(buf) - } + pub fn get_fingerprint(&self) -> Fingerprint { + let mut buf = [0; FINGERPRINT_SIZE]; + buf.copy_from_slice(&self.0[0..FINGERPRINT_SIZE]); + Fingerprint(buf) + } + + pub fn from_bytes_unsafe(bytes: &[u8]) -> VersionedFingerprint { + if bytes.len() != VERSIONED_FINGERPRINT_SIZE { + panic!( + "Input value was not a versioned fingerprint; had length: {}", + bytes.len() + ); + } + + let mut buf = [0; VERSIONED_FINGERPRINT_SIZE]; + buf.clone_from_slice(&bytes[0..VERSIONED_FINGERPRINT_SIZE]); + VersionedFingerprint(buf) + } - pub fn to_hex(&self) -> String { - let mut s = String::new(); - for byte in 0..VERSIONED_FINGERPRINT_SIZE { - fmt::Write::write_fmt(&mut s, format_args!("{byte:02x}")).unwrap(); + pub fn to_hex(&self) -> String { + let mut s = String::new(); + for byte in 0..VERSIONED_FINGERPRINT_SIZE { + fmt::Write::write_fmt(&mut s, format_args!("{byte:02x}")).unwrap(); + } + s } - s - } } impl AsRef<[u8]> for VersionedFingerprint { - fn as_ref(&self) -> &[u8] { - &self.0[..] - } + fn as_ref(&self) -> &[u8] { + &self.0[..] + } } #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] @@ -111,668 +111,671 @@ struct EnvironmentId(u8); // TODO: This should likely use an Arc around an inner struct, because it is frequently cloned. #[derive(Debug, Clone)] pub struct ShardedLmdb { - // First Database is content, second is leases. - lmdbs: HashMap, Database, Database)>, - root_path: PathBuf, - max_size_per_shard: usize, - executor: task_executor::Executor, - lease_time: Duration, - shard_count: u8, - shard_fingerprint_mask: u8, -} - -impl ShardedLmdb { - // Whenever we change the byte format of data stored in lmdb, we will - // need to increment this schema version. This schema version will - // be appended to the Fingerprint-derived keys to create the key - // we actually store in the database. This way, data stored with a version - // of pants on one schema version will not conflict with data stored - // with a different version of pants on a different schema version. - pub const SCHEMA_VERSION: u8 = 2; - - // max_size is the maximum size the databases together will be allowed to grow to. - // When calling this function, we will attempt to allocate that much virtual (not resident) memory - // for the mmap; in theory it should be possible not to bound this, but in practice we see travis - // occasionally fail tests because it's unable to allocate virtual memory if we set this too high, - // and we have too many tests running concurrently or close together. - pub fn new( + // First Database is content, second is leases. + lmdbs: HashMap, Database, Database)>, root_path: PathBuf, - max_size: usize, + max_size_per_shard: usize, executor: task_executor::Executor, lease_time: Duration, shard_count: u8, - ) -> Result { - if shard_count.count_ones() != 1 { - return Err(format!( - "The shard_count must be a power of two: got {shard_count}." - )); + shard_fingerprint_mask: u8, +} + +impl ShardedLmdb { + // Whenever we change the byte format of data stored in lmdb, we will + // need to increment this schema version. This schema version will + // be appended to the Fingerprint-derived keys to create the key + // we actually store in the database. This way, data stored with a version + // of pants on one schema version will not conflict with data stored + // with a different version of pants on a different schema version. + pub const SCHEMA_VERSION: u8 = 2; + + // max_size is the maximum size the databases together will be allowed to grow to. + // When calling this function, we will attempt to allocate that much virtual (not resident) memory + // for the mmap; in theory it should be possible not to bound this, but in practice we see travis + // occasionally fail tests because it's unable to allocate virtual memory if we set this too high, + // and we have too many tests running concurrently or close together. + pub fn new( + root_path: PathBuf, + max_size: usize, + executor: task_executor::Executor, + lease_time: Duration, + shard_count: u8, + ) -> Result { + if shard_count.count_ones() != 1 { + return Err(format!( + "The shard_count must be a power of two: got {shard_count}." + )); + } + + let max_size_per_shard = max_size / (shard_count as usize); + // We select which shard to use by masking to select only the relevant number of high order bits + // from the high order byte of each stored key. + let shard_fingerprint_mask = { + // Create a mask of the appropriate width. + let mask_width = shard_count.trailing_zeros(); + let mut mask = 0_u8; + for _ in 0..mask_width { + mask <<= 1; + mask |= 1; + } + // Then move it into the high order bits. + mask.rotate_left(Self::shard_shift(shard_count) as u32) + }; + + trace!("Initializing ShardedLmdb at root {:?}", root_path); + let mut lmdbs = HashMap::new(); + + for (env, dir, environment_id) in + ShardedLmdb::envs(&root_path, max_size_per_shard, shard_count)? + { + let content_database = env + .create_db(Some("content-versioned"), DatabaseFlags::empty()) + .map_err(|e| format!("Error creating/opening content database at {dir:?}: {e}"))?; + + let lease_database = env + .create_db(Some("leases-versioned"), DatabaseFlags::empty()) + .map_err(|e| format!("Error creating/opening content database at {dir:?}: {e}"))?; + + lmdbs.insert( + environment_id, + ( + environment_id, + dir, + Arc::new(env), + content_database, + lease_database, + ), + ); + } + + Ok(ShardedLmdb { + lmdbs, + root_path, + max_size_per_shard, + executor, + lease_time, + shard_count, + shard_fingerprint_mask, + }) } - let max_size_per_shard = max_size / (shard_count as usize); - // We select which shard to use by masking to select only the relevant number of high order bits - // from the high order byte of each stored key. - let shard_fingerprint_mask = { - // Create a mask of the appropriate width. - let mask_width = shard_count.trailing_zeros(); - let mut mask = 0_u8; - for _ in 0..mask_width { - mask <<= 1; - mask |= 1; - } - // Then move it into the high order bits. - mask.rotate_left(Self::shard_shift(shard_count) as u32) - }; - - trace!("Initializing ShardedLmdb at root {:?}", root_path); - let mut lmdbs = HashMap::new(); - - for (env, dir, environment_id) in - ShardedLmdb::envs(&root_path, max_size_per_shard, shard_count)? - { - let content_database = env - .create_db(Some("content-versioned"), DatabaseFlags::empty()) - .map_err(|e| format!("Error creating/opening content database at {dir:?}: {e}"))?; - - let lease_database = env - .create_db(Some("leases-versioned"), DatabaseFlags::empty()) - .map_err(|e| format!("Error creating/opening content database at {dir:?}: {e}"))?; - - lmdbs.insert( - environment_id, - ( - environment_id, - dir, - Arc::new(env), - content_database, - lease_database, - ), - ); + /// + /// Return the left shift value that will place the relevant portion of a byte (for the given + /// shard count, which is asserted in the constructor to be a power of two) into the high order + /// bits of a byte. + /// + fn shard_shift(shard_count: u8) -> u8 { + let mask_width = shard_count.trailing_zeros() as u8; + 8 - mask_width } - Ok(ShardedLmdb { - lmdbs, - root_path, - max_size_per_shard, - executor, - lease_time, - shard_count, - shard_fingerprint_mask, - }) - } - - /// - /// Return the left shift value that will place the relevant portion of a byte (for the given - /// shard count, which is asserted in the constructor to be a power of two) into the high order - /// bits of a byte. - /// - fn shard_shift(shard_count: u8) -> u8 { - let mask_width = shard_count.trailing_zeros() as u8; - 8 - mask_width - } - - fn envs( - root_path: &Path, - max_size_per_shard: usize, - shard_count: u8, - ) -> Result, String> { - let shard_shift = Self::shard_shift(shard_count); - - let mut envs = Vec::with_capacity(shard_count as usize); - for b in 0..shard_count { - let dir = root_path.join(format!("{b:x}")); - std::fs::create_dir_all(&dir) - .map_err(|err| format!("Error making directory for store at {dir:?}: {err:?}"))?; - let fingerprint_prefix = b.rotate_left(shard_shift as u32); - envs.push(( - ShardedLmdb::make_env(&dir, max_size_per_shard)?, - dir, - EnvironmentId(fingerprint_prefix), - )); + fn envs( + root_path: &Path, + max_size_per_shard: usize, + shard_count: u8, + ) -> Result, String> { + let shard_shift = Self::shard_shift(shard_count); + + let mut envs = Vec::with_capacity(shard_count as usize); + for b in 0..shard_count { + let dir = root_path.join(format!("{b:x}")); + std::fs::create_dir_all(&dir) + .map_err(|err| format!("Error making directory for store at {dir:?}: {err:?}"))?; + let fingerprint_prefix = b.rotate_left(shard_shift as u32); + envs.push(( + ShardedLmdb::make_env(&dir, max_size_per_shard)?, + dir, + EnvironmentId(fingerprint_prefix), + )); + } + Ok(envs) + } + + fn make_env(dir: &Path, max_size_per_shard: usize) -> Result { + Environment::new() + // NO_SYNC + // ======= + // + // Don't force fsync on every lmdb write transaction + // + // This significantly improves performance on slow or contended disks. + // + // On filesystems which preserve order of writes, on system crash this may lead to some + // transactions being rolled back. This is fine because this is just a write-once + // content-addressed cache. There is no risk of corruption, just compromised durability. + // + // On filesystems which don't preserve the order of writes, this may lead to lmdb + // corruption on system crash (but in no other circumstances, such as process crash). + // + // ------------------------------------------------------------------------------------ + // + // NO_TLS + // ====== + // + // Without this flag, each time a read transaction is started, it eats into our + // transaction limit (default: 126) until that thread dies. + // + // This flag makes transactions be removed from that limit when they are dropped, rather + // than when their thread dies. This is important, because we perform reads from a + // thread pool, so our threads never die. Without this flag, all read requests will fail + // after the first 126. + // + // The only down-side is that you need to make sure that any individual OS thread must + // not try to perform multiple write transactions concurrently. Fortunately, this + // property holds for us. + .set_flags(EnvironmentFlags::NO_SYNC | EnvironmentFlags::NO_TLS) + // 2 DBs; one for file contents, one for leases. + .set_max_dbs(2) + .set_map_size(max_size_per_shard) + .open(dir) + .map_err(|e| format!("Error making env for store at {dir:?}: {e}")) + } + + // First Database is content, second is leases. + pub fn get(&self, fingerprint: &Fingerprint) -> (Arc, Database, Database) { + let (_, _, env, db1, db2) = self.get_raw(&fingerprint.0); + (env.clone(), *db1, *db2) + } + + pub(crate) fn get_raw( + &self, + fingerprint: &[u8], + ) -> &(EnvironmentId, PathBuf, Arc, Database, Database) { + &self.lmdbs[&EnvironmentId(fingerprint[0] & self.shard_fingerprint_mask)] } - Ok(envs) - } - - fn make_env(dir: &Path, max_size_per_shard: usize) -> Result { - Environment::new() - // NO_SYNC - // ======= - // - // Don't force fsync on every lmdb write transaction - // - // This significantly improves performance on slow or contended disks. - // - // On filesystems which preserve order of writes, on system crash this may lead to some - // transactions being rolled back. This is fine because this is just a write-once - // content-addressed cache. There is no risk of corruption, just compromised durability. - // - // On filesystems which don't preserve the order of writes, this may lead to lmdb - // corruption on system crash (but in no other circumstances, such as process crash). - // - // ------------------------------------------------------------------------------------ - // - // NO_TLS - // ====== - // - // Without this flag, each time a read transaction is started, it eats into our - // transaction limit (default: 126) until that thread dies. - // - // This flag makes transactions be removed from that limit when they are dropped, rather - // than when their thread dies. This is important, because we perform reads from a - // thread pool, so our threads never die. Without this flag, all read requests will fail - // after the first 126. - // - // The only down-side is that you need to make sure that any individual OS thread must - // not try to perform multiple write transactions concurrently. Fortunately, this - // property holds for us. - .set_flags(EnvironmentFlags::NO_SYNC | EnvironmentFlags::NO_TLS) - // 2 DBs; one for file contents, one for leases. - .set_max_dbs(2) - .set_map_size(max_size_per_shard) - .open(dir) - .map_err(|e| format!("Error making env for store at {dir:?}: {e}")) - } - - // First Database is content, second is leases. - pub fn get(&self, fingerprint: &Fingerprint) -> (Arc, Database, Database) { - let (_, _, env, db1, db2) = self.get_raw(&fingerprint.0); - (env.clone(), *db1, *db2) - } - - pub(crate) fn get_raw( - &self, - fingerprint: &[u8], - ) -> &(EnvironmentId, PathBuf, Arc, Database, Database) { - &self.lmdbs[&EnvironmentId(fingerprint[0] & self.shard_fingerprint_mask)] - } - - fn all_lmdbs(&self) -> Vec<(Arc, Database, Database)> { - self - .lmdbs - .values() - .map(|(_, _, env, db1, db2)| (env.clone(), *db1, *db2)) - .collect() - } - - pub async fn remove(&self, fingerprint: Fingerprint) -> Result { - let store = self.clone(); - self - .executor - .spawn_blocking( - move || { - let effective_key = VersionedFingerprint::new(fingerprint, ShardedLmdb::SCHEMA_VERSION); - let (env, db, lease_database) = store.get(&fingerprint); - let del_res = env.begin_rw_txn().and_then(|mut txn| { - txn.del(db, &effective_key, None)?; - txn - .del(lease_database, &effective_key, None) - .or_else(|err| match err { - lmdb::Error::NotFound => Ok(()), - err => Err(err), - })?; - txn.commit() - }); - - match del_res { - Ok(()) => Ok(true), - Err(lmdb::Error::NotFound) => Ok(false), - Err(err) => Err(format!( - "Error removing versioned key {:?}: {}", - effective_key.to_hex(), - err - )), - } - }, - |e| Err(format!("`remove` task failed: {e}")), - ) - .await - } - - /// - /// Singular form of `Self::exists_batch`. When checking the existence of more than one item, - /// prefer `Self::exists_batch`. - /// - pub async fn exists(&self, fingerprint: Fingerprint) -> Result { - let missing = self.exists_batch(vec![fingerprint]).await?; - Ok(missing.contains(&fingerprint)) - } - - /// - /// Determine which of the given Fingerprints are already present in the store, - /// returning them as a set. - /// - pub async fn exists_batch( - &self, - fingerprints: Vec, - ) -> Result, String> { - let store = self.clone(); - self - .executor - .spawn_blocking( - move || { - // Group the items by the Environment that they will be applied to. - let mut items_by_env = HashMap::new(); - let mut exists = HashSet::new(); - - for fingerprint in &fingerprints { - let effective_key = - VersionedFingerprint::new(*fingerprint, ShardedLmdb::SCHEMA_VERSION); - let (env_id, _, env, db, _) = store.get_raw(&fingerprint.0); - - let (_, _, batch) = items_by_env - .entry(*env_id) - .or_insert_with(|| (env.clone(), *db, vec![])); - batch.push(effective_key); - } - - // Open and commit a Transaction per Environment. Since we never have more than one - // Transaction open at a time, we don't have to worry about ordering. - for (_, (env, db, batch)) in items_by_env { - env - .begin_ro_txn() - .and_then(|txn| { - for effective_key in &batch { - let get_res = txn.get(db, &effective_key); - match get_res { - Ok(_) => { - exists.insert(effective_key.get_fingerprint()); + + fn all_lmdbs(&self) -> Vec<(Arc, Database, Database)> { + self.lmdbs + .values() + .map(|(_, _, env, db1, db2)| (env.clone(), *db1, *db2)) + .collect() + } + + pub async fn remove(&self, fingerprint: Fingerprint) -> Result { + let store = self.clone(); + self.executor + .spawn_blocking( + move || { + let effective_key = + VersionedFingerprint::new(fingerprint, ShardedLmdb::SCHEMA_VERSION); + let (env, db, lease_database) = store.get(&fingerprint); + let del_res = env.begin_rw_txn().and_then(|mut txn| { + txn.del(db, &effective_key, None)?; + txn.del(lease_database, &effective_key, None) + .or_else(|err| match err { + lmdb::Error::NotFound => Ok(()), + err => Err(err), + })?; + txn.commit() + }); + + match del_res { + Ok(()) => Ok(true), + Err(lmdb::Error::NotFound) => Ok(false), + Err(err) => Err(format!( + "Error removing versioned key {:?}: {}", + effective_key.to_hex(), + err + )), } - Err(lmdb::Error::NotFound) => (), - Err(err) => return Err(err), - }; - } - txn.commit() - }) - .map_err(|e| { - format!( - "Error checking existence of fingerprints {:?}: {}", - batch - .iter() - .map(|key| key.get_fingerprint()) - .collect::>(), - e - ) - })?; - } - Ok(exists) - }, - |e| Err(format!("`exists_batch` task failed: {e}")), - ) - .await - } - - /// - /// Returns all fingerprints and their ages. - /// - pub async fn all_fingerprints(&self) -> Result, String> { - let store = self.clone(); - self - .executor - .spawn_blocking( - move || { - let mut fingerprints = Vec::new(); - for (env, database, lease_database) in &store.all_lmdbs() { - let txn = env - .begin_ro_txn() - .map_err(|err| format!("Error beginning transaction to garbage collect: {err}"))?; - let mut cursor = txn - .open_ro_cursor(*database) - .map_err(|err| format!("Failed to open lmdb read cursor: {err}"))?; - for key_res in cursor.iter() { - let (key, bytes) = - key_res.map_err(|err| format!("Failed to advance lmdb read cursor: {err}"))?; - - // Random access into the lease_database is slower than iterating, but hopefully garbage - // collection is rare enough that we can get away with this, rather than do two passes - // here (either to populate leases into pre-populated AgedFingerprints, or to read sizes - // when we delete from lmdb to track how much we've freed). - let lease_until_unix_timestamp = txn - .get(*lease_database, &key) - .map(|b| { - let mut array = [0_u8; 8]; - array.copy_from_slice(b); - u64::from_le_bytes(array) - }) - .unwrap_or_else(|e| match e { - lmdb::Error::NotFound => 0, - e => panic!("Error reading lease, probable lmdb corruption: {e:?}"), - }); - - let leased_until = time::UNIX_EPOCH + Duration::from_secs(lease_until_unix_timestamp); - - let expired_seconds_ago = time::SystemTime::now() - .duration_since(leased_until) - .map(|t| t.as_secs()) - // 0 indicates unexpired. - .unwrap_or(0); - - let v = VersionedFingerprint::from_bytes_unsafe(key); - let fingerprint = v.get_fingerprint(); - fingerprints.push(AgedFingerprint { - expired_seconds_ago, - fingerprint, - size_bytes: bytes.len(), - }); - } - } - Ok(fingerprints) - }, - |e| Err(format!("`all_fingerprints` task failed: {e}")), - ) - .await - } - - /// - /// Singular form of `Self::store_bytes_batch`. When storing more than one item in parallel, - /// prefer `Self::store_bytes_batch`. - /// - pub async fn store_bytes( - &self, - fingerprint: Fingerprint, - bytes: Bytes, - initial_lease: bool, - ) -> Result { - self - .store_bytes_batch(vec![(fingerprint, bytes)], initial_lease) - .await?; - Ok(fingerprint) - } - - /// - /// Store the given Bytes instances under the given Fingerprints, or under their computed - /// Fingerprints. For large/streaming usecases, prefer `Self::store`. - /// - /// See also: `Self::store_bytes`. - /// - pub async fn store_bytes_batch( - &self, - items: Vec<(Fingerprint, Bytes)>, - initial_lease: bool, - ) -> Result<(), String> { - let store = self.clone(); - self - .executor - .spawn_blocking( - move || { - // Group the items by the Environment that they will be applied to. - let mut items_by_env = HashMap::new(); - let mut fingerprints = Vec::new(); - for (fingerprint, bytes) in items { - let effective_key = VersionedFingerprint::new(fingerprint, ShardedLmdb::SCHEMA_VERSION); - let (env_id, _, env, db, lease_database) = store.get_raw(&fingerprint.0); - - let (_, _, _, batch) = items_by_env - .entry(*env_id) - .or_insert_with(|| (env.clone(), *db, *lease_database, vec![])); - batch.push((effective_key, bytes)); - fingerprints.push(fingerprint); - } - - // Open and commit a Transaction per Environment. Since we never have more than one - // Transaction open at a time, we don't have to worry about ordering. - for (_, (env, db, lease_database, batch)) in items_by_env { - env - .begin_rw_txn() - .and_then(|mut txn| { - for (effective_key, bytes) in &batch { - let put_res = txn.put(db, &effective_key, &bytes, WriteFlags::NO_OVERWRITE); - match put_res { - Ok(()) => (), - Err(lmdb::Error::KeyExist) => continue, - Err(err) => return Err(err), - } - if initial_lease { - store.lease_inner( - lease_database, - effective_key, - store.lease_until_secs_since_epoch(), - &mut txn, - )?; - } - } - txn.commit() - }) - .map_err(|e| { - format!( - "Error storing fingerprints {:?}: {}", - batch - .iter() - .map(|(key, _)| key.to_hex()) - .collect::>(), - e - ) - })?; - } - - Ok(()) - }, - |e| Err(format!("`store_bytes_batch` task failed: {e}")), - ) - .await - } - - /// - /// Stores the given Read instance under its computed digest in two passes. If !data_is_immutable, - /// we will re-hash the data to confirm that it hasn't changed. - /// - /// If the Read instance gets longer between Reads, we will not detect that here, but any - /// captured data will still be valid. - /// - pub async fn store( - &self, - initial_lease: bool, - data_is_immutable: bool, - expected_digest: Digest, - data_provider: F, - ) -> Result<(), String> - where - R: Read + Debug, - F: Fn() -> Result + Send + 'static, - { - let store = self.clone(); - self - .executor - .spawn_blocking( - move || { - let mut attempts = 0; - loop { - let effective_key = - VersionedFingerprint::new(expected_digest.hash, ShardedLmdb::SCHEMA_VERSION); - let (env, db, lease_database) = store.get(&expected_digest.hash); - let put_res: Result<(), StoreError> = env - .begin_rw_txn() - .map_err(StoreError::Lmdb) - .and_then(|mut txn| { - // Second pass: copy into the reserved memory. - let mut writer = txn - .reserve( - db, - &effective_key, - expected_digest.size_bytes, - WriteFlags::NO_OVERWRITE, - )? - .writer(); - let mut read = data_provider().map_err(|e| format!("Failed to read: {e}"))?; - let should_retry = + }, + |e| Err(format!("`remove` task failed: {e}")), + ) + .await + } + + /// + /// Singular form of `Self::exists_batch`. When checking the existence of more than one item, + /// prefer `Self::exists_batch`. + /// + pub async fn exists(&self, fingerprint: Fingerprint) -> Result { + let missing = self.exists_batch(vec![fingerprint]).await?; + Ok(missing.contains(&fingerprint)) + } + + /// + /// Determine which of the given Fingerprints are already present in the store, + /// returning them as a set. + /// + pub async fn exists_batch( + &self, + fingerprints: Vec, + ) -> Result, String> { + let store = self.clone(); + self.executor + .spawn_blocking( + move || { + // Group the items by the Environment that they will be applied to. + let mut items_by_env = HashMap::new(); + let mut exists = HashSet::new(); + + for fingerprint in &fingerprints { + let effective_key = + VersionedFingerprint::new(*fingerprint, ShardedLmdb::SCHEMA_VERSION); + let (env_id, _, env, db, _) = store.get_raw(&fingerprint.0); + + let (_, _, batch) = items_by_env + .entry(*env_id) + .or_insert_with(|| (env.clone(), *db, vec![])); + batch.push(effective_key); + } + + // Open and commit a Transaction per Environment. Since we never have more than one + // Transaction open at a time, we don't have to worry about ordering. + for (_, (env, db, batch)) in items_by_env { + env.begin_ro_txn() + .and_then(|txn| { + for effective_key in &batch { + let get_res = txn.get(db, &effective_key); + match get_res { + Ok(_) => { + exists.insert(effective_key.get_fingerprint()); + } + Err(lmdb::Error::NotFound) => (), + Err(err) => return Err(err), + }; + } + txn.commit() + }) + .map_err(|e| { + format!( + "Error checking existence of fingerprints {:?}: {}", + batch + .iter() + .map(|key| key.get_fingerprint()) + .collect::>(), + e + ) + })?; + } + Ok(exists) + }, + |e| Err(format!("`exists_batch` task failed: {e}")), + ) + .await + } + + /// + /// Returns all fingerprints and their ages. + /// + pub async fn all_fingerprints(&self) -> Result, String> { + let store = self.clone(); + self.executor + .spawn_blocking( + move || { + let mut fingerprints = Vec::new(); + for (env, database, lease_database) in &store.all_lmdbs() { + let txn = env.begin_ro_txn().map_err(|err| { + format!("Error beginning transaction to garbage collect: {err}") + })?; + let mut cursor = txn + .open_ro_cursor(*database) + .map_err(|err| format!("Failed to open lmdb read cursor: {err}"))?; + for key_res in cursor.iter() { + let (key, bytes) = key_res.map_err(|err| { + format!("Failed to advance lmdb read cursor: {err}") + })?; + + // Random access into the lease_database is slower than iterating, but hopefully garbage + // collection is rare enough that we can get away with this, rather than do two passes + // here (either to populate leases into pre-populated AgedFingerprints, or to read sizes + // when we delete from lmdb to track how much we've freed). + let lease_until_unix_timestamp = txn + .get(*lease_database, &key) + .map(|b| { + let mut array = [0_u8; 8]; + array.copy_from_slice(b); + u64::from_le_bytes(array) + }) + .unwrap_or_else(|e| match e { + lmdb::Error::NotFound => 0, + e => panic!( + "Error reading lease, probable lmdb corruption: {e:?}" + ), + }); + + let leased_until = + time::UNIX_EPOCH + Duration::from_secs(lease_until_unix_timestamp); + + let expired_seconds_ago = time::SystemTime::now() + .duration_since(leased_until) + .map(|t| t.as_secs()) + // 0 indicates unexpired. + .unwrap_or(0); + + let v = VersionedFingerprint::from_bytes_unsafe(key); + let fingerprint = v.get_fingerprint(); + fingerprints.push(AgedFingerprint { + expired_seconds_ago, + fingerprint, + size_bytes: bytes.len(), + }); + } + } + Ok(fingerprints) + }, + |e| Err(format!("`all_fingerprints` task failed: {e}")), + ) + .await + } + + /// + /// Singular form of `Self::store_bytes_batch`. When storing more than one item in parallel, + /// prefer `Self::store_bytes_batch`. + /// + pub async fn store_bytes( + &self, + fingerprint: Fingerprint, + bytes: Bytes, + initial_lease: bool, + ) -> Result { + self.store_bytes_batch(vec![(fingerprint, bytes)], initial_lease) + .await?; + Ok(fingerprint) + } + + /// + /// Store the given Bytes instances under the given Fingerprints, or under their computed + /// Fingerprints. For large/streaming usecases, prefer `Self::store`. + /// + /// See also: `Self::store_bytes`. + /// + pub async fn store_bytes_batch( + &self, + items: Vec<(Fingerprint, Bytes)>, + initial_lease: bool, + ) -> Result<(), String> { + let store = self.clone(); + self.executor + .spawn_blocking( + move || { + // Group the items by the Environment that they will be applied to. + let mut items_by_env = HashMap::new(); + let mut fingerprints = Vec::new(); + for (fingerprint, bytes) in items { + let effective_key = + VersionedFingerprint::new(fingerprint, ShardedLmdb::SCHEMA_VERSION); + let (env_id, _, env, db, lease_database) = store.get_raw(&fingerprint.0); + + let (_, _, _, batch) = items_by_env + .entry(*env_id) + .or_insert_with(|| (env.clone(), *db, *lease_database, vec![])); + batch.push((effective_key, bytes)); + fingerprints.push(fingerprint); + } + + // Open and commit a Transaction per Environment. Since we never have more than one + // Transaction open at a time, we don't have to worry about ordering. + for (_, (env, db, lease_database, batch)) in items_by_env { + env.begin_rw_txn() + .and_then(|mut txn| { + for (effective_key, bytes) in &batch { + let put_res = txn.put( + db, + &effective_key, + &bytes, + WriteFlags::NO_OVERWRITE, + ); + match put_res { + Ok(()) => (), + Err(lmdb::Error::KeyExist) => continue, + Err(err) => return Err(err), + } + if initial_lease { + store.lease_inner( + lease_database, + effective_key, + store.lease_until_secs_since_epoch(), + &mut txn, + )?; + } + } + txn.commit() + }) + .map_err(|e| { + format!( + "Error storing fingerprints {:?}: {}", + batch + .iter() + .map(|(key, _)| key.to_hex()) + .collect::>(), + e + ) + })?; + } + + Ok(()) + }, + |e| Err(format!("`store_bytes_batch` task failed: {e}")), + ) + .await + } + + /// + /// Stores the given Read instance under its computed digest in two passes. If !data_is_immutable, + /// we will re-hash the data to confirm that it hasn't changed. + /// + /// If the Read instance gets longer between Reads, we will not detect that here, but any + /// captured data will still be valid. + /// + pub async fn store( + &self, + initial_lease: bool, + data_is_immutable: bool, + expected_digest: Digest, + data_provider: F, + ) -> Result<(), String> + where + R: Read + Debug, + F: Fn() -> Result + Send + 'static, + { + let store = self.clone(); + self.executor + .spawn_blocking( + move || { + let mut attempts = 0; + loop { + let effective_key = VersionedFingerprint::new( + expected_digest.hash, + ShardedLmdb::SCHEMA_VERSION, + ); + let (env, db, lease_database) = store.get(&expected_digest.hash); + let put_res: Result<(), StoreError> = + env.begin_rw_txn() + .map_err(StoreError::Lmdb) + .and_then(|mut txn| { + // Second pass: copy into the reserved memory. + let mut writer = txn + .reserve( + db, + &effective_key, + expected_digest.size_bytes, + WriteFlags::NO_OVERWRITE, + )? + .writer(); + let mut read = data_provider() + .map_err(|e| format!("Failed to read: {e}"))?; + let should_retry = !sync_verified_copy(expected_digest, data_is_immutable, &mut read, &mut writer) .map_err(|e| { format!("Failed to copy from {read:?} or store in {env:?}: {e:?}") })?; - if should_retry { - let msg = format!("Input {read:?} changed while reading."); - log::debug!("{}", msg); - return Err(StoreError::Retry(msg)); - } + if should_retry { + let msg = format!("Input {read:?} changed while reading."); + log::debug!("{}", msg); + return Err(StoreError::Retry(msg)); + } + + if initial_lease { + store.lease_inner( + lease_database, + &effective_key, + store.lease_until_secs_since_epoch(), + &mut txn, + )?; + } + txn.commit()?; + Ok(()) + }); + + match put_res { + Ok(()) => return Ok(()), + Err(StoreError::Retry(msg)) => { + // Input changed during reading: maybe retry. + if attempts > 10 { + return Err(msg); + } else { + attempts += 1; + continue; + } + } + Err(StoreError::Lmdb(lmdb::Error::KeyExist)) => return Ok(()), + Err(StoreError::Lmdb(err)) => { + return Err(format!("Error storing {expected_digest:?}: {err}")) + } + Err(StoreError::Io(err)) => { + return Err(format!("Error storing {expected_digest:?}: {err}")) + } + }; + } + }, + |e| Err(format!("`store` task failed: {e}")), + ) + .await + } - if initial_lease { - store.lease_inner( - lease_database, - &effective_key, - store.lease_until_secs_since_epoch(), - &mut txn, - )?; - } - txn.commit()?; - Ok(()) - }); - - match put_res { - Ok(()) => return Ok(()), - Err(StoreError::Retry(msg)) => { - // Input changed during reading: maybe retry. - if attempts > 10 { - return Err(msg); - } else { - attempts += 1; - continue; - } - } - Err(StoreError::Lmdb(lmdb::Error::KeyExist)) => return Ok(()), - Err(StoreError::Lmdb(err)) => { - return Err(format!("Error storing {expected_digest:?}: {err}")) - } - Err(StoreError::Io(err)) => { - return Err(format!("Error storing {expected_digest:?}: {err}")) - } - }; - } - }, - |e| Err(format!("`store` task failed: {e}")), - ) - .await - } - - pub async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String> { - let store = self.clone(); - self - .executor - .spawn_blocking( - move || { - let until_secs_since_epoch: u64 = store.lease_until_secs_since_epoch(); - let (env, _, lease_database) = store.get(&fingerprint); - env - .begin_rw_txn() - .and_then(|mut txn| { - store.lease_inner( - lease_database, - &VersionedFingerprint::new(fingerprint, ShardedLmdb::SCHEMA_VERSION), - until_secs_since_epoch, - &mut txn, - )?; - txn.commit() - }) - .map_err(|e| format!("Error leasing {fingerprint:?}: {e}")) - }, - |e| Err(format!("`lease` task failed: {e}")), - ) - .await - } - - fn lease_inner( - &self, - database: Database, - versioned_fingerprint: &VersionedFingerprint, - until_secs_since_epoch: u64, - txn: &mut RwTransaction<'_>, - ) -> Result<(), lmdb::Error> { - txn.put( - database, - &versioned_fingerprint.as_ref(), - &until_secs_since_epoch.to_le_bytes(), - WriteFlags::empty(), - ) - } - - fn lease_until_secs_since_epoch(&self) -> u64 { - let now_since_epoch = time::SystemTime::now() - .duration_since(time::UNIX_EPOCH) - .expect("Surely you're not before the unix epoch?"); - (now_since_epoch + self.lease_time).as_secs() - } - - pub async fn load_bytes_with< - T: Send + 'static, - F: FnMut(&[u8]) -> Result + Send + Sync + 'static, - >( - &self, - fingerprint: Fingerprint, - mut f: F, - ) -> Result, String> { - let store = self.clone(); - let effective_key = VersionedFingerprint::new(fingerprint, ShardedLmdb::SCHEMA_VERSION); - self - .executor - .spawn_blocking( - move || { - let (env, db, _) = store.get(&fingerprint); - let ro_txn = env - .begin_ro_txn() - .map_err(|err| format!("Failed to begin read transaction: {err}"))?; - match ro_txn.get(db, &effective_key) { - Ok(bytes) => f(bytes).map(Some), - Err(lmdb::Error::NotFound) => Ok(None), - Err(err) => Err(format!( - "Error loading versioned key {:?}: {}", - effective_key.to_hex(), - err, - )), - } - }, - |e| Err(format!("`load_bytes_with` task failed: {e}")), - ) - .await - } - - #[allow(clippy::useless_conversion)] // False positive: https://github.com/rust-lang/rust-clippy/issues/3913 - pub fn compact(&self) -> Result<(), String> { - for (env, old_dir, _) in - ShardedLmdb::envs(&self.root_path, self.max_size_per_shard, self.shard_count)? - { - let new_dir = TempDir::new_in(old_dir.parent().unwrap()).expect("TODO"); - env - .copy(new_dir.path(), EnvironmentCopyFlags::COMPACT) - .map_err(|e| { - format!( - "Error copying store from {:?} to {:?}: {}", - old_dir, - new_dir.path(), - e - ) - })?; - std::fs::remove_dir_all(&old_dir) - .map_err(|e| format!("Error removing old store at {old_dir:?}: {e}"))?; - std::fs::rename(new_dir.path(), &old_dir).map_err(|e| { - format!( - "Error replacing {:?} with {:?}: {}", - old_dir, - new_dir.path(), - e + pub async fn lease(&self, fingerprint: Fingerprint) -> Result<(), String> { + let store = self.clone(); + self.executor + .spawn_blocking( + move || { + let until_secs_since_epoch: u64 = store.lease_until_secs_since_epoch(); + let (env, _, lease_database) = store.get(&fingerprint); + env.begin_rw_txn() + .and_then(|mut txn| { + store.lease_inner( + lease_database, + &VersionedFingerprint::new( + fingerprint, + ShardedLmdb::SCHEMA_VERSION, + ), + until_secs_since_epoch, + &mut txn, + )?; + txn.commit() + }) + .map_err(|e| format!("Error leasing {fingerprint:?}: {e}")) + }, + |e| Err(format!("`lease` task failed: {e}")), + ) + .await + } + + fn lease_inner( + &self, + database: Database, + versioned_fingerprint: &VersionedFingerprint, + until_secs_since_epoch: u64, + txn: &mut RwTransaction<'_>, + ) -> Result<(), lmdb::Error> { + txn.put( + database, + &versioned_fingerprint.as_ref(), + &until_secs_since_epoch.to_le_bytes(), + WriteFlags::empty(), ) - })?; + } - // Prevent the tempdir from being deleted on drop. - std::mem::drop(new_dir); + fn lease_until_secs_since_epoch(&self) -> u64 { + let now_since_epoch = time::SystemTime::now() + .duration_since(time::UNIX_EPOCH) + .expect("Surely you're not before the unix epoch?"); + (now_since_epoch + self.lease_time).as_secs() + } + + pub async fn load_bytes_with< + T: Send + 'static, + F: FnMut(&[u8]) -> Result + Send + Sync + 'static, + >( + &self, + fingerprint: Fingerprint, + mut f: F, + ) -> Result, String> { + let store = self.clone(); + let effective_key = VersionedFingerprint::new(fingerprint, ShardedLmdb::SCHEMA_VERSION); + self.executor + .spawn_blocking( + move || { + let (env, db, _) = store.get(&fingerprint); + let ro_txn = env + .begin_ro_txn() + .map_err(|err| format!("Failed to begin read transaction: {err}"))?; + match ro_txn.get(db, &effective_key) { + Ok(bytes) => f(bytes).map(Some), + Err(lmdb::Error::NotFound) => Ok(None), + Err(err) => Err(format!( + "Error loading versioned key {:?}: {}", + effective_key.to_hex(), + err, + )), + } + }, + |e| Err(format!("`load_bytes_with` task failed: {e}")), + ) + .await + } + + #[allow(clippy::useless_conversion)] // False positive: https://github.com/rust-lang/rust-clippy/issues/3913 + pub fn compact(&self) -> Result<(), String> { + for (env, old_dir, _) in + ShardedLmdb::envs(&self.root_path, self.max_size_per_shard, self.shard_count)? + { + let new_dir = TempDir::new_in(old_dir.parent().unwrap()).expect("TODO"); + env.copy(new_dir.path(), EnvironmentCopyFlags::COMPACT) + .map_err(|e| { + format!( + "Error copying store from {:?} to {:?}: {}", + old_dir, + new_dir.path(), + e + ) + })?; + std::fs::remove_dir_all(&old_dir) + .map_err(|e| format!("Error removing old store at {old_dir:?}: {e}"))?; + std::fs::rename(new_dir.path(), &old_dir).map_err(|e| { + format!( + "Error replacing {:?} with {:?}: {}", + old_dir, + new_dir.path(), + e + ) + })?; + + // Prevent the tempdir from being deleted on drop. + std::mem::drop(new_dir); + } + Ok(()) } - Ok(()) - } } enum StoreError { - Lmdb(lmdb::Error), - Io(String), - Retry(String), + Lmdb(lmdb::Error), + Io(String), + Retry(String), } impl From for StoreError { - fn from(err: lmdb::Error) -> Self { - Self::Lmdb(err) - } + fn from(err: lmdb::Error) -> Self { + Self::Lmdb(err) + } } impl From for StoreError { - fn from(err: String) -> Self { - Self::Io(err) - } + fn from(err: String) -> Self { + Self::Io(err) + } } #[cfg(test)] diff --git a/src/rust/engine/sharded_lmdb/src/tests.rs b/src/rust/engine/sharded_lmdb/src/tests.rs index 38ade8d82c6..1cf564e756b 100644 --- a/src/rust/engine/sharded_lmdb/src/tests.rs +++ b/src/rust/engine/sharded_lmdb/src/tests.rs @@ -11,90 +11,90 @@ use tempfile::TempDir; use crate::{ShardedLmdb, DEFAULT_LEASE_TIME}; fn new_store(shard_count: u8) -> (ShardedLmdb, TempDir) { - let tempdir = TempDir::new().unwrap(); - let s = ShardedLmdb::new( - tempdir.path().to_owned(), - 15_000_000, - Executor::new(), - DEFAULT_LEASE_TIME, - shard_count, - ) - .unwrap(); - (s, tempdir) + let tempdir = TempDir::new().unwrap(); + let s = ShardedLmdb::new( + tempdir.path().to_owned(), + 15_000_000, + Executor::new(), + DEFAULT_LEASE_TIME, + shard_count, + ) + .unwrap(); + (s, tempdir) } #[tokio::test] async fn shard_counts() { - let shard_counts = vec![1, 2, 4, 8, 16, 32, 64, 128]; - for shard_count in shard_counts { - let (s, _tempdir) = new_store(shard_count); - assert_eq!(s.all_lmdbs().len(), shard_count as usize); + let shard_counts = vec![1, 2, 4, 8, 16, 32, 64, 128]; + for shard_count in shard_counts { + let (s, _tempdir) = new_store(shard_count); + assert_eq!(s.all_lmdbs().len(), shard_count as usize); - // Confirm that each database gets an even share. - let mut databases = HashMap::new(); - for prefix_byte in 0_u8..=255_u8 { - *databases.entry(s.get_raw(&[prefix_byte]).0).or_insert(0) += 1; - } - assert_eq!(databases.len(), shard_count as usize); - for (_, count) in databases { - assert_eq!(count, 256 / shard_count as usize); + // Confirm that each database gets an even share. + let mut databases = HashMap::new(); + for prefix_byte in 0_u8..=255_u8 { + *databases.entry(s.get_raw(&[prefix_byte]).0).or_insert(0) += 1; + } + assert_eq!(databases.len(), shard_count as usize); + for (_, count) in databases { + assert_eq!(count, 256 / shard_count as usize); + } } - } } #[tokio::test] async fn store_immutable() { - let (s, _tempdir) = new_store(1); - let _ = s - .store(true, true, Digest::of_bytes(&bytes(0)), || { - Ok(bytes(0).reader()) - }) - .await - .unwrap(); + let (s, _tempdir) = new_store(1); + let _ = s + .store(true, true, Digest::of_bytes(&bytes(0)), || { + Ok(bytes(0).reader()) + }) + .await + .unwrap(); } #[tokio::test] async fn store_stable() { - let (s, _tempdir) = new_store(1); - let _ = s - .store(true, false, Digest::of_bytes(&bytes(0)), || { - Ok(bytes(0).reader()) - }) - .await - .unwrap(); + let (s, _tempdir) = new_store(1); + let _ = s + .store(true, false, Digest::of_bytes(&bytes(0)), || { + Ok(bytes(0).reader()) + }) + .await + .unwrap(); } #[tokio::test] async fn store_changing() { - let (s, _tempdir) = new_store(1); + let (s, _tempdir) = new_store(1); - // Produces Readers that change during the first two reads, but stabilize on the third and - // fourth. - let contents = Mutex::new(vec![bytes(0), bytes(1), bytes(2), bytes(2)].into_iter()); + // Produces Readers that change during the first two reads, but stabilize on the third and + // fourth. + let contents = Mutex::new(vec![bytes(0), bytes(1), bytes(2), bytes(2)].into_iter()); - let _ = s - .store(true, false, Digest::of_bytes(&bytes(2)), move || { - Ok(contents.lock().next().unwrap().reader()) - }) - .await - .unwrap(); + let _ = s + .store(true, false, Digest::of_bytes(&bytes(2)), move || { + Ok(contents.lock().next().unwrap().reader()) + }) + .await + .unwrap(); } #[tokio::test] async fn store_failure() { - let (s, _tempdir) = new_store(1); + let (s, _tempdir) = new_store(1); - // Produces Readers that never stabilize. - let contents = Mutex::new((0..100).map(bytes)); + // Produces Readers that never stabilize. + let contents = Mutex::new((0..100).map(bytes)); - let result = s - .store(true, false, Digest::of_bytes(&bytes(101)), move || { - Ok(contents.lock().next().unwrap().reader()) - }) - .await; - assert!(result.is_err()); + let result = s + .store(true, false, Digest::of_bytes(&bytes(101)), move || { + Ok(contents.lock().next().unwrap().reader()) + }) + .await; + assert!(result.is_err()); } fn bytes(content: u8) -> Bytes { - Bytes::from(vec![content; 100]) + Bytes::from(vec![content; 100]) } diff --git a/src/rust/engine/src/context.rs b/src/rust/engine/src/context.rs index 2fc187111cb..8e195bb5258 100644 --- a/src/rust/engine/src/context.rs +++ b/src/rust/engine/src/context.rs @@ -29,12 +29,13 @@ use parking_lot::Mutex; use docker::docker; use process_execution::switched::SwitchedCommandRunner; use process_execution::{ - self, bounded, local, CacheContentBehavior, CommandRunner, NamedCaches, ProcessExecutionStrategy, + self, bounded, local, CacheContentBehavior, CommandRunner, NamedCaches, + ProcessExecutionStrategy, }; use protos::gen::build::bazel::remote::execution::v2::ServerCapabilities; use regex::Regex; use remote::remote_cache::{ - RemoteCacheProviderOptions, RemoteCacheRunnerOptions, RemoteCacheWarningsBehavior, + RemoteCacheProviderOptions, RemoteCacheRunnerOptions, RemoteCacheWarningsBehavior, }; use remote::{self, remote_cache}; use rule_graph::RuleGraph; @@ -53,488 +54,492 @@ use workunit_store::{Metric, RunningWorkunit}; // - The leading (?s) which sets the flag to allow . to match \n. // - The use of ungreedy repetition via .*?, so we get shortest instead of longest matches. const PEM_RE_STR: &str = - r"(?s)-----BEGIN (?P.*?)-----\s*(?P.*?)-----END (?P.*?)-----\s*"; + r"(?s)-----BEGIN (?P.*?)-----\s*(?P.*?)-----END (?P.*?)-----\s*"; /// /// The core context shared (via Arc) between the Scheduler and the Context objects of /// all running Nodes. /// pub struct Core { - pub graph: Arc, - pub tasks: Tasks, - pub rule_graph: RuleGraph, - pub types: Types, - pub intrinsics: Intrinsics, - pub executor: Executor, - store: Store, - /// The CommandRunners to use for execution, in ascending order of reliability (for the purposes - /// of backtracking). For performance reasons, caching `CommandRunners` might skip validation of - /// their outputs, and so should be listed before uncached `CommandRunners`. - pub command_runners: Vec>, - pub http_client: reqwest::Client, - pub local_cache: PersistentCache, - pub vfs: PosixFS, - pub watcher: Option>, - pub build_root: PathBuf, - pub local_parallelism: usize, - pub graceful_shutdown_timeout: Duration, - pub sessions: Sessions, - pub named_caches: NamedCaches, - pub immutable_inputs: ImmutableInputs, - pub local_execution_root_dir: PathBuf, + pub graph: Arc, + pub tasks: Tasks, + pub rule_graph: RuleGraph, + pub types: Types, + pub intrinsics: Intrinsics, + pub executor: Executor, + store: Store, + /// The CommandRunners to use for execution, in ascending order of reliability (for the purposes + /// of backtracking). For performance reasons, caching `CommandRunners` might skip validation of + /// their outputs, and so should be listed before uncached `CommandRunners`. + pub command_runners: Vec>, + pub http_client: reqwest::Client, + pub local_cache: PersistentCache, + pub vfs: PosixFS, + pub watcher: Option>, + pub build_root: PathBuf, + pub local_parallelism: usize, + pub graceful_shutdown_timeout: Duration, + pub sessions: Sessions, + pub named_caches: NamedCaches, + pub immutable_inputs: ImmutableInputs, + pub local_execution_root_dir: PathBuf, } #[derive(Clone, Debug)] pub struct RemotingOptions { - pub execution_enable: bool, - pub store_address: Option, - pub execution_address: Option, - pub execution_process_cache_namespace: Option, - pub instance_name: Option, - pub root_ca_certs_path: Option, - pub client_certs_path: Option, - pub client_key_path: Option, - pub store_headers: BTreeMap, - pub store_chunk_bytes: usize, - pub store_rpc_retries: usize, - pub store_rpc_concurrency: usize, - pub store_rpc_timeout: Duration, - pub store_batch_api_size_limit: usize, - pub cache_warnings_behavior: RemoteCacheWarningsBehavior, - pub cache_content_behavior: CacheContentBehavior, - pub cache_rpc_concurrency: usize, - pub cache_rpc_timeout: Duration, - pub execution_headers: BTreeMap, - pub execution_overall_deadline: Duration, - pub execution_rpc_concurrency: usize, - pub append_only_caches_base_path: Option, + pub execution_enable: bool, + pub store_address: Option, + pub execution_address: Option, + pub execution_process_cache_namespace: Option, + pub instance_name: Option, + pub root_ca_certs_path: Option, + pub client_certs_path: Option, + pub client_key_path: Option, + pub store_headers: BTreeMap, + pub store_chunk_bytes: usize, + pub store_rpc_retries: usize, + pub store_rpc_concurrency: usize, + pub store_rpc_timeout: Duration, + pub store_batch_api_size_limit: usize, + pub cache_warnings_behavior: RemoteCacheWarningsBehavior, + pub cache_content_behavior: CacheContentBehavior, + pub cache_rpc_concurrency: usize, + pub cache_rpc_timeout: Duration, + pub execution_headers: BTreeMap, + pub execution_overall_deadline: Duration, + pub execution_rpc_concurrency: usize, + pub append_only_caches_base_path: Option, } #[derive(Clone, Debug)] pub struct ExecutionStrategyOptions { - pub local_parallelism: usize, - pub remote_parallelism: usize, - pub local_keep_sandboxes: local::KeepSandboxes, - pub local_cache: bool, - pub local_enable_nailgun: bool, - pub remote_cache_read: bool, - pub remote_cache_write: bool, - pub child_max_memory: usize, - pub child_default_memory: usize, - pub graceful_shutdown_timeout: Duration, + pub local_parallelism: usize, + pub remote_parallelism: usize, + pub local_keep_sandboxes: local::KeepSandboxes, + pub local_cache: bool, + pub local_enable_nailgun: bool, + pub remote_cache_read: bool, + pub remote_cache_write: bool, + pub child_max_memory: usize, + pub child_default_memory: usize, + pub graceful_shutdown_timeout: Duration, } #[derive(Clone, Debug)] pub struct LocalStoreOptions { - pub store_dir: PathBuf, - pub process_cache_max_size_bytes: usize, - pub files_max_size_bytes: usize, - pub directories_max_size_bytes: usize, - pub lease_time: Duration, - pub shard_count: u8, + pub store_dir: PathBuf, + pub process_cache_max_size_bytes: usize, + pub files_max_size_bytes: usize, + pub directories_max_size_bytes: usize, + pub lease_time: Duration, + pub shard_count: u8, } impl From<&LocalStoreOptions> for store::LocalOptions { - fn from(lso: &LocalStoreOptions) -> Self { - Self { - files_max_size_bytes: lso.files_max_size_bytes, - directories_max_size_bytes: lso.directories_max_size_bytes, - lease_time: lso.lease_time, - shard_count: lso.shard_count, + fn from(lso: &LocalStoreOptions) -> Self { + Self { + files_max_size_bytes: lso.files_max_size_bytes, + directories_max_size_bytes: lso.directories_max_size_bytes, + lease_time: lso.lease_time, + shard_count: lso.shard_count, + } } - } } impl Core { - async fn make_store( - executor: &Executor, - local_store_options: &LocalStoreOptions, - local_execution_root_dir: &Path, - enable_remote: bool, - remoting_opts: &RemotingOptions, - remote_store_address: &Option, - tls_config: grpc_util::tls::Config, - capabilities_cell_opt: Option>>, - ) -> Result { - let local_only = Store::local_only_with_options( - executor.clone(), - local_store_options.store_dir.clone(), - local_execution_root_dir, - local_store_options.into(), - )?; - if enable_remote { - let cas_address = remote_store_address - .as_ref() - .ok_or("Remote store required, but none configured")? - .clone(); - - local_only - .into_with_remote(RemoteOptions { - cas_address, - instance_name: remoting_opts.instance_name.clone(), - tls_config, - headers: remoting_opts.store_headers.clone(), - chunk_size_bytes: remoting_opts.store_chunk_bytes, - rpc_timeout: remoting_opts.store_rpc_timeout, - rpc_retries: remoting_opts.store_rpc_retries, - rpc_concurrency_limit: remoting_opts.store_rpc_concurrency, - capabilities_cell_opt, - batch_api_size_limit: remoting_opts.store_batch_api_size_limit, - }) - .await - } else { - Ok(local_only) + async fn make_store( + executor: &Executor, + local_store_options: &LocalStoreOptions, + local_execution_root_dir: &Path, + enable_remote: bool, + remoting_opts: &RemotingOptions, + remote_store_address: &Option, + tls_config: grpc_util::tls::Config, + capabilities_cell_opt: Option>>, + ) -> Result { + let local_only = Store::local_only_with_options( + executor.clone(), + local_store_options.store_dir.clone(), + local_execution_root_dir, + local_store_options.into(), + )?; + if enable_remote { + let cas_address = remote_store_address + .as_ref() + .ok_or("Remote store required, but none configured")? + .clone(); + + local_only + .into_with_remote(RemoteOptions { + cas_address, + instance_name: remoting_opts.instance_name.clone(), + tls_config, + headers: remoting_opts.store_headers.clone(), + chunk_size_bytes: remoting_opts.store_chunk_bytes, + rpc_timeout: remoting_opts.store_rpc_timeout, + rpc_retries: remoting_opts.store_rpc_retries, + rpc_concurrency_limit: remoting_opts.store_rpc_concurrency, + capabilities_cell_opt, + batch_api_size_limit: remoting_opts.store_batch_api_size_limit, + }) + .await + } else { + Ok(local_only) + } } - } - - /// - /// Make the innermost / leaf runner. Will have concurrency control and process pooling, but - /// will not have caching. - /// - async fn make_leaf_runner( - full_store: &Store, - local_runner_store: &Store, - executor: &Executor, - local_execution_root_dir: &Path, - immutable_inputs: &ImmutableInputs, - named_caches: &NamedCaches, - instance_name: Option, - process_cache_namespace: Option, - tls_config: grpc_util::tls::Config, - exec_strategy_opts: &ExecutionStrategyOptions, - remoting_opts: &RemotingOptions, - capabilities_cell_opt: Option>>, - ) -> Result, String> { - let local_command_runner = local::CommandRunner::new( - local_runner_store.clone(), - executor.clone(), - local_execution_root_dir.to_path_buf(), - named_caches.clone(), - immutable_inputs.clone(), - exec_strategy_opts.local_keep_sandboxes, - ); - - let runner: Box = if exec_strategy_opts.local_enable_nailgun { - // We set the nailgun pool size to the number of instances that fit within the memory - // parameters configured when a max child process memory has been given. - // Otherwise, pool size will be double of the local parallelism so we can always keep - // a jvm warmed up. - let pool_size: usize = if exec_strategy_opts.child_max_memory > 0 { - max( - 1, - exec_strategy_opts.child_max_memory / exec_strategy_opts.child_default_memory, - ) - } else { - exec_strategy_opts.local_parallelism * 2 - }; - - let nailgun_runner = pe_nailgun::CommandRunner::new( - local_execution_root_dir.to_path_buf(), - local_runner_store.clone(), - executor.clone(), - named_caches.clone(), - immutable_inputs.clone(), - pool_size, - ); - - Box::new(SwitchedCommandRunner::new( - nailgun_runner, - local_command_runner, - |req| !req.input_digests.use_nailgun.is_empty(), - )) - } else { - Box::new(local_command_runner) - }; - // Note that the Docker command runner is only used if the Process sets docker_image. So, - // it's safe to always create this command runner. - let docker_runner = Box::new(docker::CommandRunner::new( - local_runner_store.clone(), - executor.clone(), - &docker::DOCKER, - &docker::IMAGE_PULL_CACHE, - local_execution_root_dir.to_path_buf(), - immutable_inputs.clone(), - exec_strategy_opts.local_keep_sandboxes, - )?); - let runner = Box::new(SwitchedCommandRunner::new(docker_runner, runner, |req| { - matches!( - req.execution_environment.strategy, - ProcessExecutionStrategy::Docker(_) - ) - })); - - let mut runner: Box = Box::new(bounded::CommandRunner::new( - executor, - runner, - exec_strategy_opts.local_parallelism, - )); - - if remoting_opts.execution_enable { - // We always create the remote execution runner if it is globally enabled, but it may not - // actually be used thanks to the `SwitchedCommandRunner` below. Only one of local execution - // or remote execution will be used for any particular process. - let remote_execution_runner = Box::new( - remote::remote::CommandRunner::new( - // We unwrap because global_options.py will have already validated this is defined. - remoting_opts.execution_address.as_ref().unwrap(), - instance_name, - process_cache_namespace, - remoting_opts.append_only_caches_base_path.clone(), - tls_config.clone(), - remoting_opts.execution_headers.clone(), - full_store.clone(), - executor.clone(), - remoting_opts.execution_overall_deadline, - Duration::from_millis(100), - remoting_opts.execution_rpc_concurrency, - capabilities_cell_opt, - ) - .await?, - ); - let remote_execution_runner = Box::new(bounded::CommandRunner::new( - executor, - remote_execution_runner, - exec_strategy_opts.remote_parallelism, - )); - runner = Box::new(SwitchedCommandRunner::new( - remote_execution_runner, - runner, - |req| { - matches!( - req.execution_environment.strategy, - ProcessExecutionStrategy::RemoteExecution(_) - ) - }, - )); + /// + /// Make the innermost / leaf runner. Will have concurrency control and process pooling, but + /// will not have caching. + /// + async fn make_leaf_runner( + full_store: &Store, + local_runner_store: &Store, + executor: &Executor, + local_execution_root_dir: &Path, + immutable_inputs: &ImmutableInputs, + named_caches: &NamedCaches, + instance_name: Option, + process_cache_namespace: Option, + tls_config: grpc_util::tls::Config, + exec_strategy_opts: &ExecutionStrategyOptions, + remoting_opts: &RemotingOptions, + capabilities_cell_opt: Option>>, + ) -> Result, String> { + let local_command_runner = local::CommandRunner::new( + local_runner_store.clone(), + executor.clone(), + local_execution_root_dir.to_path_buf(), + named_caches.clone(), + immutable_inputs.clone(), + exec_strategy_opts.local_keep_sandboxes, + ); + + let runner: Box = if exec_strategy_opts.local_enable_nailgun { + // We set the nailgun pool size to the number of instances that fit within the memory + // parameters configured when a max child process memory has been given. + // Otherwise, pool size will be double of the local parallelism so we can always keep + // a jvm warmed up. + let pool_size: usize = if exec_strategy_opts.child_max_memory > 0 { + max( + 1, + exec_strategy_opts.child_max_memory / exec_strategy_opts.child_default_memory, + ) + } else { + exec_strategy_opts.local_parallelism * 2 + }; + + let nailgun_runner = pe_nailgun::CommandRunner::new( + local_execution_root_dir.to_path_buf(), + local_runner_store.clone(), + executor.clone(), + named_caches.clone(), + immutable_inputs.clone(), + pool_size, + ); + + Box::new(SwitchedCommandRunner::new( + nailgun_runner, + local_command_runner, + |req| !req.input_digests.use_nailgun.is_empty(), + )) + } else { + Box::new(local_command_runner) + }; + + // Note that the Docker command runner is only used if the Process sets docker_image. So, + // it's safe to always create this command runner. + let docker_runner = Box::new(docker::CommandRunner::new( + local_runner_store.clone(), + executor.clone(), + &docker::DOCKER, + &docker::IMAGE_PULL_CACHE, + local_execution_root_dir.to_path_buf(), + immutable_inputs.clone(), + exec_strategy_opts.local_keep_sandboxes, + )?); + let runner = Box::new(SwitchedCommandRunner::new(docker_runner, runner, |req| { + matches!( + req.execution_environment.strategy, + ProcessExecutionStrategy::Docker(_) + ) + })); + + let mut runner: Box = Box::new(bounded::CommandRunner::new( + executor, + runner, + exec_strategy_opts.local_parallelism, + )); + + if remoting_opts.execution_enable { + // We always create the remote execution runner if it is globally enabled, but it may not + // actually be used thanks to the `SwitchedCommandRunner` below. Only one of local execution + // or remote execution will be used for any particular process. + let remote_execution_runner = Box::new( + remote::remote::CommandRunner::new( + // We unwrap because global_options.py will have already validated this is defined. + remoting_opts.execution_address.as_ref().unwrap(), + instance_name, + process_cache_namespace, + remoting_opts.append_only_caches_base_path.clone(), + tls_config.clone(), + remoting_opts.execution_headers.clone(), + full_store.clone(), + executor.clone(), + remoting_opts.execution_overall_deadline, + Duration::from_millis(100), + remoting_opts.execution_rpc_concurrency, + capabilities_cell_opt, + ) + .await?, + ); + let remote_execution_runner = Box::new(bounded::CommandRunner::new( + executor, + remote_execution_runner, + exec_strategy_opts.remote_parallelism, + )); + runner = Box::new(SwitchedCommandRunner::new( + remote_execution_runner, + runner, + |req| { + matches!( + req.execution_environment.strategy, + ProcessExecutionStrategy::RemoteExecution(_) + ) + }, + )); + } + + Ok(Arc::new(runner)) } - Ok(Arc::new(runner)) - } - - /// - /// Creates a single stack of cached runners around the given "leaf" CommandRunner. - /// - /// The given cache read/write flags override the relevant cache flags to allow this method - /// to be called with all cache reads disabled, regardless of their configured values. - /// - async fn make_cached_runner( - mut runner: Arc, - full_store: &Store, - executor: &Executor, - local_cache: &PersistentCache, - instance_name: Option, - process_cache_namespace: Option, - tls_config: grpc_util::tls::Config, - remoting_opts: &RemotingOptions, - remote_cache_read: bool, - remote_cache_write: bool, - local_cache_read: bool, - local_cache_write: bool, - ) -> Result, String> { - if remote_cache_read || remote_cache_write { - runner = Arc::new( - remote_cache::CommandRunner::from_provider_options( - RemoteCacheRunnerOptions { - inner: runner, - instance_name: instance_name.clone(), - process_cache_namespace: process_cache_namespace.clone(), - executor: executor.clone(), - store: full_store.clone(), - cache_read: remote_cache_read, - cache_write: remote_cache_write, - warnings_behavior: remoting_opts.cache_warnings_behavior, - cache_content_behavior: remoting_opts.cache_content_behavior, - append_only_caches_base_path: remoting_opts.append_only_caches_base_path.clone(), - }, - RemoteCacheProviderOptions { - instance_name, - action_cache_address: remoting_opts.store_address.clone().unwrap(), - tls_config, - headers: remoting_opts.store_headers.clone(), - concurrency_limit: remoting_opts.cache_rpc_concurrency, - rpc_timeout: remoting_opts.cache_rpc_timeout, - }, - ) - .await?, - ); + /// + /// Creates a single stack of cached runners around the given "leaf" CommandRunner. + /// + /// The given cache read/write flags override the relevant cache flags to allow this method + /// to be called with all cache reads disabled, regardless of their configured values. + /// + async fn make_cached_runner( + mut runner: Arc, + full_store: &Store, + executor: &Executor, + local_cache: &PersistentCache, + instance_name: Option, + process_cache_namespace: Option, + tls_config: grpc_util::tls::Config, + remoting_opts: &RemotingOptions, + remote_cache_read: bool, + remote_cache_write: bool, + local_cache_read: bool, + local_cache_write: bool, + ) -> Result, String> { + if remote_cache_read || remote_cache_write { + runner = Arc::new( + remote_cache::CommandRunner::from_provider_options( + RemoteCacheRunnerOptions { + inner: runner, + instance_name: instance_name.clone(), + process_cache_namespace: process_cache_namespace.clone(), + executor: executor.clone(), + store: full_store.clone(), + cache_read: remote_cache_read, + cache_write: remote_cache_write, + warnings_behavior: remoting_opts.cache_warnings_behavior, + cache_content_behavior: remoting_opts.cache_content_behavior, + append_only_caches_base_path: remoting_opts + .append_only_caches_base_path + .clone(), + }, + RemoteCacheProviderOptions { + instance_name, + action_cache_address: remoting_opts.store_address.clone().unwrap(), + tls_config, + headers: remoting_opts.store_headers.clone(), + concurrency_limit: remoting_opts.cache_rpc_concurrency, + rpc_timeout: remoting_opts.cache_rpc_timeout, + }, + ) + .await?, + ); + } + + if local_cache_read || local_cache_write { + runner = Arc::new(process_execution::cache::CommandRunner::new( + runner, + local_cache.clone(), + full_store.clone(), + local_cache_read, + remoting_opts.cache_content_behavior, + process_cache_namespace, + )); + } + + Ok(runner) } - if local_cache_read || local_cache_write { - runner = Arc::new(process_execution::cache::CommandRunner::new( - runner, - local_cache.clone(), - full_store.clone(), - local_cache_read, - remoting_opts.cache_content_behavior, - process_cache_namespace, - )); + /// + /// Creates the stack of CommandRunners for the purposes of backtracking. + /// + async fn make_command_runners( + full_store: &Store, + local_runner_store: &Store, + executor: &Executor, + local_cache: &PersistentCache, + local_execution_root_dir: &Path, + immutable_inputs: &ImmutableInputs, + named_caches: &NamedCaches, + instance_name: Option, + process_cache_namespace: Option, + tls_config: grpc_util::tls::Config, + exec_strategy_opts: &ExecutionStrategyOptions, + remoting_opts: &RemotingOptions, + capabilities_cell_opt: Option>>, + ) -> Result>, String> { + let leaf_runner = Self::make_leaf_runner( + full_store, + local_runner_store, + executor, + local_execution_root_dir, + immutable_inputs, + named_caches, + instance_name.clone(), + process_cache_namespace.clone(), + tls_config.clone(), + exec_strategy_opts, + remoting_opts, + capabilities_cell_opt, + ) + .await?; + + let remote_cache_read = exec_strategy_opts.remote_cache_read; + let remote_cache_write = exec_strategy_opts.remote_cache_write; + let local_cache_read_write = exec_strategy_opts.local_cache; + + // The first attempt is always with all caches. + let mut runners = { + let cached_runner = Self::make_cached_runner( + leaf_runner.clone(), + full_store, + executor, + local_cache, + instance_name.clone(), + process_cache_namespace.clone(), + tls_config.clone(), + remoting_opts, + remote_cache_read, + remote_cache_write, + local_cache_read_write, + local_cache_read_write, + ) + .await?; + + vec![cached_runner] + }; + // If any cache is both readable and writable, we additionally add a backtracking attempt which + // disables all cache reads. + if (remote_cache_read && remote_cache_write) || local_cache_read_write { + let disabled_cached_runner = Self::make_cached_runner( + leaf_runner.clone(), + full_store, + executor, + local_cache, + instance_name.clone(), + process_cache_namespace.clone(), + tls_config, + remoting_opts, + false, + remote_cache_write, + false, + local_cache_read_write, + ) + .await?; + + runners.push(disabled_cached_runner); + } + + Ok(runners) } - Ok(runner) - } - - /// - /// Creates the stack of CommandRunners for the purposes of backtracking. - /// - async fn make_command_runners( - full_store: &Store, - local_runner_store: &Store, - executor: &Executor, - local_cache: &PersistentCache, - local_execution_root_dir: &Path, - immutable_inputs: &ImmutableInputs, - named_caches: &NamedCaches, - instance_name: Option, - process_cache_namespace: Option, - tls_config: grpc_util::tls::Config, - exec_strategy_opts: &ExecutionStrategyOptions, - remoting_opts: &RemotingOptions, - capabilities_cell_opt: Option>>, - ) -> Result>, String> { - let leaf_runner = Self::make_leaf_runner( - full_store, - local_runner_store, - executor, - local_execution_root_dir, - immutable_inputs, - named_caches, - instance_name.clone(), - process_cache_namespace.clone(), - tls_config.clone(), - exec_strategy_opts, - remoting_opts, - capabilities_cell_opt, - ) - .await?; - - let remote_cache_read = exec_strategy_opts.remote_cache_read; - let remote_cache_write = exec_strategy_opts.remote_cache_write; - let local_cache_read_write = exec_strategy_opts.local_cache; - - // The first attempt is always with all caches. - let mut runners = { - let cached_runner = Self::make_cached_runner( - leaf_runner.clone(), - full_store, - executor, - local_cache, - instance_name.clone(), - process_cache_namespace.clone(), - tls_config.clone(), - remoting_opts, - remote_cache_read, - remote_cache_write, - local_cache_read_write, - local_cache_read_write, - ) - .await?; - - vec![cached_runner] - }; - // If any cache is both readable and writable, we additionally add a backtracking attempt which - // disables all cache reads. - if (remote_cache_read && remote_cache_write) || local_cache_read_write { - let disabled_cached_runner = Self::make_cached_runner( - leaf_runner.clone(), - full_store, - executor, - local_cache, - instance_name.clone(), - process_cache_namespace.clone(), - tls_config, - remoting_opts, - false, - remote_cache_write, - false, - local_cache_read_write, - ) - .await?; - - runners.push(disabled_cached_runner); + fn load_certificates( + ca_certs_path: Option, + ) -> Result, String> { + let certs = match ca_certs_path { + Some(ref path) => { + let mut content = String::new(); + std::fs::File::open(path) + .and_then(|mut f| f.read_to_string(&mut content)) + .map_err(|err| { + format!( + "Error reading root CA certs file {}: {}", + path.display(), + err + ) + })?; + let pem_re = Regex::new(PEM_RE_STR).unwrap(); + let certs_res: Result, _> = pem_re + .find_iter(&content) + .map(|mat| reqwest::Certificate::from_pem(mat.as_str().as_bytes())) + .collect(); + certs_res.map_err(|err| { + format!( + "Error parsing PEM from root CA certs file {}: {}", + path.display(), + err + ) + })? + } + None => Vec::new(), + }; + Ok(certs) } - Ok(runners) - } - - fn load_certificates( - ca_certs_path: Option, - ) -> Result, String> { - let certs = match ca_certs_path { - Some(ref path) => { - let mut content = String::new(); - std::fs::File::open(path) - .and_then(|mut f| f.read_to_string(&mut content)) - .map_err(|err| { - format!( - "Error reading root CA certs file {}: {}", - path.display(), - err + pub async fn new( + executor: Executor, + tasks: Tasks, + types: Types, + intrinsics: Intrinsics, + build_root: PathBuf, + ignore_patterns: Vec, + use_gitignore: bool, + watch_filesystem: bool, + local_execution_root_dir: PathBuf, + named_caches_dir: PathBuf, + ca_certs_path: Option, + local_store_options: LocalStoreOptions, + remoting_opts: RemotingOptions, + exec_strategy_opts: ExecutionStrategyOptions, + ) -> Result { + // We re-use these certs for both the execution and store service; they're generally tied together. + let root_ca_certs = if let Some(ref path) = remoting_opts.root_ca_certs_path { + Some( + std::fs::read(path) + .map_err(|err| format!("Error reading root CA certs file {path:?}: {err}"))?, ) - })?; - let pem_re = Regex::new(PEM_RE_STR).unwrap(); - let certs_res: Result, _> = pem_re - .find_iter(&content) - .map(|mat| reqwest::Certificate::from_pem(mat.as_str().as_bytes())) - .collect(); - certs_res.map_err(|err| { - format!( - "Error parsing PEM from root CA certs file {}: {}", - path.display(), - err - ) - })? - } - None => Vec::new(), - }; - Ok(certs) - } - - pub async fn new( - executor: Executor, - tasks: Tasks, - types: Types, - intrinsics: Intrinsics, - build_root: PathBuf, - ignore_patterns: Vec, - use_gitignore: bool, - watch_filesystem: bool, - local_execution_root_dir: PathBuf, - named_caches_dir: PathBuf, - ca_certs_path: Option, - local_store_options: LocalStoreOptions, - remoting_opts: RemotingOptions, - exec_strategy_opts: ExecutionStrategyOptions, - ) -> Result { - // We re-use these certs for both the execution and store service; they're generally tied together. - let root_ca_certs = if let Some(ref path) = remoting_opts.root_ca_certs_path { - Some( - std::fs::read(path) - .map_err(|err| format!("Error reading root CA certs file {path:?}: {err}"))?, - ) - } else { - None - }; + } else { + None + }; - let client_certs = remoting_opts - .client_certs_path - .as_ref() - .map(|path| { - std::fs::read(path) - .map_err(|err| format!("Error reading client authentication certs file {path:?}: {err}")) - }) - .transpose()?; - - let client_key = remoting_opts - .client_key_path - .as_ref() - .map(|path| { - std::fs::read(path) - .map_err(|err| format!("Error reading client authentication key file {path:?}: {err}")) - }) - .transpose()?; - - let mtls_data = match (client_certs.as_ref(), client_key.as_ref()) { + let client_certs = remoting_opts + .client_certs_path + .as_ref() + .map(|path| { + std::fs::read(path).map_err(|err| { + format!("Error reading client authentication certs file {path:?}: {err}") + }) + }) + .transpose()?; + + let client_key = remoting_opts + .client_key_path + .as_ref() + .map(|path| { + std::fs::read(path).map_err(|err| { + format!("Error reading client authentication key file {path:?}: {err}") + }) + }) + .transpose()?; + + let mtls_data = match (client_certs.as_ref(), client_key.as_ref()) { (Some(cert), Some(key)) => Some((cert.deref(), key.deref())), (None, None) => None, _ => { @@ -545,377 +550,379 @@ impl Core { } }; - let tls_config = grpc_util::tls::Config::new(root_ca_certs.as_deref(), mtls_data)?; - - let need_remote_store = remoting_opts.execution_enable - || exec_strategy_opts.remote_cache_read - || exec_strategy_opts.remote_cache_write; - - // If the remote store and remote execution server are the same (address and headers), - // then share the capabilities cache between them to avoid duplicate GetCapabilities calls. - let capabilities_cell_opt = if need_remote_store - && remoting_opts.execution_address == remoting_opts.store_address - && remoting_opts.execution_headers == remoting_opts.store_headers - { - Some(Arc::new(OnceCell::new())) - } else { - None - }; + let tls_config = grpc_util::tls::Config::new(root_ca_certs.as_deref(), mtls_data)?; - std::fs::create_dir_all(&local_store_options.store_dir).map_err(|e| { - format!( - "Error making directory {:?}: {:?}", - local_store_options.store_dir, e - ) - })?; - - let full_store = Self::make_store( - &executor, - &local_store_options, - &local_execution_root_dir, - need_remote_store, - &remoting_opts, - &remoting_opts.store_address, - tls_config.clone(), - capabilities_cell_opt.clone(), - ) - .await - .map_err(|e| format!("Could not initialize Store: {e:?}"))?; - - let local_cache = PersistentCache::new( - &local_store_options.store_dir, - // TODO: Rename. - local_store_options.process_cache_max_size_bytes, - executor.clone(), - local_store_options.lease_time, - local_store_options.shard_count, - )?; - - let store = if (exec_strategy_opts.remote_cache_read || exec_strategy_opts.remote_cache_write) - && remoting_opts.cache_content_behavior == CacheContentBehavior::Fetch - && !remoting_opts.execution_enable - { - // In remote cache mode with eager fetching, the only interaction with the remote CAS - // should be through the remote cache code paths. Thus, the store seen by the rest of the - // code base should be the local-only store. - full_store.clone().into_local_only() - } else { - // Otherwise, the remote CAS should be visible everywhere. - // - // With remote execution, we do not always write remote results into the local cache, so it's - // important to always have access to the remote cache or else we will get missing digests. - full_store.clone() - }; + let need_remote_store = remoting_opts.execution_enable + || exec_strategy_opts.remote_cache_read + || exec_strategy_opts.remote_cache_write; - let immutable_inputs = ImmutableInputs::new(store.clone(), &local_execution_root_dir)?; - let named_caches = NamedCaches::new_local(named_caches_dir); - let command_runners = Self::make_command_runners( - &full_store, - &store, - &executor, - &local_cache, - &local_execution_root_dir, - &immutable_inputs, - &named_caches, - remoting_opts.instance_name.clone(), - remoting_opts.execution_process_cache_namespace.clone(), - tls_config.clone(), - &exec_strategy_opts, - &remoting_opts, - capabilities_cell_opt, - ) - .await?; - log::debug!("Using {command_runners:?} for process execution."); - - let graph = Arc::new(InvalidatableGraph(Graph::new(executor.clone()))); - - // These certs are for downloads, not to be confused with the ones used for remoting. - let ca_certs = Self::load_certificates(ca_certs_path)?; - - let http_client_builder = ca_certs - .iter() - .fold(reqwest::Client::builder(), |builder, cert| { - builder.add_root_certificate(cert.clone()) - }); - let http_client = http_client_builder - .build() - .map_err(|err| format!("Error building HTTP client: {err}"))?; - let rule_graph = RuleGraph::new(tasks.rules().clone(), tasks.queries().clone())?; - - let gitignore_files = if use_gitignore { - GitignoreStyleExcludes::gitignore_file_paths(&build_root) - } else { - vec![] - }; + // If the remote store and remote execution server are the same (address and headers), + // then share the capabilities cache between them to avoid duplicate GetCapabilities calls. + let capabilities_cell_opt = if need_remote_store + && remoting_opts.execution_address == remoting_opts.store_address + && remoting_opts.execution_headers == remoting_opts.store_headers + { + Some(Arc::new(OnceCell::new())) + } else { + None + }; + + std::fs::create_dir_all(&local_store_options.store_dir).map_err(|e| { + format!( + "Error making directory {:?}: {:?}", + local_store_options.store_dir, e + ) + })?; + + let full_store = Self::make_store( + &executor, + &local_store_options, + &local_execution_root_dir, + need_remote_store, + &remoting_opts, + &remoting_opts.store_address, + tls_config.clone(), + capabilities_cell_opt.clone(), + ) + .await + .map_err(|e| format!("Could not initialize Store: {e:?}"))?; + + let local_cache = PersistentCache::new( + &local_store_options.store_dir, + // TODO: Rename. + local_store_options.process_cache_max_size_bytes, + executor.clone(), + local_store_options.lease_time, + local_store_options.shard_count, + )?; + + let store = if (exec_strategy_opts.remote_cache_read + || exec_strategy_opts.remote_cache_write) + && remoting_opts.cache_content_behavior == CacheContentBehavior::Fetch + && !remoting_opts.execution_enable + { + // In remote cache mode with eager fetching, the only interaction with the remote CAS + // should be through the remote cache code paths. Thus, the store seen by the rest of the + // code base should be the local-only store. + full_store.clone().into_local_only() + } else { + // Otherwise, the remote CAS should be visible everywhere. + // + // With remote execution, we do not always write remote results into the local cache, so it's + // important to always have access to the remote cache or else we will get missing digests. + full_store.clone() + }; + + let immutable_inputs = ImmutableInputs::new(store.clone(), &local_execution_root_dir)?; + let named_caches = NamedCaches::new_local(named_caches_dir); + let command_runners = Self::make_command_runners( + &full_store, + &store, + &executor, + &local_cache, + &local_execution_root_dir, + &immutable_inputs, + &named_caches, + remoting_opts.instance_name.clone(), + remoting_opts.execution_process_cache_namespace.clone(), + tls_config.clone(), + &exec_strategy_opts, + &remoting_opts, + capabilities_cell_opt, + ) + .await?; + log::debug!("Using {command_runners:?} for process execution."); + + let graph = Arc::new(InvalidatableGraph(Graph::new(executor.clone()))); + + // These certs are for downloads, not to be confused with the ones used for remoting. + let ca_certs = Self::load_certificates(ca_certs_path)?; + + let http_client_builder = ca_certs + .iter() + .fold(reqwest::Client::builder(), |builder, cert| { + builder.add_root_certificate(cert.clone()) + }); + let http_client = http_client_builder + .build() + .map_err(|err| format!("Error building HTTP client: {err}"))?; + let rule_graph = RuleGraph::new(tasks.rules().clone(), tasks.queries().clone())?; + + let gitignore_files = if use_gitignore { + GitignoreStyleExcludes::gitignore_file_paths(&build_root) + } else { + vec![] + }; - let ignorer = - GitignoreStyleExcludes::create_with_gitignore_files(ignore_patterns, gitignore_files) - .map_err(|e| format!("Could not parse build ignore patterns: {e:?}"))?; + let ignorer = + GitignoreStyleExcludes::create_with_gitignore_files(ignore_patterns, gitignore_files) + .map_err(|e| format!("Could not parse build ignore patterns: {e:?}"))?; - let watcher = if watch_filesystem { - let w = InvalidationWatcher::new(executor.clone(), build_root.clone(), ignorer.clone())?; - w.start(&graph)?; - Some(w) - } else { - None - }; + let watcher = if watch_filesystem { + let w = + InvalidationWatcher::new(executor.clone(), build_root.clone(), ignorer.clone())?; + w.start(&graph)?; + Some(w) + } else { + None + }; + + let sessions = Sessions::new(&executor)?; - let sessions = Sessions::new(&executor)?; - - Ok(Core { - graph, - tasks, - rule_graph, - types, - intrinsics, - executor: executor.clone(), - store, - command_runners, - http_client, - local_cache, - vfs: PosixFS::new(&build_root, ignorer, executor) - .map_err(|e| format!("Could not initialize Vfs: {e:?}"))?, - build_root, - watcher, - local_parallelism: exec_strategy_opts.local_parallelism, - graceful_shutdown_timeout: exec_strategy_opts.graceful_shutdown_timeout, - sessions, - named_caches, - immutable_inputs, - local_execution_root_dir, - }) - } - - pub fn store(&self) -> Store { - self.store.clone() - } - - /// - /// Shuts down this Core. - /// - pub async fn shutdown(&self, timeout: Duration) { - // Shutdown the Sessions, which will prevent new work from starting and then await any ongoing - // work. - if let Err(msg) = self.sessions.shutdown(timeout).await { - log::warn!("During shutdown: {}", msg); + Ok(Core { + graph, + tasks, + rule_graph, + types, + intrinsics, + executor: executor.clone(), + store, + command_runners, + http_client, + local_cache, + vfs: PosixFS::new(&build_root, ignorer, executor) + .map_err(|e| format!("Could not initialize Vfs: {e:?}"))?, + build_root, + watcher, + local_parallelism: exec_strategy_opts.local_parallelism, + graceful_shutdown_timeout: exec_strategy_opts.graceful_shutdown_timeout, + sessions, + named_caches, + immutable_inputs, + local_execution_root_dir, + }) } - // Then clear the Graph to ensure that drop handlers run (particularly for running processes). - self.graph.clear(); - - // Allow command runners to cleanly shutdown in an async context to avoid issues with - // waiting for async code to run in a non-async drop context. - let shutdown_futures = self - .command_runners - .iter() - .map(|runner| runner.shutdown().boxed()); - let shutdown_results = futures::future::join_all(shutdown_futures).await; - for shutdown_result in shutdown_results { - if let Err(err) = shutdown_result { - log::warn!("Command runner failed to shutdown cleanly: {err}"); - } + + pub fn store(&self) -> Store { + self.store.clone() + } + + /// + /// Shuts down this Core. + /// + pub async fn shutdown(&self, timeout: Duration) { + // Shutdown the Sessions, which will prevent new work from starting and then await any ongoing + // work. + if let Err(msg) = self.sessions.shutdown(timeout).await { + log::warn!("During shutdown: {}", msg); + } + // Then clear the Graph to ensure that drop handlers run (particularly for running processes). + self.graph.clear(); + + // Allow command runners to cleanly shutdown in an async context to avoid issues with + // waiting for async code to run in a non-async drop context. + let shutdown_futures = self + .command_runners + .iter() + .map(|runner| runner.shutdown().boxed()); + let shutdown_results = futures::future::join_all(shutdown_futures).await; + for shutdown_result in shutdown_results { + if let Err(err) = shutdown_result { + log::warn!("Command runner failed to shutdown cleanly: {err}"); + } + } } - } } pub struct InvalidatableGraph(Graph); fn caller_to_logging_info(caller: InvalidateCaller) -> (Level, &'static str) { - match caller { - // An external invalidation is driven by some other pants operation, and thus isn't as - // interesting, there's likely to be output about that action already, hence this can be logged - // quieter. - InvalidateCaller::External => (Level::Debug, "external"), - // A notify invalidation may have been triggered by a user-driven action that isn't otherwise - // visible in logs (e.g. file editing, branch switching), hence log it louder. - InvalidateCaller::Notify => (Level::Info, "notify"), - } + match caller { + // An external invalidation is driven by some other pants operation, and thus isn't as + // interesting, there's likely to be output about that action already, hence this can be logged + // quieter. + InvalidateCaller::External => (Level::Debug, "external"), + // A notify invalidation may have been triggered by a user-driven action that isn't otherwise + // visible in logs (e.g. file editing, branch switching), hence log it louder. + InvalidateCaller::Notify => (Level::Info, "notify"), + } } impl Invalidatable for InvalidatableGraph { - fn invalidate(&self, paths: &HashSet, caller: InvalidateCaller) -> usize { - let InvalidationResult { cleared, dirtied } = self.invalidate_from_roots(false, move |node| { - if let Some(fs_subject) = node.fs_subject() { - paths.contains(fs_subject) - } else { - false - } - }); - let (level, caller) = caller_to_logging_info(caller); - log!( - level, - "{} invalidation: cleared {} and dirtied {} nodes for: {:?}", - caller, - cleared, - dirtied, - paths - ); - cleared + dirtied - } - - fn invalidate_all(&self, caller: InvalidateCaller) -> usize { - let InvalidationResult { cleared, dirtied } = - self.invalidate_from_roots(false, |node| node.fs_subject().is_some()); - let (level, caller) = caller_to_logging_info(caller); - log!( - level, - "{} invalidation: cleared {} and dirtied {} nodes for all paths", - caller, - cleared, - dirtied - ); - cleared + dirtied - } + fn invalidate(&self, paths: &HashSet, caller: InvalidateCaller) -> usize { + let InvalidationResult { cleared, dirtied } = + self.invalidate_from_roots(false, move |node| { + if let Some(fs_subject) = node.fs_subject() { + paths.contains(fs_subject) + } else { + false + } + }); + let (level, caller) = caller_to_logging_info(caller); + log!( + level, + "{} invalidation: cleared {} and dirtied {} nodes for: {:?}", + caller, + cleared, + dirtied, + paths + ); + cleared + dirtied + } + + fn invalidate_all(&self, caller: InvalidateCaller) -> usize { + let InvalidationResult { cleared, dirtied } = + self.invalidate_from_roots(false, |node| node.fs_subject().is_some()); + let (level, caller) = caller_to_logging_info(caller); + log!( + level, + "{} invalidation: cleared {} and dirtied {} nodes for all paths", + caller, + cleared, + dirtied + ); + cleared + dirtied + } } impl Deref for InvalidatableGraph { - type Target = Graph; + type Target = Graph; - fn deref(&self) -> &Graph { - &self.0 - } + fn deref(&self) -> &Graph { + &self.0 + } } pub type Context = graph::Context; pub struct SessionCore { - // TODO: This field is also accessible via the Session: move to an accessor. - pub core: Arc, - pub session: Session, - /// The number of attempts which have been made to backtrack to a particular ExecuteProcess node. - /// - /// Presence in this map at process runtime indicates that the process is being retried, and that - /// there was something invalid or unusable about previous attempts. Successive attempts should - /// run in a different mode (skipping caches, etc) to attempt to produce a valid result. - backtrack_levels: Arc>>, - /// The Digests that we have successfully invalidated a Node for. - backtrack_digests: Arc>>, + // TODO: This field is also accessible via the Session: move to an accessor. + pub core: Arc, + pub session: Session, + /// The number of attempts which have been made to backtrack to a particular ExecuteProcess node. + /// + /// Presence in this map at process runtime indicates that the process is being retried, and that + /// there was something invalid or unusable about previous attempts. Successive attempts should + /// run in a different mode (skipping caches, etc) to attempt to produce a valid result. + backtrack_levels: Arc>>, + /// The Digests that we have successfully invalidated a Node for. + backtrack_digests: Arc>>, } impl SessionCore { - pub fn new(session: Session) -> Self { - Self { - core: session.core().clone(), - session, - backtrack_levels: Arc::default(), - backtrack_digests: Arc::default(), + pub fn new(session: Session) -> Self { + Self { + core: session.core().clone(), + session, + backtrack_levels: Arc::default(), + backtrack_digests: Arc::default(), + } } - } - - /// - /// If the given Result is a Failure::MissingDigest, attempts to invalidate the Node which was - /// the source of the Digest, potentially causing indirect retry of the Result. - /// - /// If we successfully locate and restart the source of the Digest, converts the Result into a - /// `Failure::Invalidated`, which will cause retry at some level above us. - /// - /// TODO: This takes both `self` and `context: Context`, but could take `self: Context` after - /// the `arbitrary_self_types` feature has stabilized. - /// - pub fn maybe_backtrack( - &self, - context: &Context, - result: NodeResult, - workunit: &mut RunningWorkunit, - ) -> NodeResult { - let digest = if let Err(Failure::MissingDigest(_, d)) = result.as_ref() { - *d - } else { - return result; - }; - // Locate live source(s) of this Digest and their backtracking levels. - // TODO: Currently needs a combination of `visit_live` and `invalidate_from_roots` because - // `invalidate_from_roots` cannot view `Node` results. Would be more efficient as a merged - // method. - let mut candidate_roots = Vec::new(); - self.core.graph.visit_live(context, |k, v| match k { - NodeKey::ExecuteProcess(p) if v.digests().contains(&digest) => { - if let NodeOutput::ProcessResult(pr) = v { - candidate_roots.push((p.clone(), pr.backtrack_level)); - } - } - _ => (), - }); - - if candidate_roots.is_empty() { - // If there are no live sources of the Digest, see whether any have already been invalidated - // by other consumers. - if self.backtrack_digests.lock().get(&digest).is_some() { - // Some other consumer has already identified and invalidated the source of this Digest: we - // can wait for the attempt to complete. - return Err(Failure::Invalidated); - } else { - // There are no live or invalidated sources of this Digest. Directly fail. - return result.map_err(|e| { - throw(format!( - "Could not identify a process to backtrack to for: {e}" - )) + /// + /// If the given Result is a Failure::MissingDigest, attempts to invalidate the Node which was + /// the source of the Digest, potentially causing indirect retry of the Result. + /// + /// If we successfully locate and restart the source of the Digest, converts the Result into a + /// `Failure::Invalidated`, which will cause retry at some level above us. + /// + /// TODO: This takes both `self` and `context: Context`, but could take `self: Context` after + /// the `arbitrary_self_types` feature has stabilized. + /// + pub fn maybe_backtrack( + &self, + context: &Context, + result: NodeResult, + workunit: &mut RunningWorkunit, + ) -> NodeResult { + let digest = if let Err(Failure::MissingDigest(_, d)) = result.as_ref() { + *d + } else { + return result; + }; + + // Locate live source(s) of this Digest and their backtracking levels. + // TODO: Currently needs a combination of `visit_live` and `invalidate_from_roots` because + // `invalidate_from_roots` cannot view `Node` results. Would be more efficient as a merged + // method. + let mut candidate_roots = Vec::new(); + self.core.graph.visit_live(context, |k, v| match k { + NodeKey::ExecuteProcess(p) if v.digests().contains(&digest) => { + if let NodeOutput::ProcessResult(pr) = v { + candidate_roots.push((p.clone(), pr.backtrack_level)); + } + } + _ => (), }); - } - } else { - // We have identified a Node to backtrack on. Record it. - self.backtrack_digests.lock().insert(digest); - } - // Attempt to trigger backtrack attempts for the matched Nodes. It's possible that we are not - // the first consumer to notice that this Node needs to backtrack, so we only actually report - // that we're backtracking if the new level is an increase from the old level. - let roots = candidate_roots - .into_iter() - .filter_map(|(root, invalid_level)| { - let next_level = invalid_level + 1; - let maybe_new_level = { - let mut backtrack_levels = self.backtrack_levels.lock(); - if let Some(old_backtrack_level) = backtrack_levels.get_mut(&root) { - if next_level > *old_backtrack_level { - *old_backtrack_level = next_level; - Some(next_level) + if candidate_roots.is_empty() { + // If there are no live sources of the Digest, see whether any have already been invalidated + // by other consumers. + if self.backtrack_digests.lock().get(&digest).is_some() { + // Some other consumer has already identified and invalidated the source of this Digest: we + // can wait for the attempt to complete. + return Err(Failure::Invalidated); } else { - None + // There are no live or invalidated sources of this Digest. Directly fail. + return result.map_err(|e| { + throw(format!( + "Could not identify a process to backtrack to for: {e}" + )) + }); } - } else { - backtrack_levels.insert((*root).clone(), next_level); - Some(next_level) - } - }; - if let Some(new_level) = maybe_new_level { - workunit.increment_counter(Metric::BacktrackAttempts, 1); - let description = &root.process.description; - // TODO: This message should likely be at `info`, or eventually, debug. - // see https://github.com/pantsbuild/pants/issues/15867 - log::warn!( + } else { + // We have identified a Node to backtrack on. Record it. + self.backtrack_digests.lock().insert(digest); + } + + // Attempt to trigger backtrack attempts for the matched Nodes. It's possible that we are not + // the first consumer to notice that this Node needs to backtrack, so we only actually report + // that we're backtracking if the new level is an increase from the old level. + let roots = candidate_roots + .into_iter() + .filter_map(|(root, invalid_level)| { + let next_level = invalid_level + 1; + let maybe_new_level = { + let mut backtrack_levels = self.backtrack_levels.lock(); + if let Some(old_backtrack_level) = backtrack_levels.get_mut(&root) { + if next_level > *old_backtrack_level { + *old_backtrack_level = next_level; + Some(next_level) + } else { + None + } + } else { + backtrack_levels.insert((*root).clone(), next_level); + Some(next_level) + } + }; + if let Some(new_level) = maybe_new_level { + workunit.increment_counter(Metric::BacktrackAttempts, 1); + let description = &root.process.description; + // TODO: This message should likely be at `info`, or eventually, debug. + // see https://github.com/pantsbuild/pants/issues/15867 + log::warn!( "Making attempt {new_level} to backtrack and retry `{description}`, due to \ missing digest {digest:?}." ); - Some(root) - } else { - None - } - }) - .collect::>(); - - // Invalidate the matched roots. - self - .core - .graph - .invalidate_from_roots(true, move |node| match node { - NodeKey::ExecuteProcess(p) => roots.contains(p), - _ => false, - }); - - // We invalidated a Node, and the caller (at some level above us in the stack) should retry. - // Complete this node with the Invalidated state. - // TODO: Differentiate the reasons for Invalidation (filesystem changes vs missing digests) to - // improve warning messages. See https://github.com/pantsbuild/pants/issues/15867 - Err(Failure::Invalidated) - } - - /// - /// Called before executing a process to determine whether it is backtracking. - /// - /// A process which has not been marked backtracking will always return 0. - /// - pub fn maybe_start_backtracking(&self, node: &ExecuteProcess) -> usize { - self.backtrack_levels.lock().get(node).cloned().unwrap_or(0) - } + Some(root) + } else { + None + } + }) + .collect::>(); + + // Invalidate the matched roots. + self.core + .graph + .invalidate_from_roots(true, move |node| match node { + NodeKey::ExecuteProcess(p) => roots.contains(p), + _ => false, + }); + + // We invalidated a Node, and the caller (at some level above us in the stack) should retry. + // Complete this node with the Invalidated state. + // TODO: Differentiate the reasons for Invalidation (filesystem changes vs missing digests) to + // improve warning messages. See https://github.com/pantsbuild/pants/issues/15867 + Err(Failure::Invalidated) + } + + /// + /// Called before executing a process to determine whether it is backtracking. + /// + /// A process which has not been marked backtracking will always return 0. + /// + pub fn maybe_start_backtracking(&self, node: &ExecuteProcess) -> usize { + self.backtrack_levels.lock().get(node).cloned().unwrap_or(0) + } } diff --git a/src/rust/engine/src/downloads.rs b/src/rust/engine/src/downloads.rs index a685d74c970..291af0e3a37 100644 --- a/src/rust/engine/src/downloads.rs +++ b/src/rust/engine/src/downloads.rs @@ -21,43 +21,43 @@ use crate::context::Core; use workunit_store::{in_workunit, Level}; enum StreamingError { - Retryable(String), - Permanent(String), + Retryable(String), + Permanent(String), } impl From for String { - fn from(err: StreamingError) -> Self { - match err { - StreamingError::Retryable(s) | StreamingError::Permanent(s) => s, + fn from(err: StreamingError) -> Self { + match err { + StreamingError::Retryable(s) | StreamingError::Permanent(s) => s, + } } - } } #[async_trait] trait StreamingDownload: Send { - async fn next(&mut self) -> Option>; + async fn next(&mut self) -> Option>; } struct NetDownload { - stream: futures_core::stream::BoxStream<'static, Result>, + stream: futures_core::stream::BoxStream<'static, Result>, } impl NetDownload { - async fn start( - core: &Arc, - url: Url, - auth_headers: BTreeMap, - file_name: String, - ) -> Result { - let mut headers = HeaderMap::new(); - for (k, v) in &auth_headers { - headers.insert( - HeaderName::from_bytes(k.as_bytes()).unwrap(), - v.parse().unwrap(), - ); - } - - let response = core + async fn start( + core: &Arc, + url: Url, + auth_headers: BTreeMap, + file_name: String, + ) -> Result { + let mut headers = HeaderMap::new(); + for (k, v) in &auth_headers { + headers.insert( + HeaderName::from_bytes(k.as_bytes()).unwrap(), + v.parse().unwrap(), + ); + } + + let response = core .http_client .get(url.clone()) .headers(headers) @@ -84,170 +84,168 @@ impl NetDownload { Ok(res) })?; - let byte_stream = Pin::new(Box::new(response.bytes_stream())); - Ok(NetDownload { - stream: byte_stream, - }) - } + let byte_stream = Pin::new(Box::new(response.bytes_stream())); + Ok(NetDownload { + stream: byte_stream, + }) + } } #[async_trait] impl StreamingDownload for NetDownload { - async fn next(&mut self) -> Option> { - self - .stream - .next() - .await - .map(|result| result.map_err(|err| err.to_string())) - } + async fn next(&mut self) -> Option> { + self.stream + .next() + .await + .map(|result| result.map_err(|err| err.to_string())) + } } struct FileDownload { - stream: tokio_util::io::ReaderStream, + stream: tokio_util::io::ReaderStream, } impl FileDownload { - async fn start(path: &str, file_name: String) -> Result { - let file = tokio::fs::File::open(path).await.map_err(|e| { - let msg = format!("Error ({e}) opening file at {path} for download to {file_name}"); - // Fail quickly for non-existent files. - if e.kind() == io::ErrorKind::NotFound { - StreamingError::Permanent(msg) - } else { - StreamingError::Retryable(msg) - } - })?; - let stream = tokio_util::io::ReaderStream::new(file); - Ok(FileDownload { stream }) - } + async fn start(path: &str, file_name: String) -> Result { + let file = tokio::fs::File::open(path).await.map_err(|e| { + let msg = format!("Error ({e}) opening file at {path} for download to {file_name}"); + // Fail quickly for non-existent files. + if e.kind() == io::ErrorKind::NotFound { + StreamingError::Permanent(msg) + } else { + StreamingError::Retryable(msg) + } + })?; + let stream = tokio_util::io::ReaderStream::new(file); + Ok(FileDownload { stream }) + } } #[async_trait] impl StreamingDownload for FileDownload { - async fn next(&mut self) -> Option> { - self - .stream - .next() - .await - .map(|result| result.map_err(|err| err.to_string())) - } + async fn next(&mut self) -> Option> { + self.stream + .next() + .await + .map(|result| result.map_err(|err| err.to_string())) + } } async fn attempt_download( - core: &Arc, - url: &Url, - auth_headers: &BTreeMap, - file_name: String, - expected_digest: Digest, + core: &Arc, + url: &Url, + auth_headers: &BTreeMap, + file_name: String, + expected_digest: Digest, ) -> Result<(Digest, Bytes), StreamingError> { - let mut response_stream: Box = { - if url.scheme() == "file" { - if let Some(host) = url.host_str() { - return Err(StreamingError::Permanent(format!( - "The file Url `{}` has a host component. Instead, use `file:$path`, \ + let mut response_stream: Box = { + if url.scheme() == "file" { + if let Some(host) = url.host_str() { + return Err(StreamingError::Permanent(format!( + "The file Url `{}` has a host component. Instead, use `file:$path`, \ which in this case might be either `file:{}{}` or `file:{}`.", - url, - host, - url.path(), - url.path(), - ))); - } - Box::new(FileDownload::start(url.path(), file_name).await?) - } else { - Box::new(NetDownload::start(core, url.clone(), auth_headers.clone(), file_name).await?) + url, + host, + url.path(), + url.path(), + ))); + } + Box::new(FileDownload::start(url.path(), file_name).await?) + } else { + Box::new(NetDownload::start(core, url.clone(), auth_headers.clone(), file_name).await?) + } + }; + + struct SizeLimiter { + writer: W, + written: usize, + size_limit: usize, } - }; - - struct SizeLimiter { - writer: W, - written: usize, - size_limit: usize, - } - - impl Write for SizeLimiter { - fn write(&mut self, buf: &[u8]) -> Result { - let new_size = self.written + buf.len(); - if new_size > self.size_limit { - Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Downloaded file was larger than expected digest", - )) - } else { - self.written = new_size; - self.writer.write_all(buf)?; - Ok(buf.len()) - } + + impl Write for SizeLimiter { + fn write(&mut self, buf: &[u8]) -> Result { + let new_size = self.written + buf.len(); + if new_size > self.size_limit { + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Downloaded file was larger than expected digest", + )) + } else { + self.written = new_size; + self.writer.write_all(buf)?; + Ok(buf.len()) + } + } + + fn flush(&mut self) -> Result<(), std::io::Error> { + self.writer.flush() + } } - fn flush(&mut self) -> Result<(), std::io::Error> { - self.writer.flush() + let mut hasher = hashing::WriterHasher::new(SizeLimiter { + writer: bytes::BytesMut::with_capacity(expected_digest.size_bytes).writer(), + written: 0, + size_limit: expected_digest.size_bytes, + }); + + while let Some(next_chunk) = response_stream.next().await { + let chunk = next_chunk.map_err(|err| { + StreamingError::Retryable(format!("Error reading URL fetch response: {err}")) + })?; + hasher.write_all(&chunk).map_err(|err| { + StreamingError::Retryable(format!("Error hashing/capturing URL fetch response: {err}")) + })?; } - } - - let mut hasher = hashing::WriterHasher::new(SizeLimiter { - writer: bytes::BytesMut::with_capacity(expected_digest.size_bytes).writer(), - written: 0, - size_limit: expected_digest.size_bytes, - }); - - while let Some(next_chunk) = response_stream.next().await { - let chunk = next_chunk.map_err(|err| { - StreamingError::Retryable(format!("Error reading URL fetch response: {err}")) - })?; - hasher.write_all(&chunk).map_err(|err| { - StreamingError::Retryable(format!("Error hashing/capturing URL fetch response: {err}")) - })?; - } - let (digest, bytewriter) = hasher.finish(); - Ok((digest, bytewriter.writer.into_inner().freeze())) + let (digest, bytewriter) = hasher.finish(); + Ok((digest, bytewriter.writer.into_inner().freeze())) } pub async fn download( - core: Arc, - url: Url, - auth_headers: BTreeMap, - file_name: String, - expected_digest: hashing::Digest, + core: Arc, + url: Url, + auth_headers: BTreeMap, + file_name: String, + expected_digest: hashing::Digest, ) -> Result<(), String> { - let core2 = core.clone(); - let (actual_digest, bytes) = in_workunit!( - "download_file", - Level::Debug, - desc = Some(format!( - "Downloading: {url} ({})", - expected_digest - .size_bytes - .file_size(file_size_opts::CONVENTIONAL) - .unwrap() - )), - |_workunit| async move { - // TODO: Allow the retry strategy to be configurable? - // For now we retry after 10ms, 100ms, 1s, and 10s. - let retry_strategy = ExponentialBackoff::from_millis(10).map(jitter).take(4); - RetryIf::spawn( - retry_strategy, - || { - attempt_download( - &core2, - &url, - &auth_headers, - file_name.clone(), - expected_digest, - ) - }, - |err: &StreamingError| matches!(err, StreamingError::Retryable(_)), - ) - .await + let core2 = core.clone(); + let (actual_digest, bytes) = in_workunit!( + "download_file", + Level::Debug, + desc = Some(format!( + "Downloading: {url} ({})", + expected_digest + .size_bytes + .file_size(file_size_opts::CONVENTIONAL) + .unwrap() + )), + |_workunit| async move { + // TODO: Allow the retry strategy to be configurable? + // For now we retry after 10ms, 100ms, 1s, and 10s. + let retry_strategy = ExponentialBackoff::from_millis(10).map(jitter).take(4); + RetryIf::spawn( + retry_strategy, + || { + attempt_download( + &core2, + &url, + &auth_headers, + file_name.clone(), + expected_digest, + ) + }, + |err: &StreamingError| matches!(err, StreamingError::Retryable(_)), + ) + .await + } + ) + .await?; + + if expected_digest != actual_digest { + return Err(format!( + "Wrong digest for downloaded file: want {expected_digest:?} got {actual_digest:?}" + )); } - ) - .await?; - - if expected_digest != actual_digest { - return Err(format!( - "Wrong digest for downloaded file: want {expected_digest:?} got {actual_digest:?}" - )); - } - let _ = core.store().store_file_bytes(bytes, true).await?; - Ok(()) + let _ = core.store().store_file_bytes(bytes, true).await?; + Ok(()) } diff --git a/src/rust/engine/src/externs/address.rs b/src/rust/engine/src/externs/address.rs index e93ba66c77a..57cbf1dd3c7 100644 --- a/src/rust/engine/src/externs/address.rs +++ b/src/rust/engine/src/externs/address.rs @@ -24,47 +24,47 @@ create_exception!(native_engine, InvalidParametersError, InvalidAddressError); create_exception!(native_engine, UnsupportedWildcardError, InvalidAddressError); pub fn register(py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(address_spec_parse, m)?)?; - - m.add( - "AddressParseException", - py.get_type::(), - )?; - m.add("InvalidAddressError", py.get_type::())?; - m.add( - "InvalidSpecPathError", - py.get_type::(), - )?; - m.add( - "InvalidTargetNameError", - py.get_type::(), - )?; - m.add( - "InvalidParametersError", - py.get_type::(), - )?; - m.add( - "UnsupportedWildcardError", - py.get_type::(), - )?; - - m.add_class::()?; - m.add_class::
()?; - - m.add( - "BANNED_CHARS_IN_TARGET_NAME", - PyFrozenSet::new(py, BANNED_CHARS_IN_TARGET_NAME.iter())?, - )?; - m.add( - "BANNED_CHARS_IN_GENERATED_NAME", - PyFrozenSet::new(py, BANNED_CHARS_IN_GENERATED_NAME.iter())?, - )?; - m.add( - "BANNED_CHARS_IN_PARAMETERS", - PyFrozenSet::new(py, BANNED_CHARS_IN_PARAMETERS.iter())?, - )?; - - Ok(()) + m.add_function(wrap_pyfunction!(address_spec_parse, m)?)?; + + m.add( + "AddressParseException", + py.get_type::(), + )?; + m.add("InvalidAddressError", py.get_type::())?; + m.add( + "InvalidSpecPathError", + py.get_type::(), + )?; + m.add( + "InvalidTargetNameError", + py.get_type::(), + )?; + m.add( + "InvalidParametersError", + py.get_type::(), + )?; + m.add( + "UnsupportedWildcardError", + py.get_type::(), + )?; + + m.add_class::()?; + m.add_class::
()?; + + m.add( + "BANNED_CHARS_IN_TARGET_NAME", + PyFrozenSet::new(py, BANNED_CHARS_IN_TARGET_NAME.iter())?, + )?; + m.add( + "BANNED_CHARS_IN_GENERATED_NAME", + PyFrozenSet::new(py, BANNED_CHARS_IN_GENERATED_NAME.iter())?, + )?; + m.add( + "BANNED_CHARS_IN_PARAMETERS", + PyFrozenSet::new(py, BANNED_CHARS_IN_PARAMETERS.iter())?, + )?; + + Ok(()) } lazy_static! { @@ -80,200 +80,203 @@ lazy_static! { #[pyclass(name = "AddressInput")] #[derive(Debug, Hash, Eq, PartialEq)] pub struct AddressInput { - original_spec: String, - path_component: PathBuf, - target_component: Option, - generated_component: Option, - parameters: BTreeMap, - description_of_origin: String, -} - -#[pymethods] -impl AddressInput { - #[new] - fn __new__( original_spec: String, path_component: PathBuf, - description_of_origin: String, target_component: Option, generated_component: Option, - parameters: Option>, - ) -> PyResult { - if let Some(target) = target_component.as_ref() { - if target.is_empty() { - return Err(InvalidTargetNameError::new_err(format!( - "Address `{original_spec}` from {description_of_origin} sets \ + parameters: BTreeMap, + description_of_origin: String, +} + +#[pymethods] +impl AddressInput { + #[new] + fn __new__( + original_spec: String, + path_component: PathBuf, + description_of_origin: String, + target_component: Option, + generated_component: Option, + parameters: Option>, + ) -> PyResult { + if let Some(target) = target_component.as_ref() { + if target.is_empty() { + return Err(InvalidTargetNameError::new_err(format!( + "Address `{original_spec}` from {description_of_origin} sets \ the name component to the empty string, which is not legal." - ))); - } - } else if path_component.components().next().is_none() { - return Err(InvalidTargetNameError::new_err(format!( - "Address `{original_spec}` from {description_of_origin} has no name part, \ + ))); + } + } else if path_component.components().next().is_none() { + return Err(InvalidTargetNameError::new_err(format!( + "Address `{original_spec}` from {description_of_origin} has no name part, \ but it's necessary because the path is the build root." - ))); - } + ))); + } - if path_component.components().next().is_some() && path_component.is_absolute() { - return Err(InvalidSpecPathError::new_err(format!( - "Invalid address {original_spec} from {description_of_origin}. Cannot use \ + if path_component.components().next().is_some() && path_component.is_absolute() { + return Err(InvalidSpecPathError::new_err(format!( + "Invalid address {original_spec} from {description_of_origin}. Cannot use \ absolute paths." - ))); - } - - if let Some(parameters) = parameters.as_ref() { - for (k, v) in parameters { - let banned = k - .chars() - .chain(v.chars()) - .filter(|c| BANNED_CHARS_IN_PARAMETERS.contains(c)) - .map(|c| c.to_string()) - .collect::>(); - if !banned.is_empty() { - return Err(InvalidParametersError::new_err(format!( - "Invalid address `{original_spec}` from {description_of_origin}. It has + ))); + } + + if let Some(parameters) = parameters.as_ref() { + for (k, v) in parameters { + let banned = k + .chars() + .chain(v.chars()) + .filter(|c| BANNED_CHARS_IN_PARAMETERS.contains(c)) + .map(|c| c.to_string()) + .collect::>(); + if !banned.is_empty() { + return Err(InvalidParametersError::new_err(format!( + "Invalid address `{original_spec}` from {description_of_origin}. It has illegal characters in parameter entries: `{}` in `{k}={v}`.", - banned.join(","), - ))); + banned.join(","), + ))); + } + } } - } - } - - Ok(Self { - original_spec, - path_component, - target_component, - generated_component, - parameters: parameters.unwrap_or_default(), - description_of_origin, - }) - } - - #[classmethod] - fn parse( - _cls: &PyType, - spec: &str, - description_of_origin: &str, - relative_to: Option<&str>, - subproject_roots: Option>, - ) -> PyResult { - let subproject_info = subproject_roots - .zip(relative_to) - .and_then(|(roots, relative_to)| split_on_longest_dir_prefix(relative_to, &roots)); - - let parsed_spec = address::parse_address_spec(spec).map_err(AddressParseException::new_err)?; - if let Some(wildcard) = parsed_spec.wildcard { - return Err(UnsupportedWildcardError::new_err(format!( - "The address `{spec}` from {description_of_origin} ended in a wildcard \ + + Ok(Self { + original_spec, + path_component, + target_component, + generated_component, + parameters: parameters.unwrap_or_default(), + description_of_origin, + }) + } + + #[classmethod] + fn parse( + _cls: &PyType, + spec: &str, + description_of_origin: &str, + relative_to: Option<&str>, + subproject_roots: Option>, + ) -> PyResult { + let subproject_info = subproject_roots + .zip(relative_to) + .and_then(|(roots, relative_to)| split_on_longest_dir_prefix(relative_to, &roots)); + + let parsed_spec = + address::parse_address_spec(spec).map_err(AddressParseException::new_err)?; + if let Some(wildcard) = parsed_spec.wildcard { + return Err(UnsupportedWildcardError::new_err(format!( + "The address `{spec}` from {description_of_origin} ended in a wildcard \ (`{wildcard}`), which is not supported." - ))); - } - let address = parsed_spec.address; - - let normalized_relative_to = if let Some((_, normalized_relative_to)) = subproject_info { - Some(normalized_relative_to) - } else { - relative_to - }; - - let mut path_component: Cow = address.path.into(); - if let Some(normalized_relative_to) = normalized_relative_to { - if let Some(stripped) = path_component.strip_prefix("./") { - path_component = format!( - "{normalized_relative_to}{}{stripped}", - std::path::MAIN_SEPARATOR - ) - .into(); - } - if path_component.is_empty() { - path_component = normalized_relative_to.into(); - } - } - if let Some(stripped) = path_component.strip_prefix("//") { - path_component = stripped.to_owned().into(); - } - - // NB: We confirm that the path_component is normalized while still in `str` form because - // `Path` hides many of the components we're attempting to validate. - if !path_component.is_empty() { - for component in path_component.split(std::path::MAIN_SEPARATOR) { - if matches!(component, "." | ".." | "") { - return Err(InvalidSpecPathError::new_err(format!( - "Invalid address `{spec}` from {description_of_origin}. It has an \ - un-normalized path part: `{component:?}`." - ))); + ))); } - } - } - - let path_component = if let Some((subproject, _)) = subproject_info { - Path::new(subproject).join(Path::new(&*path_component)) - } else { - PathBuf::from(path_component.into_owned()) - }; - - Self::__new__( - spec.to_owned(), - path_component, - description_of_origin.to_owned(), - address.target.map(|t| t.to_owned()), - address.generated.map(|t| t.to_owned()), - Some( - address - .parameters - .into_iter() - .map(|(k, v)| (k.to_owned(), v.to_owned())) - .collect(), - ), - ) - } - - #[getter] - fn path_component(&self) -> &Path { - &self.path_component - } - - #[getter] - fn target_component(&self) -> Option<&str> { - self.target_component.as_deref() - } - - #[getter] - fn generated_component(&self) -> Option<&str> { - self.generated_component.as_deref() - } - - #[getter] - fn parameters(&self) -> BTreeMap { - // TODO: For some reason, `IntoPy` is not implemented for `&BTreeMap<_, _>`. - self.parameters.clone() - } - - #[getter] - fn description_of_origin(&self) -> &str { - &self.description_of_origin - } - - fn file_to_address(&self) -> PyResult
{ - let Some(target_component) = self.target_component.as_ref() else { - // Use the default target in the same directory as the file. - match ( - self.path_component.parent(), - self.path_component.file_name(), - ) { - (Some(spec_path), Some(relative_file_path)) if !spec_path.as_os_str().is_empty() => { - return Address::__new__( - spec_path.to_owned(), - None, - Some(self.parameters.clone()), - None, - Some(relative_file_path.into()), - ); + let address = parsed_spec.address; + + let normalized_relative_to = if let Some((_, normalized_relative_to)) = subproject_info { + Some(normalized_relative_to) + } else { + relative_to + }; + + let mut path_component: Cow = address.path.into(); + if let Some(normalized_relative_to) = normalized_relative_to { + if let Some(stripped) = path_component.strip_prefix("./") { + path_component = format!( + "{normalized_relative_to}{}{stripped}", + std::path::MAIN_SEPARATOR + ) + .into(); + } + if path_component.is_empty() { + path_component = normalized_relative_to.into(); + } } - _ => { - // We validate that this is not a top-level file. We couldn't do this earlier in the - // AddressSpec constructor because we weren't sure if the path_spec referred to a file - // vs. a directory. - return Err(InvalidTargetNameError::new_err(format!( + if let Some(stripped) = path_component.strip_prefix("//") { + path_component = stripped.to_owned().into(); + } + + // NB: We confirm that the path_component is normalized while still in `str` form because + // `Path` hides many of the components we're attempting to validate. + if !path_component.is_empty() { + for component in path_component.split(std::path::MAIN_SEPARATOR) { + if matches!(component, "." | ".." | "") { + return Err(InvalidSpecPathError::new_err(format!( + "Invalid address `{spec}` from {description_of_origin}. It has an \ + un-normalized path part: `{component:?}`." + ))); + } + } + } + + let path_component = if let Some((subproject, _)) = subproject_info { + Path::new(subproject).join(Path::new(&*path_component)) + } else { + PathBuf::from(path_component.into_owned()) + }; + + Self::__new__( + spec.to_owned(), + path_component, + description_of_origin.to_owned(), + address.target.map(|t| t.to_owned()), + address.generated.map(|t| t.to_owned()), + Some( + address + .parameters + .into_iter() + .map(|(k, v)| (k.to_owned(), v.to_owned())) + .collect(), + ), + ) + } + + #[getter] + fn path_component(&self) -> &Path { + &self.path_component + } + + #[getter] + fn target_component(&self) -> Option<&str> { + self.target_component.as_deref() + } + + #[getter] + fn generated_component(&self) -> Option<&str> { + self.generated_component.as_deref() + } + + #[getter] + fn parameters(&self) -> BTreeMap { + // TODO: For some reason, `IntoPy` is not implemented for `&BTreeMap<_, _>`. + self.parameters.clone() + } + + #[getter] + fn description_of_origin(&self) -> &str { + &self.description_of_origin + } + + fn file_to_address(&self) -> PyResult
{ + let Some(target_component) = self.target_component.as_ref() else { + // Use the default target in the same directory as the file. + match ( + self.path_component.parent(), + self.path_component.file_name(), + ) { + (Some(spec_path), Some(relative_file_path)) + if !spec_path.as_os_str().is_empty() => + { + return Address::__new__( + spec_path.to_owned(), + None, + Some(self.parameters.clone()), + None, + Some(relative_file_path.into()), + ); + } + _ => { + // We validate that this is not a top-level file. We couldn't do this earlier in the + // AddressSpec constructor because we weren't sure if the path_spec referred to a file + // vs. a directory. + return Err(InvalidTargetNameError::new_err(format!( "Addresses for generated first-party targets in the build root must include \ which target generator they come from, such as `{}:original_target`. However, \ `{}` from {} did not have a target name.", @@ -281,505 +284,504 @@ impl AddressInput { self.original_spec, self.description_of_origin, ))); + } + } + }; + + // The target component may be "above" (but not below) the file in the filesystem. + // Determine how many levels above the file it is, and validate that the path is relative. + let parent_count = Path::new(&target_component).components().count() - 1; + if parent_count == 0 { + return Address::__new__( + self.path_component + .parent() + .unwrap_or_else(|| Path::new("")) + .to_owned(), + Some(target_component.clone()), + Some(self.parameters.clone()), + None, + self.path_component.file_name().map(|f| f.into()), + ); } - } - }; - - // The target component may be "above" (but not below) the file in the filesystem. - // Determine how many levels above the file it is, and validate that the path is relative. - let parent_count = Path::new(&target_component).components().count() - 1; - if parent_count == 0 { - return Address::__new__( - self - .path_component - .parent() - .unwrap_or_else(|| Path::new("")) - .to_owned(), - Some(target_component.clone()), - Some(self.parameters.clone()), - None, - self.path_component.file_name().map(|f| f.into()), - ); - } - - if parent_count - != Path::new(&target_component) - .components() - .take_while(|c| c.as_os_str() == OsStr::new("..")) - .count() - { - return Err(InvalidTargetNameError::new_err(format!( - "Invalid address `{}` from {}. The target name portion of the address must refer \ + + if parent_count + != Path::new(&target_component) + .components() + .take_while(|c| c.as_os_str() == OsStr::new("..")) + .count() + { + return Err(InvalidTargetNameError::new_err(format!( + "Invalid address `{}` from {}. The target name portion of the address must refer \ to a target defined in the same directory or a parent directory of the file path \ `{}`, but the value `{target_component}` is a subdirectory.", - self.original_spec, - self.description_of_origin, - self.path_component.display(), - ))); - } - - // Split the path_component into a spec_path and relative_file_path at the appropriate - // position. - let path_components = self - .path_component - .components() - .map(|c| c.as_os_str().to_str().unwrap()) - .collect::>(); - if path_components.len() <= parent_count { - return Err(InvalidTargetNameError::new_err(format!( - "Invalid address `{}` from {}. The target name portion of the address \ + self.original_spec, + self.description_of_origin, + self.path_component.display(), + ))); + } + + // Split the path_component into a spec_path and relative_file_path at the appropriate + // position. + let path_components = self + .path_component + .components() + .map(|c| c.as_os_str().to_str().unwrap()) + .collect::>(); + if path_components.len() <= parent_count { + return Err(InvalidTargetNameError::new_err(format!( + "Invalid address `{}` from {}. The target name portion of the address \ `{target_component}` has too many `../`, which means it refers to a directory \ above the file path `{}`. Expected no more than {} instances of `../` in `{target_component}`, but found {parent_count} instances.", - self.original_spec, - self.description_of_origin, - self.path_component.display(), - path_components.len(), - ))); - } - - let offset = path_components.len() - (parent_count + 1); - let spec_path = path_components[..offset].join(std::path::MAIN_SEPARATOR_STR); - let relative_file_path = path_components[offset..].join(std::path::MAIN_SEPARATOR_STR); - let target_name = Path::new(&target_component).file_name(); - Address::__new__( - spec_path.into(), - target_name.and_then(|t| t.to_str()).map(|t| t.to_owned()), - Some(self.parameters.clone()), - None, - Some(relative_file_path.into()), - ) - } - - fn dir_to_address(&self) -> PyResult
{ - Address::__new__( - self.path_component.clone(), - self.target_component.clone(), - Some(self.parameters.clone()), - self.generated_component.clone(), - None, - ) - } - - #[getter] - fn spec(&self) -> &str { - &self.original_spec - } - - fn __hash__(&self) -> u64 { - let mut s = FnvHasher::default(); - self.hash(&mut s); - s.finish() - } - - fn __str__(&self) -> String { - format!("{self:?}") - } - - fn __repr__(&self) -> String { - format!("{self:?}") - } - - fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), - } - } + self.original_spec, + self.description_of_origin, + self.path_component.display(), + path_components.len(), + ))); + } + + let offset = path_components.len() - (parent_count + 1); + let spec_path = path_components[..offset].join(std::path::MAIN_SEPARATOR_STR); + let relative_file_path = path_components[offset..].join(std::path::MAIN_SEPARATOR_STR); + let target_name = Path::new(&target_component).file_name(); + Address::__new__( + spec_path.into(), + target_name.and_then(|t| t.to_str()).map(|t| t.to_owned()), + Some(self.parameters.clone()), + None, + Some(relative_file_path.into()), + ) + } + + fn dir_to_address(&self) -> PyResult
{ + Address::__new__( + self.path_component.clone(), + self.target_component.clone(), + Some(self.parameters.clone()), + self.generated_component.clone(), + None, + ) + } + + #[getter] + fn spec(&self) -> &str { + &self.original_spec + } + + fn __hash__(&self) -> u64 { + let mut s = FnvHasher::default(); + self.hash(&mut s); + s.finish() + } + + fn __str__(&self) -> String { + format!("{self:?}") + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } } fn split_on_longest_dir_prefix<'a, 'b>( - path: &'a str, - prefixes: &[&'b str], + path: &'a str, + prefixes: &[&'b str], ) -> Option<(&'b str, &'a str)> { - let mut longest_match = 0; - let mut matched = None; - for prefix in prefixes { - if prefix.len() > longest_match { - if let Ok(stripped) = Path::new(path).strip_prefix(prefix) { - longest_match = prefix.len(); - matched = Some((*prefix, stripped.to_str().unwrap())); - } - } - } - matched + let mut longest_match = 0; + let mut matched = None; + for prefix in prefixes { + if prefix.len() > longest_match { + if let Ok(stripped) = Path::new(path).strip_prefix(prefix) { + longest_match = prefix.len(); + matched = Some((*prefix, stripped.to_str().unwrap())); + } + } + } + matched } #[pyclass(name = "Address")] #[derive(Clone, Hash, Eq, PartialEq, Ord, PartialOrd)] pub struct Address { - // NB: Field ordering is deliberate, so that Ord will roughly match `self.spec`. - spec_path: PathBuf, - relative_file_path: Option, - target_name: Option, - parameters: BTreeMap, - generated_name: Option, + // NB: Field ordering is deliberate, so that Ord will roughly match `self.spec`. + spec_path: PathBuf, + relative_file_path: Option, + target_name: Option, + parameters: BTreeMap, + generated_name: Option, } #[pymethods] impl Address { - #[new] - fn __new__( - spec_path: PathBuf, - target_name: Option, - parameters: Option>, - generated_name: Option, - relative_file_path: Option, - ) -> PyResult { - if let Some(generated_name) = generated_name.as_ref() { - if let Some(relative_file_path) = relative_file_path { - return Err(PyAssertionError::new_err(format!( - "Do not use both `generated_name` ({generated_name}) and \ + #[new] + fn __new__( + spec_path: PathBuf, + target_name: Option, + parameters: Option>, + generated_name: Option, + relative_file_path: Option, + ) -> PyResult { + if let Some(generated_name) = generated_name.as_ref() { + if let Some(relative_file_path) = relative_file_path { + return Err(PyAssertionError::new_err(format!( + "Do not use both `generated_name` ({generated_name}) and \ `relative_file_path` ({}).", - relative_file_path.display() - ))); - } - let banned = generated_name - .chars() - .filter(|c| BANNED_CHARS_IN_GENERATED_NAME.contains(c)) - .map(|c| c.to_string()) - .collect::>(); - if !banned.is_empty() { - return Err(InvalidTargetNameError::new_err(format!( + relative_file_path.display() + ))); + } + let banned = generated_name + .chars() + .filter(|c| BANNED_CHARS_IN_GENERATED_NAME.contains(c)) + .map(|c| c.to_string()) + .collect::>(); + if !banned.is_empty() { + return Err(InvalidTargetNameError::new_err(format!( "The generated name `{generated_name}` (defined in directory {}, the part after \ `#`) contains banned characters (`{}`). Please replace \ these characters with another separator character like `_`, `-`, or `/`.", spec_path.display(), banned.join(","), ))); - } - } - - let target_name = if let Some(target_name) = target_name { - if Some(OsStr::new(&target_name)) == spec_path.file_name() { - // If the target_name is the same as the default name would be, we normalize to None. - None - } else { - let banned = target_name - .chars() - .filter(|c| BANNED_CHARS_IN_TARGET_NAME.contains(c)) - .map(|c| c.to_string()) - .collect::>(); - if !banned.is_empty() { - return Err(InvalidTargetNameError::new_err(format!( - "The target name {target_name} (defined in directory {}) \ + } + } + + let target_name = if let Some(target_name) = target_name { + if Some(OsStr::new(&target_name)) == spec_path.file_name() { + // If the target_name is the same as the default name would be, we normalize to None. + None + } else { + let banned = target_name + .chars() + .filter(|c| BANNED_CHARS_IN_TARGET_NAME.contains(c)) + .map(|c| c.to_string()) + .collect::>(); + if !banned.is_empty() { + return Err(InvalidTargetNameError::new_err(format!( + "The target name {target_name} (defined in directory {}) \ contains banned characters (`{}`). Please replace \ these characters with another separator character like `_` or `-`.", - spec_path.display(), - banned.join(","), - ))); - } - Some(target_name) - } - } else { - None - }; - - let address = Self { - spec_path, - target_name, - parameters: parameters.unwrap_or_default(), - generated_name, - relative_file_path, - }; - - if let Some(file_name) = address.spec_path.file_name().and_then(|n| n.to_str()) { - if file_name.starts_with("BUILD") { - return Err(InvalidSpecPathError::new_err(format!( - "The address {address} has {} as the last part of its \ + spec_path.display(), + banned.join(","), + ))); + } + Some(target_name) + } + } else { + None + }; + + let address = Self { + spec_path, + target_name, + parameters: parameters.unwrap_or_default(), + generated_name, + relative_file_path, + }; + + if let Some(file_name) = address.spec_path.file_name().and_then(|n| n.to_str()) { + if file_name.starts_with("BUILD") { + return Err(InvalidSpecPathError::new_err(format!( + "The address {address} has {} as the last part of its \ path, but BUILD is a reserved name. Please make sure that you did not name any \ directories BUILD.", - Path::new(file_name).display(), - ))); - } - } - - Ok(address) - } - - #[getter] - fn spec_path(&self) -> &Path { - &self.spec_path - } - - #[getter] - fn generated_name(&self) -> Option<&str> { - self.generated_name.as_deref() - } - - #[getter] - fn relative_file_path(&self) -> Option<&Path> { - self.relative_file_path.as_deref() - } - - #[getter] - fn parameters(&self) -> BTreeMap { - // TODO: For some reason, `IntoPy` is not implemented for `&BTreeMap<_, _>`. - self.parameters.clone() - } - - #[getter] - pub fn is_generated_target(&self) -> bool { - self.generated_name.is_some() || self.is_file_target() - } - - #[getter] - fn is_file_target(&self) -> bool { - self.relative_file_path.is_some() - } - - #[getter] - fn is_parametrized(&self) -> bool { - !self.parameters.is_empty() - } - - fn is_parametrized_subset_of(&self, other: &Address) -> bool { - self.equal_without_parameters(other) - && self - .parameters - .iter() - .all(|(k, v)| other.parameters.get(k) == Some(v)) - } - - #[getter] - fn filename(&self) -> PyResult { - if let Some(relative_file_path) = self.relative_file_path.as_ref() { - Ok(self.spec_path.join(relative_file_path)) - } else { - Err(PyException::new_err(format!( - "Only a file Address (`self.is_file_target`) has a filename: {self}", - ))) - } - } - - #[getter] - fn target_name(&self) -> &str { - if let Some(target_name) = self.target_name.as_ref() { - target_name - } else if let Some(file_name) = self.spec_path.file_name() { - file_name - .to_str() - .unwrap_or_else(|| panic!("{} could not be viewed as UTF8.", self.spec_path.display())) - } else { - // TODO: This case is preserved from the original implementation (because `os.path.basename` - // returns the empty output for an empty input), but should likely be ruled out in the - // constructor. - "" - } - } - - #[getter] - fn parameters_repr(&self) -> Cow { - if self.parameters.is_empty() { - return Cow::from(""); - } - - let rhs = self - .parameters - .iter() - .map(|(k, v)| format!("{k}={v}")) - .collect::>() - .join(","); - Cow::from(format!("@{rhs}")) - } - - #[getter] - fn spec(&self) -> String { - let prefix = if self.spec_path.as_os_str().is_empty() { - "//" - } else { - "" - }; - - let (path, target): (Cow, Cow) = - if let Some(relative_file_path) = self.relative_file_path.as_ref() { - let parent_prefix = "../".repeat(relative_file_path.components().count() - 1); - let target = if self.target_name.is_none() && parent_prefix.is_empty() { - "" + Path::new(file_name).display(), + ))); + } + } + + Ok(address) + } + + #[getter] + fn spec_path(&self) -> &Path { + &self.spec_path + } + + #[getter] + fn generated_name(&self) -> Option<&str> { + self.generated_name.as_deref() + } + + #[getter] + fn relative_file_path(&self) -> Option<&Path> { + self.relative_file_path.as_deref() + } + + #[getter] + fn parameters(&self) -> BTreeMap { + // TODO: For some reason, `IntoPy` is not implemented for `&BTreeMap<_, _>`. + self.parameters.clone() + } + + #[getter] + pub fn is_generated_target(&self) -> bool { + self.generated_name.is_some() || self.is_file_target() + } + + #[getter] + fn is_file_target(&self) -> bool { + self.relative_file_path.is_some() + } + + #[getter] + fn is_parametrized(&self) -> bool { + !self.parameters.is_empty() + } + + fn is_parametrized_subset_of(&self, other: &Address) -> bool { + self.equal_without_parameters(other) + && self + .parameters + .iter() + .all(|(k, v)| other.parameters.get(k) == Some(v)) + } + + #[getter] + fn filename(&self) -> PyResult { + if let Some(relative_file_path) = self.relative_file_path.as_ref() { + Ok(self.spec_path.join(relative_file_path)) + } else { + Err(PyException::new_err(format!( + "Only a file Address (`self.is_file_target`) has a filename: {self}", + ))) + } + } + + #[getter] + fn target_name(&self) -> &str { + if let Some(target_name) = self.target_name.as_ref() { + target_name + } else if let Some(file_name) = self.spec_path.file_name() { + file_name.to_str().unwrap_or_else(|| { + panic!("{} could not be viewed as UTF8.", self.spec_path.display()) + }) } else { - self.target_name() + // TODO: This case is preserved from the original implementation (because `os.path.basename` + // returns the empty output for an empty input), but should likely be ruled out in the + // constructor. + "" + } + } + + #[getter] + fn parameters_repr(&self) -> Cow { + if self.parameters.is_empty() { + return Cow::from(""); + } + + let rhs = self + .parameters + .iter() + .map(|(k, v)| format!("{k}={v}")) + .collect::>() + .join(","); + Cow::from(format!("@{rhs}")) + } + + #[getter] + fn spec(&self) -> String { + let prefix = if self.spec_path.as_os_str().is_empty() { + "//" + } else { + "" }; - ( - self.spec_path.join(relative_file_path).into(), - format!("{parent_prefix}{target}").into(), + + let (path, target): (Cow, Cow) = + if let Some(relative_file_path) = self.relative_file_path.as_ref() { + let parent_prefix = "../".repeat(relative_file_path.components().count() - 1); + let target = if self.target_name.is_none() && parent_prefix.is_empty() { + "" + } else { + self.target_name() + }; + ( + self.spec_path.join(relative_file_path).into(), + format!("{parent_prefix}{target}").into(), + ) + } else { + let target_name = if self.target_name.is_none() + && (self.generated_name.is_some() || !self.parameters.is_empty()) + { + "".into() + } else { + self.target_name().into() + }; + ((&self.spec_path).into(), target_name) + }; + + let target_sep = if target.is_empty() { "" } else { ":" }; + let generated: Cow = if let Some(generated_name) = self.generated_name.as_ref() { + format!("#{generated_name}").into() + } else { + "".into() + }; + + format!( + "{prefix}{}{target_sep}{target}{generated}{}", + path.display(), + self.parameters_repr() ) - } else { - let target_name = if self.target_name.is_none() - && (self.generated_name.is_some() || !self.parameters.is_empty()) - { - "".into() + } + + #[getter] + fn path_safe_spec(&self) -> PyResult { + fn sanitize(s: D) -> String { + s.to_string().replace(std::path::MAIN_SEPARATOR, ".") + } + + let (parent_prefix, path): (Cow, Cow) = match self.relative_file_path.as_ref() { + Some(relative_file_path) if !relative_file_path.as_os_str().is_empty() => { + let parent_count = relative_file_path.components().count() - 1; + let parent_prefix = if parent_count > 0 { + "@".repeat(parent_count).into() + } else { + ".".into() + }; + ( + parent_prefix, + format!(".{}", sanitize(relative_file_path.display())).into(), + ) + } + _ => (".".into(), "".into()), + }; + + let target: Cow = if parent_prefix == "." { + if let Some(target_name) = self.target_name.as_ref() { + format!("{parent_prefix}{target_name}").into() + } else { + "".into() + } + } else { + format!("{parent_prefix}{}", self.target_name()).into() + }; + + let params: Cow = if self.parameters.is_empty() { + "".into() } else { - self.target_name().into() + format!("@{}", sanitize(self.parameters_repr())).into() }; - ((&self.spec_path).into(), target_name) - }; - - let target_sep = if target.is_empty() { "" } else { ":" }; - let generated: Cow = if let Some(generated_name) = self.generated_name.as_ref() { - format!("#{generated_name}").into() - } else { - "".into() - }; - - format!( - "{prefix}{}{target_sep}{target}{generated}{}", - path.display(), - self.parameters_repr() - ) - } - - #[getter] - fn path_safe_spec(&self) -> PyResult { - fn sanitize(s: D) -> String { - s.to_string().replace(std::path::MAIN_SEPARATOR, ".") - } - - let (parent_prefix, path): (Cow, Cow) = match self.relative_file_path.as_ref() { - Some(relative_file_path) if !relative_file_path.as_os_str().is_empty() => { - let parent_count = relative_file_path.components().count() - 1; - let parent_prefix = if parent_count > 0 { - "@".repeat(parent_count).into() + + let generated: Cow = if let Some(generated_name) = self.generated_name.as_ref() { + format!("@{}", sanitize(generated_name)).into() } else { - ".".into() + "".into() }; - ( - parent_prefix, - format!(".{}", sanitize(relative_file_path.display())).into(), - ) - } - _ => (".".into(), "".into()), - }; - - let target: Cow = if parent_prefix == "." { - if let Some(target_name) = self.target_name.as_ref() { - format!("{parent_prefix}{target_name}").into() - } else { - "".into() - } - } else { - format!("{parent_prefix}{}", self.target_name()).into() - }; - - let params: Cow = if self.parameters.is_empty() { - "".into() - } else { - format!("@{}", sanitize(self.parameters_repr())).into() - }; - - let generated: Cow = if let Some(generated_name) = self.generated_name.as_ref() { - format!("@{}", sanitize(generated_name)).into() - } else { - "".into() - }; - - let prefix = sanitize(self.spec_path.display()); - - Ok(format!("{prefix}{path}{target}{generated}{params}")) - } - - fn parametrize(&self, parameters: BTreeMap) -> Self { - let mut merged_parameters = self.parameters.clone(); - merged_parameters.extend(parameters); - - Self { - spec_path: self.spec_path.clone(), - target_name: self.target_name.clone(), - parameters: merged_parameters, - generated_name: self.generated_name.clone(), - relative_file_path: self.relative_file_path.clone(), - } - } - - fn maybe_convert_to_target_generator(self_: PyRef, py: Python) -> PyObject { - if !self_.is_generated_target() && !self_.is_parametrized() { - return self_.into_py(py); - } - - Self { - spec_path: self_.spec_path.clone(), - target_name: self_.target_name.clone(), - parameters: BTreeMap::default(), - generated_name: None, - relative_file_path: None, - } - .into_py(py) - } - - fn create_generated(&self, generated_name: String) -> PyResult { - if self.is_generated_target() { - return Err(PyAssertionError::new_err(format!( - "Cannot call `create_generated` on `{self}`." - ))); - } - - Ok(Self { - spec_path: self.spec_path.clone(), - target_name: self.target_name.clone(), - parameters: self.parameters.clone(), - generated_name: Some(generated_name), - relative_file_path: None, - }) - } - - fn create_file(&self, relative_file_path: PathBuf) -> PyResult { - if self.is_generated_target() { - return Err(PyAssertionError::new_err(format!( - "Cannot call `create_file` on `{self}`." - ))); - } - - Ok(Self { - spec_path: self.spec_path.clone(), - target_name: self.target_name.clone(), - parameters: self.parameters.clone(), - generated_name: None, - relative_file_path: Some(relative_file_path), - }) - } - - fn debug_hint(&self) -> String { - self.spec() - } - - fn metadata<'p>(&self, py: Python<'p>) -> PyResult<&'p PyDict> { - let dict = PyDict::new(py); - dict.set_item(pyo3::intern!(py, "address"), self.spec())?; - Ok(dict) - } - - fn __hash__(&self) -> u64 { - let mut s = FnvHasher::default(); - self.hash(&mut s); - s.finish() - } - - fn __str__(&self) -> String { - format!("{self}") - } - - fn __repr__(&self) -> String { - format!("Address({self})") - } - - fn __richcmp__(&self, other: &Self, op: CompareOp) -> bool { - op.matches(self.cmp(other)) - } + + let prefix = sanitize(self.spec_path.display()); + + Ok(format!("{prefix}{path}{target}{generated}{params}")) + } + + fn parametrize(&self, parameters: BTreeMap) -> Self { + let mut merged_parameters = self.parameters.clone(); + merged_parameters.extend(parameters); + + Self { + spec_path: self.spec_path.clone(), + target_name: self.target_name.clone(), + parameters: merged_parameters, + generated_name: self.generated_name.clone(), + relative_file_path: self.relative_file_path.clone(), + } + } + + fn maybe_convert_to_target_generator(self_: PyRef, py: Python) -> PyObject { + if !self_.is_generated_target() && !self_.is_parametrized() { + return self_.into_py(py); + } + + Self { + spec_path: self_.spec_path.clone(), + target_name: self_.target_name.clone(), + parameters: BTreeMap::default(), + generated_name: None, + relative_file_path: None, + } + .into_py(py) + } + + fn create_generated(&self, generated_name: String) -> PyResult { + if self.is_generated_target() { + return Err(PyAssertionError::new_err(format!( + "Cannot call `create_generated` on `{self}`." + ))); + } + + Ok(Self { + spec_path: self.spec_path.clone(), + target_name: self.target_name.clone(), + parameters: self.parameters.clone(), + generated_name: Some(generated_name), + relative_file_path: None, + }) + } + + fn create_file(&self, relative_file_path: PathBuf) -> PyResult { + if self.is_generated_target() { + return Err(PyAssertionError::new_err(format!( + "Cannot call `create_file` on `{self}`." + ))); + } + + Ok(Self { + spec_path: self.spec_path.clone(), + target_name: self.target_name.clone(), + parameters: self.parameters.clone(), + generated_name: None, + relative_file_path: Some(relative_file_path), + }) + } + + fn debug_hint(&self) -> String { + self.spec() + } + + fn metadata<'p>(&self, py: Python<'p>) -> PyResult<&'p PyDict> { + let dict = PyDict::new(py); + dict.set_item(pyo3::intern!(py, "address"), self.spec())?; + Ok(dict) + } + + fn __hash__(&self) -> u64 { + let mut s = FnvHasher::default(); + self.hash(&mut s); + s.finish() + } + + fn __str__(&self) -> String { + format!("{self}") + } + + fn __repr__(&self) -> String { + format!("Address({self})") + } + + fn __richcmp__(&self, other: &Self, op: CompareOp) -> bool { + op.matches(self.cmp(other)) + } } impl Address { - fn equal_without_parameters(&self, other: &Address) -> bool { - self.spec_path == other.spec_path - && self.target_name == other.target_name - && self.generated_name == other.generated_name - && self.relative_file_path == other.relative_file_path - } + fn equal_without_parameters(&self, other: &Address) -> bool { + self.spec_path == other.spec_path + && self.target_name == other.target_name + && self.generated_name == other.generated_name + && self.relative_file_path == other.relative_file_path + } } impl std::fmt::Display for Address { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.spec()) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.spec()) + } } /// 1. a path component @@ -787,10 +789,10 @@ impl std::fmt::Display for Address { /// 3. a generated component /// 4. a sequence of key/value parameters type ParsedAddress<'a> = ( - &'a str, - Option<&'a str>, - Option<&'a str>, - Vec<(&'a str, &'a str)>, + &'a str, + Option<&'a str>, + Option<&'a str>, + Vec<(&'a str, &'a str)>, ); /// 1. an address @@ -800,14 +802,14 @@ type ParsedSpec<'a> = (ParsedAddress<'a>, Option<&'a str>); /// Parses an "address spec" from the CLI. #[pyfunction] fn address_spec_parse(spec_str: &str) -> PyResult { - let spec = address::parse_address_spec(spec_str).map_err(AddressParseException::new_err)?; - Ok(( - ( - spec.address.path, - spec.address.target, - spec.address.generated, - spec.address.parameters, - ), - spec.wildcard, - )) + let spec = address::parse_address_spec(spec_str).map_err(AddressParseException::new_err)?; + Ok(( + ( + spec.address.path, + spec.address.target, + spec.address.generated, + spec.address.parameters, + ), + spec.wildcard, + )) } diff --git a/src/rust/engine/src/externs/dep_inference.rs b/src/rust/engine/src/externs/dep_inference.rs index 2a18853c9d0..1f76f5b138d 100644 --- a/src/rust/engine/src/externs/dep_inference.rs +++ b/src/rust/engine/src/externs/dep_inference.rs @@ -10,14 +10,14 @@ use pyo3::{IntoPy, PyObject, Python}; use fs::DirectoryDigest; use protos::gen::pants::cache::{ - dependency_inference_request, javascript_inference_metadata, JavascriptInferenceMetadata, + dependency_inference_request, javascript_inference_metadata, JavascriptInferenceMetadata, }; use crate::externs::fs::PyDigest; pub(crate) fn register(m: &PyModule) -> PyResult<()> { - m.add_class::()?; - m.add_class::() + m.add_class::()?; + m.add_class::() } #[pyclass(name = "InferenceMetadata")] @@ -26,82 +26,82 @@ pub struct PyInferenceMetadata(pub dependency_inference_request::Metadata); #[pymethods] impl PyInferenceMetadata { - #[staticmethod] - fn javascript(package_root: String, import_patterns: &PyDict) -> PyResult { - use javascript_inference_metadata::ImportPattern; - let import_patterns: PyResult> = import_patterns - .iter() - .map(|(key, value)| { - Ok(ImportPattern { - pattern: key.extract()?, - replacements: value.extract()?, - }) - }) - .collect(); - Ok(Self(dependency_inference_request::Metadata::Js( - JavascriptInferenceMetadata { - package_root, - import_patterns: import_patterns?, - }, - ))) - } + #[staticmethod] + fn javascript(package_root: String, import_patterns: &PyDict) -> PyResult { + use javascript_inference_metadata::ImportPattern; + let import_patterns: PyResult> = import_patterns + .iter() + .map(|(key, value)| { + Ok(ImportPattern { + pattern: key.extract()?, + replacements: value.extract()?, + }) + }) + .collect(); + Ok(Self(dependency_inference_request::Metadata::Js( + JavascriptInferenceMetadata { + package_root, + import_patterns: import_patterns?, + }, + ))) + } - fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } } - } - fn __repr__(&self) -> String { - format!("InferenceMetadata({:?})", self.0) - } + fn __repr__(&self) -> String { + format!("InferenceMetadata({:?})", self.0) + } - fn __hash__(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.0.hash(&mut s); - s.finish() - } + fn __hash__(&self) -> u64 { + let mut s = DefaultHasher::new(); + self.0.hash(&mut s); + s.finish() + } } #[pyclass(name = "NativeDependenciesRequest")] #[derive(Clone, Debug, PartialEq)] pub struct PyNativeDependenciesRequest { - pub directory_digest: DirectoryDigest, - pub metadata: Option, + pub directory_digest: DirectoryDigest, + pub metadata: Option, } #[pymethods] impl PyNativeDependenciesRequest { - #[new] - fn __new__(digest: PyDigest, metadata: Option) -> Self { - Self { - directory_digest: digest.0, - metadata: metadata.map(|inner| inner.0), + #[new] + fn __new__(digest: PyDigest, metadata: Option) -> Self { + Self { + directory_digest: digest.0, + metadata: metadata.map(|inner| inner.0), + } } - } - fn __hash__(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.directory_digest.hash(&mut s); - self.metadata.hash(&mut s); - s.finish() - } + fn __hash__(&self) -> u64 { + let mut s = DefaultHasher::new(); + self.directory_digest.hash(&mut s); + self.metadata.hash(&mut s); + s.finish() + } - fn __repr__(&self) -> String { - format!( - "NativeDependenciesRequest('{}', {:?})", - PyDigest(self.directory_digest.clone()), - self.metadata - ) - } + fn __repr__(&self) -> String { + format!( + "NativeDependenciesRequest('{}', {:?})", + PyDigest(self.directory_digest.clone()), + self.metadata + ) + } - fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } } - } } diff --git a/src/rust/engine/src/externs/engine_aware.rs b/src/rust/engine/src/externs/engine_aware.rs index 3d16a2ba998..892e64918ac 100644 --- a/src/rust/engine/src/externs/engine_aware.rs +++ b/src/rust/engine/src/externs/engine_aware.rs @@ -21,103 +21,103 @@ use workunit_store::{ArtifactOutput, Level, RunningWorkunit, UserMetadataItem, W pub(crate) struct EngineAwareReturnType; impl EngineAwareReturnType { - pub(crate) fn update_workunit(workunit: &mut RunningWorkunit, task_result: &PyAny) { - workunit.update_metadata(|old| { - let new_level = Self::level(task_result); - - // If the metadata already existed, or if its level changed, we need to update it. - let (mut metadata, level) = if let Some((metadata, old_level)) = old { - (metadata, new_level.unwrap_or(old_level)) - } else if let Some(level) = new_level { - (WorkunitMetadata::default(), level) - } else { - return None; - }; - - metadata.message = Self::message(task_result); - metadata - .artifacts - .extend(Self::artifacts(task_result).unwrap_or_default()); - metadata - .user_metadata - .extend(metadata_for(task_result).unwrap_or_default()); - Some((metadata, level)) - }); - } - - fn level(obj: &PyAny) -> Option { - let level_val = obj.call_method0("level").ok()?; - if level_val.is_none() { - return None; + pub(crate) fn update_workunit(workunit: &mut RunningWorkunit, task_result: &PyAny) { + workunit.update_metadata(|old| { + let new_level = Self::level(task_result); + + // If the metadata already existed, or if its level changed, we need to update it. + let (mut metadata, level) = if let Some((metadata, old_level)) = old { + (metadata, new_level.unwrap_or(old_level)) + } else if let Some(level) = new_level { + (WorkunitMetadata::default(), level) + } else { + return None; + }; + + metadata.message = Self::message(task_result); + metadata + .artifacts + .extend(Self::artifacts(task_result).unwrap_or_default()); + metadata + .user_metadata + .extend(metadata_for(task_result).unwrap_or_default()); + Some((metadata, level)) + }); } - externs::val_to_log_level(level_val).ok() - } - fn message(obj: &PyAny) -> Option { - let msg_val = obj.call_method0("message").ok()?; - if msg_val.is_none() { - return None; + fn level(obj: &PyAny) -> Option { + let level_val = obj.call_method0("level").ok()?; + if level_val.is_none() { + return None; + } + externs::val_to_log_level(level_val).ok() } - msg_val.extract().ok() - } - fn artifacts(obj: &PyAny) -> Option> { - let artifacts_val = obj.call_method0("artifacts").ok()?; - if artifacts_val.is_none() { - return None; + fn message(obj: &PyAny) -> Option { + let msg_val = obj.call_method0("message").ok()?; + if msg_val.is_none() { + return None; + } + msg_val.extract().ok() } - let artifacts_dict = artifacts_val.downcast::().ok()?; - let mut output = Vec::new(); - - for kv_pair in artifacts_dict.items().into_iter() { - let (key, value): (String, &PyAny) = kv_pair.extract().ok()?; - let artifact_output = if value.is_instance_of::() { - lift_file_digest(value).map(ArtifactOutput::FileDigest) - } else { - let digest_value = value.getattr("digest").ok()?; - lift_directory_digest(digest_value).map(|dd| ArtifactOutput::Snapshot(Arc::new(dd))) - } - .ok()?; - output.push((key, artifact_output)); + fn artifacts(obj: &PyAny) -> Option> { + let artifacts_val = obj.call_method0("artifacts").ok()?; + if artifacts_val.is_none() { + return None; + } + + let artifacts_dict = artifacts_val.downcast::().ok()?; + let mut output = Vec::new(); + + for kv_pair in artifacts_dict.items().into_iter() { + let (key, value): (String, &PyAny) = kv_pair.extract().ok()?; + let artifact_output = if value.is_instance_of::() { + lift_file_digest(value).map(ArtifactOutput::FileDigest) + } else { + let digest_value = value.getattr("digest").ok()?; + lift_directory_digest(digest_value).map(|dd| ArtifactOutput::Snapshot(Arc::new(dd))) + } + .ok()?; + output.push((key, artifact_output)); + } + Some(output) } - Some(output) - } - pub(crate) fn is_cacheable(obj: &PyAny) -> Option { - obj.call_method0("cacheable").ok()?.extract().ok() - } + pub(crate) fn is_cacheable(obj: &PyAny) -> Option { + obj.call_method0("cacheable").ok()?.extract().ok() + } } pub struct EngineAwareParameter; impl EngineAwareParameter { - pub fn debug_hint(obj: &PyAny) -> Option { - let hint = obj.call_method0("debug_hint").ok()?; - if hint.is_none() { - return None; + pub fn debug_hint(obj: &PyAny) -> Option { + let hint = obj.call_method0("debug_hint").ok()?; + if hint.is_none() { + return None; + } + hint.extract().ok() } - hint.extract().ok() - } - pub fn metadata(obj: &PyAny) -> Vec<(String, UserMetadataItem)> { - metadata_for(obj).unwrap_or_default() - } + pub fn metadata(obj: &PyAny) -> Vec<(String, UserMetadataItem)> { + metadata_for(obj).unwrap_or_default() + } } fn metadata_for(obj: &PyAny) -> Option> { - let metadata_val = obj.call_method0("metadata").ok()?; - if metadata_val.is_none() { - return None; - } - - let mut output = Vec::new(); - let metadata_dict = metadata_val.downcast::().ok()?; - - for kv_pair in metadata_dict.items().into_iter() { - let (key, py_any): (String, &PyAny) = kv_pair.extract().ok()?; - let value: Value = Value::new(py_any.into()); - output.push((key, UserMetadataItem::PyValue(Arc::new(value)))); - } - Some(output) + let metadata_val = obj.call_method0("metadata").ok()?; + if metadata_val.is_none() { + return None; + } + + let mut output = Vec::new(); + let metadata_dict = metadata_val.downcast::().ok()?; + + for kv_pair in metadata_dict.items().into_iter() { + let (key, py_any): (String, &PyAny) = kv_pair.extract().ok()?; + let value: Value = Value::new(py_any.into()); + output.push((key, UserMetadataItem::PyValue(Arc::new(value)))); + } + Some(output) } diff --git a/src/rust/engine/src/externs/fs.rs b/src/rust/engine/src/externs/fs.rs index fb5f8dc3441..97a921efdfb 100644 --- a/src/rust/engine/src/externs/fs.rs +++ b/src/rust/engine/src/externs/fs.rs @@ -13,8 +13,8 @@ use pyo3::prelude::*; use pyo3::types::{PyIterator, PyString, PyTuple, PyType}; use fs::{ - DirectoryDigest, FilespecMatcher, GlobExpansionConjunction, PathGlobs, StrictGlobMatching, - EMPTY_DIRECTORY_DIGEST, + DirectoryDigest, FilespecMatcher, GlobExpansionConjunction, PathGlobs, StrictGlobMatching, + EMPTY_DIRECTORY_DIGEST, }; use hashing::{Digest, Fingerprint, EMPTY_DIGEST}; use store::Snapshot; @@ -22,20 +22,20 @@ use store::Snapshot; use crate::Failure; pub(crate) fn register(m: &PyModule) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - m.add("EMPTY_DIGEST", PyDigest(EMPTY_DIRECTORY_DIGEST.clone()))?; - m.add("EMPTY_FILE_DIGEST", PyFileDigest(EMPTY_DIGEST))?; - m.add("EMPTY_SNAPSHOT", PySnapshot(Snapshot::empty()))?; - - m.add_function(wrap_pyfunction!(default_cache_path, m)?)?; - Ok(()) + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + m.add("EMPTY_DIGEST", PyDigest(EMPTY_DIRECTORY_DIGEST.clone()))?; + m.add("EMPTY_FILE_DIGEST", PyFileDigest(EMPTY_DIGEST))?; + m.add("EMPTY_SNAPSHOT", PySnapshot(Snapshot::empty()))?; + + m.add_function(wrap_pyfunction!(default_cache_path, m)?)?; + Ok(()) } /// @@ -46,8 +46,8 @@ pub(crate) fn register(m: &PyModule) -> PyResult<()> { /// so this method can eventually be replaced with direct conversion via `?`. /// pub fn possible_store_missing_digest(e: store::StoreError) -> PyErr { - let failure: Failure = e.into(); - failure.into() + let failure: Failure = e.into(); + failure.into() } #[pyclass(name = "Digest")] @@ -55,56 +55,56 @@ pub fn possible_store_missing_digest(e: store::StoreError) -> PyErr { pub struct PyDigest(pub DirectoryDigest); impl fmt::Display for PyDigest { - fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result { - let digest = self.0.as_digest(); - write!( - f, - "Digest('{}', {})", - digest.hash.to_hex(), - digest.size_bytes, - ) - } + fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result { + let digest = self.0.as_digest(); + write!( + f, + "Digest('{}', {})", + digest.hash.to_hex(), + digest.size_bytes, + ) + } } #[pymethods] impl PyDigest { - /// NB: This constructor is only safe for use in testing, or when there is some other guarantee - /// that the Digest has been persisted. - #[new] - fn __new__(fingerprint: &str, serialized_bytes_length: usize) -> PyResult { - let fingerprint = Fingerprint::from_hex_string(fingerprint) - .map_err(|e| PyValueError::new_err(format!("Invalid digest hex: {e}")))?; - Ok(Self(DirectoryDigest::from_persisted_digest(Digest::new( - fingerprint, - serialized_bytes_length, - )))) - } - - fn __hash__(&self) -> u64 { - self.0.as_digest().hash.prefix_hash() - } - - fn __repr__(&self) -> String { - format!("{self}") - } - - fn __richcmp__(&self, other: &PyDigest, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), - } - } - - #[getter] - fn fingerprint(&self) -> String { - self.0.as_digest().hash.to_hex() - } - - #[getter] - fn serialized_bytes_length(&self) -> usize { - self.0.as_digest().size_bytes - } + /// NB: This constructor is only safe for use in testing, or when there is some other guarantee + /// that the Digest has been persisted. + #[new] + fn __new__(fingerprint: &str, serialized_bytes_length: usize) -> PyResult { + let fingerprint = Fingerprint::from_hex_string(fingerprint) + .map_err(|e| PyValueError::new_err(format!("Invalid digest hex: {e}")))?; + Ok(Self(DirectoryDigest::from_persisted_digest(Digest::new( + fingerprint, + serialized_bytes_length, + )))) + } + + fn __hash__(&self) -> u64 { + self.0.as_digest().hash.prefix_hash() + } + + fn __repr__(&self) -> String { + format!("{self}") + } + + fn __richcmp__(&self, other: &PyDigest, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } + + #[getter] + fn fingerprint(&self) -> String { + self.0.as_digest().hash.to_hex() + } + + #[getter] + fn serialized_bytes_length(&self) -> usize { + self.0.as_digest().size_bytes + } } #[pyclass(name = "FileDigest")] @@ -113,42 +113,42 @@ pub struct PyFileDigest(pub Digest); #[pymethods] impl PyFileDigest { - #[new] - fn __new__(fingerprint: &str, serialized_bytes_length: usize) -> PyResult { - let fingerprint = Fingerprint::from_hex_string(fingerprint) - .map_err(|e| PyValueError::new_err(format!("Invalid file digest hex: {e}")))?; - Ok(Self(Digest::new(fingerprint, serialized_bytes_length))) - } - - fn __hash__(&self) -> u64 { - self.0.hash.prefix_hash() - } - - fn __repr__(&self) -> String { - format!( - "FileDigest('{}', {})", - self.0.hash.to_hex(), - self.0.size_bytes - ) - } - - fn __richcmp__(&self, other: &PyFileDigest, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), - } - } - - #[getter] - fn fingerprint(&self) -> String { - self.0.hash.to_hex() - } - - #[getter] - fn serialized_bytes_length(&self) -> usize { - self.0.size_bytes - } + #[new] + fn __new__(fingerprint: &str, serialized_bytes_length: usize) -> PyResult { + let fingerprint = Fingerprint::from_hex_string(fingerprint) + .map_err(|e| PyValueError::new_err(format!("Invalid file digest hex: {e}")))?; + Ok(Self(Digest::new(fingerprint, serialized_bytes_length))) + } + + fn __hash__(&self) -> u64 { + self.0.hash.prefix_hash() + } + + fn __repr__(&self) -> String { + format!( + "FileDigest('{}', {})", + self.0.hash.to_hex(), + self.0.size_bytes + ) + } + + fn __richcmp__(&self, other: &PyFileDigest, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } + + #[getter] + fn fingerprint(&self) -> String { + self.0.hash.to_hex() + } + + #[getter] + fn serialized_bytes_length(&self) -> usize { + self.0.size_bytes + } } #[pyclass(name = "Snapshot")] @@ -156,101 +156,98 @@ pub struct PySnapshot(pub Snapshot); #[pymethods] impl PySnapshot { - #[classmethod] - fn create_for_testing(_cls: &PyType, files: Vec, dirs: Vec) -> PyResult { - Ok(Self( - Snapshot::create_for_testing(files, dirs).map_err(PyException::new_err)?, - )) - } - - fn __hash__(&self) -> u64 { - self.0.digest.hash.prefix_hash() - } - - fn __repr__(&self) -> PyResult { - Ok(format!( - "Snapshot(digest=({}, {}), dirs=({}), files=({}))", - self.0.digest.hash.to_hex(), - self.0.digest.size_bytes, - self - .0 - .directories() - .into_iter() - .map(|d| d.display().to_string()) - .collect::>() - .join(","), - self - .0 - .files() - .into_iter() - .map(|d| d.display().to_string()) - .collect::>() - .join(","), - )) - } - - fn __richcmp__(&self, other: &PySnapshot, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self.0.digest == other.0.digest).into_py(py), - CompareOp::Ne => (self.0.digest != other.0.digest).into_py(py), - _ => py.NotImplemented(), - } - } - - #[getter] - fn digest(&self) -> PyDigest { - PyDigest(self.0.clone().into()) - } - - #[getter] - fn files<'py>(&self, py: Python<'py>) -> &'py PyTuple { - let files = self.0.files(); - PyTuple::new( - py, - files - .into_iter() - .map(|path| PyString::new(py, &path.to_string_lossy())) - .collect::>(), - ) - } - - #[getter] - fn dirs<'py>(&self, py: Python<'py>) -> &'py PyTuple { - let dirs = self.0.directories(); - PyTuple::new( - py, - dirs - .into_iter() - .map(|path| PyString::new(py, &path.to_string_lossy())) - .collect::>(), - ) - } - - // NB: Prefix with underscore. The Python call will be hidden behind a helper which returns a much - // richer type. - fn _diff<'py>(&self, other: &PySnapshot, py: Python<'py>) -> &'py PyTuple { - let result = self.0.tree.diff(&other.0.tree); - - let into_tuple = |x: &Vec| -> &'py PyTuple { - PyTuple::new( - py, - x.iter() - .map(|path| PyString::new(py, &path.to_string_lossy())) - .collect::>(), - ) - }; - - PyTuple::new( - py, - vec![ - into_tuple(&result.our_unique_files), - into_tuple(&result.our_unique_dirs), - into_tuple(&result.their_unique_files), - into_tuple(&result.their_unique_dirs), - into_tuple(&result.changed_files), - ], - ) - } + #[classmethod] + fn create_for_testing(_cls: &PyType, files: Vec, dirs: Vec) -> PyResult { + Ok(Self( + Snapshot::create_for_testing(files, dirs).map_err(PyException::new_err)?, + )) + } + + fn __hash__(&self) -> u64 { + self.0.digest.hash.prefix_hash() + } + + fn __repr__(&self) -> PyResult { + Ok(format!( + "Snapshot(digest=({}, {}), dirs=({}), files=({}))", + self.0.digest.hash.to_hex(), + self.0.digest.size_bytes, + self.0 + .directories() + .into_iter() + .map(|d| d.display().to_string()) + .collect::>() + .join(","), + self.0 + .files() + .into_iter() + .map(|d| d.display().to_string()) + .collect::>() + .join(","), + )) + } + + fn __richcmp__(&self, other: &PySnapshot, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self.0.digest == other.0.digest).into_py(py), + CompareOp::Ne => (self.0.digest != other.0.digest).into_py(py), + _ => py.NotImplemented(), + } + } + + #[getter] + fn digest(&self) -> PyDigest { + PyDigest(self.0.clone().into()) + } + + #[getter] + fn files<'py>(&self, py: Python<'py>) -> &'py PyTuple { + let files = self.0.files(); + PyTuple::new( + py, + files + .into_iter() + .map(|path| PyString::new(py, &path.to_string_lossy())) + .collect::>(), + ) + } + + #[getter] + fn dirs<'py>(&self, py: Python<'py>) -> &'py PyTuple { + let dirs = self.0.directories(); + PyTuple::new( + py, + dirs.into_iter() + .map(|path| PyString::new(py, &path.to_string_lossy())) + .collect::>(), + ) + } + + // NB: Prefix with underscore. The Python call will be hidden behind a helper which returns a much + // richer type. + fn _diff<'py>(&self, other: &PySnapshot, py: Python<'py>) -> &'py PyTuple { + let result = self.0.tree.diff(&other.0.tree); + + let into_tuple = |x: &Vec| -> &'py PyTuple { + PyTuple::new( + py, + x.iter() + .map(|path| PyString::new(py, &path.to_string_lossy())) + .collect::>(), + ) + }; + + PyTuple::new( + py, + vec![ + into_tuple(&result.our_unique_files), + into_tuple(&result.our_unique_dirs), + into_tuple(&result.their_unique_files), + into_tuple(&result.their_unique_dirs), + into_tuple(&result.changed_files), + ], + ) + } } #[pyclass(name = "MergeDigests")] @@ -259,121 +256,121 @@ pub struct PyMergeDigests(pub Vec); #[pymethods] impl PyMergeDigests { - #[new] - fn __new__(digests: &PyAny, py: Python) -> PyResult { - let digests: PyResult> = PyIterator::from_object(py, digests)? - .map(|v| { - let py_digest = v?.extract::()?; - Ok(py_digest.0) - }) - .collect(); - Ok(Self(digests?)) - } - - fn __hash__(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.0.hash(&mut s); - s.finish() - } - - fn __repr__(&self) -> String { - let digests = self - .0 - .iter() - .map(|d| format!("{}", PyDigest(d.clone()))) - .join(", "); - format!("MergeDigests([{digests}])") - } - - fn __richcmp__(&self, other: &PyMergeDigests, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), - } - } + #[new] + fn __new__(digests: &PyAny, py: Python) -> PyResult { + let digests: PyResult> = PyIterator::from_object(py, digests)? + .map(|v| { + let py_digest = v?.extract::()?; + Ok(py_digest.0) + }) + .collect(); + Ok(Self(digests?)) + } + + fn __hash__(&self) -> u64 { + let mut s = DefaultHasher::new(); + self.0.hash(&mut s); + s.finish() + } + + fn __repr__(&self) -> String { + let digests = self + .0 + .iter() + .map(|d| format!("{}", PyDigest(d.clone()))) + .join(", "); + format!("MergeDigests([{digests}])") + } + + fn __richcmp__(&self, other: &PyMergeDigests, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } } #[pyclass(name = "AddPrefix")] #[derive(Debug, PartialEq, Eq)] pub struct PyAddPrefix { - pub digest: DirectoryDigest, - pub prefix: PathBuf, + pub digest: DirectoryDigest, + pub prefix: PathBuf, } #[pymethods] impl PyAddPrefix { - #[new] - fn __new__(digest: PyDigest, prefix: PathBuf) -> Self { - Self { - digest: digest.0, - prefix, - } - } - - fn __hash__(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.digest.as_digest().hash.prefix_hash().hash(&mut s); - self.prefix.hash(&mut s); - s.finish() - } - - fn __repr__(&self) -> String { - format!( - "AddPrefix('{}', {})", - PyDigest(self.digest.clone()), - self.prefix.display() - ) - } - - fn __richcmp__(&self, other: &PyAddPrefix, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), - } - } + #[new] + fn __new__(digest: PyDigest, prefix: PathBuf) -> Self { + Self { + digest: digest.0, + prefix, + } + } + + fn __hash__(&self) -> u64 { + let mut s = DefaultHasher::new(); + self.digest.as_digest().hash.prefix_hash().hash(&mut s); + self.prefix.hash(&mut s); + s.finish() + } + + fn __repr__(&self) -> String { + format!( + "AddPrefix('{}', {})", + PyDigest(self.digest.clone()), + self.prefix.display() + ) + } + + fn __richcmp__(&self, other: &PyAddPrefix, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } } #[pyclass(name = "RemovePrefix")] #[derive(Debug, PartialEq, Eq)] pub struct PyRemovePrefix { - pub digest: DirectoryDigest, - pub prefix: PathBuf, + pub digest: DirectoryDigest, + pub prefix: PathBuf, } #[pymethods] impl PyRemovePrefix { - #[new] - fn __new__(digest: PyDigest, prefix: PathBuf) -> Self { - Self { - digest: digest.0, - prefix, - } - } - - fn __hash__(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.digest.as_digest().hash.prefix_hash().hash(&mut s); - self.prefix.hash(&mut s); - s.finish() - } - - fn __repr__(&self) -> String { - format!( - "RemovePrefix('{}', {})", - PyDigest(self.digest.clone()), - self.prefix.display() - ) - } - - fn __richcmp__(&self, other: &PyRemovePrefix, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), - } - } + #[new] + fn __new__(digest: PyDigest, prefix: PathBuf) -> Self { + Self { + digest: digest.0, + prefix, + } + } + + fn __hash__(&self) -> u64 { + let mut s = DefaultHasher::new(); + self.digest.as_digest().hash.prefix_hash().hash(&mut s); + self.prefix.hash(&mut s); + s.finish() + } + + fn __repr__(&self) -> String { + format!( + "RemovePrefix('{}', {})", + PyDigest(self.digest.clone()), + self.prefix.display() + ) + } + + fn __richcmp__(&self, other: &PyRemovePrefix, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } } // ----------------------------------------------------------------------------- @@ -383,33 +380,33 @@ impl PyRemovePrefix { struct PyPathGlobs(PathGlobs); impl<'source> FromPyObject<'source> for PyPathGlobs { - fn extract(obj: &'source PyAny) -> PyResult { - let globs: Vec = obj.getattr("globs")?.extract()?; - - let description_of_origin_field = obj.getattr("description_of_origin")?; - let description_of_origin = if description_of_origin_field.is_none() { - None - } else { - Some(description_of_origin_field.extract()?) - }; - - let match_behavior_str: &str = obj - .getattr("glob_match_error_behavior")? - .getattr("value")? - .extract()?; - let match_behavior = StrictGlobMatching::create(match_behavior_str, description_of_origin) - .map_err(PyValueError::new_err)?; - - let conjunction_str: &str = obj.getattr("conjunction")?.getattr("value")?.extract()?; - let conjunction = - GlobExpansionConjunction::create(conjunction_str).map_err(PyValueError::new_err)?; - - Ok(PyPathGlobs(PathGlobs::new( - globs, - match_behavior, - conjunction, - ))) - } + fn extract(obj: &'source PyAny) -> PyResult { + let globs: Vec = obj.getattr("globs")?.extract()?; + + let description_of_origin_field = obj.getattr("description_of_origin")?; + let description_of_origin = if description_of_origin_field.is_none() { + None + } else { + Some(description_of_origin_field.extract()?) + }; + + let match_behavior_str: &str = obj + .getattr("glob_match_error_behavior")? + .getattr("value")? + .extract()?; + let match_behavior = StrictGlobMatching::create(match_behavior_str, description_of_origin) + .map_err(PyValueError::new_err)?; + + let conjunction_str: &str = obj.getattr("conjunction")?.getattr("value")?.extract()?; + let conjunction = + GlobExpansionConjunction::create(conjunction_str).map_err(PyValueError::new_err)?; + + Ok(PyPathGlobs(PathGlobs::new( + globs, + match_behavior, + conjunction, + ))) + } } // ----------------------------------------------------------------------------- @@ -422,55 +419,54 @@ pub struct PyFilespecMatcher(FilespecMatcher); #[pymethods] impl PyFilespecMatcher { - #[new] - fn __new__(includes: Vec, excludes: Vec, py: Python) -> PyResult { - // Parsing the globs has shown up in benchmarks - // (https://github.com/pantsbuild/pants/issues/16122), so we use py.allow_threads(). - let matcher = - py.allow_threads(|| FilespecMatcher::new(includes, excludes).map_err(PyValueError::new_err))?; - Ok(Self(matcher)) - } - - fn __hash__(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.0.include_globs().hash(&mut s); - self.0.exclude_globs().hash(&mut s); - s.finish() - } - - fn __repr__(&self) -> String { - let includes = self - .0 - .include_globs() - .iter() - .map(|pattern| pattern.to_string()) - .join(", "); - let excludes = self.0.exclude_globs().join(", "); - format!("FilespecMatcher(includes=['{includes}'], excludes=[{excludes}])",) - } - - fn __richcmp__(&self, other: &PyFilespecMatcher, op: CompareOp, py: Python) -> PyObject { - match op { - CompareOp::Eq => (self.0.include_globs() == other.0.include_globs() - && self.0.exclude_globs() == other.0.exclude_globs()) - .into_py(py), - CompareOp::Ne => (self.0.include_globs() != other.0.include_globs() - || self.0.exclude_globs() != other.0.exclude_globs()) - .into_py(py), - _ => py.NotImplemented(), - } - } - - fn matches(&self, paths: Vec, py: Python) -> PyResult> { - py.allow_threads(|| { - Ok( - paths - .into_iter() - .filter(|p| self.0.matches(Path::new(p))) - .collect(), - ) - }) - } + #[new] + fn __new__(includes: Vec, excludes: Vec, py: Python) -> PyResult { + // Parsing the globs has shown up in benchmarks + // (https://github.com/pantsbuild/pants/issues/16122), so we use py.allow_threads(). + let matcher = py.allow_threads(|| { + FilespecMatcher::new(includes, excludes).map_err(PyValueError::new_err) + })?; + Ok(Self(matcher)) + } + + fn __hash__(&self) -> u64 { + let mut s = DefaultHasher::new(); + self.0.include_globs().hash(&mut s); + self.0.exclude_globs().hash(&mut s); + s.finish() + } + + fn __repr__(&self) -> String { + let includes = self + .0 + .include_globs() + .iter() + .map(|pattern| pattern.to_string()) + .join(", "); + let excludes = self.0.exclude_globs().join(", "); + format!("FilespecMatcher(includes=['{includes}'], excludes=[{excludes}])",) + } + + fn __richcmp__(&self, other: &PyFilespecMatcher, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self.0.include_globs() == other.0.include_globs() + && self.0.exclude_globs() == other.0.exclude_globs()) + .into_py(py), + CompareOp::Ne => (self.0.include_globs() != other.0.include_globs() + || self.0.exclude_globs() != other.0.exclude_globs()) + .into_py(py), + _ => py.NotImplemented(), + } + } + + fn matches(&self, paths: Vec, py: Python) -> PyResult> { + py.allow_threads(|| { + Ok(paths + .into_iter() + .filter(|p| self.0.matches(Path::new(p))) + .collect()) + }) + } } // ----------------------------------------------------------------------------- @@ -479,12 +475,12 @@ impl PyFilespecMatcher { #[pyfunction] fn default_cache_path() -> PyResult { - fs::default_cache_path() - .into_os_string() - .into_string() - .map_err(|s| { - PyTypeError::new_err(format!( - "Default cache path {s:?} could not be converted to a string." - )) - }) + fs::default_cache_path() + .into_os_string() + .into_string() + .map_err(|s| { + PyTypeError::new_err(format!( + "Default cache path {s:?} could not be converted to a string." + )) + }) } diff --git a/src/rust/engine/src/externs/interface.rs b/src/rust/engine/src/externs/interface.rs index cb44d0aa131..196cec419b9 100644 --- a/src/rust/engine/src/externs/interface.rs +++ b/src/rust/engine/src/externs/interface.rs @@ -32,8 +32,8 @@ use petgraph::graph::{DiGraph, Graph}; use process_execution::CacheContentBehavior; use pyo3::exceptions::{PyException, PyIOError, PyKeyboardInterrupt, PyValueError}; use pyo3::prelude::{ - pyclass, pyfunction, pymethods, pymodule, wrap_pyfunction, PyModule, PyObject, - PyResult as PyO3Result, Python, ToPyObject, + pyclass, pyfunction, pymethods, pymodule, wrap_pyfunction, PyModule, PyObject, + PyResult as PyO3Result, Python, ToPyObject, }; use pyo3::types::{PyBytes, PyDict, PyList, PyTuple, PyType}; use pyo3::{create_exception, IntoPy, PyAny, PyRef}; @@ -42,115 +42,115 @@ use remote::remote_cache::RemoteCacheWarningsBehavior; use rule_graph::{self, DependencyKey, RuleGraph, RuleId}; use task_executor::Executor; use workunit_store::{ - ArtifactOutput, ObservationMetric, UserMetadataItem, Workunit, WorkunitState, WorkunitStore, - WorkunitStoreHandle, + ArtifactOutput, ObservationMetric, UserMetadataItem, Workunit, WorkunitState, WorkunitStore, + WorkunitStoreHandle, }; use crate::externs::fs::{possible_store_missing_digest, PyFileDigest}; use crate::externs::process::PyProcessExecutionEnvironment; use crate::{ - externs, nodes, Core, ExecutionRequest, ExecutionStrategyOptions, ExecutionTermination, Failure, - Function, Intrinsic, Intrinsics, Key, LocalStoreOptions, Params, RemotingOptions, Rule, - Scheduler, Session, SessionCore, Tasks, TypeId, Types, Value, + externs, nodes, Core, ExecutionRequest, ExecutionStrategyOptions, ExecutionTermination, + Failure, Function, Intrinsic, Intrinsics, Key, LocalStoreOptions, Params, RemotingOptions, + Rule, Scheduler, Session, SessionCore, Tasks, TypeId, Types, Value, }; #[pymodule] fn native_engine(py: Python, m: &PyModule) -> PyO3Result<()> { - externs::register(py, m)?; - externs::address::register(py, m)?; - externs::fs::register(m)?; - externs::nailgun::register(py, m)?; - externs::process::register(m)?; - externs::pantsd::register(py, m)?; - externs::scheduler::register(m)?; - externs::target::register(m)?; - externs::testutil::register(m)?; - externs::workunits::register(m)?; - externs::dep_inference::register(m)?; - - m.add("PollTimeout", py.get_type::())?; - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - m.add_function(wrap_pyfunction!(stdio_initialize, m)?)?; - m.add_function(wrap_pyfunction!(stdio_thread_console_set, m)?)?; - m.add_function(wrap_pyfunction!(stdio_thread_console_color_mode_set, m)?)?; - m.add_function(wrap_pyfunction!(stdio_thread_console_clear, m)?)?; - m.add_function(wrap_pyfunction!(stdio_thread_get_destination, m)?)?; - m.add_function(wrap_pyfunction!(stdio_thread_set_destination, m)?)?; - - m.add_function(wrap_pyfunction!(flush_log, m)?)?; - m.add_function(wrap_pyfunction!(write_log, m)?)?; - m.add_function(wrap_pyfunction!(set_per_run_log_path, m)?)?; - m.add_function(wrap_pyfunction!(teardown_dynamic_ui, m)?)?; - m.add_function(wrap_pyfunction!(maybe_set_panic_handler, m)?)?; - - m.add_function(wrap_pyfunction!(task_side_effected, m)?)?; - - m.add_function(wrap_pyfunction!(tasks_task_begin, m)?)?; - m.add_function(wrap_pyfunction!(tasks_task_end, m)?)?; - m.add_function(wrap_pyfunction!(tasks_add_get, m)?)?; - m.add_function(wrap_pyfunction!(tasks_add_get_union, m)?)?; - m.add_function(wrap_pyfunction!(tasks_add_query, m)?)?; - - m.add_function(wrap_pyfunction!(write_digest, m)?)?; - m.add_function(wrap_pyfunction!(capture_snapshots, m)?)?; - - m.add_function(wrap_pyfunction!(graph_invalidate_paths, m)?)?; - m.add_function(wrap_pyfunction!(graph_invalidate_all_paths, m)?)?; - m.add_function(wrap_pyfunction!(graph_invalidate_all, m)?)?; - m.add_function(wrap_pyfunction!(graph_len, m)?)?; - m.add_function(wrap_pyfunction!(graph_visualize, m)?)?; - - m.add_function(wrap_pyfunction!(nailgun_server_create, m)?)?; - m.add_function(wrap_pyfunction!(nailgun_server_await_shutdown, m)?)?; - - m.add_function(wrap_pyfunction!(garbage_collect_store, m)?)?; - m.add_function(wrap_pyfunction!(lease_files_in_graph, m)?)?; - m.add_function(wrap_pyfunction!(check_invalidation_watcher_liveness, m)?)?; - - m.add_function(wrap_pyfunction!(validate_reachability, m)?)?; - m.add_function(wrap_pyfunction!(rule_graph_consumed_types, m)?)?; - m.add_function(wrap_pyfunction!(rule_graph_visualize, m)?)?; - m.add_function(wrap_pyfunction!(rule_subgraph_visualize, m)?)?; - - m.add_function(wrap_pyfunction!(execution_add_root_select, m)?)?; - - m.add_function(wrap_pyfunction!(session_new_run_id, m)?)?; - m.add_function(wrap_pyfunction!(session_poll_workunits, m)?)?; - m.add_function(wrap_pyfunction!(session_run_interactive_process, m)?)?; - m.add_function(wrap_pyfunction!(session_get_metrics, m)?)?; - m.add_function(wrap_pyfunction!(session_get_observation_histograms, m)?)?; - m.add_function(wrap_pyfunction!(session_record_test_observation, m)?)?; - m.add_function(wrap_pyfunction!(session_isolated_shallow_clone, m)?)?; - m.add_function(wrap_pyfunction!(session_wait_for_tail_tasks, m)?)?; - - m.add_function(wrap_pyfunction!(single_file_digests_to_bytes, m)?)?; - m.add_function(wrap_pyfunction!(ensure_remote_has_recursive, m)?)?; - m.add_function(wrap_pyfunction!(ensure_directory_digest_persisted, m)?)?; - - m.add_function(wrap_pyfunction!(scheduler_execute, m)?)?; - m.add_function(wrap_pyfunction!(scheduler_metrics, m)?)?; - m.add_function(wrap_pyfunction!(scheduler_live_items, m)?)?; - m.add_function(wrap_pyfunction!(scheduler_create, m)?)?; - m.add_function(wrap_pyfunction!(scheduler_shutdown, m)?)?; - - m.add_function(wrap_pyfunction!(strongly_connected_components, m)?)?; - m.add_function(wrap_pyfunction!(hash_prefix_zero_bits, m)?)?; - - Ok(()) + externs::register(py, m)?; + externs::address::register(py, m)?; + externs::fs::register(m)?; + externs::nailgun::register(py, m)?; + externs::process::register(m)?; + externs::pantsd::register(py, m)?; + externs::scheduler::register(m)?; + externs::target::register(m)?; + externs::testutil::register(m)?; + externs::workunits::register(m)?; + externs::dep_inference::register(m)?; + + m.add("PollTimeout", py.get_type::())?; + + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + m.add_function(wrap_pyfunction!(stdio_initialize, m)?)?; + m.add_function(wrap_pyfunction!(stdio_thread_console_set, m)?)?; + m.add_function(wrap_pyfunction!(stdio_thread_console_color_mode_set, m)?)?; + m.add_function(wrap_pyfunction!(stdio_thread_console_clear, m)?)?; + m.add_function(wrap_pyfunction!(stdio_thread_get_destination, m)?)?; + m.add_function(wrap_pyfunction!(stdio_thread_set_destination, m)?)?; + + m.add_function(wrap_pyfunction!(flush_log, m)?)?; + m.add_function(wrap_pyfunction!(write_log, m)?)?; + m.add_function(wrap_pyfunction!(set_per_run_log_path, m)?)?; + m.add_function(wrap_pyfunction!(teardown_dynamic_ui, m)?)?; + m.add_function(wrap_pyfunction!(maybe_set_panic_handler, m)?)?; + + m.add_function(wrap_pyfunction!(task_side_effected, m)?)?; + + m.add_function(wrap_pyfunction!(tasks_task_begin, m)?)?; + m.add_function(wrap_pyfunction!(tasks_task_end, m)?)?; + m.add_function(wrap_pyfunction!(tasks_add_get, m)?)?; + m.add_function(wrap_pyfunction!(tasks_add_get_union, m)?)?; + m.add_function(wrap_pyfunction!(tasks_add_query, m)?)?; + + m.add_function(wrap_pyfunction!(write_digest, m)?)?; + m.add_function(wrap_pyfunction!(capture_snapshots, m)?)?; + + m.add_function(wrap_pyfunction!(graph_invalidate_paths, m)?)?; + m.add_function(wrap_pyfunction!(graph_invalidate_all_paths, m)?)?; + m.add_function(wrap_pyfunction!(graph_invalidate_all, m)?)?; + m.add_function(wrap_pyfunction!(graph_len, m)?)?; + m.add_function(wrap_pyfunction!(graph_visualize, m)?)?; + + m.add_function(wrap_pyfunction!(nailgun_server_create, m)?)?; + m.add_function(wrap_pyfunction!(nailgun_server_await_shutdown, m)?)?; + + m.add_function(wrap_pyfunction!(garbage_collect_store, m)?)?; + m.add_function(wrap_pyfunction!(lease_files_in_graph, m)?)?; + m.add_function(wrap_pyfunction!(check_invalidation_watcher_liveness, m)?)?; + + m.add_function(wrap_pyfunction!(validate_reachability, m)?)?; + m.add_function(wrap_pyfunction!(rule_graph_consumed_types, m)?)?; + m.add_function(wrap_pyfunction!(rule_graph_visualize, m)?)?; + m.add_function(wrap_pyfunction!(rule_subgraph_visualize, m)?)?; + + m.add_function(wrap_pyfunction!(execution_add_root_select, m)?)?; + + m.add_function(wrap_pyfunction!(session_new_run_id, m)?)?; + m.add_function(wrap_pyfunction!(session_poll_workunits, m)?)?; + m.add_function(wrap_pyfunction!(session_run_interactive_process, m)?)?; + m.add_function(wrap_pyfunction!(session_get_metrics, m)?)?; + m.add_function(wrap_pyfunction!(session_get_observation_histograms, m)?)?; + m.add_function(wrap_pyfunction!(session_record_test_observation, m)?)?; + m.add_function(wrap_pyfunction!(session_isolated_shallow_clone, m)?)?; + m.add_function(wrap_pyfunction!(session_wait_for_tail_tasks, m)?)?; + + m.add_function(wrap_pyfunction!(single_file_digests_to_bytes, m)?)?; + m.add_function(wrap_pyfunction!(ensure_remote_has_recursive, m)?)?; + m.add_function(wrap_pyfunction!(ensure_directory_digest_persisted, m)?)?; + + m.add_function(wrap_pyfunction!(scheduler_execute, m)?)?; + m.add_function(wrap_pyfunction!(scheduler_metrics, m)?)?; + m.add_function(wrap_pyfunction!(scheduler_live_items, m)?)?; + m.add_function(wrap_pyfunction!(scheduler_create, m)?)?; + m.add_function(wrap_pyfunction!(scheduler_shutdown, m)?)?; + + m.add_function(wrap_pyfunction!(strongly_connected_components, m)?)?; + m.add_function(wrap_pyfunction!(hash_prefix_zero_bits, m)?)?; + + Ok(()) } create_exception!(native_engine, PollTimeout, PyException); @@ -160,10 +160,10 @@ struct PyTasks(RefCell); #[pymethods] impl PyTasks { - #[new] - fn __new__() -> Self { - Self(RefCell::new(Tasks::new())) - } + #[new] + fn __new__() -> Self { + Self(RefCell::new(Tasks::new())) + } } #[pyclass] @@ -171,75 +171,75 @@ struct PyTypes(RefCell>); #[pymethods] impl PyTypes { - #[new] - fn __new__( - paths: &PyType, - file_content: &PyType, - file_entry: &PyType, - symlink_entry: &PyType, - directory: &PyType, - digest_contents: &PyType, - digest_entries: &PyType, - path_globs: &PyType, - create_digest: &PyType, - digest_subset: &PyType, - native_download_file: &PyType, - platform: &PyType, - process: &PyType, - process_result: &PyType, - process_result_metadata: &PyType, - coroutine: &PyType, - session_values: &PyType, - run_id: &PyType, - interactive_process: &PyType, - interactive_process_result: &PyType, - engine_aware_parameter: &PyType, - docker_resolve_image_request: &PyType, - docker_resolve_image_result: &PyType, - parsed_python_deps_result: &PyType, - parsed_javascript_deps_result: &PyType, - py: Python, - ) -> Self { - Self(RefCell::new(Some(Types { - directory_digest: TypeId::new(py.get_type::()), - file_digest: TypeId::new(py.get_type::()), - snapshot: TypeId::new(py.get_type::()), - paths: TypeId::new(paths), - file_content: TypeId::new(file_content), - file_entry: TypeId::new(file_entry), - symlink_entry: TypeId::new(symlink_entry), - directory: TypeId::new(directory), - digest_contents: TypeId::new(digest_contents), - digest_entries: TypeId::new(digest_entries), - path_globs: TypeId::new(path_globs), - merge_digests: TypeId::new(py.get_type::()), - add_prefix: TypeId::new(py.get_type::()), - remove_prefix: TypeId::new(py.get_type::()), - create_digest: TypeId::new(create_digest), - digest_subset: TypeId::new(digest_subset), - native_download_file: TypeId::new(native_download_file), - platform: TypeId::new(platform), - process: TypeId::new(process), - process_result: TypeId::new(process_result), - process_config_from_environment: TypeId::new( - py.get_type::(), - ), - process_result_metadata: TypeId::new(process_result_metadata), - coroutine: TypeId::new(coroutine), - session_values: TypeId::new(session_values), - run_id: TypeId::new(run_id), - interactive_process: TypeId::new(interactive_process), - interactive_process_result: TypeId::new(interactive_process_result), - engine_aware_parameter: TypeId::new(engine_aware_parameter), - docker_resolve_image_request: TypeId::new(docker_resolve_image_request), - docker_resolve_image_result: TypeId::new(docker_resolve_image_result), - parsed_python_deps_result: TypeId::new(parsed_python_deps_result), - parsed_javascript_deps_result: TypeId::new(parsed_javascript_deps_result), - deps_request: TypeId::new( - py.get_type::(), - ), - }))) - } + #[new] + fn __new__( + paths: &PyType, + file_content: &PyType, + file_entry: &PyType, + symlink_entry: &PyType, + directory: &PyType, + digest_contents: &PyType, + digest_entries: &PyType, + path_globs: &PyType, + create_digest: &PyType, + digest_subset: &PyType, + native_download_file: &PyType, + platform: &PyType, + process: &PyType, + process_result: &PyType, + process_result_metadata: &PyType, + coroutine: &PyType, + session_values: &PyType, + run_id: &PyType, + interactive_process: &PyType, + interactive_process_result: &PyType, + engine_aware_parameter: &PyType, + docker_resolve_image_request: &PyType, + docker_resolve_image_result: &PyType, + parsed_python_deps_result: &PyType, + parsed_javascript_deps_result: &PyType, + py: Python, + ) -> Self { + Self(RefCell::new(Some(Types { + directory_digest: TypeId::new(py.get_type::()), + file_digest: TypeId::new(py.get_type::()), + snapshot: TypeId::new(py.get_type::()), + paths: TypeId::new(paths), + file_content: TypeId::new(file_content), + file_entry: TypeId::new(file_entry), + symlink_entry: TypeId::new(symlink_entry), + directory: TypeId::new(directory), + digest_contents: TypeId::new(digest_contents), + digest_entries: TypeId::new(digest_entries), + path_globs: TypeId::new(path_globs), + merge_digests: TypeId::new(py.get_type::()), + add_prefix: TypeId::new(py.get_type::()), + remove_prefix: TypeId::new(py.get_type::()), + create_digest: TypeId::new(create_digest), + digest_subset: TypeId::new(digest_subset), + native_download_file: TypeId::new(native_download_file), + platform: TypeId::new(platform), + process: TypeId::new(process), + process_result: TypeId::new(process_result), + process_config_from_environment: TypeId::new( + py.get_type::(), + ), + process_result_metadata: TypeId::new(process_result_metadata), + coroutine: TypeId::new(coroutine), + session_values: TypeId::new(session_values), + run_id: TypeId::new(run_id), + interactive_process: TypeId::new(interactive_process), + interactive_process_result: TypeId::new(interactive_process_result), + engine_aware_parameter: TypeId::new(engine_aware_parameter), + docker_resolve_image_request: TypeId::new(docker_resolve_image_request), + docker_resolve_image_result: TypeId::new(docker_resolve_image_result), + parsed_python_deps_result: TypeId::new(parsed_python_deps_result), + parsed_javascript_deps_result: TypeId::new(parsed_javascript_deps_result), + deps_request: TypeId::new( + py.get_type::(), + ), + }))) + } } #[pyclass] @@ -258,35 +258,37 @@ struct PyExecutionStrategyOptions(ExecutionStrategyOptions); #[pymethods] impl PyExecutionStrategyOptions { - #[new] - fn __new__( - local_parallelism: usize, - remote_parallelism: usize, - local_keep_sandboxes: String, - local_cache: bool, - local_enable_nailgun: bool, - remote_cache_read: bool, - remote_cache_write: bool, - child_default_memory: usize, - child_max_memory: usize, - graceful_shutdown_timeout: usize, - ) -> Self { - Self(ExecutionStrategyOptions { - local_parallelism, - remote_parallelism, - local_keep_sandboxes: process_execution::local::KeepSandboxes::from_str( - &local_keep_sandboxes, - ) - .unwrap(), - local_cache, - local_enable_nailgun, - remote_cache_read, - remote_cache_write, - child_default_memory, - child_max_memory, - graceful_shutdown_timeout: Duration::from_secs(graceful_shutdown_timeout.try_into().unwrap()), - }) - } + #[new] + fn __new__( + local_parallelism: usize, + remote_parallelism: usize, + local_keep_sandboxes: String, + local_cache: bool, + local_enable_nailgun: bool, + remote_cache_read: bool, + remote_cache_write: bool, + child_default_memory: usize, + child_max_memory: usize, + graceful_shutdown_timeout: usize, + ) -> Self { + Self(ExecutionStrategyOptions { + local_parallelism, + remote_parallelism, + local_keep_sandboxes: process_execution::local::KeepSandboxes::from_str( + &local_keep_sandboxes, + ) + .unwrap(), + local_cache, + local_enable_nailgun, + remote_cache_read, + remote_cache_write, + child_default_memory, + child_max_memory, + graceful_shutdown_timeout: Duration::from_secs( + graceful_shutdown_timeout.try_into().unwrap(), + ), + }) + } } /// Represents configuration related to remote execution and caching. @@ -295,57 +297,60 @@ struct PyRemotingOptions(RemotingOptions); #[pymethods] impl PyRemotingOptions { - #[new] - fn __new__( - execution_enable: bool, - store_headers: BTreeMap, - store_chunk_bytes: usize, - store_rpc_retries: usize, - store_rpc_concurrency: usize, - store_rpc_timeout_millis: u64, - store_batch_api_size_limit: usize, - cache_warnings_behavior: String, - cache_content_behavior: String, - cache_rpc_concurrency: usize, - cache_rpc_timeout_millis: u64, - execution_headers: BTreeMap, - execution_overall_deadline_secs: u64, - execution_rpc_concurrency: usize, - store_address: Option, - execution_address: Option, - execution_process_cache_namespace: Option, - instance_name: Option, - root_ca_certs_path: Option, - client_certs_path: Option, - client_key_path: Option, - append_only_caches_base_path: Option, - ) -> Self { - Self(RemotingOptions { - execution_enable, - store_address, - execution_address, - execution_process_cache_namespace, - instance_name, - root_ca_certs_path, - client_certs_path, - client_key_path, - store_headers, - store_chunk_bytes, - store_rpc_retries, - store_rpc_concurrency, - store_rpc_timeout: Duration::from_millis(store_rpc_timeout_millis), - store_batch_api_size_limit, - cache_warnings_behavior: RemoteCacheWarningsBehavior::from_str(&cache_warnings_behavior) - .unwrap(), - cache_content_behavior: CacheContentBehavior::from_str(&cache_content_behavior).unwrap(), - cache_rpc_concurrency, - cache_rpc_timeout: Duration::from_millis(cache_rpc_timeout_millis), - execution_headers, - execution_overall_deadline: Duration::from_secs(execution_overall_deadline_secs), - execution_rpc_concurrency, - append_only_caches_base_path, - }) - } + #[new] + fn __new__( + execution_enable: bool, + store_headers: BTreeMap, + store_chunk_bytes: usize, + store_rpc_retries: usize, + store_rpc_concurrency: usize, + store_rpc_timeout_millis: u64, + store_batch_api_size_limit: usize, + cache_warnings_behavior: String, + cache_content_behavior: String, + cache_rpc_concurrency: usize, + cache_rpc_timeout_millis: u64, + execution_headers: BTreeMap, + execution_overall_deadline_secs: u64, + execution_rpc_concurrency: usize, + store_address: Option, + execution_address: Option, + execution_process_cache_namespace: Option, + instance_name: Option, + root_ca_certs_path: Option, + client_certs_path: Option, + client_key_path: Option, + append_only_caches_base_path: Option, + ) -> Self { + Self(RemotingOptions { + execution_enable, + store_address, + execution_address, + execution_process_cache_namespace, + instance_name, + root_ca_certs_path, + client_certs_path, + client_key_path, + store_headers, + store_chunk_bytes, + store_rpc_retries, + store_rpc_concurrency, + store_rpc_timeout: Duration::from_millis(store_rpc_timeout_millis), + store_batch_api_size_limit, + cache_warnings_behavior: RemoteCacheWarningsBehavior::from_str( + &cache_warnings_behavior, + ) + .unwrap(), + cache_content_behavior: CacheContentBehavior::from_str(&cache_content_behavior) + .unwrap(), + cache_rpc_concurrency, + cache_rpc_timeout: Duration::from_millis(cache_rpc_timeout_millis), + execution_headers, + execution_overall_deadline: Duration::from_secs(execution_overall_deadline_secs), + execution_rpc_concurrency, + append_only_caches_base_path, + }) + } } #[pyclass] @@ -353,29 +358,29 @@ struct PyLocalStoreOptions(LocalStoreOptions); #[pymethods] impl PyLocalStoreOptions { - #[new] - fn __new__( - store_dir: PathBuf, - process_cache_max_size_bytes: usize, - files_max_size_bytes: usize, - directories_max_size_bytes: usize, - lease_time_millis: u64, - shard_count: u8, - ) -> PyO3Result { - if shard_count.count_ones() != 1 { - return Err(PyValueError::new_err(format!( - "The local store shard count must be a power of two: got {shard_count}" - ))); + #[new] + fn __new__( + store_dir: PathBuf, + process_cache_max_size_bytes: usize, + files_max_size_bytes: usize, + directories_max_size_bytes: usize, + lease_time_millis: u64, + shard_count: u8, + ) -> PyO3Result { + if shard_count.count_ones() != 1 { + return Err(PyValueError::new_err(format!( + "The local store shard count must be a power of two: got {shard_count}" + ))); + } + Ok(Self(LocalStoreOptions { + store_dir, + process_cache_max_size_bytes, + files_max_size_bytes, + directories_max_size_bytes, + lease_time: Duration::from_millis(lease_time_millis), + shard_count, + })) } - Ok(Self(LocalStoreOptions { - store_dir, - process_cache_max_size_bytes, - files_max_size_bytes, - directories_max_size_bytes, - lease_time: Duration::from_millis(lease_time_millis), - shard_count, - })) - } } #[pyclass] @@ -383,52 +388,52 @@ struct PySession(Session); #[pymethods] impl PySession { - #[new] - fn __new__( - scheduler: &PyScheduler, - dynamic_ui: bool, - ui_use_prodash: bool, - max_workunit_level: u64, - build_id: String, - session_values: PyObject, - cancellation_latch: &PySessionCancellationLatch, - py: Python, - ) -> PyO3Result { - let core = scheduler.0.core.clone(); - let cancellation_latch = cancellation_latch.0.clone(); - let py_level: PythonLogLevel = max_workunit_level - .try_into() - .map_err(|e| PyException::new_err(format!("{e}")))?; - // NB: Session creation interacts with the Graph, which must not be accessed while the GIL is - // held. - let session = py - .allow_threads(|| { - Session::new( - core, - dynamic_ui, - ui_use_prodash, - py_level.into(), - build_id, - session_values, - cancellation_latch, - ) - }) - .map_err(PyException::new_err)?; - Ok(Self(session)) - } + #[new] + fn __new__( + scheduler: &PyScheduler, + dynamic_ui: bool, + ui_use_prodash: bool, + max_workunit_level: u64, + build_id: String, + session_values: PyObject, + cancellation_latch: &PySessionCancellationLatch, + py: Python, + ) -> PyO3Result { + let core = scheduler.0.core.clone(); + let cancellation_latch = cancellation_latch.0.clone(); + let py_level: PythonLogLevel = max_workunit_level + .try_into() + .map_err(|e| PyException::new_err(format!("{e}")))?; + // NB: Session creation interacts with the Graph, which must not be accessed while the GIL is + // held. + let session = py + .allow_threads(|| { + Session::new( + core, + dynamic_ui, + ui_use_prodash, + py_level.into(), + build_id, + session_values, + cancellation_latch, + ) + }) + .map_err(PyException::new_err)?; + Ok(Self(session)) + } - fn cancel(&self) { - self.0.cancel() - } + fn cancel(&self) { + self.0.cancel() + } - fn is_cancelled(&self) -> bool { - self.0.is_cancelled() - } + fn is_cancelled(&self) -> bool { + self.0.is_cancelled() + } - #[getter] - fn session_values(&self) -> PyObject { - self.0.session_values() - } + #[getter] + fn session_values(&self) -> PyObject { + self.0.session_values() + } } #[pyclass] @@ -436,31 +441,31 @@ struct PySessionCancellationLatch(AsyncLatch); #[pymethods] impl PySessionCancellationLatch { - #[new] - fn __new__() -> Self { - Self(AsyncLatch::new()) - } + #[new] + fn __new__() -> Self { + Self(AsyncLatch::new()) + } - fn is_cancelled(&self) -> bool { - self.0.poll_triggered() - } + fn is_cancelled(&self) -> bool { + self.0.poll_triggered() + } } #[pyclass] struct PyNailgunServer { - server: RefCell>, - executor: Executor, + server: RefCell>, + executor: Executor, } #[pymethods] impl PyNailgunServer { - fn port(&self) -> PyO3Result { - let borrowed_server = self.server.borrow(); - let server = borrowed_server.as_ref().ok_or_else(|| { - PyException::new_err("Cannot get the port of a server that has already shut down.") - })?; - Ok(server.port()) - } + fn port(&self) -> PyO3Result { + let borrowed_server = self.server.borrow(); + let server = borrowed_server.as_ref().ok_or_else(|| { + PyException::new_err("Cannot get the port of a server that has already shut down.") + })?; + Ok(server.port()) + } } #[pyclass] @@ -468,186 +473,184 @@ struct PyExecutionRequest(RefCell); #[pymethods] impl PyExecutionRequest { - #[new] - fn __new__(poll: bool, poll_delay_in_ms: Option, timeout_in_ms: Option) -> Self { - let request = ExecutionRequest { - poll, - poll_delay: poll_delay_in_ms.map(Duration::from_millis), - timeout: timeout_in_ms.map(Duration::from_millis), - ..ExecutionRequest::default() - }; - Self(RefCell::new(request)) - } + #[new] + fn __new__(poll: bool, poll_delay_in_ms: Option, timeout_in_ms: Option) -> Self { + let request = ExecutionRequest { + poll, + poll_delay: poll_delay_in_ms.map(Duration::from_millis), + timeout: timeout_in_ms.map(Duration::from_millis), + ..ExecutionRequest::default() + }; + Self(RefCell::new(request)) + } } #[pyclass] struct PyResult { - #[pyo3(get)] - is_throw: bool, - #[pyo3(get)] - result: PyObject, - #[pyo3(get)] - python_traceback: Option, - #[pyo3(get)] - engine_traceback: Vec<(String, Option)>, + #[pyo3(get)] + is_throw: bool, + #[pyo3(get)] + result: PyObject, + #[pyo3(get)] + python_traceback: Option, + #[pyo3(get)] + engine_traceback: Vec<(String, Option)>, } fn py_result_from_root(py: Python, result: Result) -> PyResult { - match result { - Ok(val) => PyResult { - is_throw: false, - result: val.into(), - python_traceback: None, - engine_traceback: vec![], - }, - Err(f) => { - let (val, python_traceback, engine_traceback) = match f { - f @ (Failure::Invalidated | Failure::MissingDigest { .. }) => { - let msg = format!("{f}"); - let python_traceback = Failure::native_traceback(&msg); - ( - externs::create_exception(py, msg), - python_traceback, - Vec::new(), - ) + match result { + Ok(val) => PyResult { + is_throw: false, + result: val.into(), + python_traceback: None, + engine_traceback: vec![], + }, + Err(f) => { + let (val, python_traceback, engine_traceback) = match f { + f @ (Failure::Invalidated | Failure::MissingDigest { .. }) => { + let msg = format!("{f}"); + let python_traceback = Failure::native_traceback(&msg); + ( + externs::create_exception(py, msg), + python_traceback, + Vec::new(), + ) + } + Failure::Throw { + val, + python_traceback, + engine_traceback, + } => (val, python_traceback, engine_traceback), + }; + PyResult { + is_throw: true, + result: val.into(), + python_traceback: Some(python_traceback), + engine_traceback: engine_traceback + .into_iter() + .map(|ff| (ff.name, ff.desc)) + .collect(), + } } - Failure::Throw { - val, - python_traceback, - engine_traceback, - } => (val, python_traceback, engine_traceback), - }; - PyResult { - is_throw: true, - result: val.into(), - python_traceback: Some(python_traceback), - engine_traceback: engine_traceback - .into_iter() - .map(|ff| (ff.name, ff.desc)) - .collect(), - } } - } } #[pyclass] struct PyThreadLocals(Arc, Option); impl PyThreadLocals { - fn get() -> Self { - let stdio_dest = stdio::get_destination(); - let workunit_store_handle = workunit_store::get_workunit_store_handle(); - Self(stdio_dest, workunit_store_handle) - } + fn get() -> Self { + let stdio_dest = stdio::get_destination(); + let workunit_store_handle = workunit_store::get_workunit_store_handle(); + Self(stdio_dest, workunit_store_handle) + } } #[pymethods] impl PyThreadLocals { - #[classmethod] - fn get_for_current_thread(_cls: &PyType) -> Self { - Self::get() - } + #[classmethod] + fn get_for_current_thread(_cls: &PyType) -> Self { + Self::get() + } - fn set_for_current_thread(&self) { - stdio::set_thread_destination(self.0.clone()); - workunit_store::set_thread_workunit_store_handle(self.1.clone()); - } + fn set_for_current_thread(&self) { + stdio::set_thread_destination(self.0.clone()); + workunit_store::set_thread_workunit_store_handle(self.1.clone()); + } } #[pyfunction] fn nailgun_server_create( - py_executor: &externs::scheduler::PyExecutor, - port: u16, - runner: PyObject, + py_executor: &externs::scheduler::PyExecutor, + port: u16, + runner: PyObject, ) -> PyO3Result { - let server_future = { - let executor = py_executor.0.clone(); - nailgun::Server::new(executor, port, move |exe: nailgun::RawFdExecution| { - Python::with_gil(|py| { - let result = runner.as_ref(py).call1(( - exe.cmd.command, - PyTuple::new(py, exe.cmd.args), - exe.cmd.env.into_iter().collect::>(), - exe.cmd.working_dir, - PySessionCancellationLatch(exe.cancelled), - exe.stdin_fd as i64, - exe.stdout_fd as i64, - exe.stderr_fd as i64, - )); - match result { - Ok(exit_code) => { - let code: i32 = exit_code.extract().unwrap(); - nailgun::ExitCode(code) - } - Err(e) => { - error!( - "Uncaught exception in nailgun handler: {:#?}", - Failure::from_py_err_with_gil(py, e) - ); - nailgun::ExitCode(1) - } - } - }) - }) - }; + let server_future = { + let executor = py_executor.0.clone(); + nailgun::Server::new(executor, port, move |exe: nailgun::RawFdExecution| { + Python::with_gil(|py| { + let result = runner.as_ref(py).call1(( + exe.cmd.command, + PyTuple::new(py, exe.cmd.args), + exe.cmd.env.into_iter().collect::>(), + exe.cmd.working_dir, + PySessionCancellationLatch(exe.cancelled), + exe.stdin_fd as i64, + exe.stdout_fd as i64, + exe.stderr_fd as i64, + )); + match result { + Ok(exit_code) => { + let code: i32 = exit_code.extract().unwrap(); + nailgun::ExitCode(code) + } + Err(e) => { + error!( + "Uncaught exception in nailgun handler: {:#?}", + Failure::from_py_err_with_gil(py, e) + ); + nailgun::ExitCode(1) + } + } + }) + }) + }; - let server = py_executor - .0 - .block_on(server_future) - .map_err(PyException::new_err)?; - Ok(PyNailgunServer { - server: RefCell::new(Some(server)), - executor: py_executor.0.clone(), - }) + let server = py_executor + .0 + .block_on(server_future) + .map_err(PyException::new_err)?; + Ok(PyNailgunServer { + server: RefCell::new(Some(server)), + executor: py_executor.0.clone(), + }) } #[pyfunction] fn nailgun_server_await_shutdown( - py: Python, - nailgun_server_ptr: &PyNailgunServer, + py: Python, + nailgun_server_ptr: &PyNailgunServer, ) -> PyO3Result<()> { - if let Some(server) = nailgun_server_ptr.server.borrow_mut().take() { - let executor = nailgun_server_ptr.executor.clone(); - py.allow_threads(|| executor.block_on(server.shutdown())) - .map_err(PyException::new_err) - } else { - Ok(()) - } + if let Some(server) = nailgun_server_ptr.server.borrow_mut().take() { + let executor = nailgun_server_ptr.executor.clone(); + py.allow_threads(|| executor.block_on(server.shutdown())) + .map_err(PyException::new_err) + } else { + Ok(()) + } } #[pyfunction] fn strongly_connected_components( - py: Python, - adjacency_lists: Vec<(PyObject, Vec)>, + py: Python, + adjacency_lists: Vec<(PyObject, Vec)>, ) -> PyO3Result>> { - let mut graph: DiGraph = Graph::new(); - let mut node_ids: HashMap = HashMap::new(); - - for (node, adjacency_list) in adjacency_lists { - let node_key = Key::from_value(node.into())?; - let node_id = *node_ids - .entry(node_key.clone()) - .or_insert_with(|| graph.add_node(node_key)); - for dependency in adjacency_list { - let dependency_key = Key::from_value(dependency.into())?; - let dependency_id = node_ids - .entry(dependency_key.clone()) - .or_insert_with(|| graph.add_node(dependency_key)); - graph.add_edge(node_id, *dependency_id, ()); + let mut graph: DiGraph = Graph::new(); + let mut node_ids: HashMap = HashMap::new(); + + for (node, adjacency_list) in adjacency_lists { + let node_key = Key::from_value(node.into())?; + let node_id = *node_ids + .entry(node_key.clone()) + .or_insert_with(|| graph.add_node(node_key)); + for dependency in adjacency_list { + let dependency_key = Key::from_value(dependency.into())?; + let dependency_id = node_ids + .entry(dependency_key.clone()) + .or_insert_with(|| graph.add_node(dependency_key)); + graph.add_edge(node_id, *dependency_id, ()); + } } - } - - Ok( - petgraph::algo::tarjan_scc(&graph) - .into_iter() - .map(|component| { - component - .into_iter() - .map(|node_id| graph[node_id].to_value().consume_into_py_object(py)) - .collect::>() - }) - .collect(), - ) + + Ok(petgraph::algo::tarjan_scc(&graph) + .into_iter() + .map(|component| { + component + .into_iter() + .map(|node_id| graph[node_id].to_value().consume_into_py_object(py)) + .collect::>() + }) + .collect()) } /// Return the number of zero bits prefixed on an (undefined, but well balanced) hash of the given @@ -656,9 +659,9 @@ fn strongly_connected_components( /// This is mostly in rust because of the convenience of the `leading_zeros` builtin method. #[pyfunction] fn hash_prefix_zero_bits(item: &str) -> u32 { - let mut hasher = FnvHasher::default(); - hasher.write(item.as_bytes()); - hasher.finish().leading_zeros() + let mut hasher = FnvHasher::default(); + hasher.write(item.as_bytes()); + hasher.finish().leading_zeros() } /// @@ -669,1066 +672,1059 @@ fn hash_prefix_zero_bits(item: &str) -> u32 { /// #[pyfunction] fn scheduler_create( - py_executor: &externs::scheduler::PyExecutor, - py_tasks: &PyTasks, - types_ptr: &PyTypes, - build_root: PathBuf, - local_execution_root_dir: PathBuf, - named_caches_dir: PathBuf, - ignore_patterns: Vec, - use_gitignore: bool, - watch_filesystem: bool, - remoting_options: &PyRemotingOptions, - local_store_options: &PyLocalStoreOptions, - exec_strategy_opts: &PyExecutionStrategyOptions, - ca_certs_path: Option, + py_executor: &externs::scheduler::PyExecutor, + py_tasks: &PyTasks, + types_ptr: &PyTypes, + build_root: PathBuf, + local_execution_root_dir: PathBuf, + named_caches_dir: PathBuf, + ignore_patterns: Vec, + use_gitignore: bool, + watch_filesystem: bool, + remoting_options: &PyRemotingOptions, + local_store_options: &PyLocalStoreOptions, + exec_strategy_opts: &PyExecutionStrategyOptions, + ca_certs_path: Option, ) -> PyO3Result { - match fs::increase_limits() { - Ok(msg) => debug!("{}", msg), - Err(e) => warn!("{}", e), - } - let types = types_ptr - .0 - .borrow_mut() - .take() - .ok_or_else(|| PyException::new_err("An instance of PyTypes may only be used once."))?; - let intrinsics = Intrinsics::new(&types); - let mut tasks = py_tasks.0.replace(Tasks::new()); - tasks.intrinsics_set(&intrinsics); - - // NOTE: Enter the Tokio runtime so that libraries like Tonic (for gRPC) are able to - // use `tokio::spawn` since Python does not setup Tokio for the main thread. This also - // ensures that the correct executor is used by those libraries. - let core = py_executor - .0 - .enter(|| { - py_executor.0.block_on(async { - Core::new( - py_executor.0.clone(), - tasks, - types, - intrinsics, - build_root, - ignore_patterns, - use_gitignore, - watch_filesystem, - local_execution_root_dir, - named_caches_dir, - ca_certs_path, - local_store_options.0.clone(), - remoting_options.0.clone(), - exec_strategy_opts.0.clone(), - ) - .await - }) - }) - .map_err(PyValueError::new_err)?; - Ok(PyScheduler(Scheduler::new(core))) + match fs::increase_limits() { + Ok(msg) => debug!("{}", msg), + Err(e) => warn!("{}", e), + } + let types = types_ptr + .0 + .borrow_mut() + .take() + .ok_or_else(|| PyException::new_err("An instance of PyTypes may only be used once."))?; + let intrinsics = Intrinsics::new(&types); + let mut tasks = py_tasks.0.replace(Tasks::new()); + tasks.intrinsics_set(&intrinsics); + + // NOTE: Enter the Tokio runtime so that libraries like Tonic (for gRPC) are able to + // use `tokio::spawn` since Python does not setup Tokio for the main thread. This also + // ensures that the correct executor is used by those libraries. + let core = py_executor + .0 + .enter(|| { + py_executor.0.block_on(async { + Core::new( + py_executor.0.clone(), + tasks, + types, + intrinsics, + build_root, + ignore_patterns, + use_gitignore, + watch_filesystem, + local_execution_root_dir, + named_caches_dir, + ca_certs_path, + local_store_options.0.clone(), + remoting_options.0.clone(), + exec_strategy_opts.0.clone(), + ) + .await + }) + }) + .map_err(PyValueError::new_err)?; + Ok(PyScheduler(Scheduler::new(core))) } async fn workunit_to_py_value( - workunit_store: &WorkunitStore, - workunit: Workunit, - core: &Arc, + workunit_store: &WorkunitStore, + workunit: Workunit, + core: &Arc, ) -> PyO3Result { - let metadata = workunit.metadata.ok_or_else(|| { - PyException::new_err(format!( - // TODO: It would be better for this to be typesafe, but it isn't currently worth it to - // split the Workunit struct. - "Workunit for {} was disabled. Please file an issue at \ + let metadata = workunit.metadata.ok_or_else(|| { + PyException::new_err(format!( + // TODO: It would be better for this to be typesafe, but it isn't currently worth it to + // split the Workunit struct. + "Workunit for {} was disabled. Please file an issue at \ [https://github.com/pantsbuild/pants/issues/new].", - workunit.span_id - )) - })?; - let has_parent_ids = !workunit.parent_ids.is_empty(); - let mut dict_entries = Python::with_gil(|py| { - let mut dict_entries = vec![ - ( - externs::store_utf8(py, "name"), - externs::store_utf8(py, workunit.name), - ), - ( - externs::store_utf8(py, "span_id"), - externs::store_utf8(py, &format!("{}", workunit.span_id)), - ), - ( - externs::store_utf8(py, "level"), - externs::store_utf8(py, &workunit.level.to_string()), - ), - ]; - - let parent_ids = workunit - .parent_ids - .into_iter() - .map(|parent_id| externs::store_utf8(py, &parent_id.to_string())) - .collect::>(); - - if has_parent_ids { - // TODO: Remove the single-valued `parent_id` field around version 2.16.0.dev0. - dict_entries.push((externs::store_utf8(py, "parent_id"), parent_ids[0].clone())); - } - dict_entries.push(( - externs::store_utf8(py, "parent_ids"), - externs::store_tuple(py, parent_ids), - )); - - match workunit.state { - WorkunitState::Started { start_time, .. } => { - let duration = start_time - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_else(|_| Duration::default()); - dict_entries.extend_from_slice(&[ - ( - externs::store_utf8(py, "start_secs"), - externs::store_u64(py, duration.as_secs()), - ), - ( - externs::store_utf8(py, "start_nanos"), - externs::store_u64(py, duration.subsec_nanos() as u64), - ), - ]) - } - WorkunitState::Completed { time_span } => { - dict_entries.extend_from_slice(&[ - ( - externs::store_utf8(py, "start_secs"), - externs::store_u64(py, time_span.start.secs), - ), - ( - externs::store_utf8(py, "start_nanos"), - externs::store_u64(py, u64::from(time_span.start.nanos)), - ), - ( - externs::store_utf8(py, "duration_secs"), - externs::store_u64(py, time_span.duration.secs), - ), - ( - externs::store_utf8(py, "duration_nanos"), - externs::store_u64(py, u64::from(time_span.duration.nanos)), - ), - ]); - } - }; + workunit.span_id + )) + })?; + let has_parent_ids = !workunit.parent_ids.is_empty(); + let mut dict_entries = Python::with_gil(|py| { + let mut dict_entries = vec![ + ( + externs::store_utf8(py, "name"), + externs::store_utf8(py, workunit.name), + ), + ( + externs::store_utf8(py, "span_id"), + externs::store_utf8(py, &format!("{}", workunit.span_id)), + ), + ( + externs::store_utf8(py, "level"), + externs::store_utf8(py, &workunit.level.to_string()), + ), + ]; + + let parent_ids = workunit + .parent_ids + .into_iter() + .map(|parent_id| externs::store_utf8(py, &parent_id.to_string())) + .collect::>(); + + if has_parent_ids { + // TODO: Remove the single-valued `parent_id` field around version 2.16.0.dev0. + dict_entries.push((externs::store_utf8(py, "parent_id"), parent_ids[0].clone())); + } + dict_entries.push(( + externs::store_utf8(py, "parent_ids"), + externs::store_tuple(py, parent_ids), + )); - if let Some(desc) = &metadata.desc.as_ref() { - dict_entries.push(( - externs::store_utf8(py, "description"), - externs::store_utf8(py, desc), - )); - } - dict_entries - }); - - let mut artifact_entries = Vec::new(); - - for (artifact_name, digest) in metadata.artifacts.iter() { - let store = core.store(); - let py_val = match digest { - ArtifactOutput::FileDigest(digest) => Python::with_gil(|py| { - crate::nodes::Snapshot::store_file_digest(py, *digest).map_err(PyException::new_err) - })?, - ArtifactOutput::Snapshot(digest_handle) => { - let digest = (**digest_handle) - .as_any() - .downcast_ref::() - .ok_or_else(|| { - PyException::new_err(format!( - "Failed to convert {digest_handle:?} to a DirectoryDigest." - )) - })?; - let snapshot = store::Snapshot::from_digest(store, digest.clone()) - .await - .map_err(possible_store_missing_digest)?; + match workunit.state { + WorkunitState::Started { start_time, .. } => { + let duration = start_time + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_else(|_| Duration::default()); + dict_entries.extend_from_slice(&[ + ( + externs::store_utf8(py, "start_secs"), + externs::store_u64(py, duration.as_secs()), + ), + ( + externs::store_utf8(py, "start_nanos"), + externs::store_u64(py, duration.subsec_nanos() as u64), + ), + ]) + } + WorkunitState::Completed { time_span } => { + dict_entries.extend_from_slice(&[ + ( + externs::store_utf8(py, "start_secs"), + externs::store_u64(py, time_span.start.secs), + ), + ( + externs::store_utf8(py, "start_nanos"), + externs::store_u64(py, u64::from(time_span.start.nanos)), + ), + ( + externs::store_utf8(py, "duration_secs"), + externs::store_u64(py, time_span.duration.secs), + ), + ( + externs::store_utf8(py, "duration_nanos"), + externs::store_u64(py, u64::from(time_span.duration.nanos)), + ), + ]); + } + }; - Python::with_gil(|py| { - crate::nodes::Snapshot::store_snapshot(py, snapshot).map_err(PyException::new_err) - })? - } - }; + if let Some(desc) = &metadata.desc.as_ref() { + dict_entries.push(( + externs::store_utf8(py, "description"), + externs::store_utf8(py, desc), + )); + } + dict_entries + }); - Python::with_gil(|py| { - artifact_entries.push((externs::store_utf8(py, artifact_name.as_str()), py_val)) - }) - } - - Python::with_gil(|py| { - let mut user_metadata_entries = Vec::with_capacity(metadata.user_metadata.len()); - for (user_metadata_key, user_metadata_item) in metadata.user_metadata.iter() { - let value = match user_metadata_item { - UserMetadataItem::String(v) => v.into_py(py), - UserMetadataItem::Int(n) => n.into_py(py), - UserMetadataItem::PyValue(py_val_handle) => (**py_val_handle) - .as_any() - .downcast_ref::() - .ok_or_else(|| { - PyException::new_err(format!("Failed to convert {py_val_handle:?} to a Value.")) - })? - .to_object(py), - }; - user_metadata_entries.push(( - externs::store_utf8(py, user_metadata_key.as_str()), - Value::new(value), - )); - } + let mut artifact_entries = Vec::new(); + + for (artifact_name, digest) in metadata.artifacts.iter() { + let store = core.store(); + let py_val = match digest { + ArtifactOutput::FileDigest(digest) => Python::with_gil(|py| { + crate::nodes::Snapshot::store_file_digest(py, *digest).map_err(PyException::new_err) + })?, + ArtifactOutput::Snapshot(digest_handle) => { + let digest = (**digest_handle) + .as_any() + .downcast_ref::() + .ok_or_else(|| { + PyException::new_err(format!( + "Failed to convert {digest_handle:?} to a DirectoryDigest." + )) + })?; + let snapshot = store::Snapshot::from_digest(store, digest.clone()) + .await + .map_err(possible_store_missing_digest)?; + + Python::with_gil(|py| { + crate::nodes::Snapshot::store_snapshot(py, snapshot) + .map_err(PyException::new_err) + })? + } + }; - dict_entries.push(( - externs::store_utf8(py, "metadata"), - externs::store_dict(py, user_metadata_entries)?, - )); - - if let Some(stdout_digest) = metadata.stdout { - artifact_entries.push(( - externs::store_utf8(py, "stdout_digest"), - crate::nodes::Snapshot::store_file_digest(py, stdout_digest) - .map_err(PyException::new_err)?, - )); + Python::with_gil(|py| { + artifact_entries.push((externs::store_utf8(py, artifact_name.as_str()), py_val)) + }) } - if let Some(stderr_digest) = metadata.stderr { - artifact_entries.push(( - externs::store_utf8(py, "stderr_digest"), - crate::nodes::Snapshot::store_file_digest(py, stderr_digest) - .map_err(PyException::new_err)?, - )); - } + Python::with_gil(|py| { + let mut user_metadata_entries = Vec::with_capacity(metadata.user_metadata.len()); + for (user_metadata_key, user_metadata_item) in metadata.user_metadata.iter() { + let value = match user_metadata_item { + UserMetadataItem::String(v) => v.into_py(py), + UserMetadataItem::Int(n) => n.into_py(py), + UserMetadataItem::PyValue(py_val_handle) => (**py_val_handle) + .as_any() + .downcast_ref::() + .ok_or_else(|| { + PyException::new_err(format!( + "Failed to convert {py_val_handle:?} to a Value." + )) + })? + .to_object(py), + }; + user_metadata_entries.push(( + externs::store_utf8(py, user_metadata_key.as_str()), + Value::new(value), + )); + } - dict_entries.push(( - externs::store_utf8(py, "artifacts"), - externs::store_dict(py, artifact_entries)?, - )); + dict_entries.push(( + externs::store_utf8(py, "metadata"), + externs::store_dict(py, user_metadata_entries)?, + )); - // TODO: Temporarily attaching the global counters to the "root" workunit. Callers should - // switch to consuming `StreamingWorkunitContext.get_metrics`. - // Remove this deprecation after 2.14.0.dev0. - if !has_parent_ids { - let mut metrics = workunit_store.get_metrics(); + if let Some(stdout_digest) = metadata.stdout { + artifact_entries.push(( + externs::store_utf8(py, "stdout_digest"), + crate::nodes::Snapshot::store_file_digest(py, stdout_digest) + .map_err(PyException::new_err)?, + )); + } - metrics.insert("DEPRECATED_ConsumeGlobalCountersInstead", 0); - let counters_entries = metrics - .into_iter() - .map(|(counter_name, counter_value)| { - ( - externs::store_utf8(py, counter_name), - externs::store_u64(py, counter_value), - ) - }) - .collect(); + if let Some(stderr_digest) = metadata.stderr { + artifact_entries.push(( + externs::store_utf8(py, "stderr_digest"), + crate::nodes::Snapshot::store_file_digest(py, stderr_digest) + .map_err(PyException::new_err)?, + )); + } - dict_entries.push(( - externs::store_utf8(py, "counters"), - externs::store_dict(py, counters_entries)?, - )); - } - externs::store_dict(py, dict_entries) - }) + dict_entries.push(( + externs::store_utf8(py, "artifacts"), + externs::store_dict(py, artifact_entries)?, + )); + + // TODO: Temporarily attaching the global counters to the "root" workunit. Callers should + // switch to consuming `StreamingWorkunitContext.get_metrics`. + // Remove this deprecation after 2.14.0.dev0. + if !has_parent_ids { + let mut metrics = workunit_store.get_metrics(); + + metrics.insert("DEPRECATED_ConsumeGlobalCountersInstead", 0); + let counters_entries = metrics + .into_iter() + .map(|(counter_name, counter_value)| { + ( + externs::store_utf8(py, counter_name), + externs::store_u64(py, counter_value), + ) + }) + .collect(); + + dict_entries.push(( + externs::store_utf8(py, "counters"), + externs::store_dict(py, counters_entries)?, + )); + } + externs::store_dict(py, dict_entries) + }) } async fn workunits_to_py_tuple_value( - py: Python<'_>, - workunit_store: &WorkunitStore, - workunits: Vec, - core: &Arc, + py: Python<'_>, + workunit_store: &WorkunitStore, + workunits: Vec, + core: &Arc, ) -> PyO3Result { - let mut workunit_values = Vec::new(); - for workunit in workunits { - let py_value = workunit_to_py_value(workunit_store, workunit, core).await?; - workunit_values.push(py_value); - } + let mut workunit_values = Vec::new(); + for workunit in workunits { + let py_value = workunit_to_py_value(workunit_store, workunit, core).await?; + workunit_values.push(py_value); + } - Ok(externs::store_tuple(py, workunit_values)) + Ok(externs::store_tuple(py, workunit_values)) } #[pyfunction] fn session_poll_workunits( - py_scheduler: PyObject, - py_session: PyObject, - max_log_verbosity_level: u64, + py_scheduler: PyObject, + py_session: PyObject, + max_log_verbosity_level: u64, ) -> PyO3Result { - // TODO: Black magic. PyObject is not marked UnwindSafe, and contains an UnsafeCell. Since PyO3 - // only allows us to receive `pyfunction` arguments as `PyObject` (or references under a held - // GIL), we cannot do what it does to use `catch_unwind` which would be interacting with - // `catch_unwind` while the object is still a raw pointer, and unchecked. - // - // Instead, we wrap the call, and assert that it is safe. It really might not be though. So this - // code should only live long enough to shake out the current issue, and an upstream issue with - // PyO3 will be the long term solution. - // - // see https://github.com/PyO3/pyo3/issues/2102 for more info. - let py_scheduler = std::panic::AssertUnwindSafe(py_scheduler); - let py_session = std::panic::AssertUnwindSafe(py_session); - std::panic::catch_unwind(|| { - let (core, session, py_level) = { - Python::with_gil(|py| -> PyO3Result<_> { - let py_scheduler = py_scheduler.extract::>(py)?; - let py_session = py_session.extract::>(py)?; - let py_level: PythonLogLevel = max_log_verbosity_level - .try_into() - .map_err(|e| PyException::new_err(format!("{e}")))?; - Ok((py_scheduler.0.core.clone(), py_session.0.clone(), py_level)) - })? - }; - core.executor.enter(|| { - let workunit_store = session.workunit_store(); - let (started, completed) = workunit_store.latest_workunits(py_level.into()); - - Python::with_gil(|py| -> PyO3Result<_> { - let started_val = core.executor.block_on(workunits_to_py_tuple_value( - py, - &workunit_store, - started, - &core, - ))?; - let completed_val = core.executor.block_on(workunits_to_py_tuple_value( - py, - &workunit_store, - completed, - &core, - ))?; - Ok(externs::store_tuple(py, vec![started_val, completed_val]).into()) - }) + // TODO: Black magic. PyObject is not marked UnwindSafe, and contains an UnsafeCell. Since PyO3 + // only allows us to receive `pyfunction` arguments as `PyObject` (or references under a held + // GIL), we cannot do what it does to use `catch_unwind` which would be interacting with + // `catch_unwind` while the object is still a raw pointer, and unchecked. + // + // Instead, we wrap the call, and assert that it is safe. It really might not be though. So this + // code should only live long enough to shake out the current issue, and an upstream issue with + // PyO3 will be the long term solution. + // + // see https://github.com/PyO3/pyo3/issues/2102 for more info. + let py_scheduler = std::panic::AssertUnwindSafe(py_scheduler); + let py_session = std::panic::AssertUnwindSafe(py_session); + std::panic::catch_unwind(|| { + let (core, session, py_level) = { + Python::with_gil(|py| -> PyO3Result<_> { + let py_scheduler = py_scheduler.extract::>(py)?; + let py_session = py_session.extract::>(py)?; + let py_level: PythonLogLevel = max_log_verbosity_level + .try_into() + .map_err(|e| PyException::new_err(format!("{e}")))?; + Ok((py_scheduler.0.core.clone(), py_session.0.clone(), py_level)) + })? + }; + core.executor.enter(|| { + let workunit_store = session.workunit_store(); + let (started, completed) = workunit_store.latest_workunits(py_level.into()); + + Python::with_gil(|py| -> PyO3Result<_> { + let started_val = core.executor.block_on(workunits_to_py_tuple_value( + py, + &workunit_store, + started, + &core, + ))?; + let completed_val = core.executor.block_on(workunits_to_py_tuple_value( + py, + &workunit_store, + completed, + &core, + ))?; + Ok(externs::store_tuple(py, vec![started_val, completed_val]).into()) + }) + }) + }) + .unwrap_or_else(|e| { + log::warn!("Panic in `session_poll_workunits`: {:?}", e); + std::panic::resume_unwind(e); }) - }) - .unwrap_or_else(|e| { - log::warn!("Panic in `session_poll_workunits`: {:?}", e); - std::panic::resume_unwind(e); - }) } #[pyfunction] fn session_run_interactive_process( - py: Python, - py_session: &PySession, - interactive_process: PyObject, - process_config_from_environment: PyProcessExecutionEnvironment, + py: Python, + py_session: &PySession, + interactive_process: PyObject, + process_config_from_environment: PyProcessExecutionEnvironment, ) -> PyO3Result { - let core = py_session.0.core(); - let context = py_session - .0 - .core() - .graph - .context(SessionCore::new(py_session.0.clone())); - let interactive_process: Value = interactive_process.into(); - let process_config = Value::new(process_config_from_environment.into_py(py)); - py.allow_threads(|| { - core.executor.clone().block_on(nodes::maybe_side_effecting( - true, - &Arc::new(std::sync::atomic::AtomicBool::new(true)), - core.intrinsics.run( - &Intrinsic { - id: RuleId::new("interactive_process"), - product: core.types.interactive_process_result, - inputs: vec![ - DependencyKey::new(core.types.interactive_process), - DependencyKey::new(core.types.process_config_from_environment), - ], - }, - context, - vec![interactive_process, process_config], - ), - )) - }) - .map(|v| v.into()) - .map_err(|e| PyException::new_err(e.to_string())) + let core = py_session.0.core(); + let context = py_session + .0 + .core() + .graph + .context(SessionCore::new(py_session.0.clone())); + let interactive_process: Value = interactive_process.into(); + let process_config = Value::new(process_config_from_environment.into_py(py)); + py.allow_threads(|| { + core.executor.clone().block_on(nodes::maybe_side_effecting( + true, + &Arc::new(std::sync::atomic::AtomicBool::new(true)), + core.intrinsics.run( + &Intrinsic { + id: RuleId::new("interactive_process"), + product: core.types.interactive_process_result, + inputs: vec![ + DependencyKey::new(core.types.interactive_process), + DependencyKey::new(core.types.process_config_from_environment), + ], + }, + context, + vec![interactive_process, process_config], + ), + )) + }) + .map(|v| v.into()) + .map_err(|e| PyException::new_err(e.to_string())) } #[pyfunction] fn scheduler_metrics<'py>( - py: Python<'py>, - py_scheduler: &'py PyScheduler, - py_session: &'py PySession, + py: Python<'py>, + py_scheduler: &'py PyScheduler, + py_session: &'py PySession, ) -> HashMap<&'py str, i64> { - py_scheduler - .0 - .core - .executor - .enter(|| py.allow_threads(|| py_scheduler.0.metrics(&py_session.0))) + py_scheduler + .0 + .core + .executor + .enter(|| py.allow_threads(|| py_scheduler.0.metrics(&py_session.0))) } #[pyfunction] fn scheduler_live_items<'py>( - py: Python<'py>, - py_scheduler: &'py PyScheduler, - py_session: &'py PySession, + py: Python<'py>, + py_scheduler: &'py PyScheduler, + py_session: &'py PySession, ) -> (Vec, HashMap<&'static str, (usize, usize)>) { - let (items, sizes) = py_scheduler - .0 - .core - .executor - .enter(|| py.allow_threads(|| py_scheduler.0.live_items(&py_session.0))); - let py_items = items.into_iter().map(|value| value.to_object(py)).collect(); - (py_items, sizes) + let (items, sizes) = py_scheduler + .0 + .core + .executor + .enter(|| py.allow_threads(|| py_scheduler.0.live_items(&py_session.0))); + let py_items = items.into_iter().map(|value| value.to_object(py)).collect(); + (py_items, sizes) } #[pyfunction] fn scheduler_shutdown(py: Python, py_scheduler: &PyScheduler, timeout_secs: u64) { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - py.allow_threads(|| { - core - .executor - .block_on(core.shutdown(Duration::from_secs(timeout_secs))); + let core = &py_scheduler.0.core; + core.executor.enter(|| { + py.allow_threads(|| { + core.executor + .block_on(core.shutdown(Duration::from_secs(timeout_secs))); + }) }) - }) } #[pyfunction] fn scheduler_execute( - py: Python, - py_scheduler: &PyScheduler, - py_session: &PySession, - py_execution_request: &PyExecutionRequest, + py: Python, + py_scheduler: &PyScheduler, + py_session: &PySession, + py_execution_request: &PyExecutionRequest, ) -> PyO3Result> { - py_scheduler.0.core.executor.enter(|| { - // TODO: A parent_id should be an explicit argument. - py_session.0.workunit_store().init_thread_state(None); - - let execution_request: &mut ExecutionRequest = &mut py_execution_request.0.borrow_mut(); - Ok( - py.allow_threads(|| { - py_scheduler - .0 - .execute(execution_request, &py_session.0) - .map_err(|e| match e { - ExecutionTermination::KeyboardInterrupt => PyKeyboardInterrupt::new_err(()), - ExecutionTermination::PollTimeout => PollTimeout::new_err(()), - ExecutionTermination::Fatal(msg) => PyException::new_err(msg), - }) - })? - .into_iter() - .map(|root_result| py_result_from_root(py, root_result)) - .collect(), - ) - }) + py_scheduler.0.core.executor.enter(|| { + // TODO: A parent_id should be an explicit argument. + py_session.0.workunit_store().init_thread_state(None); + + let execution_request: &mut ExecutionRequest = &mut py_execution_request.0.borrow_mut(); + Ok(py + .allow_threads(|| { + py_scheduler + .0 + .execute(execution_request, &py_session.0) + .map_err(|e| match e { + ExecutionTermination::KeyboardInterrupt => PyKeyboardInterrupt::new_err(()), + ExecutionTermination::PollTimeout => PollTimeout::new_err(()), + ExecutionTermination::Fatal(msg) => PyException::new_err(msg), + }) + })? + .into_iter() + .map(|root_result| py_result_from_root(py, root_result)) + .collect()) + }) } #[pyfunction] fn execution_add_root_select( - py_scheduler: &PyScheduler, - py_execution_request: &PyExecutionRequest, - param_vals: Vec, - product: &PyType, + py_scheduler: &PyScheduler, + py_execution_request: &PyExecutionRequest, + param_vals: Vec, + product: &PyType, ) -> PyO3Result<()> { - py_scheduler.0.core.executor.enter(|| { - let product = TypeId::new(product); - let keys = param_vals - .into_iter() - .map(|p| Key::from_value(p.into())) - .collect::, _>>()?; - Params::new(keys) - .and_then(|params| { - let mut execution_request = py_execution_request.0.borrow_mut(); - py_scheduler - .0 - .add_root_select(&mut execution_request, params, product) - }) - .map_err(PyException::new_err) - }) + py_scheduler.0.core.executor.enter(|| { + let product = TypeId::new(product); + let keys = param_vals + .into_iter() + .map(|p| Key::from_value(p.into())) + .collect::, _>>()?; + Params::new(keys) + .and_then(|params| { + let mut execution_request = py_execution_request.0.borrow_mut(); + py_scheduler + .0 + .add_root_select(&mut execution_request, params, product) + }) + .map_err(PyException::new_err) + }) } #[pyfunction] fn tasks_task_begin( - py_tasks: &PyTasks, - func: PyObject, - output_type: &PyType, - arg_types: Vec<&PyType>, - masked_types: Vec<&PyType>, - side_effecting: bool, - engine_aware_return_type: bool, - cacheable: bool, - name: String, - desc: String, - level: u64, + py_tasks: &PyTasks, + func: PyObject, + output_type: &PyType, + arg_types: Vec<&PyType>, + masked_types: Vec<&PyType>, + side_effecting: bool, + engine_aware_return_type: bool, + cacheable: bool, + name: String, + desc: String, + level: u64, ) -> PyO3Result<()> { - let py_level: PythonLogLevel = level - .try_into() - .map_err(|e| PyException::new_err(format!("{e}")))?; - let func = Function(Key::from_value(func.into())?); - let output_type = TypeId::new(output_type); - let arg_types = arg_types.into_iter().map(TypeId::new).collect(); - let masked_types = masked_types.into_iter().map(TypeId::new).collect(); - let mut tasks = py_tasks.0.borrow_mut(); - tasks.task_begin( - func, - output_type, - side_effecting, - engine_aware_return_type, - arg_types, - masked_types, - cacheable, - name, - if desc.is_empty() { None } else { Some(desc) }, - py_level.into(), - ); - Ok(()) + let py_level: PythonLogLevel = level + .try_into() + .map_err(|e| PyException::new_err(format!("{e}")))?; + let func = Function(Key::from_value(func.into())?); + let output_type = TypeId::new(output_type); + let arg_types = arg_types.into_iter().map(TypeId::new).collect(); + let masked_types = masked_types.into_iter().map(TypeId::new).collect(); + let mut tasks = py_tasks.0.borrow_mut(); + tasks.task_begin( + func, + output_type, + side_effecting, + engine_aware_return_type, + arg_types, + masked_types, + cacheable, + name, + if desc.is_empty() { None } else { Some(desc) }, + py_level.into(), + ); + Ok(()) } #[pyfunction] fn tasks_task_end(py_tasks: &PyTasks) { - let mut tasks = py_tasks.0.borrow_mut(); - tasks.task_end(); + let mut tasks = py_tasks.0.borrow_mut(); + tasks.task_end(); } #[pyfunction] fn tasks_add_get( - py_tasks: &PyTasks, - output: &PyType, - inputs: Vec<&PyType>, - rule_id: Option, + py_tasks: &PyTasks, + output: &PyType, + inputs: Vec<&PyType>, + rule_id: Option, ) { - let output = TypeId::new(output); - let inputs = inputs.into_iter().map(TypeId::new).collect(); - let mut tasks = py_tasks.0.borrow_mut(); - tasks.add_get(output, inputs, rule_id); + let output = TypeId::new(output); + let inputs = inputs.into_iter().map(TypeId::new).collect(); + let mut tasks = py_tasks.0.borrow_mut(); + tasks.add_get(output, inputs, rule_id); } #[pyfunction] fn tasks_add_get_union( - py_tasks: &PyTasks, - output_type: &PyType, - input_types: Vec<&PyType>, - in_scope_types: Vec<&PyType>, + py_tasks: &PyTasks, + output_type: &PyType, + input_types: Vec<&PyType>, + in_scope_types: Vec<&PyType>, ) { - let product = TypeId::new(output_type); - let input_types = input_types.into_iter().map(TypeId::new).collect(); - let in_scope_types = in_scope_types.into_iter().map(TypeId::new).collect(); - let mut tasks = py_tasks.0.borrow_mut(); - tasks.add_get_union(product, input_types, in_scope_types); + let product = TypeId::new(output_type); + let input_types = input_types.into_iter().map(TypeId::new).collect(); + let in_scope_types = in_scope_types.into_iter().map(TypeId::new).collect(); + let mut tasks = py_tasks.0.borrow_mut(); + tasks.add_get_union(product, input_types, in_scope_types); } #[pyfunction] fn tasks_add_query(py_tasks: &PyTasks, output_type: &PyType, input_types: Vec<&PyType>) { - let product = TypeId::new(output_type); - let params = input_types.into_iter().map(TypeId::new).collect(); - let mut tasks = py_tasks.0.borrow_mut(); - tasks.query_add(product, params); + let product = TypeId::new(output_type); + let params = input_types.into_iter().map(TypeId::new).collect(); + let mut tasks = py_tasks.0.borrow_mut(); + tasks.query_add(product, params); } #[pyfunction] fn graph_invalidate_paths(py: Python, py_scheduler: &PyScheduler, paths: HashSet) -> u64 { - py_scheduler - .0 - .core - .executor - .enter(|| py.allow_threads(|| py_scheduler.0.invalidate_paths(&paths) as u64)) + py_scheduler + .0 + .core + .executor + .enter(|| py.allow_threads(|| py_scheduler.0.invalidate_paths(&paths) as u64)) } #[pyfunction] fn graph_invalidate_all_paths(py: Python, py_scheduler: &PyScheduler) -> u64 { - py_scheduler - .0 - .core - .executor - .enter(|| py.allow_threads(|| py_scheduler.0.invalidate_all_paths() as u64)) + py_scheduler + .0 + .core + .executor + .enter(|| py.allow_threads(|| py_scheduler.0.invalidate_all_paths() as u64)) } #[pyfunction] fn graph_invalidate_all(py: Python, py_scheduler: &PyScheduler) { - py_scheduler - .0 - .core - .executor - .enter(|| py.allow_threads(|| py_scheduler.0.invalidate_all())) + py_scheduler + .0 + .core + .executor + .enter(|| py.allow_threads(|| py_scheduler.0.invalidate_all())) } #[pyfunction] fn check_invalidation_watcher_liveness(py_scheduler: &PyScheduler) -> PyO3Result<()> { - py_scheduler - .0 - .core - .executor - .enter(|| py_scheduler.0.is_valid().map_err(PyException::new_err)) + py_scheduler + .0 + .core + .executor + .enter(|| py_scheduler.0.is_valid().map_err(PyException::new_err)) } #[pyfunction] fn graph_len(py: Python, py_scheduler: &PyScheduler) -> u64 { - let core = &py_scheduler.0.core; - core - .executor - .enter(|| py.allow_threads(|| core.graph.len() as u64)) + let core = &py_scheduler.0.core; + core.executor + .enter(|| py.allow_threads(|| core.graph.len() as u64)) } #[pyfunction] fn graph_visualize( - py: Python, - py_scheduler: &PyScheduler, - py_session: &PySession, - path: PathBuf, + py: Python, + py_scheduler: &PyScheduler, + py_session: &PySession, + path: PathBuf, ) -> PyO3Result<()> { - py_scheduler.0.core.executor.enter(|| { - py.allow_threads(|| py_scheduler.0.visualize(&py_session.0, path.as_path())) - .map_err(|e| { - PyException::new_err(format!( - "Failed to visualize to {}: {:?}", - path.display(), - e - )) - }) - }) + py_scheduler.0.core.executor.enter(|| { + py.allow_threads(|| py_scheduler.0.visualize(&py_session.0, path.as_path())) + .map_err(|e| { + PyException::new_err(format!( + "Failed to visualize to {}: {:?}", + path.display(), + e + )) + }) + }) } #[pyfunction] fn session_new_run_id(py_session: &PySession) { - py_session.0.new_run_id(); + py_session.0.new_run_id(); } #[pyfunction] fn session_get_metrics(py: Python<'_>, py_session: &PySession) -> HashMap<&'static str, u64> { - py.allow_threads(|| py_session.0.workunit_store().get_metrics()) + py.allow_threads(|| py_session.0.workunit_store().get_metrics()) } #[pyfunction] fn session_get_observation_histograms<'py>( - py: Python<'py>, - py_scheduler: &PyScheduler, - py_session: &PySession, + py: Python<'py>, + py_scheduler: &PyScheduler, + py_session: &PySession, ) -> PyO3Result<&'py PyDict> { - // Encoding version to return to callers. This should be bumped when the encoded histograms - // are encoded in a backwards-incompatible manner. - const OBSERVATIONS_VERSION: u64 = 0; - - py_scheduler.0.core.executor.enter(|| { - let observations = py.allow_threads(|| { - py_session - .0 - .workunit_store() - .encode_observations() - .map_err(PyException::new_err) - })?; - - let encoded_observations = PyDict::new(py); - for (metric, encoded_histogram) in &observations { - encoded_observations.set_item(metric, PyBytes::new(py, &encoded_histogram[..]))?; - } + // Encoding version to return to callers. This should be bumped when the encoded histograms + // are encoded in a backwards-incompatible manner. + const OBSERVATIONS_VERSION: u64 = 0; + + py_scheduler.0.core.executor.enter(|| { + let observations = py.allow_threads(|| { + py_session + .0 + .workunit_store() + .encode_observations() + .map_err(PyException::new_err) + })?; + + let encoded_observations = PyDict::new(py); + for (metric, encoded_histogram) in &observations { + encoded_observations.set_item(metric, PyBytes::new(py, &encoded_histogram[..]))?; + } - let result = PyDict::new(py); - result.set_item("version", OBSERVATIONS_VERSION)?; - result.set_item("histograms", encoded_observations)?; - Ok(result) - }) + let result = PyDict::new(py); + result.set_item("version", OBSERVATIONS_VERSION)?; + result.set_item("histograms", encoded_observations)?; + Ok(result) + }) } #[pyfunction] fn session_record_test_observation(py_scheduler: &PyScheduler, py_session: &PySession, value: u64) { - py_scheduler.0.core.executor.enter(|| { - py_session - .0 - .workunit_store() - .record_observation(ObservationMetric::TestObservation, value); - }) + py_scheduler.0.core.executor.enter(|| { + py_session + .0 + .workunit_store() + .record_observation(ObservationMetric::TestObservation, value); + }) } #[pyfunction] fn session_isolated_shallow_clone( - py_session: &PySession, - build_id: String, + py_session: &PySession, + build_id: String, ) -> PyO3Result { - let session_clone = py_session - .0 - .isolated_shallow_clone(build_id) - .map_err(PyException::new_err)?; - Ok(PySession(session_clone)) + let session_clone = py_session + .0 + .isolated_shallow_clone(build_id) + .map_err(PyException::new_err)?; + Ok(PySession(session_clone)) } #[pyfunction] fn session_wait_for_tail_tasks( - py: Python, - py_scheduler: &PyScheduler, - py_session: &PySession, - timeout: f64, + py: Python, + py_scheduler: &PyScheduler, + py_session: &PySession, + timeout: f64, ) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - let timeout = Duration::from_secs_f64(timeout); - core.executor.enter(|| { - py.allow_threads(|| { - core - .executor - .block_on(py_session.0.tail_tasks().wait(timeout)); - }) - }); - Ok(()) + let core = &py_scheduler.0.core; + let timeout = Duration::from_secs_f64(timeout); + core.executor.enter(|| { + py.allow_threads(|| { + core.executor + .block_on(py_session.0.tail_tasks().wait(timeout)); + }) + }); + Ok(()) } #[pyfunction] fn validate_reachability(py_scheduler: &PyScheduler) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - core - .rule_graph - .validate_reachability() - .map_err(PyException::new_err) - }) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + core.rule_graph + .validate_reachability() + .map_err(PyException::new_err) + }) } #[pyfunction] fn rule_graph_consumed_types<'py>( - py: Python<'py>, - py_scheduler: &PyScheduler, - param_types: Vec<&PyType>, - product_type: &PyType, + py: Python<'py>, + py_scheduler: &PyScheduler, + param_types: Vec<&PyType>, + product_type: &PyType, ) -> PyO3Result> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - let param_types = param_types.into_iter().map(TypeId::new).collect::>(); - let subgraph = core - .rule_graph - .subgraph(param_types, TypeId::new(product_type)) - .map_err(PyValueError::new_err)?; - - Ok( - subgraph - .consumed_types() - .into_iter() - .map(|type_id| type_id.as_py_type(py)) - .collect(), - ) - }) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + let param_types = param_types.into_iter().map(TypeId::new).collect::>(); + let subgraph = core + .rule_graph + .subgraph(param_types, TypeId::new(product_type)) + .map_err(PyValueError::new_err)?; + + Ok(subgraph + .consumed_types() + .into_iter() + .map(|type_id| type_id.as_py_type(py)) + .collect()) + }) } #[pyfunction] fn rule_graph_visualize(py_scheduler: &PyScheduler, path: PathBuf) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - // TODO(#7117): we want to represent union types in the graph visualizer somehow!!! - write_to_file(path.as_path(), &core.rule_graph).map_err(|e| { - PyIOError::new_err(format!( - "Failed to visualize to {}: {:?}", - path.display(), - e - )) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + // TODO(#7117): we want to represent union types in the graph visualizer somehow!!! + write_to_file(path.as_path(), &core.rule_graph).map_err(|e| { + PyIOError::new_err(format!( + "Failed to visualize to {}: {:?}", + path.display(), + e + )) + }) }) - }) } #[pyfunction] fn rule_subgraph_visualize( - py_scheduler: &PyScheduler, - param_types: Vec<&PyType>, - product_type: &PyType, - path: PathBuf, + py_scheduler: &PyScheduler, + param_types: Vec<&PyType>, + product_type: &PyType, + path: PathBuf, ) -> PyO3Result<()> { - py_scheduler.0.core.executor.enter(|| { - let param_types = param_types.into_iter().map(TypeId::new).collect::>(); - let product_type = TypeId::new(product_type); - - // TODO(#7117): we want to represent union types in the graph visualizer somehow!!! - let subgraph = py_scheduler - .0 - .core - .rule_graph - .subgraph(param_types, product_type) - .map_err(PyValueError::new_err)?; - - write_to_file(path.as_path(), &subgraph).map_err(|e| { - PyIOError::new_err(format!( - "Failed to visualize to {}: {:?}", - path.display(), - e - )) + py_scheduler.0.core.executor.enter(|| { + let param_types = param_types.into_iter().map(TypeId::new).collect::>(); + let product_type = TypeId::new(product_type); + + // TODO(#7117): we want to represent union types in the graph visualizer somehow!!! + let subgraph = py_scheduler + .0 + .core + .rule_graph + .subgraph(param_types, product_type) + .map_err(PyValueError::new_err)?; + + write_to_file(path.as_path(), &subgraph).map_err(|e| { + PyIOError::new_err(format!( + "Failed to visualize to {}: {:?}", + path.display(), + e + )) + }) }) - }) } pub(crate) fn generate_panic_string(payload: &(dyn Any + Send)) -> String { - match payload - .downcast_ref::() - .cloned() - .or_else(|| payload.downcast_ref::<&str>().map(|&s| s.to_string())) - { - Some(ref s) => format!("panic at '{s}'"), - None => format!("Non-string panic payload at {payload:p}"), - } + match payload + .downcast_ref::() + .cloned() + .or_else(|| payload.downcast_ref::<&str>().map(|&s| s.to_string())) + { + Some(ref s) => format!("panic at '{s}'"), + None => format!("Non-string panic payload at {payload:p}"), + } } /// Set up a panic handler, unless RUST_BACKTRACE is set. #[pyfunction] fn maybe_set_panic_handler() { - if std::env::var("RUST_BACKTRACE").unwrap_or_else(|_| "0".to_owned()) != "0" { - return; - } - panic::set_hook(Box::new(|panic_info| { - let payload = panic_info.payload(); - let mut panic_str = generate_panic_string(payload); - - if let Some(location) = panic_info.location() { - let panic_location_str = format!(", {}:{}", location.file(), location.line()); - panic_str.push_str(&panic_location_str); + if std::env::var("RUST_BACKTRACE").unwrap_or_else(|_| "0".to_owned()) != "0" { + return; } + panic::set_hook(Box::new(|panic_info| { + let payload = panic_info.payload(); + let mut panic_str = generate_panic_string(payload); - error!("{}", panic_str); + if let Some(location) = panic_info.location() { + let panic_location_str = format!(", {}:{}", location.file(), location.line()); + panic_str.push_str(&panic_location_str); + } - let panic_file_bug_str = "Please set RUST_BACKTRACE=1, re-run, and then file a bug at https://github.com/pantsbuild/pants/issues."; - error!("{}", panic_file_bug_str); - })); + error!("{}", panic_str); + + let panic_file_bug_str = "Please set RUST_BACKTRACE=1, re-run, and then file a bug at https://github.com/pantsbuild/pants/issues."; + error!("{}", panic_file_bug_str); + })); } #[pyfunction] fn garbage_collect_store( - py: Python, - py_scheduler: &PyScheduler, - target_size_bytes: usize, + py: Python, + py_scheduler: &PyScheduler, + target_size_bytes: usize, ) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - py.allow_threads(|| { - core.executor.block_on( - core - .store() - .garbage_collect(target_size_bytes, store::ShrinkBehavior::Fast), - ) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + py.allow_threads(|| { + core.executor.block_on( + core.store() + .garbage_collect(target_size_bytes, store::ShrinkBehavior::Fast), + ) + }) + .map_err(PyException::new_err) }) - .map_err(PyException::new_err) - }) } #[pyfunction] fn lease_files_in_graph( - py: Python, - py_scheduler: &PyScheduler, - py_session: &PySession, + py: Python, + py_scheduler: &PyScheduler, + py_session: &PySession, ) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - py.allow_threads(|| { - let digests = py_scheduler.0.all_digests(&py_session.0); - core - .executor - .block_on(core.store().lease_all_recursively(digests.iter())) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + py.allow_threads(|| { + let digests = py_scheduler.0.all_digests(&py_session.0); + core.executor + .block_on(core.store().lease_all_recursively(digests.iter())) + }) + .map_err(possible_store_missing_digest) }) - .map_err(possible_store_missing_digest) - }) } #[pyfunction] fn capture_snapshots( - py: Python, - py_scheduler: &PyScheduler, - py_session: &PySession, - path_globs_and_root_tuple_wrapper: &PyAny, + py: Python, + py_scheduler: &PyScheduler, + py_session: &PySession, + path_globs_and_root_tuple_wrapper: &PyAny, ) -> PyO3Result> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - // TODO: A parent_id should be an explicit argument. - py_session.0.workunit_store().init_thread_state(None); - - let values = externs::collect_iterable(path_globs_and_root_tuple_wrapper).unwrap(); - let path_globs_and_roots = values - .into_iter() - .map(|value| { - let root: PathBuf = externs::getattr(value, "root")?; - let path_globs = - nodes::Snapshot::lift_prepared_path_globs(externs::getattr(value, "path_globs")?); - let digest_hint = { - let maybe_digest: &PyAny = externs::getattr(value, "digest_hint")?; - if maybe_digest.is_none() { - None - } else { - Some(nodes::lift_directory_digest(maybe_digest)?) - } - }; - path_globs.map(|path_globs| (path_globs, root, digest_hint)) - }) - .collect::, _>>() - .map_err(PyValueError::new_err)?; - - py.allow_threads(|| { - let snapshot_futures = path_globs_and_roots - .into_iter() - .map(|(path_globs, root, digest_hint)| { - store::Snapshot::capture_snapshot_from_arbitrary_root( - core.store(), - core.executor.clone(), - root, - path_globs, - digest_hint, - ) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + // TODO: A parent_id should be an explicit argument. + py_session.0.workunit_store().init_thread_state(None); + + let values = externs::collect_iterable(path_globs_and_root_tuple_wrapper).unwrap(); + let path_globs_and_roots = values + .into_iter() + .map(|value| { + let root: PathBuf = externs::getattr(value, "root")?; + let path_globs = nodes::Snapshot::lift_prepared_path_globs(externs::getattr( + value, + "path_globs", + )?); + let digest_hint = { + let maybe_digest: &PyAny = externs::getattr(value, "digest_hint")?; + if maybe_digest.is_none() { + None + } else { + Some(nodes::lift_directory_digest(maybe_digest)?) + } + }; + path_globs.map(|path_globs| (path_globs, root, digest_hint)) + }) + .collect::, _>>() + .map_err(PyValueError::new_err)?; + + py.allow_threads(|| { + let snapshot_futures = path_globs_and_roots + .into_iter() + .map(|(path_globs, root, digest_hint)| { + store::Snapshot::capture_snapshot_from_arbitrary_root( + core.store(), + core.executor.clone(), + root, + path_globs, + digest_hint, + ) + }) + .collect::>(); + + Ok(core + .executor + .block_on(future::try_join_all(snapshot_futures)) + .map_err(PyException::new_err)? + .into_iter() + .map(externs::fs::PySnapshot) + .collect()) }) - .collect::>(); - - Ok( - core - .executor - .block_on(future::try_join_all(snapshot_futures)) - .map_err(PyException::new_err)? - .into_iter() - .map(externs::fs::PySnapshot) - .collect(), - ) }) - }) } #[pyfunction] fn ensure_remote_has_recursive( - py: Python, - py_scheduler: &PyScheduler, - py_digests: &PyList, + py: Python, + py_scheduler: &PyScheduler, + py_digests: &PyList, ) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - // NB: Supports either a PyFileDigest or PyDigest as input. - let digests: Vec = py_digests - .iter() - .map(|value| { - crate::nodes::lift_directory_digest(value) - .map(|dd| dd.as_digest()) - .or_else(|_| crate::nodes::lift_file_digest(value)) - }) - .collect::, _>>() - .map_err(PyException::new_err)?; - - py.allow_threads(|| { - core - .executor - .block_on(core.store().ensure_remote_has_recursive(digests)) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + // NB: Supports either a PyFileDigest or PyDigest as input. + let digests: Vec = py_digests + .iter() + .map(|value| { + crate::nodes::lift_directory_digest(value) + .map(|dd| dd.as_digest()) + .or_else(|_| crate::nodes::lift_file_digest(value)) + }) + .collect::, _>>() + .map_err(PyException::new_err)?; + + py.allow_threads(|| { + core.executor + .block_on(core.store().ensure_remote_has_recursive(digests)) + }) + .map_err(possible_store_missing_digest)?; + Ok(()) }) - .map_err(possible_store_missing_digest)?; - Ok(()) - }) } #[pyfunction] fn ensure_directory_digest_persisted( - py: Python, - py_scheduler: &PyScheduler, - py_digest: &PyAny, + py: Python, + py_scheduler: &PyScheduler, + py_digest: &PyAny, ) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - let digest = crate::nodes::lift_directory_digest(py_digest).map_err(PyException::new_err)?; + let core = &py_scheduler.0.core; + core.executor.enter(|| { + let digest = + crate::nodes::lift_directory_digest(py_digest).map_err(PyException::new_err)?; - py.allow_threads(|| { - core - .executor - .block_on(core.store().ensure_directory_digest_persisted(digest)) + py.allow_threads(|| { + core.executor + .block_on(core.store().ensure_directory_digest_persisted(digest)) + }) + .map_err(possible_store_missing_digest)?; + Ok(()) }) - .map_err(possible_store_missing_digest)?; - Ok(()) - }) } #[pyfunction] fn single_file_digests_to_bytes<'py>( - py: Python<'py>, - py_scheduler: &PyScheduler, - py_file_digests: Vec, + py: Python<'py>, + py_scheduler: &PyScheduler, + py_file_digests: Vec, ) -> PyO3Result<&'py PyList> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - let digest_futures = py_file_digests.into_iter().map(|py_file_digest| { - let store = core.store(); - async move { - store - .load_file_bytes_with(py_file_digest.0, |bytes| { - Python::with_gil(|py| externs::store_bytes(py, bytes)) - }) - .await - } - }); - - let bytes_values: Vec = py - .allow_threads(|| core.executor.block_on(future::try_join_all(digest_futures))) - .map(|values| values.into_iter().map(|val| val.into()).collect()) - .map_err(possible_store_missing_digest)?; - - let output_list = PyList::new(py, &bytes_values); - Ok(output_list) - }) + let core = &py_scheduler.0.core; + core.executor.enter(|| { + let digest_futures = py_file_digests.into_iter().map(|py_file_digest| { + let store = core.store(); + async move { + store + .load_file_bytes_with(py_file_digest.0, |bytes| { + Python::with_gil(|py| externs::store_bytes(py, bytes)) + }) + .await + } + }); + + let bytes_values: Vec = py + .allow_threads(|| core.executor.block_on(future::try_join_all(digest_futures))) + .map(|values| values.into_iter().map(|val| val.into()).collect()) + .map_err(possible_store_missing_digest)?; + + let output_list = PyList::new(py, &bytes_values); + Ok(output_list) + }) } fn ensure_path_doesnt_exist(path: &Path) -> io::Result<()> { - match std::fs::remove_file(path) { - Ok(()) => Ok(()), - Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()), - // Always fall through to remove_dir_all unless the path definitely doesn't exist, because - // std::io::ErrorKind::IsADirectory is unstable https://github.com/rust-lang/rust/issues/86442 - // - // NB. we don't need to check this returning NotFound because remove_file will identify that - // above (except if there's a concurrent removal, which is out of scope) - Err(_) => std::fs::remove_dir_all(path), - } + match std::fs::remove_file(path) { + Ok(()) => Ok(()), + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()), + // Always fall through to remove_dir_all unless the path definitely doesn't exist, because + // std::io::ErrorKind::IsADirectory is unstable https://github.com/rust-lang/rust/issues/86442 + // + // NB. we don't need to check this returning NotFound because remove_file will identify that + // above (except if there's a concurrent removal, which is out of scope) + Err(_) => std::fs::remove_dir_all(path), + } } #[pyfunction] fn write_digest( - py: Python, - py_scheduler: &PyScheduler, - py_session: &PySession, - digest: &PyAny, - path_prefix: String, - clear_paths: Vec, + py: Python, + py_scheduler: &PyScheduler, + py_session: &PySession, + digest: &PyAny, + path_prefix: String, + clear_paths: Vec, ) -> PyO3Result<()> { - let core = &py_scheduler.0.core; - core.executor.enter(|| { - // TODO: A parent_id should be an explicit argument. - py_session.0.workunit_store().init_thread_state(None); - - let lifted_digest = nodes::lift_directory_digest(digest).map_err(PyValueError::new_err)?; - - // Python will have already validated that path_prefix is a relative path. - let path_prefix = Path::new(&path_prefix); - let mut destination = PathBuf::new(); - destination.push(&core.build_root); - destination.push(path_prefix); - - for subpath in &clear_paths { - let resolved = destination.join(subpath); - ensure_path_doesnt_exist(&resolved).map_err(|e| { - PyIOError::new_err(format!( - "Failed to clear {} when writing digest: {e}", - resolved.display() - )) - })?; - } - - block_in_place_and_wait(py, || async move { - let store = core.store(); - store - .materialize_directory( - destination.clone(), - &core.build_root, - lifted_digest.clone(), - true, // Force everything we write to be mutable - &BTreeSet::new(), - fs::Permissions::Writable, - ) - .await?; - - // Invalidate all the paths we've changed within `path_prefix`: both the paths we cleared and - // the files we've just written to. - let snapshot = store::Snapshot::from_digest(store, lifted_digest).await?; - let written_paths = snapshot.tree.leaf_paths(); - let written_paths = written_paths.iter().map(|p| p as &Path); - - let cleared_paths = clear_paths.iter().map(Path::new); - - let changed_paths = written_paths - .chain(cleared_paths) - .map(|p| path_prefix.join(p)) - .collect(); - - py_scheduler.0.invalidate_paths(&changed_paths); + let core = &py_scheduler.0.core; + core.executor.enter(|| { + // TODO: A parent_id should be an explicit argument. + py_session.0.workunit_store().init_thread_state(None); + + let lifted_digest = nodes::lift_directory_digest(digest).map_err(PyValueError::new_err)?; + + // Python will have already validated that path_prefix is a relative path. + let path_prefix = Path::new(&path_prefix); + let mut destination = PathBuf::new(); + destination.push(&core.build_root); + destination.push(path_prefix); + + for subpath in &clear_paths { + let resolved = destination.join(subpath); + ensure_path_doesnt_exist(&resolved).map_err(|e| { + PyIOError::new_err(format!( + "Failed to clear {} when writing digest: {e}", + resolved.display() + )) + })?; + } - Ok(()) + block_in_place_and_wait(py, || async move { + let store = core.store(); + store + .materialize_directory( + destination.clone(), + &core.build_root, + lifted_digest.clone(), + true, // Force everything we write to be mutable + &BTreeSet::new(), + fs::Permissions::Writable, + ) + .await?; + + // Invalidate all the paths we've changed within `path_prefix`: both the paths we cleared and + // the files we've just written to. + let snapshot = store::Snapshot::from_digest(store, lifted_digest).await?; + let written_paths = snapshot.tree.leaf_paths(); + let written_paths = written_paths.iter().map(|p| p as &Path); + + let cleared_paths = clear_paths.iter().map(Path::new); + + let changed_paths = written_paths + .chain(cleared_paths) + .map(|p| path_prefix.join(p)) + .collect(); + + py_scheduler.0.invalidate_paths(&changed_paths); + + Ok(()) + }) + .map_err(possible_store_missing_digest) }) - .map_err(possible_store_missing_digest) - }) } #[pyfunction] fn stdio_initialize( - level: u64, - show_rust_3rdparty_logs: bool, - show_target: bool, - log_levels_by_target: HashMap, - literal_filters: Vec, - regex_filters: Vec, - log_file_path: PathBuf, + level: u64, + show_rust_3rdparty_logs: bool, + show_target: bool, + log_levels_by_target: HashMap, + literal_filters: Vec, + regex_filters: Vec, + log_file_path: PathBuf, ) -> PyO3Result<( - externs::stdio::PyStdioRead, - externs::stdio::PyStdioWrite, - externs::stdio::PyStdioWrite, + externs::stdio::PyStdioRead, + externs::stdio::PyStdioWrite, + externs::stdio::PyStdioWrite, )> { - let regex_filters = regex_filters + let regex_filters = regex_filters .iter() .map(|re| { Regex::new(re).map_err(|e| { @@ -1741,98 +1737,98 @@ fn stdio_initialize( }) .collect::, _>>()?; - Logger::init( - level, - show_rust_3rdparty_logs, - show_target, - log_levels_by_target, - literal_filters, - regex_filters, - log_file_path, - ) - .map_err(|s| PyException::new_err(format!("Could not initialize logging: {s}")))?; - - Ok(( - externs::stdio::PyStdioRead, - externs::stdio::PyStdioWrite { is_stdout: true }, - externs::stdio::PyStdioWrite { is_stdout: false }, - )) + Logger::init( + level, + show_rust_3rdparty_logs, + show_target, + log_levels_by_target, + literal_filters, + regex_filters, + log_file_path, + ) + .map_err(|s| PyException::new_err(format!("Could not initialize logging: {s}")))?; + + Ok(( + externs::stdio::PyStdioRead, + externs::stdio::PyStdioWrite { is_stdout: true }, + externs::stdio::PyStdioWrite { is_stdout: false }, + )) } #[pyfunction] fn stdio_thread_console_set(stdin_fileno: i32, stdout_fileno: i32, stderr_fileno: i32) { - let destination = stdio::new_console_destination(stdin_fileno, stdout_fileno, stderr_fileno); - stdio::set_thread_destination(destination); + let destination = stdio::new_console_destination(stdin_fileno, stdout_fileno, stderr_fileno); + stdio::set_thread_destination(destination); } #[pyfunction] fn stdio_thread_console_color_mode_set(use_color: bool) { - stdio::get_destination().stderr_set_use_color(use_color); + stdio::get_destination().stderr_set_use_color(use_color); } #[pyfunction] fn stdio_thread_console_clear() { - stdio::get_destination().console_clear(); + stdio::get_destination().console_clear(); } // TODO: Deprecated, but without easy access to the decorator. Use // `PyThreadLocals::get_for_current_thread` instead. Remove in Pants 2.17.0.dev0. #[pyfunction] fn stdio_thread_get_destination() -> PyStdioDestination { - PyStdioDestination(PyThreadLocals::get()) + PyStdioDestination(PyThreadLocals::get()) } // TODO: Deprecated, but without easy access to the decorator. Use // `PyThreadLocals::set_for_current_thread` instead. Remove in Pants 2.17.0.dev0. #[pyfunction] fn stdio_thread_set_destination(stdio_destination: &PyStdioDestination) { - stdio_destination.0.set_for_current_thread(); + stdio_destination.0.set_for_current_thread(); } // TODO: Needs to be thread-local / associated with the Console. #[pyfunction] fn set_per_run_log_path(py: Python, log_path: Option) { - py.allow_threads(|| { - PANTS_LOGGER.set_per_run_logs(log_path); - }) + py.allow_threads(|| { + PANTS_LOGGER.set_per_run_logs(log_path); + }) } #[pyfunction] fn write_log(py: Python, msg: String, level: u64, target: String) { - py.allow_threads(|| { - Logger::log_from_python(&msg, level, &target).expect("Error logging message"); - }) + py.allow_threads(|| { + Logger::log_from_python(&msg, level, &target).expect("Error logging message"); + }) } #[pyfunction] fn task_side_effected() -> PyO3Result<()> { - nodes::task_side_effected().map_err(PyException::new_err) + nodes::task_side_effected().map_err(PyException::new_err) } #[pyfunction] fn teardown_dynamic_ui(py: Python, py_scheduler: &PyScheduler, py_session: &PySession) { - py_scheduler.0.core.executor.enter(|| { - let _ = block_in_place_and_wait(py, || { - py_session - .0 - .maybe_display_teardown() - .unit_error() - .boxed_local() - }); - }) + py_scheduler.0.core.executor.enter(|| { + let _ = block_in_place_and_wait(py, || { + py_session + .0 + .maybe_display_teardown() + .unit_error() + .boxed_local() + }); + }) } #[pyfunction] fn flush_log(py: Python) { - py.allow_threads(|| { - PANTS_LOGGER.flush(); - }) + py.allow_threads(|| { + PANTS_LOGGER.flush(); + }) } fn write_to_file(path: &Path, graph: &RuleGraph) -> io::Result<()> { - let file = File::create(path)?; - let mut f = io::BufWriter::new(file); - graph.visualize(&mut f) + let file = File::create(path)?; + let mut f = io::BufWriter::new(file); + graph.visualize(&mut f) } /// @@ -1845,12 +1841,12 @@ fn write_to_file(path: &Path, graph: &RuleGraph) -> io::Result<()> { /// fn block_in_place_and_wait(py: Python, f: impl FnOnce() -> F + Sync + Send) -> Result where - F: Future>, - T: Send, - E: Send, + F: Future>, + T: Send, + E: Send, { - py.allow_threads(|| { - let future = f(); - tokio::task::block_in_place(|| futures::executor::block_on(future)) - }) + py.allow_threads(|| { + let future = f(); + tokio::task::block_in_place(|| futures::executor::block_on(future)) + }) } diff --git a/src/rust/engine/src/externs/interface_tests.rs b/src/rust/engine/src/externs/interface_tests.rs index a55bfe36c79..cc3f2f09f9c 100644 --- a/src/rust/engine/src/externs/interface_tests.rs +++ b/src/rust/engine/src/externs/interface_tests.rs @@ -6,19 +6,19 @@ use std::any::Any; #[test] fn test_panic_string() { - let a: &str = "a str panic payload"; - assert_eq!( - generate_panic_string(&a as &(dyn Any + Send)), - "panic at 'a str panic payload'" - ); + let a: &str = "a str panic payload"; + assert_eq!( + generate_panic_string(&a as &(dyn Any + Send)), + "panic at 'a str panic payload'" + ); - let b: String = "a String panic payload".to_string(); - assert_eq!( - generate_panic_string(&b as &(dyn Any + Send)), - "panic at 'a String panic payload'" - ); + let b: String = "a String panic payload".to_string(); + assert_eq!( + generate_panic_string(&b as &(dyn Any + Send)), + "panic at 'a String panic payload'" + ); - let c: u32 = 18; - let output = generate_panic_string(&c as &(dyn Any + Send)); - assert!(output.contains("Non-string panic payload at")); + let c: u32 = 18; + let output = generate_panic_string(&c as &(dyn Any + Send)); + assert!(output.contains("Non-string panic payload at")); } diff --git a/src/rust/engine/src/externs/mod.rs b/src/rust/engine/src/externs/mod.rs index af6107b1665..13bc78cd5c1 100644 --- a/src/rust/engine/src/externs/mod.rs +++ b/src/rust/engine/src/externs/mod.rs @@ -40,18 +40,18 @@ pub mod testutil; pub mod workunits; pub fn register(py: Python, m: &PyModule) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; - m.add("EngineError", py.get_type::())?; - m.add("IntrinsicError", py.get_type::())?; - m.add( - "IncorrectProductError", - py.get_type::(), - )?; + m.add("EngineError", py.get_type::())?; + m.add("IntrinsicError", py.get_type::())?; + m.add( + "IncorrectProductError", + py.get_type::(), + )?; - Ok(()) + Ok(()) } create_exception!(native_engine, EngineError, PyException); @@ -64,14 +64,14 @@ pub struct PyFailure(pub Failure); #[pymethods] impl PyFailure { - fn get_error(&self, py: Python) -> PyErr { - match &self.0 { - Failure::Throw { val, .. } => val.into_py(py), - f @ (Failure::Invalidated | Failure::MissingDigest { .. }) => { - EngineError::new_err(format!("{f}")) - } + fn get_error(&self, py: Python) -> PyErr { + match &self.0 { + Failure::Throw { val, .. } => val.into_py(py), + f @ (Failure::Invalidated | Failure::MissingDigest { .. }) => { + EngineError::new_err(format!("{f}")) + } + } } - } } // TODO: We import this exception type because `pyo3` doesn't support declaring exceptions with @@ -79,82 +79,82 @@ impl PyFailure { import_exception!(pants.base.exceptions, NativeEngineFailure); pub fn equals(h1: &PyAny, h2: &PyAny) -> bool { - // NB: Although it does not precisely align with Python's definition of equality, we ban matches - // between non-equal types to avoid legacy behavior like `assert True == 1`, which is very - // surprising in interning, and would likely be surprising anywhere else in the engine where we - // compare things. - if !h1.get_type().is(h2.get_type()) { - return false; - } - h1.eq(h2).unwrap() + // NB: Although it does not precisely align with Python's definition of equality, we ban matches + // between non-equal types to avoid legacy behavior like `assert True == 1`, which is very + // surprising in interning, and would likely be surprising anywhere else in the engine where we + // compare things. + if !h1.get_type().is(h2.get_type()) { + return false; + } + h1.eq(h2).unwrap() } /// Return true if the given type is a @union. /// /// This function is also implemented in Python as `pants.engine.union.is_union`. pub fn is_union(py: Python, v: &PyType) -> PyResult { - let is_union_for_attr = intern!(py, "_is_union_for"); - if !v.hasattr(is_union_for_attr)? { - return Ok(false); - } + let is_union_for_attr = intern!(py, "_is_union_for"); + if !v.hasattr(is_union_for_attr)? { + return Ok(false); + } - let is_union_for = v.getattr(is_union_for_attr)?; - Ok(is_union_for.is(v)) + let is_union_for = v.getattr(is_union_for_attr)?; + Ok(is_union_for.is(v)) } /// If the given type is a @union, return its in-scope types. /// /// This function is also implemented in Python as `pants.engine.union.union_in_scope_types`. pub fn union_in_scope_types<'p>( - py: Python<'p>, - v: &'p PyType, + py: Python<'p>, + v: &'p PyType, ) -> PyResult>> { - if !is_union(py, v)? { - return Ok(None); - } + if !is_union(py, v)? { + return Ok(None); + } - let union_in_scope_types: Vec<&PyType> = - v.getattr(intern!(py, "_union_in_scope_types"))?.extract()?; - Ok(Some(union_in_scope_types)) + let union_in_scope_types: Vec<&PyType> = + v.getattr(intern!(py, "_union_in_scope_types"))?.extract()?; + Ok(Some(union_in_scope_types)) } pub fn store_tuple(py: Python, values: Vec) -> Value { - let arg_handles: Vec<_> = values - .into_iter() - .map(|v| v.consume_into_py_object(py)) - .collect(); - Value::from(PyTuple::new(py, &arg_handles).to_object(py)) + let arg_handles: Vec<_> = values + .into_iter() + .map(|v| v.consume_into_py_object(py)) + .collect(); + Value::from(PyTuple::new(py, &arg_handles).to_object(py)) } /// Store a slice containing 2-tuples of (key, value) as a Python dictionary. pub fn store_dict(py: Python, keys_and_values: Vec<(Value, Value)>) -> PyResult { - let dict = PyDict::new(py); - for (k, v) in keys_and_values { - dict.set_item(k.consume_into_py_object(py), v.consume_into_py_object(py))?; - } - Ok(Value::from(dict.to_object(py))) + let dict = PyDict::new(py); + for (k, v) in keys_and_values { + dict.set_item(k.consume_into_py_object(py), v.consume_into_py_object(py))?; + } + Ok(Value::from(dict.to_object(py))) } /// Store an opaque buffer of bytes to pass to Python. This will end up as a Python `bytes`. pub fn store_bytes(py: Python, bytes: &[u8]) -> Value { - Value::from(PyBytes::new(py, bytes).to_object(py)) + Value::from(PyBytes::new(py, bytes).to_object(py)) } /// Store a buffer of utf8 bytes to pass to Python. This will end up as a Python `str`. pub fn store_utf8(py: Python, utf8: &str) -> Value { - Value::from(utf8.to_object(py)) + Value::from(utf8.to_object(py)) } pub fn store_u64(py: Python, val: u64) -> Value { - Value::from(val.to_object(py)) + Value::from(val.to_object(py)) } pub fn store_i64(py: Python, val: i64) -> Value { - Value::from(val.to_object(py)) + Value::from(val.to_object(py)) } pub fn store_bool(py: Python, val: bool) -> Value { - Value::from(val.to_object(py)) + Value::from(val.to_object(py)) } /// @@ -162,109 +162,109 @@ pub fn store_bool(py: Python, val: bool) -> Value { /// pub fn getattr<'py, T>(value: &'py PyAny, field: &str) -> Result where - T: FromPyObject<'py>, + T: FromPyObject<'py>, { - value - .getattr(field) - .map_err(|e| format!("Could not get field `{field}`: {e:?}"))? - .extract::() - .map_err(|e| { - format!( - "Field `{}` was not convertible to type {}: {:?}", - field, - core::any::type_name::(), - e - ) - }) + value + .getattr(field) + .map_err(|e| format!("Could not get field `{field}`: {e:?}"))? + .extract::() + .map_err(|e| { + format!( + "Field `{}` was not convertible to type {}: {:?}", + field, + core::any::type_name::(), + e + ) + }) } /// /// Collect the Values contained within an outer Python Iterable PyObject. /// pub fn collect_iterable(value: &PyAny) -> Result, String> { - match value.iter() { - Ok(py_iter) => py_iter - .enumerate() - .map(|(i, py_res)| { - py_res.map_err(|py_err| { - format!( - "Could not iterate {}, failed to extract {}th item: {:?}", + match value.iter() { + Ok(py_iter) => py_iter + .enumerate() + .map(|(i, py_res)| { + py_res.map_err(|py_err| { + format!( + "Could not iterate {}, failed to extract {}th item: {:?}", + val_to_str(value), + i, + py_err + ) + }) + }) + .collect(), + Err(py_err) => Err(format!( + "Could not iterate {}: {:?}", val_to_str(value), - i, py_err - ) - }) - }) - .collect(), - Err(py_err) => Err(format!( - "Could not iterate {}: {:?}", - val_to_str(value), - py_err - )), - } + )), + } } /// Read a `FrozenDict[str, T]`. pub fn getattr_from_str_frozendict<'p, T: FromPyObject<'p>>( - value: &'p PyAny, - field: &str, + value: &'p PyAny, + field: &str, ) -> BTreeMap { - let frozendict = getattr(value, field).unwrap(); - let pydict: &PyDict = getattr(frozendict, "_data").unwrap(); - pydict - .items() - .into_iter() - .map(|kv_pair| kv_pair.extract().unwrap()) - .collect() + let frozendict = getattr(value, field).unwrap(); + let pydict: &PyDict = getattr(frozendict, "_data").unwrap(); + pydict + .items() + .into_iter() + .map(|kv_pair| kv_pair.extract().unwrap()) + .collect() } pub fn getattr_as_optional_string(value: &PyAny, field: &str) -> PyResult> { - // TODO: It's possible to view a python string as a `Cow`, so we could avoid actually - // cloning in some cases. - value.getattr(field)?.extract() + // TODO: It's possible to view a python string as a `Cow`, so we could avoid actually + // cloning in some cases. + value.getattr(field)?.extract() } /// Call the equivalent of `str()` on an arbitrary Python object. /// /// Converts `None` to the empty string. pub fn val_to_str(obj: &PyAny) -> String { - if obj.is_none() { - return "".to_string(); - } - obj.str().unwrap().extract().unwrap() + if obj.is_none() { + return "".to_string(); + } + obj.str().unwrap().extract().unwrap() } pub fn val_to_log_level(obj: &PyAny) -> Result { - let res: Result = getattr(obj, "_level").and_then(|n: u64| { - n.try_into() - .map_err(|e: num_enum::TryFromPrimitiveError<_>| { - format!("Could not parse {:?} as a LogLevel: {}", val_to_str(obj), e) - }) - }); - res.map(|py_level| py_level.into()) + let res: Result = getattr(obj, "_level").and_then(|n: u64| { + n.try_into() + .map_err(|e: num_enum::TryFromPrimitiveError<_>| { + format!("Could not parse {:?} as a LogLevel: {}", val_to_str(obj), e) + }) + }); + res.map(|py_level| py_level.into()) } /// Link to the Pants docs using the current version of Pants. pub fn doc_url(py: Python, slug: &str) -> String { - let docutil_module = py.import("pants.util.docutil").unwrap(); - let doc_url_func = docutil_module.getattr("doc_url").unwrap(); - doc_url_func.call1((slug,)).unwrap().extract().unwrap() + let docutil_module = py.import("pants.util.docutil").unwrap(); + let doc_url_func = docutil_module.getattr("doc_url").unwrap(); + doc_url_func.call1((slug,)).unwrap().extract().unwrap() } pub fn create_exception(py: Python, msg: String) -> Value { - Value::new(IntrinsicError::new_err(msg).into_py(py)) + Value::new(IntrinsicError::new_err(msg).into_py(py)) } pub fn call_function<'py>(func: &'py PyAny, args: &[Value]) -> PyResult<&'py PyAny> { - let args: Vec = args.iter().map(|v| v.clone().into()).collect(); - let args_tuple = PyTuple::new(func.py(), &args); - func.call1(args_tuple) + let args: Vec = args.iter().map(|v| v.clone().into()).collect(); + let args_tuple = PyTuple::new(func.py(), &args); + func.call1(args_tuple) } pub(crate) enum GeneratorInput { - Initial, - Arg(Value), - Err(PyErr), + Initial, + Arg(Value), + Err(PyErr), } /// @@ -277,195 +277,195 @@ pub(crate) enum GeneratorInput { /// - a coroutine will eventually return a single return value. /// pub(crate) fn generator_send( - py: Python, - generator_type: &TypeId, - generator: &Value, - input: GeneratorInput, + py: Python, + generator_type: &TypeId, + generator: &Value, + input: GeneratorInput, ) -> Result { - let (response_unhandled, maybe_thrown) = match input { - GeneratorInput::Arg(arg) => { - let response = generator - .getattr(py, intern!(py, "send"))? - .call1(py, (&*arg,)); - (response, None) - } - GeneratorInput::Err(err) => { - let throw_method = generator.getattr(py, intern!(py, "throw"))?; - if err.is_instance_of::(py) { - let throw = err - .value(py) - .getattr(intern!(py, "failure"))? - .extract::>()? - .get_error(py); - let response = throw_method.call1(py, (&throw,)); - (response, Some((throw, err))) - } else { - let response = throw_method.call1(py, (err,)); - (response, None) - } - } - GeneratorInput::Initial => { - let response = generator - .getattr(py, intern!(py, "send"))? - .call1(py, (&py.None(),)); - (response, None) - } - }; + let (response_unhandled, maybe_thrown) = match input { + GeneratorInput::Arg(arg) => { + let response = generator + .getattr(py, intern!(py, "send"))? + .call1(py, (&*arg,)); + (response, None) + } + GeneratorInput::Err(err) => { + let throw_method = generator.getattr(py, intern!(py, "throw"))?; + if err.is_instance_of::(py) { + let throw = err + .value(py) + .getattr(intern!(py, "failure"))? + .extract::>()? + .get_error(py); + let response = throw_method.call1(py, (&throw,)); + (response, Some((throw, err))) + } else { + let response = throw_method.call1(py, (err,)); + (response, None) + } + } + GeneratorInput::Initial => { + let response = generator + .getattr(py, intern!(py, "send"))? + .call1(py, (&py.None(),)); + (response, None) + } + }; - let response = match response_unhandled { - Err(e) if e.is_instance_of::(py) => { - let value = e.into_value(py).getattr(py, intern!(py, "value"))?; - let type_id = TypeId::new(value.as_ref(py).get_type()); - return Ok(GeneratorResponse::Break(Value::new(value), type_id)); - } - Err(e) => { - match (maybe_thrown, e.cause(py)) { - (Some((thrown, err)), Some(cause)) if thrown.value(py).is(cause.value(py)) => { - // Preserve the engine traceback by using the wrapped failure error as cause. The cause - // will be swapped back again in `Failure::from_py_err_with_gil()` to preserve the python - // traceback. - e.set_cause(py, Some(err)); + let response = match response_unhandled { + Err(e) if e.is_instance_of::(py) => { + let value = e.into_value(py).getattr(py, intern!(py, "value"))?; + let type_id = TypeId::new(value.as_ref(py).get_type()); + return Ok(GeneratorResponse::Break(Value::new(value), type_id)); } - _ => (), - }; - return Err(e.into()); - } - Ok(r) => r, - }; - - let result = if let Ok(call) = response.extract::>(py) { - Ok(GeneratorResponse::Call(call.take()?)) - } else if let Ok(get) = response.extract::>(py) { - // It isn't necessary to differentiate between `Get` and `Effect` here, as the static - // analysis of `@rule`s has already validated usage. - Ok(GeneratorResponse::Get(get.take()?)) - } else if let Ok(get_multi) = response.downcast::(py) { - // Was an `All` or `MultiGet`. - let gogs = get_multi - .iter()? - .map(|gog| { - let gog = gog?; - // TODO: Find a better way to check whether something is a coroutine... this seems - // unnecessarily awkward. - if gog.is_instance(generator_type.as_py_type(py).into())? { - Ok(GetOrGenerator::Generator(Value::new(gog.into()))) - } else if let Ok(get) = gog.extract::>() { - Ok(GetOrGenerator::Get( - get.take().map_err(PyException::new_err)?, - )) - } else { - Err(PyValueError::new_err(format!( + Err(e) => { + match (maybe_thrown, e.cause(py)) { + (Some((thrown, err)), Some(cause)) if thrown.value(py).is(cause.value(py)) => { + // Preserve the engine traceback by using the wrapped failure error as cause. The cause + // will be swapped back again in `Failure::from_py_err_with_gil()` to preserve the python + // traceback. + e.set_cause(py, Some(err)); + } + _ => (), + }; + return Err(e.into()); + } + Ok(r) => r, + }; + + let result = if let Ok(call) = response.extract::>(py) { + Ok(GeneratorResponse::Call(call.take()?)) + } else if let Ok(get) = response.extract::>(py) { + // It isn't necessary to differentiate between `Get` and `Effect` here, as the static + // analysis of `@rule`s has already validated usage. + Ok(GeneratorResponse::Get(get.take()?)) + } else if let Ok(get_multi) = response.downcast::(py) { + // Was an `All` or `MultiGet`. + let gogs = get_multi + .iter()? + .map(|gog| { + let gog = gog?; + // TODO: Find a better way to check whether something is a coroutine... this seems + // unnecessarily awkward. + if gog.is_instance(generator_type.as_py_type(py).into())? { + Ok(GetOrGenerator::Generator(Value::new(gog.into()))) + } else if let Ok(get) = gog.extract::>() { + Ok(GetOrGenerator::Get( + get.take().map_err(PyException::new_err)?, + )) + } else { + Err(PyValueError::new_err(format!( "Expected an `All` or `MultiGet` to receive either `Get`s or calls to rules, \ but got: {response}" ))) - } - }) - .collect::, _>>()?; - Ok(GeneratorResponse::All(gogs)) - } else { - Err(PyValueError::new_err(format!( + } + }) + .collect::, _>>()?; + Ok(GeneratorResponse::All(gogs)) + } else { + Err(PyValueError::new_err(format!( "Async @rule error. Expected a rule query such as `Get(..)` or similar, but got: {response}" ))) - }; + }; - Ok(result?) + Ok(result?) } /// NB: Panics on failure. Only recommended for use with built-in types, such as /// those configured in types::Types. pub fn unsafe_call(py: Python, type_id: TypeId, args: &[Value]) -> Value { - let py_type = type_id.as_py_type(py); - call_function(py_type, args) - .map(|obj| Value::new(obj.into_py(py))) - .unwrap_or_else(|e| { - panic!( - "Core type constructor `{}` failed: {:?}", - py_type.name().unwrap(), - e - ); - }) + let py_type = type_id.as_py_type(py); + call_function(py_type, args) + .map(|obj| Value::new(obj.into_py(py))) + .unwrap_or_else(|e| { + panic!( + "Core type constructor `{}` failed: {:?}", + py_type.name().unwrap(), + e + ); + }) } lazy_static! { - pub static ref INTERNS: Interns = Interns::new(); + pub static ref INTERNS: Interns = Interns::new(); } /// Interprets the `Get` and `implicitly(..)` syntax, which reduces to two optional positional /// arguments, and results in input types and inputs. #[allow(clippy::type_complexity)] fn interpret_get_inputs( - py: Python, - input_arg0: Option<&PyAny>, - input_arg1: Option<&PyAny>, + py: Python, + input_arg0: Option<&PyAny>, + input_arg1: Option<&PyAny>, ) -> PyResult<(SmallVec<[TypeId; 2]>, SmallVec<[Key; 2]>)> { - match (input_arg0, input_arg1) { - (None, None) => Ok((smallvec![], smallvec![])), - (None, Some(_)) => Err(PyAssertionError::new_err( - "input_arg1 set, but input_arg0 was None. This should not happen with PyO3.", - )), - (Some(input_arg0), None) => { - if input_arg0.is_instance_of::() { - return Err(PyTypeError::new_err(format!( - "Invalid Get. Because you are using the shorthand form \ + match (input_arg0, input_arg1) { + (None, None) => Ok((smallvec![], smallvec![])), + (None, Some(_)) => Err(PyAssertionError::new_err( + "input_arg1 set, but input_arg0 was None. This should not happen with PyO3.", + )), + (Some(input_arg0), None) => { + if input_arg0.is_instance_of::() { + return Err(PyTypeError::new_err(format!( + "Invalid Get. Because you are using the shorthand form \ Get(OutputType, InputType(constructor args)), the second argument should be \ a constructor call, rather than a type, but given {input_arg0}." - ))); - } - if let Ok(d) = input_arg0.downcast::() { - let mut input_types = SmallVec::new(); - let mut inputs = SmallVec::new(); - for (value, declared_type) in d.iter() { - input_types.push(TypeId::new(declared_type.downcast::().map_err( - |_| { - PyTypeError::new_err( + ))); + } + if let Ok(d) = input_arg0.downcast::() { + let mut input_types = SmallVec::new(); + let mut inputs = SmallVec::new(); + for (value, declared_type) in d.iter() { + input_types.push(TypeId::new(declared_type.downcast::().map_err( + |_| { + PyTypeError::new_err( "Invalid Get. Because the second argument was a dict, we expected the keys of the \ dict to be the Get inputs, and the values of the dict to be the declared \ types of those inputs.", ) - }, - )?)); - inputs.push(INTERNS.key_insert(py, value.into())?); + }, + )?)); + inputs.push(INTERNS.key_insert(py, value.into())?); + } + Ok((input_types, inputs)) + } else { + Ok(( + smallvec![TypeId::new(input_arg0.get_type())], + smallvec![INTERNS.key_insert(py, input_arg0.into())?], + )) + } } - Ok((input_types, inputs)) - } else { - Ok(( - smallvec![TypeId::new(input_arg0.get_type())], - smallvec![INTERNS.key_insert(py, input_arg0.into())?], - )) - } - } - (Some(input_arg0), Some(input_arg1)) => { - let declared_type = input_arg0.downcast::().map_err(|_| { - let input_arg0_type = input_arg0.get_type(); - PyTypeError::new_err(format!( + (Some(input_arg0), Some(input_arg1)) => { + let declared_type = input_arg0.downcast::().map_err(|_| { + let input_arg0_type = input_arg0.get_type(); + PyTypeError::new_err(format!( "Invalid Get. Because you are using the longhand form Get(OutputType, InputType, \ input), the second argument must be a type, but given `{input_arg0}` of type \ {input_arg0_type}." )) - })?; + })?; - if input_arg1.is_instance_of::() { - return Err(PyTypeError::new_err(format!( - "Invalid Get. Because you are using the longhand form \ + if input_arg1.is_instance_of::() { + return Err(PyTypeError::new_err(format!( + "Invalid Get. Because you are using the longhand form \ Get(OutputType, InputType, input), the third argument should be \ an object, rather than a type, but given {input_arg1}." - ))); - } + ))); + } - let actual_type = input_arg1.get_type(); - if !declared_type.is(actual_type) && !is_union(py, declared_type)? { - return Err(PyTypeError::new_err(format!( + let actual_type = input_arg1.get_type(); + if !declared_type.is(actual_type) && !is_union(py, declared_type)? { + return Err(PyTypeError::new_err(format!( "Invalid Get. The third argument `{input_arg1}` must have the exact same type as the \ second argument, {declared_type}, but had the type {actual_type}." ))); - } + } - Ok(( - smallvec![TypeId::new(declared_type)], - smallvec![INTERNS.key_insert(py, input_arg1.into())?], - )) + Ok(( + smallvec![TypeId::new(declared_type)], + smallvec![INTERNS.key_insert(py, input_arg1.into())?], + )) + } } - } } #[pyclass(subclass)] @@ -473,34 +473,33 @@ pub struct PyGeneratorResponseCall(RefCell>); #[pymethods] impl PyGeneratorResponseCall { - #[new] - fn __new__( - py: Python, - rule_id: String, - output_type: &PyType, - input_arg0: Option<&PyAny>, - input_arg1: Option<&PyAny>, - ) -> PyResult { - let output_type = TypeId::new(output_type); - let (input_types, inputs) = interpret_get_inputs(py, input_arg0, input_arg1)?; - - Ok(Self(RefCell::new(Some(Call { - rule_id: RuleId::from_string(rule_id), - output_type, - input_types, - inputs, - })))) - } + #[new] + fn __new__( + py: Python, + rule_id: String, + output_type: &PyType, + input_arg0: Option<&PyAny>, + input_arg1: Option<&PyAny>, + ) -> PyResult { + let output_type = TypeId::new(output_type); + let (input_types, inputs) = interpret_get_inputs(py, input_arg0, input_arg1)?; + + Ok(Self(RefCell::new(Some(Call { + rule_id: RuleId::from_string(rule_id), + output_type, + input_types, + inputs, + })))) + } } impl PyGeneratorResponseCall { - fn take(&self) -> Result { - self - .0 - .borrow_mut() - .take() - .ok_or_else(|| "A `Call` may only be consumed once.".to_owned()) - } + fn take(&self) -> Result { + self.0 + .borrow_mut() + .take() + .ok_or_else(|| "A `Call` may only be consumed once.".to_owned()) + } } // Contains a `RefCell>` in order to allow us to `take` the content without cloning. @@ -508,186 +507,177 @@ impl PyGeneratorResponseCall { pub struct PyGeneratorResponseGet(RefCell>); impl PyGeneratorResponseGet { - fn take(&self) -> Result { - self - .0 - .borrow_mut() - .take() - .ok_or_else(|| "A `Get` may only be consumed once.".to_owned()) - } + fn take(&self) -> Result { + self.0 + .borrow_mut() + .take() + .ok_or_else(|| "A `Get` may only be consumed once.".to_owned()) + } } #[pymethods] impl PyGeneratorResponseGet { - #[new] - fn __new__( - py: Python, - product: &PyAny, - input_arg0: Option<&PyAny>, - input_arg1: Option<&PyAny>, - ) -> PyResult { - let product = product.downcast::().map_err(|_| { - let actual_type = product.get_type(); - PyTypeError::new_err(format!( - "Invalid Get. The first argument (the output type) must be a type, but given \ + #[new] + fn __new__( + py: Python, + product: &PyAny, + input_arg0: Option<&PyAny>, + input_arg1: Option<&PyAny>, + ) -> PyResult { + let product = product.downcast::().map_err(|_| { + let actual_type = product.get_type(); + PyTypeError::new_err(format!( + "Invalid Get. The first argument (the output type) must be a type, but given \ `{product}` with type {actual_type}." - )) - })?; - let output = TypeId::new(product); - - let (input_types, inputs) = interpret_get_inputs(py, input_arg0, input_arg1)?; - - Ok(Self(RefCell::new(Some(Get { - output, - input_types, - inputs, - })))) - } - - #[getter] - fn output_type<'p>(&'p self, py: Python<'p>) -> PyResult<&'p PyType> { - Ok( - self - .0 - .borrow() - .as_ref() - .ok_or_else(|| { - PyException::new_err( - "A `Get` may not be consumed after being provided to the @rule engine.", - ) - })? - .output - .as_py_type(py), - ) - } - - #[getter] - fn input_types<'p>(&'p self, py: Python<'p>) -> PyResult> { - Ok( - self - .0 - .borrow() - .as_ref() - .ok_or_else(|| { - PyException::new_err( - "A `Get` may not be consumed after being provided to the @rule engine.", - ) - })? - .input_types - .iter() - .map(|t| t.as_py_type(py)) - .collect(), - ) - } - - #[getter] - fn inputs(&self) -> PyResult> { - Ok( - self - .0 - .borrow() - .as_ref() - .ok_or_else(|| { - PyException::new_err( - "A `Get` may not be consumed after being provided to the @rule engine.", - ) - })? - .inputs - .iter() - .map(|k| { - let pyo: PyObject = k.value.clone().into(); - pyo - }) - .collect(), - ) - } - - fn __repr__(&self) -> PyResult { - Ok(format!( - "{}", - self.0.borrow().as_ref().ok_or_else(|| { - PyException::new_err( - "A `Get` may not be consumed after being provided to the @rule engine.", - ) - })? - )) - } + )) + })?; + let output = TypeId::new(product); + + let (input_types, inputs) = interpret_get_inputs(py, input_arg0, input_arg1)?; + + Ok(Self(RefCell::new(Some(Get { + output, + input_types, + inputs, + })))) + } + + #[getter] + fn output_type<'p>(&'p self, py: Python<'p>) -> PyResult<&'p PyType> { + Ok(self + .0 + .borrow() + .as_ref() + .ok_or_else(|| { + PyException::new_err( + "A `Get` may not be consumed after being provided to the @rule engine.", + ) + })? + .output + .as_py_type(py)) + } + + #[getter] + fn input_types<'p>(&'p self, py: Python<'p>) -> PyResult> { + Ok(self + .0 + .borrow() + .as_ref() + .ok_or_else(|| { + PyException::new_err( + "A `Get` may not be consumed after being provided to the @rule engine.", + ) + })? + .input_types + .iter() + .map(|t| t.as_py_type(py)) + .collect()) + } + + #[getter] + fn inputs(&self) -> PyResult> { + Ok(self + .0 + .borrow() + .as_ref() + .ok_or_else(|| { + PyException::new_err( + "A `Get` may not be consumed after being provided to the @rule engine.", + ) + })? + .inputs + .iter() + .map(|k| { + let pyo: PyObject = k.value.clone().into(); + pyo + }) + .collect()) + } + + fn __repr__(&self) -> PyResult { + Ok(format!( + "{}", + self.0.borrow().as_ref().ok_or_else(|| { + PyException::new_err( + "A `Get` may not be consumed after being provided to the @rule engine.", + ) + })? + )) + } } #[derive(Debug)] pub struct Call { - pub rule_id: RuleId, - pub output_type: TypeId, - pub input_types: SmallVec<[TypeId; 2]>, - pub inputs: SmallVec<[Key; 2]>, + pub rule_id: RuleId, + pub output_type: TypeId, + pub input_types: SmallVec<[TypeId; 2]>, + pub inputs: SmallVec<[Key; 2]>, } impl fmt::Display for Call { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "Call({}, {}", self.rule_id, self.output_type)?; - match self.input_types.len() { - 0 => write!(f, ")"), - 1 => write!(f, ", {}, {})", self.input_types[0], self.inputs[0]), - _ => write!( - f, - ", {{{}}})", - self - .input_types - .iter() - .zip(self.inputs.iter()) - .map(|(t, k)| { format!("{k}: {t}") }) - .collect::>() - .join(", ") - ), + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "Call({}, {}", self.rule_id, self.output_type)?; + match self.input_types.len() { + 0 => write!(f, ")"), + 1 => write!(f, ", {}, {})", self.input_types[0], self.inputs[0]), + _ => write!( + f, + ", {{{}}})", + self.input_types + .iter() + .zip(self.inputs.iter()) + .map(|(t, k)| { format!("{k}: {t}") }) + .collect::>() + .join(", ") + ), + } } - } } #[derive(Debug)] pub struct Get { - pub output: TypeId, - pub input_types: SmallVec<[TypeId; 2]>, - pub inputs: SmallVec<[Key; 2]>, + pub output: TypeId, + pub input_types: SmallVec<[TypeId; 2]>, + pub inputs: SmallVec<[Key; 2]>, } impl fmt::Display for Get { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "Get({}", self.output)?; - match self.input_types.len() { - 0 => write!(f, ")"), - 1 => write!(f, ", {}, {})", self.input_types[0], self.inputs[0]), - _ => write!( - f, - ", {{{}}})", - self - .input_types - .iter() - .zip(self.inputs.iter()) - .map(|(t, k)| { format!("{k}: {t}") }) - .collect::>() - .join(", ") - ), + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "Get({}", self.output)?; + match self.input_types.len() { + 0 => write!(f, ")"), + 1 => write!(f, ", {}, {})", self.input_types[0], self.inputs[0]), + _ => write!( + f, + ", {{{}}})", + self.input_types + .iter() + .zip(self.inputs.iter()) + .map(|(t, k)| { format!("{k}: {t}") }) + .collect::>() + .join(", ") + ), + } } - } } pub enum GetOrGenerator { - Get(Get), - Generator(Value), + Get(Get), + Generator(Value), } pub enum GeneratorResponse { - /// The generator has completed with the given value of the given type. - Break(Value, TypeId), - /// The generator is awaiting a call to a known rule. - Call(Call), - /// The generator is awaiting a call to an unknown rule. - Get(Get), - /// The generator is awaiting calls to a series of generators or Gets, all of which will - /// produce `Call`s or `Get`s. - /// - /// The generators used in this position will either be call-by-name `@rule` stubs (which will - /// immediately produce a `Call`, and then return its value), or async "rule helpers", which - /// might use either the call-by-name or `Get` syntax. - All(Vec), + /// The generator has completed with the given value of the given type. + Break(Value, TypeId), + /// The generator is awaiting a call to a known rule. + Call(Call), + /// The generator is awaiting a call to an unknown rule. + Get(Get), + /// The generator is awaiting calls to a series of generators or Gets, all of which will + /// produce `Call`s or `Get`s. + /// + /// The generators used in this position will either be call-by-name `@rule` stubs (which will + /// immediately produce a `Call`, and then return its value), or async "rule helpers", which + /// might use either the call-by-name or `Get` syntax. + All(Vec), } diff --git a/src/rust/engine/src/externs/nailgun.rs b/src/rust/engine/src/externs/nailgun.rs index c29d2b9869c..464e99babff 100644 --- a/src/rust/engine/src/externs/nailgun.rs +++ b/src/rust/engine/src/externs/nailgun.rs @@ -10,16 +10,16 @@ use crate::externs::scheduler::PyExecutor; use task_executor::Executor; pub fn register(py: Python, m: &PyModule) -> PyResult<()> { - m.add( - "PantsdConnectionException", - py.get_type::(), - )?; - m.add( - "PantsdClientException", - py.get_type::(), - )?; - m.add_class::()?; - Ok(()) + m.add( + "PantsdConnectionException", + py.get_type::(), + )?; + m.add( + "PantsdClientException", + py.get_type::(), + )?; + m.add_class::()?; + Ok(()) } create_exception!(native_engine, PantsdConnectionException, PyException); @@ -27,42 +27,51 @@ create_exception!(native_engine, PantsdClientException, PyException); #[pyclass] struct PyNailgunClient { - port: u16, - executor: Executor, + port: u16, + executor: Executor, } #[pymethods] impl PyNailgunClient { - #[new] - fn __new__(port: u16, py_executor: &PyExecutor) -> Self { - Self { - port, - executor: py_executor.0.clone(), + #[new] + fn __new__(port: u16, py_executor: &PyExecutor) -> Self { + Self { + port, + executor: py_executor.0.clone(), + } } - } - fn execute(&self, command: String, args: Vec, env: &PyDict, py: Python) -> PyResult { - use nailgun::NailgunClientError; + fn execute( + &self, + command: String, + args: Vec, + env: &PyDict, + py: Python, + ) -> PyResult { + use nailgun::NailgunClientError; - // NB: We assume that env var names and values are Python strs strictly convertible to UTF-8 - // (that is, with no lone surrogates representing invalid UTF-8 passed from the OS). - // The Python-side caller must ensure this. - let env_list: Vec<(String, String)> = env - .items() - .into_iter() - .map(|kv_pair| kv_pair.extract::<(String, String)>()) - .collect::, _>>()?; + // NB: We assume that env var names and values are Python strs strictly convertible to UTF-8 + // (that is, with no lone surrogates representing invalid UTF-8 passed from the OS). + // The Python-side caller must ensure this. + let env_list: Vec<(String, String)> = env + .items() + .into_iter() + .map(|kv_pair| kv_pair.extract::<(String, String)>()) + .collect::, _>>()?; - py.allow_threads(|| { - self - .executor - .block_on(nailgun::client_execute(self.port, command, args, env_list)) - .map_err(|e| match e { - NailgunClientError::PreConnect(err_str) => PantsdConnectionException::new_err(err_str), - NailgunClientError::PostConnect(err_str) => PantsdClientException::new_err(err_str), - NailgunClientError::BrokenPipe => PyBrokenPipeError::new_err(""), - NailgunClientError::KeyboardInterrupt => PyKeyboardInterrupt::new_err(""), + py.allow_threads(|| { + self.executor + .block_on(nailgun::client_execute(self.port, command, args, env_list)) + .map_err(|e| match e { + NailgunClientError::PreConnect(err_str) => { + PantsdConnectionException::new_err(err_str) + } + NailgunClientError::PostConnect(err_str) => { + PantsdClientException::new_err(err_str) + } + NailgunClientError::BrokenPipe => PyBrokenPipeError::new_err(""), + NailgunClientError::KeyboardInterrupt => PyKeyboardInterrupt::new_err(""), + }) }) - }) - } + } } diff --git a/src/rust/engine/src/externs/pantsd.rs b/src/rust/engine/src/externs/pantsd.rs index 1c7723a17e2..b0d72ee5ab2 100644 --- a/src/rust/engine/src/externs/pantsd.rs +++ b/src/rust/engine/src/externs/pantsd.rs @@ -9,8 +9,8 @@ use pyo3::prelude::*; use options::{Args, BuildRoot, Env, OptionParser}; pub fn register(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(pantsd_fingerprint_compute, m)?)?; - Ok(()) + m.add_function(wrap_pyfunction!(pantsd_fingerprint_compute, m)?)?; + Ok(()) } /// Computes the current `pantsd` fingerprint. @@ -20,23 +20,23 @@ pub fn register(_py: Python, m: &PyModule) -> PyResult<()> { /// information about this redundancy). #[pyfunction] fn pantsd_fingerprint_compute(expected_option_names: HashSet) -> PyResult { - let build_root = BuildRoot::find().map_err(PyException::new_err)?; - let options_parser = - OptionParser::new(Env::capture_lossy().0, Args::argv()).map_err(PyException::new_err)?; - - let options = pantsd::fingerprinted_options(&build_root).map_err(PyException::new_err)?; - let actual_option_names = options - .into_iter() - .map(|o| o.id.name_underscored()) - .collect::>(); - - if expected_option_names != actual_option_names { - return Err(PyException::new_err(format!( - "The `daemon=True` options declared on the Python side did \ + let build_root = BuildRoot::find().map_err(PyException::new_err)?; + let options_parser = + OptionParser::new(Env::capture_lossy().0, Args::argv()).map_err(PyException::new_err)?; + + let options = pantsd::fingerprinted_options(&build_root).map_err(PyException::new_err)?; + let actual_option_names = options + .into_iter() + .map(|o| o.id.name_underscored()) + .collect::>(); + + if expected_option_names != actual_option_names { + return Err(PyException::new_err(format!( + "The `daemon=True` options declared on the Python side did \ not match the fingerprinted options from the Rust side: \ {expected_option_names:?} vs {actual_option_names:?}" - ))); - } + ))); + } - pantsd::fingerprint_compute(&build_root, &options_parser).map_err(PyException::new_err) + pantsd::fingerprint_compute(&build_root, &options_parser).map_err(PyException::new_err) } diff --git a/src/rust/engine/src/externs/process.rs b/src/rust/engine/src/externs/process.rs index 5ccb3e64279..30fe3439110 100644 --- a/src/rust/engine/src/externs/process.rs +++ b/src/rust/engine/src/externs/process.rs @@ -11,109 +11,109 @@ use pyo3::prelude::*; use process_execution::{Platform, ProcessExecutionEnvironment, ProcessExecutionStrategy}; pub(crate) fn register(m: &PyModule) -> PyResult<()> { - m.add_class::()?; + m.add_class::()?; - Ok(()) + Ok(()) } #[pyclass(name = "ProcessExecutionEnvironment")] #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PyProcessExecutionEnvironment { - pub environment: ProcessExecutionEnvironment, + pub environment: ProcessExecutionEnvironment, } #[pymethods] impl PyProcessExecutionEnvironment { - #[new] - fn __new__( - platform: String, - remote_execution: bool, - remote_execution_extra_platform_properties: Vec<(String, String)>, - environment_name: Option, - docker_image: Option, - ) -> PyResult { - let platform = Platform::try_from(platform).map_err(PyValueError::new_err)?; - let strategy = match (docker_image, remote_execution) { - (None, true) => Ok(ProcessExecutionStrategy::RemoteExecution( - remote_execution_extra_platform_properties, - )), - (None, false) => Ok(ProcessExecutionStrategy::Local), - (Some(image), false) => Ok(ProcessExecutionStrategy::Docker(image)), - (Some(_), true) => Err(PyAssertionError::new_err( - "docker_image cannot be set at the same time as remote_execution", - )), - }?; - Ok(Self { - environment: ProcessExecutionEnvironment { - name: environment_name, - platform, - strategy, - }, - }) - } - - fn __hash__(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.environment.hash(&mut s); - s.finish() - } - - fn __repr__(&self) -> String { - format!( - "ProcessExecutionEnvironment(environment={:?})", - self.environment, - ) - } - - fn __richcmp__( - &self, - other: &PyProcessExecutionEnvironment, - op: CompareOp, - py: Python, - ) -> PyObject { - match op { - CompareOp::Eq => (self == other).into_py(py), - CompareOp::Ne => (self != other).into_py(py), - _ => py.NotImplemented(), + #[new] + fn __new__( + platform: String, + remote_execution: bool, + remote_execution_extra_platform_properties: Vec<(String, String)>, + environment_name: Option, + docker_image: Option, + ) -> PyResult { + let platform = Platform::try_from(platform).map_err(PyValueError::new_err)?; + let strategy = match (docker_image, remote_execution) { + (None, true) => Ok(ProcessExecutionStrategy::RemoteExecution( + remote_execution_extra_platform_properties, + )), + (None, false) => Ok(ProcessExecutionStrategy::Local), + (Some(image), false) => Ok(ProcessExecutionStrategy::Docker(image)), + (Some(_), true) => Err(PyAssertionError::new_err( + "docker_image cannot be set at the same time as remote_execution", + )), + }?; + Ok(Self { + environment: ProcessExecutionEnvironment { + name: environment_name, + platform, + strategy, + }, + }) } - } - - #[getter] - fn name(&self) -> Option<&str> { - self.environment.name.as_deref() - } - - #[getter] - fn environment_type(&self) -> &str { - self.environment.strategy.strategy_type() - } - - #[getter] - fn platform(&self) -> String { - self.environment.platform.into() - } - - #[getter] - fn remote_execution(&self) -> bool { - matches!( - self.environment.strategy, - ProcessExecutionStrategy::RemoteExecution(_) - ) - } - - #[getter] - fn docker_image(&self) -> Option<&str> { - match &self.environment.strategy { - ProcessExecutionStrategy::Docker(image) => Some(image), - _ => None, + + fn __hash__(&self) -> u64 { + let mut s = DefaultHasher::new(); + self.environment.hash(&mut s); + s.finish() + } + + fn __repr__(&self) -> String { + format!( + "ProcessExecutionEnvironment(environment={:?})", + self.environment, + ) + } + + fn __richcmp__( + &self, + other: &PyProcessExecutionEnvironment, + op: CompareOp, + py: Python, + ) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } + + #[getter] + fn name(&self) -> Option<&str> { + self.environment.name.as_deref() + } + + #[getter] + fn environment_type(&self) -> &str { + self.environment.strategy.strategy_type() + } + + #[getter] + fn platform(&self) -> String { + self.environment.platform.into() + } + + #[getter] + fn remote_execution(&self) -> bool { + matches!( + self.environment.strategy, + ProcessExecutionStrategy::RemoteExecution(_) + ) + } + + #[getter] + fn docker_image(&self) -> Option<&str> { + match &self.environment.strategy { + ProcessExecutionStrategy::Docker(image) => Some(image), + _ => None, + } } - } - #[getter] - fn remote_execution_extra_platform_properties(&self) -> Vec<(String, String)> { - match &self.environment.strategy { - ProcessExecutionStrategy::RemoteExecution(properties) => properties.to_owned(), - _ => vec![], + #[getter] + fn remote_execution_extra_platform_properties(&self) -> Vec<(String, String)> { + match &self.environment.strategy { + ProcessExecutionStrategy::RemoteExecution(properties) => properties.to_owned(), + _ => vec![], + } } - } } diff --git a/src/rust/engine/src/externs/scheduler.rs b/src/rust/engine/src/externs/scheduler.rs index 5b5d286a4f9..017de3622bf 100644 --- a/src/rust/engine/src/externs/scheduler.rs +++ b/src/rust/engine/src/externs/scheduler.rs @@ -8,15 +8,15 @@ use pyo3::ffi; use pyo3::prelude::*; pub fn register(m: &PyModule) -> PyResult<()> { - m.add_class::()?; - Ok(()) + m.add_class::()?; + Ok(()) } // NB: This exists because we need the PyInterpreterState to pass to PyThreadState_New, // however PyInterpreterState_Get wasn't added until Py 3.9. They vary in implementation, but because // we don't have any sub-interpreters they should both return the same object. extern "C" { - pub fn PyInterpreterState_Main() -> *mut ffi::PyInterpreterState; + pub fn PyInterpreterState_Main() -> *mut ffi::PyInterpreterState; } #[pyclass] @@ -25,44 +25,45 @@ pub struct PyExecutor(pub task_executor::Executor); #[pymethods] impl PyExecutor { - #[new] - fn __new__(core_threads: usize, max_threads: usize) -> PyResult { - task_executor::Executor::new_owned(core_threads, max_threads, || { - // NB: We need a PyThreadState object which lives throughout the lifetime of this thread - // as the debug trace object is attached to it. Otherwise the PyThreadState is - // constructed/destroyed with each `with_gil` call (inside PyGILState_Ensure/PyGILState_Release). - // - // Constructing (and leaking) a ThreadState object allocates and associates it with the current - // thread, and the Python runtime won't wipe the trace function between calls. - // See https://github.com/PyO3/pyo3/issues/2495 - let _ = unsafe { ffi::PyThreadState_New(Python::with_gil(|_| PyInterpreterState_Main())) }; - Python::with_gil(|py| { - let _ = py.eval("__import__('debugpy').debug_this_thread()", None, None); - }); - }) - .map(PyExecutor) - .map_err(PyException::new_err) - } + #[new] + fn __new__(core_threads: usize, max_threads: usize) -> PyResult { + task_executor::Executor::new_owned(core_threads, max_threads, || { + // NB: We need a PyThreadState object which lives throughout the lifetime of this thread + // as the debug trace object is attached to it. Otherwise the PyThreadState is + // constructed/destroyed with each `with_gil` call (inside PyGILState_Ensure/PyGILState_Release). + // + // Constructing (and leaking) a ThreadState object allocates and associates it with the current + // thread, and the Python runtime won't wipe the trace function between calls. + // See https://github.com/PyO3/pyo3/issues/2495 + let _ = + unsafe { ffi::PyThreadState_New(Python::with_gil(|_| PyInterpreterState_Main())) }; + Python::with_gil(|py| { + let _ = py.eval("__import__('debugpy').debug_this_thread()", None, None); + }); + }) + .map(PyExecutor) + .map_err(PyException::new_err) + } - /// Returns a clone of the PyExecutor which is disconnected from its parent's lifecycle. Shutdown - /// of the borrowed clone will have no effect on its parent. - fn to_borrowed(&self) -> Self { - PyExecutor(self.0.to_borrowed()) - } + /// Returns a clone of the PyExecutor which is disconnected from its parent's lifecycle. Shutdown + /// of the borrowed clone will have no effect on its parent. + fn to_borrowed(&self) -> Self { + PyExecutor(self.0.to_borrowed()) + } - /// Shut down this executor, waiting for all tasks to exit. Any tasks which have not exited at - /// the end of the timeout will be leaked. - fn shutdown(&self, py: Python, duration_secs: f64) { - py.allow_threads(|| self.0.shutdown(Duration::from_secs_f64(duration_secs))) - } + /// Shut down this executor, waiting for all tasks to exit. Any tasks which have not exited at + /// the end of the timeout will be leaked. + fn shutdown(&self, py: Python, duration_secs: f64) { + py.allow_threads(|| self.0.shutdown(Duration::from_secs_f64(duration_secs))) + } } impl Drop for PyExecutor { - fn drop(&mut self) { - if !self.0.is_shutdown() { - // This can lead to hangs, since `Drop` will run on an arbitrary thread under arbitrary - // locks. See #18211. - log::warn!("Executor was not shut down explicitly."); + fn drop(&mut self) { + if !self.0.is_shutdown() { + // This can lead to hangs, since `Drop` will run on an arbitrary thread under arbitrary + // locks. See #18211. + log::warn!("Executor was not shut down explicitly."); + } } - } } diff --git a/src/rust/engine/src/externs/stdio.rs b/src/rust/engine/src/externs/stdio.rs index 498a8b4a26d..64f671be5e0 100644 --- a/src/rust/engine/src/externs/stdio.rs +++ b/src/rust/engine/src/externs/stdio.rs @@ -11,84 +11,84 @@ pub struct PyStdioRead; #[pymethods] impl PyStdioRead { - fn isatty(&self) -> bool { - if let Ok(fd) = self.fileno() { - unsafe { libc::isatty(fd) != 0 } - } else { - false + fn isatty(&self) -> bool { + if let Ok(fd) = self.fileno() { + unsafe { libc::isatty(fd) != 0 } + } else { + false + } } - } - fn fileno(&self) -> PyResult { - stdio::get_destination() - .stdin_as_raw_fd() - .map_err(PyException::new_err) - } + fn fileno(&self) -> PyResult { + stdio::get_destination() + .stdin_as_raw_fd() + .map_err(PyException::new_err) + } - fn readinto(&self, obj: &PyAny, py: Python) -> PyResult { - let py_buffer = PyBuffer::get(obj)?; - let mut buffer = vec![0; py_buffer.len_bytes()]; - let read = py - .allow_threads(|| stdio::get_destination().read_stdin(&mut buffer)) - .map_err(|e| PyException::new_err(e.to_string()))?; - // NB: `as_mut_slice` exposes a `&[Cell]`, which we can't use directly in `read`. We use - // `copy_from_slice` instead, which unfortunately involves some extra copying. - py_buffer.copy_from_slice(py, &buffer)?; - Ok(read) - } + fn readinto(&self, obj: &PyAny, py: Python) -> PyResult { + let py_buffer = PyBuffer::get(obj)?; + let mut buffer = vec![0; py_buffer.len_bytes()]; + let read = py + .allow_threads(|| stdio::get_destination().read_stdin(&mut buffer)) + .map_err(|e| PyException::new_err(e.to_string()))?; + // NB: `as_mut_slice` exposes a `&[Cell]`, which we can't use directly in `read`. We use + // `copy_from_slice` instead, which unfortunately involves some extra copying. + py_buffer.copy_from_slice(py, &buffer)?; + Ok(read) + } - #[getter] - fn closed(&self) -> bool { - false - } + #[getter] + fn closed(&self) -> bool { + false + } - fn readable(&self) -> bool { - true - } + fn readable(&self) -> bool { + true + } - fn seekable(&self) -> bool { - false - } + fn seekable(&self) -> bool { + false + } } /// A Python file-like that proxies to the `stdio` module, which implements thread-local output. #[pyclass] pub struct PyStdioWrite { - pub(crate) is_stdout: bool, + pub(crate) is_stdout: bool, } #[pymethods] impl PyStdioWrite { - fn write(&self, payload: &str, py: Python) { - py.allow_threads(|| { - let destination = stdio::get_destination(); - if self.is_stdout { - destination.write_stdout(payload.as_bytes()); - } else { - destination.write_stderr(payload.as_bytes()); - } - }); - } + fn write(&self, payload: &str, py: Python) { + py.allow_threads(|| { + let destination = stdio::get_destination(); + if self.is_stdout { + destination.write_stdout(payload.as_bytes()); + } else { + destination.write_stderr(payload.as_bytes()); + } + }); + } - fn isatty(&self) -> bool { - if let Ok(fd) = self.fileno() { - unsafe { libc::isatty(fd) != 0 } - } else { - false + fn isatty(&self) -> bool { + if let Ok(fd) = self.fileno() { + unsafe { libc::isatty(fd) != 0 } + } else { + false + } } - } - fn fileno(&self) -> PyResult { - let destination = stdio::get_destination(); - let fd = if self.is_stdout { - destination.stdout_as_raw_fd() - } else { - destination.stderr_as_raw_fd() - }; - fd.map_err(PyException::new_err) - } + fn fileno(&self) -> PyResult { + let destination = stdio::get_destination(); + let fd = if self.is_stdout { + destination.stdout_as_raw_fd() + } else { + destination.stderr_as_raw_fd() + }; + fd.map_err(PyException::new_err) + } - fn flush(&self) { - // All of our destinations are line-buffered. - } + fn flush(&self) { + // All of our destinations are line-buffered. + } } diff --git a/src/rust/engine/src/externs/target.rs b/src/rust/engine/src/externs/target.rs index dfab9c7c2b3..a817037f02a 100644 --- a/src/rust/engine/src/externs/target.rs +++ b/src/rust/engine/src/externs/target.rs @@ -12,12 +12,12 @@ use pyo3::types::PyType; use crate::externs::address::Address; pub fn register(m: &PyModule) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; - m.add("NO_VALUE", NoFieldValue)?; + m.add("NO_VALUE", NoFieldValue)?; - Ok(()) + Ok(()) } #[pyclass(name = "_NoValue")] @@ -26,227 +26,230 @@ struct NoFieldValue; #[pymethods] impl NoFieldValue { - fn __bool__(&self) -> bool { - false - } + fn __bool__(&self) -> bool { + false + } - fn __repr__(&self) -> &'static str { - "" - } + fn __repr__(&self) -> &'static str { + "" + } } #[pyclass(subclass)] pub struct Field { - value: PyObject, + value: PyObject, } #[pymethods] impl Field { - #[new] - #[classmethod] - #[pyo3(signature = (raw_value, address))] - fn __new__( - cls: &PyType, - raw_value: Option, - address: PyRef
, - py: Python, - ) -> PyResult { - // NB: The deprecation check relies on the presence of NoFieldValue to detect if - // the field was explicitly set, so this must come before we coerce the raw_value - // to None below. - Self::check_deprecated(cls, raw_value.as_ref(), &address, py)?; - - let raw_value = match raw_value { - Some(value) - if value.extract::(py).is_ok() && !Self::cls_none_is_valid_value(cls)? => - { + #[new] + #[classmethod] + #[pyo3(signature = (raw_value, address))] + fn __new__( + cls: &PyType, + raw_value: Option, + address: PyRef
, + py: Python, + ) -> PyResult { + // NB: The deprecation check relies on the presence of NoFieldValue to detect if + // the field was explicitly set, so this must come before we coerce the raw_value + // to None below. + Self::check_deprecated(cls, raw_value.as_ref(), &address, py)?; + + let raw_value = match raw_value { + Some(value) + if value.extract::(py).is_ok() + && !Self::cls_none_is_valid_value(cls)? => + { + None + } + rv => rv, + }; + + Ok(Self { + value: cls + .call_method(intern!(py, "compute_value"), (raw_value, address), None)? + .into(), + }) + } + + #[classattr] + fn none_is_valid_value() -> bool { + false + } + + #[classattr] + fn required() -> bool { + false + } + + #[classattr] + fn removal_version() -> Option<&'static str> { + None + } + + #[classattr] + fn removal_hint() -> Option<&'static str> { None - } - rv => rv, - }; - - Ok(Self { - value: cls - .call_method(intern!(py, "compute_value"), (raw_value, address), None)? - .into(), - }) - } - - #[classattr] - fn none_is_valid_value() -> bool { - false - } - - #[classattr] - fn required() -> bool { - false - } - - #[classattr] - fn removal_version() -> Option<&'static str> { - None - } - - #[classattr] - fn removal_hint() -> Option<&'static str> { - None - } - - #[classattr] - fn deprecated_alias() -> Option<&'static str> { - None - } - - #[classattr] - fn deprecated_alias_removal_version() -> Option<&'static str> { - None - } - - #[classmethod] - #[pyo3(signature = (raw_value, address))] - fn compute_value( - cls: &PyType, - raw_value: Option, - address: PyRef
, - py: Python, - ) -> PyResult { - let default = || -> PyResult { - if Self::cls_required(cls)? { - // TODO: Should be `RequiredFieldMissingException`. - Err(PyValueError::new_err(format!( - "The `{}` field in target {} must be defined.", - Self::cls_alias(cls)?, - *address, - ))) - } else { - Self::cls_default(cls) - } - }; - - let none_is_valid_value = Self::cls_none_is_valid_value(cls)?; - match raw_value { - Some(value) if none_is_valid_value && value.extract::(py).is_ok() => default(), - None if none_is_valid_value => Ok(py.None()), - None => default(), - Some(value) => Ok(value), - } - } - - #[getter] - fn value(&self) -> &PyObject { - &self.value - } - - fn __hash__(self_: &PyCell, py: Python) -> PyResult { - Ok(self_.get_type().hash()? & self_.borrow().value.as_ref(py).hash()?) - } - - fn __repr__(self_: &PyCell) -> PyResult { - let mut result = String::new(); - write!( - result, - "{}(alias={}, value={}", - self_.get_type(), - Self::cls_alias(self_)?, - self_.borrow().value - ) - .unwrap(); - if let Ok(default) = self_.getattr("default") { - write!(result, ", default={})", default).unwrap(); - } else { - write!(result, ")").unwrap(); - } - Ok(result) - } - - fn __str__(self_: &PyCell) -> PyResult { - Ok(format!( - "{}={}", - Self::cls_alias(self_)?, - self_.borrow().value - )) - } - - fn __richcmp__( - self_: &PyCell, - other: &PyAny, - op: CompareOp, - py: Python, - ) -> PyResult { - let is_eq = self_.get_type().eq(other.get_type())? - && self_ - .borrow() - .value - .as_ref(py) - .eq(&other.extract::>()?.value)?; - match op { - CompareOp::Eq => Ok(is_eq.into_py(py)), - CompareOp::Ne => Ok((!is_eq).into_py(py)), - _ => Ok(py.NotImplemented()), - } - } + } + + #[classattr] + fn deprecated_alias() -> Option<&'static str> { + None + } + + #[classattr] + fn deprecated_alias_removal_version() -> Option<&'static str> { + None + } + + #[classmethod] + #[pyo3(signature = (raw_value, address))] + fn compute_value( + cls: &PyType, + raw_value: Option, + address: PyRef
, + py: Python, + ) -> PyResult { + let default = || -> PyResult { + if Self::cls_required(cls)? { + // TODO: Should be `RequiredFieldMissingException`. + Err(PyValueError::new_err(format!( + "The `{}` field in target {} must be defined.", + Self::cls_alias(cls)?, + *address, + ))) + } else { + Self::cls_default(cls) + } + }; + + let none_is_valid_value = Self::cls_none_is_valid_value(cls)?; + match raw_value { + Some(value) if none_is_valid_value && value.extract::(py).is_ok() => { + default() + } + None if none_is_valid_value => Ok(py.None()), + None => default(), + Some(value) => Ok(value), + } + } + + #[getter] + fn value(&self) -> &PyObject { + &self.value + } + + fn __hash__(self_: &PyCell, py: Python) -> PyResult { + Ok(self_.get_type().hash()? & self_.borrow().value.as_ref(py).hash()?) + } + + fn __repr__(self_: &PyCell) -> PyResult { + let mut result = String::new(); + write!( + result, + "{}(alias={}, value={}", + self_.get_type(), + Self::cls_alias(self_)?, + self_.borrow().value + ) + .unwrap(); + if let Ok(default) = self_.getattr("default") { + write!(result, ", default={})", default).unwrap(); + } else { + write!(result, ")").unwrap(); + } + Ok(result) + } + + fn __str__(self_: &PyCell) -> PyResult { + Ok(format!( + "{}={}", + Self::cls_alias(self_)?, + self_.borrow().value + )) + } + + fn __richcmp__( + self_: &PyCell, + other: &PyAny, + op: CompareOp, + py: Python, + ) -> PyResult { + let is_eq = self_.get_type().eq(other.get_type())? + && self_ + .borrow() + .value + .as_ref(py) + .eq(&other.extract::>()?.value)?; + match op { + CompareOp::Eq => Ok(is_eq.into_py(py)), + CompareOp::Ne => Ok((!is_eq).into_py(py)), + _ => Ok(py.NotImplemented()), + } + } } impl Field { - fn cls_none_is_valid_value(cls: &PyAny) -> PyResult { - cls.getattr("none_is_valid_value")?.extract::() - } - - fn cls_default(cls: &PyAny) -> PyResult { - cls.getattr("default")?.extract() - } - - fn cls_required(cls: &PyAny) -> PyResult { - cls.getattr("required")?.extract() - } - - fn cls_alias(cls: &PyAny) -> PyResult<&str> { - // TODO: All of these methods should use interned attr names. - cls.getattr("alias")?.extract() - } - - fn cls_removal_version(cls: &PyAny) -> PyResult> { - cls.getattr("removal_version")?.extract() - } - - fn cls_removal_hint(cls: &PyAny) -> PyResult> { - cls.getattr("removal_hint")?.extract() - } - - fn check_deprecated( - cls: &PyType, - raw_value: Option<&PyObject>, - address: &Address, - py: Python, - ) -> PyResult<()> { - if address.is_generated_target() { - return Ok(()); - } - let Some(removal_version) = Self::cls_removal_version(cls)? else { - return Ok(()); - }; - match raw_value { - Some(value) if value.extract::(py).is_ok() => return Ok(()), - _ => (), - } - - let Some(removal_hint) = Self::cls_removal_hint(cls)? else { - return Err(PyValueError::new_err( - "You specified `removal_version` for {cls:?}, but not the class \ + fn cls_none_is_valid_value(cls: &PyAny) -> PyResult { + cls.getattr("none_is_valid_value")?.extract::() + } + + fn cls_default(cls: &PyAny) -> PyResult { + cls.getattr("default")?.extract() + } + + fn cls_required(cls: &PyAny) -> PyResult { + cls.getattr("required")?.extract() + } + + fn cls_alias(cls: &PyAny) -> PyResult<&str> { + // TODO: All of these methods should use interned attr names. + cls.getattr("alias")?.extract() + } + + fn cls_removal_version(cls: &PyAny) -> PyResult> { + cls.getattr("removal_version")?.extract() + } + + fn cls_removal_hint(cls: &PyAny) -> PyResult> { + cls.getattr("removal_hint")?.extract() + } + + fn check_deprecated( + cls: &PyType, + raw_value: Option<&PyObject>, + address: &Address, + py: Python, + ) -> PyResult<()> { + if address.is_generated_target() { + return Ok(()); + } + let Some(removal_version) = Self::cls_removal_version(cls)? else { + return Ok(()); + }; + match raw_value { + Some(value) if value.extract::(py).is_ok() => return Ok(()), + _ => (), + } + + let Some(removal_hint) = Self::cls_removal_hint(cls)? else { + return Err(PyValueError::new_err( + "You specified `removal_version` for {cls:?}, but not the class \ property `removal_hint`.", - )); - }; - - let alias = Self::cls_alias(cls)?; - let deprecated = PyModule::import(py, "pants.base.deprecated")?; - deprecated.getattr("warn_or_error")?.call( - ( - removal_version, - format!("the {alias} field"), - format!("Using the `{alias}` field in the target {address}. {removal_hint}"), - ), - None, - )?; - Ok(()) - } + )); + }; + + let alias = Self::cls_alias(cls)?; + let deprecated = PyModule::import(py, "pants.base.deprecated")?; + deprecated.getattr("warn_or_error")?.call( + ( + removal_version, + format!("the {alias} field"), + format!("Using the `{alias}` field in the target {address}. {removal_hint}"), + ), + None, + )?; + Ok(()) + } } diff --git a/src/rust/engine/src/externs/testutil.rs b/src/rust/engine/src/externs/testutil.rs index f6b72131c9d..13126bce54b 100644 --- a/src/rust/engine/src/externs/testutil.rs +++ b/src/rust/engine/src/externs/testutil.rs @@ -14,9 +14,9 @@ use crate::externs::fs::{PyDigest, PyFileDigest}; use crate::externs::scheduler::PyExecutor; pub fn register(m: &PyModule) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - Ok(()) + m.add_class::()?; + m.add_class::()?; + Ok(()) } #[pyclass] @@ -24,31 +24,31 @@ struct PyStubCASBuilder(Arc>>); #[pymethods] impl PyStubCASBuilder { - fn ac_always_errors(&mut self) -> PyResult { - let mut builder_opt = self.0.lock(); - let builder = builder_opt - .take() - .ok_or_else(|| PyAssertionError::new_err("Unable to unwrap StubCASBuilder"))?; - *builder_opt = Some(builder.ac_always_errors()); - Ok(PyStubCASBuilder(self.0.clone())) - } - fn cas_always_errors(&mut self) -> PyResult { - let mut builder_opt = self.0.lock(); - let builder = builder_opt - .take() - .ok_or_else(|| PyAssertionError::new_err("Unable to unwrap StubCASBuilder"))?; - *builder_opt = Some(builder.cas_always_errors()); - Ok(PyStubCASBuilder(self.0.clone())) - } + fn ac_always_errors(&mut self) -> PyResult { + let mut builder_opt = self.0.lock(); + let builder = builder_opt + .take() + .ok_or_else(|| PyAssertionError::new_err("Unable to unwrap StubCASBuilder"))?; + *builder_opt = Some(builder.ac_always_errors()); + Ok(PyStubCASBuilder(self.0.clone())) + } + fn cas_always_errors(&mut self) -> PyResult { + let mut builder_opt = self.0.lock(); + let builder = builder_opt + .take() + .ok_or_else(|| PyAssertionError::new_err("Unable to unwrap StubCASBuilder"))?; + *builder_opt = Some(builder.cas_always_errors()); + Ok(PyStubCASBuilder(self.0.clone())) + } - fn build(&mut self, py_executor: &PyExecutor) -> PyResult { - let mut builder_opt = self.0.lock(); - let builder = builder_opt - .take() - .ok_or_else(|| PyAssertionError::new_err("Unable to unwrap StubCASBuilder"))?; - // NB: A Tokio runtime must be used when building StubCAS. - py_executor.0.enter(|| Ok(PyStubCAS(builder.build()))) - } + fn build(&mut self, py_executor: &PyExecutor) -> PyResult { + let mut builder_opt = self.0.lock(); + let builder = builder_opt + .take() + .ok_or_else(|| PyAssertionError::new_err("Unable to unwrap StubCASBuilder"))?; + // NB: A Tokio runtime must be used when building StubCAS. + py_executor.0.enter(|| Ok(PyStubCAS(builder.build()))) + } } #[pyclass] @@ -56,26 +56,26 @@ struct PyStubCAS(StubCAS); #[pymethods] impl PyStubCAS { - #[classmethod] - fn builder(_cls: &PyType) -> PyStubCASBuilder { - let builder = Arc::new(Mutex::new(Some(StubCAS::builder()))); - PyStubCASBuilder(builder) - } + #[classmethod] + fn builder(_cls: &PyType) -> PyStubCASBuilder { + let builder = Arc::new(Mutex::new(Some(StubCAS::builder()))); + PyStubCASBuilder(builder) + } - #[getter] - fn address(&self) -> String { - self.0.address() - } + #[getter] + fn address(&self) -> String { + self.0.address() + } - fn remove(&self, digest: &PyAny) -> PyResult { - let digest = digest - .extract::() - .map(|fd| fd.0) - .or_else(|_| digest.extract::().map(|d| d.0.as_digest()))?; - Ok(self.0.remove(digest.hash)) - } + fn remove(&self, digest: &PyAny) -> PyResult { + let digest = digest + .extract::() + .map(|fd| fd.0) + .or_else(|_| digest.extract::().map(|d| d.0.as_digest()))?; + Ok(self.0.remove(digest.hash)) + } - fn action_cache_len(&self) -> usize { - self.0.action_cache.len() - } + fn action_cache_len(&self) -> usize { + self.0.action_cache.len() + } } diff --git a/src/rust/engine/src/externs/workunits.rs b/src/rust/engine/src/externs/workunits.rs index 85138ab562f..22475c5b4b9 100644 --- a/src/rust/engine/src/externs/workunits.rs +++ b/src/rust/engine/src/externs/workunits.rs @@ -5,11 +5,11 @@ use pyo3::prelude::*; use workunit_store::Metric; pub fn register(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(all_counter_names, m)?)?; - Ok(()) + m.add_function(wrap_pyfunction!(all_counter_names, m)?)?; + Ok(()) } #[pyfunction] fn all_counter_names() -> Vec { - Metric::all_metrics() + Metric::all_metrics() } diff --git a/src/rust/engine/src/interning.rs b/src/rust/engine/src/interning.rs index 2a853528cff..36d21f6ed01 100644 --- a/src/rust/engine/src/interning.rs +++ b/src/rust/engine/src/interning.rs @@ -37,33 +37,33 @@ use crate::python::{Key, TypeId}; /// before the GIL (Value equality in particular might re-acquire it). /// pub struct Interns { - // A mapping between Python objects and integer ids. - keys: Py, - id_generator: atomic::AtomicU64, + // A mapping between Python objects and integer ids. + keys: Py, + id_generator: atomic::AtomicU64, } impl Interns { - pub fn new() -> Self { - Self { - keys: Python::with_gil(|py| PyDict::new(py).into()), - id_generator: atomic::AtomicU64::default(), + pub fn new() -> Self { + Self { + keys: Python::with_gil(|py| PyDict::new(py).into()), + id_generator: atomic::AtomicU64::default(), + } } - } - pub fn key_insert(&self, py: Python, v: PyObject) -> PyResult { - let (id, type_id): (u64, TypeId) = { - let v = v.as_ref(py); - let keys = self.keys.as_ref(py); - let id: u64 = if let Some(key) = keys.get_item(v) { - key.extract()? - } else { - let id = self.id_generator.fetch_add(1, atomic::Ordering::Relaxed); - keys.set_item(v, id)?; - id - }; - (id, v.get_type().into()) - }; + pub fn key_insert(&self, py: Python, v: PyObject) -> PyResult { + let (id, type_id): (u64, TypeId) = { + let v = v.as_ref(py); + let keys = self.keys.as_ref(py); + let id: u64 = if let Some(key) = keys.get_item(v) { + key.extract()? + } else { + let id = self.id_generator.fetch_add(1, atomic::Ordering::Relaxed); + keys.set_item(v, id)?; + id + }; + (id, v.get_type().into()) + }; - Ok(Key::new(id, type_id, v.into())) - } + Ok(Key::new(id, type_id, v.into())) + } } diff --git a/src/rust/engine/src/intrinsics.rs b/src/rust/engine/src/intrinsics.rs index d22e2ca57a5..f1bcac79b2e 100644 --- a/src/rust/engine/src/intrinsics.rs +++ b/src/rust/engine/src/intrinsics.rs @@ -12,8 +12,8 @@ use std::time::Duration; use crate::context::Context; use crate::externs::fs::{PyAddPrefix, PyFileDigest, PyMergeDigests, PyRemovePrefix}; use crate::nodes::{ - lift_directory_digest, task_side_effected, unmatched_globs_additional_context, DownloadedFile, - ExecuteProcess, NodeResult, RunId, SessionValues, Snapshot, + lift_directory_digest, task_side_effected, unmatched_globs_additional_context, DownloadedFile, + ExecuteProcess, NodeResult, RunId, SessionValues, Snapshot, }; use crate::python::{throw, Key, Value}; use crate::tasks::Intrinsic; @@ -22,7 +22,7 @@ use crate::Failure; use crate::{externs, Core}; use dep_inference::{javascript, python}; use protos::gen::pants::cache::{ - dependency_inference_request, CacheKey, CacheKeyType, DependencyInferenceRequest, + dependency_inference_request, CacheKey, CacheKeyType, DependencyInferenceRequest, }; use bytes::Bytes; @@ -37,12 +37,12 @@ use tokio::process; use docker::docker::{ImagePullPolicy, ImagePullScope, DOCKER, IMAGE_PULL_CACHE}; use fs::{ - DigestTrie, DirectoryDigest, Entry, GlobMatching, PathStat, RelativePath, SymlinkBehavior, - TypedPath, + DigestTrie, DirectoryDigest, Entry, GlobMatching, PathStat, RelativePath, SymlinkBehavior, + TypedPath, }; use hashing::{Digest, EMPTY_DIGEST}; use process_execution::local::{ - apply_chroot, create_sandbox, prepare_workdir, setup_run_sh_script, KeepSandboxes, + apply_chroot, create_sandbox, prepare_workdir, setup_run_sh_script, KeepSandboxes, }; use process_execution::{ManagedChild, Platform, ProcessExecutionStrategy}; use rule_graph::{DependencyKey, RuleId}; @@ -55,571 +55,576 @@ use workunit_store::{in_workunit, Level}; use grpc_util::prost::MessageExt; type IntrinsicFn = - Box) -> BoxFuture<'static, NodeResult> + Send + Sync>; + Box) -> BoxFuture<'static, NodeResult> + Send + Sync>; pub struct Intrinsics { - intrinsics: IndexMap, + intrinsics: IndexMap, } impl Intrinsics { - pub fn new(types: &Types) -> Intrinsics { - let mut intrinsics: IndexMap = IndexMap::new(); - intrinsics.insert( - Intrinsic::new( - "create_digest_to_digest", - types.directory_digest, - types.create_digest, - ), - Box::new(create_digest_to_digest), - ); - intrinsics.insert( - Intrinsic::new( - "path_globs_to_digest", - types.directory_digest, - types.path_globs, - ), - Box::new(path_globs_to_digest), - ); - intrinsics.insert( - Intrinsic::new("path_globs_to_paths", types.paths, types.path_globs), - Box::new(path_globs_to_paths), - ); - intrinsics.insert( - Intrinsic::new( - "download_file_to_digest", - types.directory_digest, - types.native_download_file, - ), - Box::new(download_file_to_digest), - ); - intrinsics.insert( - Intrinsic::new("digest_to_snapshot", types.snapshot, types.directory_digest), - Box::new(digest_to_snapshot), - ); - intrinsics.insert( - Intrinsic::new( - "directory_digest_to_digest_contents", - types.digest_contents, - types.directory_digest, - ), - Box::new(directory_digest_to_digest_contents), - ); - intrinsics.insert( - Intrinsic::new( - "directory_digest_to_digest_entries", - types.digest_entries, - types.directory_digest, - ), - Box::new(directory_digest_to_digest_entries), - ); - intrinsics.insert( - Intrinsic::new( - "merge_digests_request_to_digest", - types.directory_digest, - types.merge_digests, - ), - Box::new(merge_digests_request_to_digest), - ); - intrinsics.insert( - Intrinsic::new( - "remove_prefix_request_to_digest", - types.directory_digest, - types.remove_prefix, - ), - Box::new(remove_prefix_request_to_digest), - ); - intrinsics.insert( - Intrinsic::new( - "add_prefix_request_to_digest", - types.directory_digest, - types.add_prefix, - ), - Box::new(add_prefix_request_to_digest), - ); - intrinsics.insert( - Intrinsic { - id: RuleId::new("process_request_to_process_result"), - product: types.process_result, - inputs: vec![ - DependencyKey::new(types.process), - DependencyKey::new(types.process_config_from_environment), - ], - }, - Box::new(process_request_to_process_result), - ); - intrinsics.insert( - Intrinsic::new( - "digest_subset_to_digest", - types.directory_digest, - types.digest_subset, - ), - Box::new(digest_subset_to_digest), - ); - intrinsics.insert( - Intrinsic { - id: RuleId::new("session_values"), - product: types.session_values, - inputs: vec![], - }, - Box::new(session_values), - ); - intrinsics.insert( - Intrinsic { - id: RuleId::new("run_id"), - product: types.run_id, - inputs: vec![], - }, - Box::new(run_id), - ); - intrinsics.insert( - Intrinsic { - id: RuleId::new("interactive_process"), - product: types.interactive_process_result, - inputs: vec![ - DependencyKey::new(types.interactive_process), - DependencyKey::new(types.process_config_from_environment), - ], - }, - Box::new(interactive_process), - ); - intrinsics.insert( - Intrinsic { - id: RuleId::new("docker_resolve_image"), - product: types.docker_resolve_image_result, - inputs: vec![DependencyKey::new(types.docker_resolve_image_request)], - }, - Box::new(docker_resolve_image), - ); - intrinsics.insert( - Intrinsic { - id: RuleId::new("parse_python_deps"), - product: types.parsed_python_deps_result, - inputs: vec![DependencyKey::new(types.deps_request)], - }, - Box::new(parse_python_deps), - ); - intrinsics.insert( - Intrinsic { - id: RuleId::new("parse_javascript_deps"), - product: types.parsed_javascript_deps_result, - inputs: vec![DependencyKey::new(types.deps_request)], - }, - Box::new(parse_javascript_deps), - ); - Intrinsics { intrinsics } - } - - pub fn keys(&self) -> impl Iterator { - self.intrinsics.keys() - } - - pub async fn run( - &self, - intrinsic: &Intrinsic, - context: Context, - args: Vec, - ) -> NodeResult { - let function = self - .intrinsics - .get(intrinsic) - .unwrap_or_else(|| panic!("Unrecognized intrinsic: {intrinsic:?}")); - function(context, args).await - } + pub fn new(types: &Types) -> Intrinsics { + let mut intrinsics: IndexMap = IndexMap::new(); + intrinsics.insert( + Intrinsic::new( + "create_digest_to_digest", + types.directory_digest, + types.create_digest, + ), + Box::new(create_digest_to_digest), + ); + intrinsics.insert( + Intrinsic::new( + "path_globs_to_digest", + types.directory_digest, + types.path_globs, + ), + Box::new(path_globs_to_digest), + ); + intrinsics.insert( + Intrinsic::new("path_globs_to_paths", types.paths, types.path_globs), + Box::new(path_globs_to_paths), + ); + intrinsics.insert( + Intrinsic::new( + "download_file_to_digest", + types.directory_digest, + types.native_download_file, + ), + Box::new(download_file_to_digest), + ); + intrinsics.insert( + Intrinsic::new("digest_to_snapshot", types.snapshot, types.directory_digest), + Box::new(digest_to_snapshot), + ); + intrinsics.insert( + Intrinsic::new( + "directory_digest_to_digest_contents", + types.digest_contents, + types.directory_digest, + ), + Box::new(directory_digest_to_digest_contents), + ); + intrinsics.insert( + Intrinsic::new( + "directory_digest_to_digest_entries", + types.digest_entries, + types.directory_digest, + ), + Box::new(directory_digest_to_digest_entries), + ); + intrinsics.insert( + Intrinsic::new( + "merge_digests_request_to_digest", + types.directory_digest, + types.merge_digests, + ), + Box::new(merge_digests_request_to_digest), + ); + intrinsics.insert( + Intrinsic::new( + "remove_prefix_request_to_digest", + types.directory_digest, + types.remove_prefix, + ), + Box::new(remove_prefix_request_to_digest), + ); + intrinsics.insert( + Intrinsic::new( + "add_prefix_request_to_digest", + types.directory_digest, + types.add_prefix, + ), + Box::new(add_prefix_request_to_digest), + ); + intrinsics.insert( + Intrinsic { + id: RuleId::new("process_request_to_process_result"), + product: types.process_result, + inputs: vec![ + DependencyKey::new(types.process), + DependencyKey::new(types.process_config_from_environment), + ], + }, + Box::new(process_request_to_process_result), + ); + intrinsics.insert( + Intrinsic::new( + "digest_subset_to_digest", + types.directory_digest, + types.digest_subset, + ), + Box::new(digest_subset_to_digest), + ); + intrinsics.insert( + Intrinsic { + id: RuleId::new("session_values"), + product: types.session_values, + inputs: vec![], + }, + Box::new(session_values), + ); + intrinsics.insert( + Intrinsic { + id: RuleId::new("run_id"), + product: types.run_id, + inputs: vec![], + }, + Box::new(run_id), + ); + intrinsics.insert( + Intrinsic { + id: RuleId::new("interactive_process"), + product: types.interactive_process_result, + inputs: vec![ + DependencyKey::new(types.interactive_process), + DependencyKey::new(types.process_config_from_environment), + ], + }, + Box::new(interactive_process), + ); + intrinsics.insert( + Intrinsic { + id: RuleId::new("docker_resolve_image"), + product: types.docker_resolve_image_result, + inputs: vec![DependencyKey::new(types.docker_resolve_image_request)], + }, + Box::new(docker_resolve_image), + ); + intrinsics.insert( + Intrinsic { + id: RuleId::new("parse_python_deps"), + product: types.parsed_python_deps_result, + inputs: vec![DependencyKey::new(types.deps_request)], + }, + Box::new(parse_python_deps), + ); + intrinsics.insert( + Intrinsic { + id: RuleId::new("parse_javascript_deps"), + product: types.parsed_javascript_deps_result, + inputs: vec![DependencyKey::new(types.deps_request)], + }, + Box::new(parse_javascript_deps), + ); + Intrinsics { intrinsics } + } + + pub fn keys(&self) -> impl Iterator { + self.intrinsics.keys() + } + + pub async fn run( + &self, + intrinsic: &Intrinsic, + context: Context, + args: Vec, + ) -> NodeResult { + let function = self + .intrinsics + .get(intrinsic) + .unwrap_or_else(|| panic!("Unrecognized intrinsic: {intrinsic:?}")); + function(context, args).await + } } fn process_request_to_process_result( - context: Context, - mut args: Vec, + context: Context, + mut args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let process_config: externs::process::PyProcessExecutionEnvironment = Python::with_gil(|py| { - args - .pop() - .unwrap() - .as_ref() - .extract(py) - .map_err(|e| format!("{e}")) - })?; - let process_request = - ExecuteProcess::lift(&context.core.store(), args.pop().unwrap(), process_config) - .map_err(|e| e.enrich("Error lifting Process")) - .await?; - - let result = context.get(process_request).await?.result; - - let store = context.core.store(); - let (stdout_bytes, stderr_bytes) = try_join!( - store - .load_file_bytes_with(result.stdout_digest, |bytes: &[u8]| bytes.to_owned()) - .map_err(|e| e.enrich("Bytes from stdout")), - store - .load_file_bytes_with(result.stderr_digest, |bytes: &[u8]| bytes.to_owned()) - .map_err(|e| e.enrich("Bytes from stderr")) - )?; - - Python::with_gil(|py| -> NodeResult { - Ok(externs::unsafe_call( - py, - context.core.types.process_result, - &[ - externs::store_bytes(py, &stdout_bytes), - Snapshot::store_file_digest(py, result.stdout_digest)?, - externs::store_bytes(py, &stderr_bytes), - Snapshot::store_file_digest(py, result.stderr_digest)?, - externs::store_i64(py, result.exit_code.into()), - Snapshot::store_directory_digest(py, result.output_directory)?, - externs::unsafe_call( - py, - context.core.types.process_result_metadata, - &[ - result - .metadata - .total_elapsed - .map(|d| externs::store_u64(py, Duration::from(d).as_millis() as u64)) - .unwrap_or_else(|| Value::from(py.None())), - Value::from( - externs::process::PyProcessExecutionEnvironment { - environment: result.metadata.environment, - } - .into_py(py), - ), - externs::store_utf8(py, result.metadata.source.into()), - externs::store_u64(py, result.metadata.source_run_id.0.into()), - ], - ), - ], - )) - }) - } - .boxed() + async move { + let process_config: externs::process::PyProcessExecutionEnvironment = + Python::with_gil(|py| { + args.pop() + .unwrap() + .as_ref() + .extract(py) + .map_err(|e| format!("{e}")) + })?; + let process_request = + ExecuteProcess::lift(&context.core.store(), args.pop().unwrap(), process_config) + .map_err(|e| e.enrich("Error lifting Process")) + .await?; + + let result = context.get(process_request).await?.result; + + let store = context.core.store(); + let (stdout_bytes, stderr_bytes) = try_join!( + store + .load_file_bytes_with(result.stdout_digest, |bytes: &[u8]| bytes.to_owned()) + .map_err(|e| e.enrich("Bytes from stdout")), + store + .load_file_bytes_with(result.stderr_digest, |bytes: &[u8]| bytes.to_owned()) + .map_err(|e| e.enrich("Bytes from stderr")) + )?; + + Python::with_gil(|py| -> NodeResult { + Ok(externs::unsafe_call( + py, + context.core.types.process_result, + &[ + externs::store_bytes(py, &stdout_bytes), + Snapshot::store_file_digest(py, result.stdout_digest)?, + externs::store_bytes(py, &stderr_bytes), + Snapshot::store_file_digest(py, result.stderr_digest)?, + externs::store_i64(py, result.exit_code.into()), + Snapshot::store_directory_digest(py, result.output_directory)?, + externs::unsafe_call( + py, + context.core.types.process_result_metadata, + &[ + result + .metadata + .total_elapsed + .map(|d| { + externs::store_u64(py, Duration::from(d).as_millis() as u64) + }) + .unwrap_or_else(|| Value::from(py.None())), + Value::from( + externs::process::PyProcessExecutionEnvironment { + environment: result.metadata.environment, + } + .into_py(py), + ), + externs::store_utf8(py, result.metadata.source.into()), + externs::store_u64(py, result.metadata.source_run_id.0.into()), + ], + ), + ], + )) + }) + } + .boxed() } fn directory_digest_to_digest_contents( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let digest = Python::with_gil(|py| { - let py_digest = (*args[0]).as_ref(py); - lift_directory_digest(py_digest) - })?; - - let digest_contents = context.core.store().contents_for_directory(digest).await?; - - Ok(Python::with_gil(|py| { - Snapshot::store_digest_contents(py, &context, &digest_contents) - })?) - } - .boxed() + async move { + let digest = Python::with_gil(|py| { + let py_digest = (*args[0]).as_ref(py); + lift_directory_digest(py_digest) + })?; + + let digest_contents = context.core.store().contents_for_directory(digest).await?; + + Ok(Python::with_gil(|py| { + Snapshot::store_digest_contents(py, &context, &digest_contents) + })?) + } + .boxed() } fn directory_digest_to_digest_entries( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let digest = Python::with_gil(|py| { - let py_digest = (*args[0]).as_ref(py); - lift_directory_digest(py_digest) - })?; - let digest_entries = context.core.store().entries_for_directory(digest).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_digest_entries(py, &context, &digest_entries) - })?) - } - .boxed() + async move { + let digest = Python::with_gil(|py| { + let py_digest = (*args[0]).as_ref(py); + lift_directory_digest(py_digest) + })?; + let digest_entries = context.core.store().entries_for_directory(digest).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_digest_entries(py, &context, &digest_entries) + })?) + } + .boxed() } fn remove_prefix_request_to_digest( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let (digest, prefix) = Python::with_gil(|py| { - let py_remove_prefix = (*args[0]) - .as_ref(py) - .extract::>() - .map_err(|e| throw(format!("{e}")))?; - let prefix = RelativePath::new(&py_remove_prefix.prefix) - .map_err(|e| throw(format!("The `prefix` must be relative: {e}")))?; - let res: NodeResult<_> = Ok((py_remove_prefix.digest.clone(), prefix)); - res - })?; - let digest = context.core.store().strip_prefix(digest, &prefix).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_directory_digest(py, digest) - })?) - } - .boxed() + async move { + let (digest, prefix) = Python::with_gil(|py| { + let py_remove_prefix = (*args[0]) + .as_ref(py) + .extract::>() + .map_err(|e| throw(format!("{e}")))?; + let prefix = RelativePath::new(&py_remove_prefix.prefix) + .map_err(|e| throw(format!("The `prefix` must be relative: {e}")))?; + let res: NodeResult<_> = Ok((py_remove_prefix.digest.clone(), prefix)); + res + })?; + let digest = context.core.store().strip_prefix(digest, &prefix).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_directory_digest(py, digest) + })?) + } + .boxed() } fn add_prefix_request_to_digest( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let (digest, prefix) = Python::with_gil(|py| { - let py_add_prefix = (*args[0]) - .as_ref(py) - .extract::>() - .map_err(|e| throw(format!("{e}")))?; - let prefix = RelativePath::new(&py_add_prefix.prefix) - .map_err(|e| throw(format!("The `prefix` must be relative: {e}")))?; - let res: NodeResult<(DirectoryDigest, RelativePath)> = - Ok((py_add_prefix.digest.clone(), prefix)); - res - })?; - let digest = context.core.store().add_prefix(digest, &prefix).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_directory_digest(py, digest) - })?) - } - .boxed() + async move { + let (digest, prefix) = Python::with_gil(|py| { + let py_add_prefix = (*args[0]) + .as_ref(py) + .extract::>() + .map_err(|e| throw(format!("{e}")))?; + let prefix = RelativePath::new(&py_add_prefix.prefix) + .map_err(|e| throw(format!("The `prefix` must be relative: {e}")))?; + let res: NodeResult<(DirectoryDigest, RelativePath)> = + Ok((py_add_prefix.digest.clone(), prefix)); + res + })?; + let digest = context.core.store().add_prefix(digest, &prefix).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_directory_digest(py, digest) + })?) + } + .boxed() } fn digest_to_snapshot(context: Context, args: Vec) -> BoxFuture<'static, NodeResult> { - let store = context.core.store(); - async move { - let digest = Python::with_gil(|py| { - let py_digest = (*args[0]).as_ref(py); - lift_directory_digest(py_digest) - })?; - let snapshot = store::Snapshot::from_digest(store, digest).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_snapshot(py, snapshot) - })?) - } - .boxed() + let store = context.core.store(); + async move { + let digest = Python::with_gil(|py| { + let py_digest = (*args[0]).as_ref(py); + lift_directory_digest(py_digest) + })?; + let snapshot = store::Snapshot::from_digest(store, digest).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_snapshot(py, snapshot) + })?) + } + .boxed() } fn merge_digests_request_to_digest( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - let core = &context.core; - let store = core.store(); - async move { - let digests = Python::with_gil(|py| { - (*args[0]) - .as_ref(py) - .extract::>() - .map(|py_merge_digests| py_merge_digests.0.clone()) - .map_err(|e| throw(format!("{e}"))) - })?; - let digest = store.merge(digests).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_directory_digest(py, digest) - })?) - } - .boxed() + let core = &context.core; + let store = core.store(); + async move { + let digests = Python::with_gil(|py| { + (*args[0]) + .as_ref(py) + .extract::>() + .map(|py_merge_digests| py_merge_digests.0.clone()) + .map_err(|e| throw(format!("{e}"))) + })?; + let digest = store.merge(digests).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_directory_digest(py, digest) + })?) + } + .boxed() } fn download_file_to_digest( - context: Context, - mut args: Vec, + context: Context, + mut args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let key = Key::from_value(args.pop().unwrap()).map_err(Failure::from)?; - let snapshot = context.get(DownloadedFile(key)).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_directory_digest(py, snapshot.into()) - })?) - } - .boxed() + async move { + let key = Key::from_value(args.pop().unwrap()).map_err(Failure::from)?; + let snapshot = context.get(DownloadedFile(key)).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_directory_digest(py, snapshot.into()) + })?) + } + .boxed() } fn path_globs_to_digest( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let path_globs = Python::with_gil(|py| { - let py_path_globs = (*args[0]).as_ref(py); - Snapshot::lift_path_globs(py_path_globs) - }) - .map_err(|e| throw(format!("Failed to parse PathGlobs: {e}")))?; - let snapshot = context.get(Snapshot::from_path_globs(path_globs)).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_directory_digest(py, snapshot.into()) - })?) - } - .boxed() + async move { + let path_globs = Python::with_gil(|py| { + let py_path_globs = (*args[0]).as_ref(py); + Snapshot::lift_path_globs(py_path_globs) + }) + .map_err(|e| throw(format!("Failed to parse PathGlobs: {e}")))?; + let snapshot = context.get(Snapshot::from_path_globs(path_globs)).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_directory_digest(py, snapshot.into()) + })?) + } + .boxed() } fn path_globs_to_paths( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - let core = context.core.clone(); - async move { - let path_globs = Python::with_gil(|py| { - let py_path_globs = (*args[0]).as_ref(py); - Snapshot::lift_path_globs(py_path_globs) - }) - .map_err(|e| throw(format!("Failed to parse PathGlobs: {e}")))?; - - let path_globs = path_globs.parse().map_err(throw)?; - let path_stats = context - .expand_globs( - path_globs, - SymlinkBehavior::Oblivious, - unmatched_globs_additional_context(), - ) - .await?; - - Python::with_gil(|py| { - let mut files = Vec::new(); - let mut dirs = Vec::new(); - for ps in path_stats.iter() { - match ps { - PathStat::File { path, .. } => { - files.push(Snapshot::store_path(py, path)?); - } - PathStat::Link { path, .. } => { - panic!("Paths shouldn't be symlink-aware {path:?}"); - } - PathStat::Dir { path, .. } => { - dirs.push(Snapshot::store_path(py, path)?); - } - } - } - Ok(externs::unsafe_call( - py, - core.types.paths, - &[ - externs::store_tuple(py, files), - externs::store_tuple(py, dirs), - ], - )) - }) - } - .boxed() + let core = context.core.clone(); + async move { + let path_globs = Python::with_gil(|py| { + let py_path_globs = (*args[0]).as_ref(py); + Snapshot::lift_path_globs(py_path_globs) + }) + .map_err(|e| throw(format!("Failed to parse PathGlobs: {e}")))?; + + let path_globs = path_globs.parse().map_err(throw)?; + let path_stats = context + .expand_globs( + path_globs, + SymlinkBehavior::Oblivious, + unmatched_globs_additional_context(), + ) + .await?; + + Python::with_gil(|py| { + let mut files = Vec::new(); + let mut dirs = Vec::new(); + for ps in path_stats.iter() { + match ps { + PathStat::File { path, .. } => { + files.push(Snapshot::store_path(py, path)?); + } + PathStat::Link { path, .. } => { + panic!("Paths shouldn't be symlink-aware {path:?}"); + } + PathStat::Dir { path, .. } => { + dirs.push(Snapshot::store_path(py, path)?); + } + } + } + Ok(externs::unsafe_call( + py, + core.types.paths, + &[ + externs::store_tuple(py, files), + externs::store_tuple(py, dirs), + ], + )) + }) + } + .boxed() } enum CreateDigestItem { - FileContent(RelativePath, bytes::Bytes, bool), - FileEntry(RelativePath, Digest, bool), - SymlinkEntry(RelativePath, PathBuf), - Dir(RelativePath), + FileContent(RelativePath, bytes::Bytes, bool), + FileEntry(RelativePath, Digest, bool), + SymlinkEntry(RelativePath, PathBuf), + Dir(RelativePath), } fn create_digest_to_digest( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - let mut new_file_count = 0; - - let items: Vec = { - Python::with_gil(|py| { - let py_create_digest = (*args[0]).as_ref(py); - externs::collect_iterable(py_create_digest) - .unwrap() - .into_iter() - .map(|obj| { - let raw_path: String = externs::getattr(obj, "path").unwrap(); - let path = RelativePath::new(PathBuf::from(raw_path)).unwrap(); - if obj.hasattr("content").unwrap() { - let bytes = bytes::Bytes::from(externs::getattr::>(obj, "content").unwrap()); - let is_executable: bool = externs::getattr(obj, "is_executable").unwrap(); - new_file_count += 1; - CreateDigestItem::FileContent(path, bytes, is_executable) - } else if obj.hasattr("file_digest").unwrap() { - let py_file_digest: PyFileDigest = externs::getattr(obj, "file_digest").unwrap(); - let is_executable: bool = externs::getattr(obj, "is_executable").unwrap(); - CreateDigestItem::FileEntry(path, py_file_digest.0, is_executable) - } else if obj.hasattr("target").unwrap() { - let target: String = externs::getattr(obj, "target").unwrap(); - CreateDigestItem::SymlinkEntry(path, PathBuf::from(target)) - } else { - CreateDigestItem::Dir(path) - } + let mut new_file_count = 0; + + let items: Vec = { + Python::with_gil(|py| { + let py_create_digest = (*args[0]).as_ref(py); + externs::collect_iterable(py_create_digest) + .unwrap() + .into_iter() + .map(|obj| { + let raw_path: String = externs::getattr(obj, "path").unwrap(); + let path = RelativePath::new(PathBuf::from(raw_path)).unwrap(); + if obj.hasattr("content").unwrap() { + let bytes = bytes::Bytes::from( + externs::getattr::>(obj, "content").unwrap(), + ); + let is_executable: bool = externs::getattr(obj, "is_executable").unwrap(); + new_file_count += 1; + CreateDigestItem::FileContent(path, bytes, is_executable) + } else if obj.hasattr("file_digest").unwrap() { + let py_file_digest: PyFileDigest = + externs::getattr(obj, "file_digest").unwrap(); + let is_executable: bool = externs::getattr(obj, "is_executable").unwrap(); + CreateDigestItem::FileEntry(path, py_file_digest.0, is_executable) + } else if obj.hasattr("target").unwrap() { + let target: String = externs::getattr(obj, "target").unwrap(); + CreateDigestItem::SymlinkEntry(path, PathBuf::from(target)) + } else { + CreateDigestItem::Dir(path) + } + }) + .collect() }) - .collect() - }) - }; - - let mut typed_paths: Vec = Vec::with_capacity(items.len()); - let mut file_digests: HashMap = HashMap::with_capacity(items.len()); - let mut items_to_store = Vec::with_capacity(new_file_count); - - for item in &items { - match item { - CreateDigestItem::FileContent(path, bytes, is_executable) => { - let digest = Digest::of_bytes(bytes); - items_to_store.push((digest.hash, bytes.clone())); - typed_paths.push(TypedPath::File { - path, - is_executable: *is_executable, - }); - file_digests.insert(path.to_path_buf(), digest); - } - CreateDigestItem::FileEntry(path, digest, is_executable) => { - typed_paths.push(TypedPath::File { - path, - is_executable: *is_executable, - }); - file_digests.insert(path.to_path_buf(), *digest); - } - CreateDigestItem::SymlinkEntry(path, target) => { - typed_paths.push(TypedPath::Link { path, target }); - file_digests.insert(path.to_path_buf(), EMPTY_DIGEST); - } - CreateDigestItem::Dir(path) => { - typed_paths.push(TypedPath::Dir(path)); - file_digests.insert(path.to_path_buf(), EMPTY_DIGEST); - } + }; + + let mut typed_paths: Vec = Vec::with_capacity(items.len()); + let mut file_digests: HashMap = HashMap::with_capacity(items.len()); + let mut items_to_store = Vec::with_capacity(new_file_count); + + for item in &items { + match item { + CreateDigestItem::FileContent(path, bytes, is_executable) => { + let digest = Digest::of_bytes(bytes); + items_to_store.push((digest.hash, bytes.clone())); + typed_paths.push(TypedPath::File { + path, + is_executable: *is_executable, + }); + file_digests.insert(path.to_path_buf(), digest); + } + CreateDigestItem::FileEntry(path, digest, is_executable) => { + typed_paths.push(TypedPath::File { + path, + is_executable: *is_executable, + }); + file_digests.insert(path.to_path_buf(), *digest); + } + CreateDigestItem::SymlinkEntry(path, target) => { + typed_paths.push(TypedPath::Link { path, target }); + file_digests.insert(path.to_path_buf(), EMPTY_DIGEST); + } + CreateDigestItem::Dir(path) => { + typed_paths.push(TypedPath::Dir(path)); + file_digests.insert(path.to_path_buf(), EMPTY_DIGEST); + } + } } - } - - let store = context.core.store(); - let trie = DigestTrie::from_unique_paths(typed_paths, &file_digests).unwrap(); - async move { - store.store_file_bytes_batch(items_to_store, true).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_directory_digest(py, trie.into()) - })?) - } - .boxed() + + let store = context.core.store(); + let trie = DigestTrie::from_unique_paths(typed_paths, &file_digests).unwrap(); + async move { + store.store_file_bytes_batch(items_to_store, true).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_directory_digest(py, trie.into()) + })?) + } + .boxed() } fn digest_subset_to_digest( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - let store = context.core.store(); - async move { - let (path_globs, original_digest) = Python::with_gil(|py| { - let py_digest_subset = (*args[0]).as_ref(py); - let py_path_globs = externs::getattr(py_digest_subset, "globs").unwrap(); - let py_digest = externs::getattr(py_digest_subset, "digest").unwrap(); - let res: NodeResult<_> = Ok(( - Snapshot::lift_prepared_path_globs(py_path_globs)?, - lift_directory_digest(py_digest)?, - )); - res - })?; - let subset_params = SubsetParams { globs: path_globs }; - let digest = store.subset(original_digest, subset_params).await?; - Ok(Python::with_gil(|py| { - Snapshot::store_directory_digest(py, digest) - })?) - } - .boxed() + let store = context.core.store(); + async move { + let (path_globs, original_digest) = Python::with_gil(|py| { + let py_digest_subset = (*args[0]).as_ref(py); + let py_path_globs = externs::getattr(py_digest_subset, "globs").unwrap(); + let py_digest = externs::getattr(py_digest_subset, "digest").unwrap(); + let res: NodeResult<_> = Ok(( + Snapshot::lift_prepared_path_globs(py_path_globs)?, + lift_directory_digest(py_digest)?, + )); + res + })?; + let subset_params = SubsetParams { globs: path_globs }; + let digest = store.subset(original_digest, subset_params).await?; + Ok(Python::with_gil(|py| { + Snapshot::store_directory_digest(py, digest) + })?) + } + .boxed() } fn session_values(context: Context, _args: Vec) -> BoxFuture<'static, NodeResult> { - async move { context.get(SessionValues).await }.boxed() + async move { context.get(SessionValues).await }.boxed() } fn run_id(context: Context, _args: Vec) -> BoxFuture<'static, NodeResult> { - async move { context.get(RunId).await }.boxed() + async move { context.get(RunId).await }.boxed() } fn interactive_process( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - in_workunit!( + in_workunit!( "interactive_process", Level::Debug, |_workunit| async move { @@ -789,277 +794,277 @@ fn interactive_process( } fn docker_resolve_image( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let types = &context.core.types; - let docker_resolve_image_result = types.docker_resolve_image_result; - - let (image_name, platform) = Python::with_gil(|py| { - let py_docker_request = (*args[0]).as_ref(py); - let image_name: String = externs::getattr(py_docker_request, "image_name").unwrap(); - let platform: String = externs::getattr(py_docker_request, "platform").unwrap(); - (image_name, platform) - }); - - let platform = Platform::try_from(platform)?; - - let docker = DOCKER.get().await?; - let image_pull_scope = ImagePullScope::new(context.session.build_id()); - - // Ensure that the image has been pulled. - IMAGE_PULL_CACHE - .pull_image( - docker, - &context.core.executor, - &image_name, - &platform, - image_pull_scope, - ImagePullPolicy::OnlyIfLatestOrMissing, - ) - .await - .map_err(|err| format!("Failed to pull image `{image_name}`: {err}"))?; + async move { + let types = &context.core.types; + let docker_resolve_image_result = types.docker_resolve_image_result; + + let (image_name, platform) = Python::with_gil(|py| { + let py_docker_request = (*args[0]).as_ref(py); + let image_name: String = externs::getattr(py_docker_request, "image_name").unwrap(); + let platform: String = externs::getattr(py_docker_request, "platform").unwrap(); + (image_name, platform) + }); - let image_metadata = docker.inspect_image(&image_name).await.map_err(|err| { - format!( - "Failed to resolve image ID for image `{}`: {:?}", - &image_name, err - ) - })?; - let image_id = image_metadata - .id - .ok_or_else(|| format!("Image does not exist: `{}`", &image_name))?; - - Ok(Python::with_gil(|py| { - externs::unsafe_call( - py, - docker_resolve_image_result, - &[Value::from(PyString::new(py, &image_id).to_object(py))], - ) - })) - } - .boxed() + let platform = Platform::try_from(platform)?; + + let docker = DOCKER.get().await?; + let image_pull_scope = ImagePullScope::new(context.session.build_id()); + + // Ensure that the image has been pulled. + IMAGE_PULL_CACHE + .pull_image( + docker, + &context.core.executor, + &image_name, + &platform, + image_pull_scope, + ImagePullPolicy::OnlyIfLatestOrMissing, + ) + .await + .map_err(|err| format!("Failed to pull image `{image_name}`: {err}"))?; + + let image_metadata = docker.inspect_image(&image_name).await.map_err(|err| { + format!( + "Failed to resolve image ID for image `{}`: {:?}", + &image_name, err + ) + })?; + let image_id = image_metadata + .id + .ok_or_else(|| format!("Image does not exist: `{}`", &image_name))?; + + Ok(Python::with_gil(|py| { + externs::unsafe_call( + py, + docker_resolve_image_result, + &[Value::from(PyString::new(py, &image_id).to_object(py))], + ) + })) + } + .boxed() } struct PreparedInferenceRequest { - digest: Digest, - /// The request that's guaranteed to have been constructed via ::prepare(). - /// - /// NB. this `inner` value is used as the cache key, so anything that can influence the dep - /// inference should (also) be inside it, not just a key on the outer struct - inner: DependencyInferenceRequest, + digest: Digest, + /// The request that's guaranteed to have been constructed via ::prepare(). + /// + /// NB. this `inner` value is used as the cache key, so anything that can influence the dep + /// inference should (also) be inside it, not just a key on the outer struct + inner: DependencyInferenceRequest, } impl PreparedInferenceRequest { - pub async fn prepare( - args: Vec, - store: &Store, - backend: &str, - impl_hash: &str, - ) -> NodeResult { - let PyNativeDependenciesRequest { - directory_digest, - metadata, - } = Python::with_gil(|py| (*args[0]).as_ref(py).extract())?; - - let (path, digest) = Self::find_one_file(directory_digest, store, backend).await?; - let str_path = path.display().to_string(); - - Ok(Self { - digest, - inner: DependencyInferenceRequest { - input_file_path: str_path, - input_file_digest: Some(digest.into()), - metadata, - impl_hash: impl_hash.to_string(), - }, - }) - } - - pub async fn read_digest(&self, store: &Store) -> NodeResult { - let bytes = store - .load_file_bytes_with(self.digest, |bytes| Vec::from(bytes)) - .await?; + pub async fn prepare( + args: Vec, + store: &Store, + backend: &str, + impl_hash: &str, + ) -> NodeResult { + let PyNativeDependenciesRequest { + directory_digest, + metadata, + } = Python::with_gil(|py| (*args[0]).as_ref(py).extract())?; + + let (path, digest) = Self::find_one_file(directory_digest, store, backend).await?; + let str_path = path.display().to_string(); + + Ok(Self { + digest, + inner: DependencyInferenceRequest { + input_file_path: str_path, + input_file_digest: Some(digest.into()), + metadata, + impl_hash: impl_hash.to_string(), + }, + }) + } - Ok( - String::from_utf8(bytes) - .map_err(|err| format!("Failed to convert digest bytes to utf-8: {err}"))?, - ) - } + pub async fn read_digest(&self, store: &Store) -> NodeResult { + let bytes = store + .load_file_bytes_with(self.digest, |bytes| Vec::from(bytes)) + .await?; - async fn find_one_file( - directory_digest: DirectoryDigest, - store: &Store, - backend: &str, - ) -> NodeResult<(PathBuf, Digest)> { - let mut path = None; - let mut digest = None; - store - .load_digest_trie(directory_digest.clone()) - .await? - .walk(SymlinkBehavior::Oblivious, &mut |node_path, entry| { - if let Entry::File(file) = entry { - path = Some(node_path.to_owned()); - digest = Some(file.digest()); + Ok(String::from_utf8(bytes) + .map_err(|err| format!("Failed to convert digest bytes to utf-8: {err}"))?) + } + + async fn find_one_file( + directory_digest: DirectoryDigest, + store: &Store, + backend: &str, + ) -> NodeResult<(PathBuf, Digest)> { + let mut path = None; + let mut digest = None; + store + .load_digest_trie(directory_digest.clone()) + .await? + .walk(SymlinkBehavior::Oblivious, &mut |node_path, entry| { + if let Entry::File(file) = entry { + path = Some(node_path.to_owned()); + digest = Some(file.digest()); + } + }); + if digest.is_none() || path.is_none() { + Err(format!( + "Couldn't find a file in digest for {backend} inference: {directory_digest:?}" + ))? } - }); - if digest.is_none() || path.is_none() { - Err(format!( - "Couldn't find a file in digest for {backend} inference: {directory_digest:?}" - ))? + let path = path.unwrap(); + let digest = digest.unwrap(); + Ok((path, digest)) } - let path = path.unwrap(); - let digest = digest.unwrap(); - Ok((path, digest)) - } - - fn cache_key(&self) -> CacheKey { - CacheKey { - key_type: CacheKeyType::DepInferenceRequest.into(), - digest: Some(Digest::of_bytes(&self.inner.to_bytes()).into()), + + fn cache_key(&self) -> CacheKey { + CacheKey { + key_type: CacheKeyType::DepInferenceRequest.into(), + digest: Some(Digest::of_bytes(&self.inner.to_bytes()).into()), + } } - } } fn parse_python_deps(context: Context, args: Vec) -> BoxFuture<'static, NodeResult> { - async move { - let core = &context.core; - let store = core.store(); - let prepared_inference_request = - PreparedInferenceRequest::prepare(args, &store, "Python", python::IMPL_HASH).await?; - in_workunit!( - "parse_python_dependencies", - Level::Debug, - desc = Some(format!( - "Determine Python dependencies for {:?}", - &prepared_inference_request.inner.input_file_path - )), - |_workunit| async move { - let result: ParsedPythonDependencies = get_or_create_inferred_dependencies( - core, - &store, - prepared_inference_request, - |content, request| { - python::get_dependencies(content, request.inner.input_file_path.into()) - }, + async move { + let core = &context.core; + let store = core.store(); + let prepared_inference_request = + PreparedInferenceRequest::prepare(args, &store, "Python", python::IMPL_HASH).await?; + in_workunit!( + "parse_python_dependencies", + Level::Debug, + desc = Some(format!( + "Determine Python dependencies for {:?}", + &prepared_inference_request.inner.input_file_path + )), + |_workunit| async move { + let result: ParsedPythonDependencies = get_or_create_inferred_dependencies( + core, + &store, + prepared_inference_request, + |content, request| { + python::get_dependencies(content, request.inner.input_file_path.into()) + }, + ) + .await?; + + let result = Python::with_gil(|py| { + externs::unsafe_call( + py, + core.types.parsed_python_deps_result, + &[ + result.imports.to_object(py).into(), + result.string_candidates.to_object(py).into(), + ], + ) + }); + + Ok(result) + } ) - .await?; - - let result = Python::with_gil(|py| { - externs::unsafe_call( - py, - core.types.parsed_python_deps_result, - &[ - result.imports.to_object(py).into(), - result.string_candidates.to_object(py).into(), - ], - ) - }); - - Ok(result) - } - ) - .await - } - .boxed() + .await + } + .boxed() } fn parse_javascript_deps( - context: Context, - args: Vec, + context: Context, + args: Vec, ) -> BoxFuture<'static, NodeResult> { - async move { - let core = &context.core; - let store = core.store(); - let prepared_inference_request = - PreparedInferenceRequest::prepare(args, &store, "Javascript", javascript::IMPL_HASH).await?; - - in_workunit!( - "parse_javascript_dependencies", - Level::Debug, - desc = Some(format!( - "Determine Javascript dependencies for {:?}", - prepared_inference_request.inner.input_file_path - )), - |_workunit| async move { - let result: ParsedJavascriptDependencies = get_or_create_inferred_dependencies( - core, - &store, - prepared_inference_request, - |content, request| { - if let Some(dependency_inference_request::Metadata::Js(metadata)) = - request.inner.metadata - { - javascript::get_dependencies(content, request.inner.input_file_path.into(), metadata) - } else { - Err(format!( - "{:?} is not valid metadata for Javascript dependency inference", - request.inner.metadata - )) + async move { + let core = &context.core; + let store = core.store(); + let prepared_inference_request = + PreparedInferenceRequest::prepare(args, &store, "Javascript", javascript::IMPL_HASH) + .await?; + + in_workunit!( + "parse_javascript_dependencies", + Level::Debug, + desc = Some(format!( + "Determine Javascript dependencies for {:?}", + prepared_inference_request.inner.input_file_path + )), + |_workunit| async move { + let result: ParsedJavascriptDependencies = get_or_create_inferred_dependencies( + core, + &store, + prepared_inference_request, + |content, request| { + if let Some(dependency_inference_request::Metadata::Js(metadata)) = + request.inner.metadata + { + javascript::get_dependencies( + content, + request.inner.input_file_path.into(), + metadata, + ) + } else { + Err(format!( + "{:?} is not valid metadata for Javascript dependency inference", + request.inner.metadata + )) + } + }, + ) + .await?; + + let result = Python::with_gil(|py| { + externs::unsafe_call( + py, + core.types.parsed_javascript_deps_result, + &[ + result.file_imports.to_object(py).into(), + result.package_imports.to_object(py).into(), + ], + ) + }); + + Ok(result) } - }, ) - .await?; - - let result = Python::with_gil(|py| { - externs::unsafe_call( - py, - core.types.parsed_javascript_deps_result, - &[ - result.file_imports.to_object(py).into(), - result.package_imports.to_object(py).into(), - ], - ) - }); - - Ok(result) - } - ) - .await - } - .boxed() + .await + } + .boxed() } async fn get_or_create_inferred_dependencies( - core: &Arc, - store: &Store, - request: PreparedInferenceRequest, - dependencies_parser: F, + core: &Arc, + store: &Store, + request: PreparedInferenceRequest, + dependencies_parser: F, ) -> NodeResult where - T: serde::de::DeserializeOwned + serde::Serialize, - F: Fn(&str, PreparedInferenceRequest) -> Result, + T: serde::de::DeserializeOwned + serde::Serialize, + F: Fn(&str, PreparedInferenceRequest) -> Result, { - let cache_key = request.cache_key(); - let result = if let Some(result) = lookup_inferred_dependencies(&cache_key, core).await? { - result - } else { - let contents = request.read_digest(store).await?; - let result = dependencies_parser(&contents, request)?; - core - .local_cache - .store( - &cache_key, - Bytes::from( - serde_json::to_string(&result) - .map_err(|e| format!("Failed to serialize dep inference cache result: {e}"))?, - ), - ) - .await?; - result - }; - Ok(result) + let cache_key = request.cache_key(); + let result = + if let Some(result) = lookup_inferred_dependencies(&cache_key, core).await? { + result + } else { + let contents = request.read_digest(store).await?; + let result = dependencies_parser(&contents, request)?; + core.local_cache + .store( + &cache_key, + Bytes::from(serde_json::to_string(&result).map_err(|e| { + format!("Failed to serialize dep inference cache result: {e}") + })?), + ) + .await?; + result + }; + Ok(result) } async fn lookup_inferred_dependencies( - key: &CacheKey, - core: &Arc, + key: &CacheKey, + core: &Arc, ) -> NodeResult> { - let cached_result = core.local_cache.load(key).await?; - Ok( - cached_result - .and_then(|bytes| serde_json::from_slice(&bytes).ok()) - .flatten(), - ) + let cached_result = core.local_cache.load(key).await?; + Ok(cached_result + .and_then(|bytes| serde_json::from_slice(&bytes).ok()) + .flatten()) } diff --git a/src/rust/engine/src/lib.rs b/src/rust/engine/src/lib.rs index d1d3aae006a..bb7185d810d 100644 --- a/src/rust/engine/src/lib.rs +++ b/src/rust/engine/src/lib.rs @@ -4,21 +4,21 @@ #![deny(warnings)] // Enable all clippy lints except for many of the pedantic ones. It's a shame this needs to be copied and pasted across crates, but there doesn't appear to be a way to include inner attributes from a common source. #![deny( - clippy::all, - clippy::default_trait_access, - clippy::expl_impl_clone_on_copy, - clippy::if_not_else, - clippy::needless_continue, - clippy::unseparated_literal_suffix, - clippy::used_underscore_binding + clippy::all, + clippy::default_trait_access, + clippy::expl_impl_clone_on_copy, + clippy::if_not_else, + clippy::needless_continue, + clippy::unseparated_literal_suffix, + clippy::used_underscore_binding )] // It is often more clear to show that nothing is being moved. #![allow(clippy::match_ref_pats)] // Subjective style. #![allow( - clippy::len_without_is_empty, - clippy::redundant_field_names, - clippy::too_many_arguments + clippy::len_without_is_empty, + clippy::redundant_field_names, + clippy::too_many_arguments )] // Default isn't as big a deal as people seem to think it is. #![allow(clippy::new_without_default, clippy::new_ret_no_self)] @@ -44,7 +44,7 @@ mod tasks; mod types; pub use crate::context::{ - Context, Core, ExecutionStrategyOptions, LocalStoreOptions, RemotingOptions, SessionCore, + Context, Core, ExecutionStrategyOptions, LocalStoreOptions, RemotingOptions, SessionCore, }; pub use crate::intrinsics::Intrinsics; pub use crate::python::{Failure, Function, Key, Params, TypeId, Value}; diff --git a/src/rust/engine/src/nodes.rs b/src/rust/engine/src/nodes.rs index 252b354a440..603774332a0 100644 --- a/src/rust/engine/src/nodes.rs +++ b/src/rust/engine/src/nodes.rs @@ -27,13 +27,13 @@ use crate::externs; use crate::python::{display_sorted_in_parens, throw, Failure, Key, Params, TypeId, Value}; use crate::tasks::{self, Rule}; use fs::{ - self, DigestEntry, Dir, DirectoryDigest, DirectoryListing, File, FileContent, FileEntry, - GlobExpansionConjunction, GlobMatching, Link, PathGlobs, PreparedPathGlobs, RelativePath, - StrictGlobMatching, SymlinkBehavior, SymlinkEntry, Vfs, + self, DigestEntry, Dir, DirectoryDigest, DirectoryListing, File, FileContent, FileEntry, + GlobExpansionConjunction, GlobMatching, Link, PathGlobs, PreparedPathGlobs, RelativePath, + StrictGlobMatching, SymlinkBehavior, SymlinkEntry, Vfs, }; use process_execution::{ - self, CacheName, InputDigests, Process, ProcessCacheScope, ProcessExecutionStrategy, - ProcessResultSource, + self, CacheName, InputDigests, Process, ProcessCacheScope, ProcessExecutionStrategy, + ProcessResultSource, }; use crate::externs::engine_aware::{EngineAwareParameter, EngineAwareReturnType}; @@ -44,8 +44,8 @@ use hashing::Digest; use rule_graph::{DependencyKey, Query}; use store::{self, Store, StoreError, StoreFileByDigest}; use workunit_store::{ - in_workunit, Level, Metric, ObservationMetric, RunningWorkunit, UserMetadataItem, - WorkunitMetadata, + in_workunit, Level, Metric, ObservationMetric, RunningWorkunit, UserMetadataItem, + WorkunitMetadata, }; tokio::task_local! { @@ -53,58 +53,58 @@ tokio::task_local! { } pub fn task_side_effected() -> Result<(), String> { - TASK_SIDE_EFFECTED - .try_with(|task_side_effected| { - task_side_effected.store(true, Ordering::SeqCst); - }) - .map_err(|_| { - "Side-effects are not allowed in this context: SideEffecting types must be \ + TASK_SIDE_EFFECTED + .try_with(|task_side_effected| { + task_side_effected.store(true, Ordering::SeqCst); + }) + .map_err(|_| { + "Side-effects are not allowed in this context: SideEffecting types must be \ acquired via parameters to `@rule`s." - .to_owned() - }) + .to_owned() + }) } pub async fn maybe_side_effecting>( - is_side_effecting: bool, - side_effected: &Arc, - f: F, + is_side_effecting: bool, + side_effected: &Arc, + f: F, ) -> T { - if is_side_effecting { - TASK_SIDE_EFFECTED.scope(side_effected.clone(), f).await - } else { - f.await - } + if is_side_effecting { + TASK_SIDE_EFFECTED.scope(side_effected.clone(), f).await + } else { + f.await + } } pub type NodeResult = Result; #[async_trait] impl Vfs for Context { - async fn read_link(&self, link: &Link) -> Result { - Ok(self.get(ReadLink(link.clone())).await?.0) - } + async fn read_link(&self, link: &Link) -> Result { + Ok(self.get(ReadLink(link.clone())).await?.0) + } - async fn scandir(&self, dir: Dir) -> Result, Failure> { - self.get(Scandir(dir)).await - } + async fn scandir(&self, dir: Dir) -> Result, Failure> { + self.get(Scandir(dir)).await + } - fn is_ignored(&self, stat: &fs::Stat) -> bool { - self.core.vfs.is_ignored(stat) - } + fn is_ignored(&self, stat: &fs::Stat) -> bool { + self.core.vfs.is_ignored(stat) + } - fn mk_error(msg: &str) -> Failure { - throw(msg.to_owned()) - } + fn mk_error(msg: &str) -> Failure { + throw(msg.to_owned()) + } } impl StoreFileByDigest for Context { - fn store_by_digest( - &self, - file: File, - ) -> future::BoxFuture<'static, Result> { - let context = self.clone(); - async move { context.get(DigestFile(file)).await }.boxed() - } + fn store_by_digest( + &self, + file: File, + ) -> future::BoxFuture<'static, Result> { + let context = self.clone(); + async move { context.get(DigestFile(file)).await }.boxed() + } } /// @@ -119,439 +119,447 @@ impl StoreFileByDigest for Context { /// #[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)] pub struct Select { - pub params: Params, - pub product: TypeId, - entry: Intern>, + pub params: Params, + pub product: TypeId, + entry: Intern>, } impl Select { - pub fn new( - mut params: Params, - product: TypeId, - entry: Intern>, - ) -> Select { - params.retain(|k| match entry.as_ref() { - rule_graph::Entry::Param(type_id) => type_id == k.type_id(), - rule_graph::Entry::WithDeps(with_deps) => with_deps.params().contains(k.type_id()), - }); - Select { - params, - product, - entry, - } - } - - pub fn new_from_edges( - params: Params, - dependency_key: &DependencyKey, - edges: &rule_graph::RuleEdges, - ) -> Select { - let entry = edges - .entry_for(dependency_key) - .unwrap_or_else(|| panic!("{edges:?} did not declare a dependency on {dependency_key:?}")); - Select::new(params, dependency_key.product(), entry) - } - - fn reenter<'a>( - &self, - context: Context, - query: &'a Query, - ) -> BoxFuture<'a, NodeResult> { - let edges = context - .core - .rule_graph - .find_root(query.params.iter().cloned(), query.product) - .map(|(_, edges)| edges); - - let params = self.params.clone(); - async move { - let edges = edges?; - Select::new_from_edges(params, &DependencyKey::new(query.product), &edges) - .run_node(context) - .await + pub fn new( + mut params: Params, + product: TypeId, + entry: Intern>, + ) -> Select { + params.retain(|k| match entry.as_ref() { + rule_graph::Entry::Param(type_id) => type_id == k.type_id(), + rule_graph::Entry::WithDeps(with_deps) => with_deps.params().contains(k.type_id()), + }); + Select { + params, + product, + entry, + } } - .boxed() - } - - fn select_product<'a>( - &self, - context: Context, - dependency_key: &'a DependencyKey, - caller_description: &str, - ) -> BoxFuture<'a, NodeResult> { - let edges = context - .core - .rule_graph - .edges_for_inner(&self.entry) - .ok_or_else(|| { - throw(format!( - "Tried to request {dependency_key} for {caller_description} but found no edges" - )) - }); - let params = self.params.clone(); - async move { - let edges = edges?; - Select::new_from_edges(params, dependency_key, &edges) - .run_node(context) - .await + + pub fn new_from_edges( + params: Params, + dependency_key: &DependencyKey, + edges: &rule_graph::RuleEdges, + ) -> Select { + let entry = edges.entry_for(dependency_key).unwrap_or_else(|| { + panic!("{edges:?} did not declare a dependency on {dependency_key:?}") + }); + Select::new(params, dependency_key.product(), entry) } - .boxed() - } - - async fn run_node(self, context: Context) -> NodeResult { - match self.entry.as_ref() { - &rule_graph::Entry::WithDeps(wd) => match wd.as_ref() { - rule_graph::EntryWithDeps::Rule(ref rule) => match rule.rule() { - tasks::Rule::Task(task) => { - context - .get(Task { - params: self.params.clone(), - task: *task, - entry: self.entry, - side_effected: Arc::new(AtomicBool::new(false)), - }) - .await - } - Rule::Intrinsic(intrinsic) => { - let values = future::try_join_all( - intrinsic - .inputs - .iter() - .map(|dependency_key| { - self.select_product(context.clone(), dependency_key, "intrinsic") - }) - .collect::>(), - ) - .await?; - context - .core - .intrinsics - .run(intrinsic, context.clone(), values) - .await - } - }, - rule_graph::EntryWithDeps::Reentry(reentry) => { - // TODO: Actually using the `RuleEdges` of this entry to compute inputs is not - // implemented: doing so would involve doing something similar to what we do for - // intrinsics above, and waiting to compute inputs before executing the query here. - // - // That doesn't block using a singleton to provide an API type, but it would block a more - // complex use case. - // - // see https://github.com/pantsbuild/pants/issues/16751 - self.reenter(context, &reentry.query).await + + fn reenter<'a>( + &self, + context: Context, + query: &'a Query, + ) -> BoxFuture<'a, NodeResult> { + let edges = context + .core + .rule_graph + .find_root(query.params.iter().cloned(), query.product) + .map(|(_, edges)| edges); + + let params = self.params.clone(); + async move { + let edges = edges?; + Select::new_from_edges(params, &DependencyKey::new(query.product), &edges) + .run_node(context) + .await } - &rule_graph::EntryWithDeps::Root(_) => { - panic!("Not a runtime-executable entry! {:?}", self.entry) + .boxed() + } + + fn select_product<'a>( + &self, + context: Context, + dependency_key: &'a DependencyKey, + caller_description: &str, + ) -> BoxFuture<'a, NodeResult> { + let edges = context + .core + .rule_graph + .edges_for_inner(&self.entry) + .ok_or_else(|| { + throw(format!( + "Tried to request {dependency_key} for {caller_description} but found no edges" + )) + }); + let params = self.params.clone(); + async move { + let edges = edges?; + Select::new_from_edges(params, dependency_key, &edges) + .run_node(context) + .await } - }, - &rule_graph::Entry::Param(type_id) => { - if let Some(key) = self.params.find(type_id) { - Ok(key.to_value()) - } else { - Err(throw(format!( - "Expected a Param of type {} to be present, but had only: {}", - type_id, self.params, - ))) + .boxed() + } + + async fn run_node(self, context: Context) -> NodeResult { + match self.entry.as_ref() { + &rule_graph::Entry::WithDeps(wd) => match wd.as_ref() { + rule_graph::EntryWithDeps::Rule(ref rule) => match rule.rule() { + tasks::Rule::Task(task) => { + context + .get(Task { + params: self.params.clone(), + task: *task, + entry: self.entry, + side_effected: Arc::new(AtomicBool::new(false)), + }) + .await + } + Rule::Intrinsic(intrinsic) => { + let values = future::try_join_all( + intrinsic + .inputs + .iter() + .map(|dependency_key| { + self.select_product( + context.clone(), + dependency_key, + "intrinsic", + ) + }) + .collect::>(), + ) + .await?; + context + .core + .intrinsics + .run(intrinsic, context.clone(), values) + .await + } + }, + rule_graph::EntryWithDeps::Reentry(reentry) => { + // TODO: Actually using the `RuleEdges` of this entry to compute inputs is not + // implemented: doing so would involve doing something similar to what we do for + // intrinsics above, and waiting to compute inputs before executing the query here. + // + // That doesn't block using a singleton to provide an API type, but it would block a more + // complex use case. + // + // see https://github.com/pantsbuild/pants/issues/16751 + self.reenter(context, &reentry.query).await + } + &rule_graph::EntryWithDeps::Root(_) => { + panic!("Not a runtime-executable entry! {:?}", self.entry) + } + }, + &rule_graph::Entry::Param(type_id) => { + if let Some(key) = self.params.find(type_id) { + Ok(key.to_value()) + } else { + Err(throw(format!( + "Expected a Param of type {} to be present, but had only: {}", + type_id, self.params, + ))) + } + } } - } } - } } impl From), - Snapshot(Snapshot), - SessionValues(SessionValues), - RunId(RunId), - Task(Box), + DigestFile(DigestFile), + DownloadedFile(DownloadedFile), + ExecuteProcess(Box), + ReadLink(ReadLink), + Scandir(Scandir), + Select(Box