From c405efef538f36a4c5b49517fd411224bf515268 Mon Sep 17 00:00:00 2001 From: Chris Tolliday Date: Fri, 24 Jan 2025 16:26:35 -0800 Subject: [PATCH] Exclude source location from category key if non-generic tags are present Summary: To exclude source location from well tagged category keys so we can track them more easily across refactors. Reviewed By: Will-MingLun-Li Differential Revision: D68574898 fbshipit-source-id: ab17ad27fb3a3fd6df56e7bcc6eb15d61cfc7a58 --- app/buck2_error/BUCK | 1 - app/buck2_error/Cargo.toml | 1 - app/buck2_error/src/classify.rs | 35 +++++++++++++ app/buck2_error/src/error.rs | 50 ++++++++++++------- tests/core/build/test_error_categorization.py | 12 ++--- 5 files changed, 69 insertions(+), 30 deletions(-) diff --git a/app/buck2_error/BUCK b/app/buck2_error/BUCK index 957bc7821111f..8dd44c51dbef4 100644 --- a/app/buck2_error/BUCK +++ b/app/buck2_error/BUCK @@ -32,7 +32,6 @@ rust_library( "fbsource//third-party/rust:hex", "fbsource//third-party/rust:http", "fbsource//third-party/rust:hyper", - "fbsource//third-party/rust:itertools", "fbsource//third-party/rust:prost", "fbsource//third-party/rust:prost-types", "fbsource//third-party/rust:ref-cast", diff --git a/app/buck2_error/Cargo.toml b/app/buck2_error/Cargo.toml index c93ff117ef7d6..a11c8d50b038f 100644 --- a/app/buck2_error/Cargo.toml +++ b/app/buck2_error/Cargo.toml @@ -17,7 +17,6 @@ fancy-regex = { workspace = true } hex = { workspace = true } http = { workspace = true } hyper = { workspace = true } -itertools = { workspace = true } prost = { workspace = true } prost-types = { workspace = true } ref-cast = { workspace = true } diff --git a/app/buck2_error/src/classify.rs b/app/buck2_error/src/classify.rs index 8bde7b72ce323..9c92fcca7e8ba 100644 --- a/app/buck2_error/src/classify.rs +++ b/app/buck2_error/src/classify.rs @@ -183,6 +183,41 @@ pub(crate) fn category_and_rank(tag: ErrorTag) -> (Option, u32) { } } +/// Errors can be categorized by tags only if they have any non-generic tags. +pub fn tag_is_generic(tag: &ErrorTag) -> bool { + if tag_is_hidden(tag) { + return true; + } + match tag { + ErrorTag::MaterializationError => true, + ErrorTag::UnusedDefaultTag => true, + ErrorTag::UnexpectedNone => true, + ErrorTag::Http => true, + ErrorTag::IoBrokenPipe => true, + ErrorTag::IoSystem => true, + ErrorTag::IoSource => true, + ErrorTag::IoNotFound => true, + ErrorTag::IoPermissionDenied => true, + ErrorTag::IoTimeout => true, + ErrorTag::IoEden => true, + ErrorTag::StarlarkError => true, + ErrorTag::Analysis => true, + ErrorTag::AnyActionExecution => true, + ErrorTag::ClientGrpc => true, + _ => false, + } +} + +/// Hidden tags only used internally, for categorization. +pub fn tag_is_hidden(tag: &ErrorTag) -> bool { + match tag { + ErrorTag::Tier0 => true, + ErrorTag::Input => true, + ErrorTag::Environment => true, + _ => false, + } +} + pub trait ErrorLike { fn best_tag(&self) -> Option; diff --git a/app/buck2_error/src/error.rs b/app/buck2_error/src/error.rs index 8eee581de2525..feaa59a7ad735 100644 --- a/app/buck2_error/src/error.rs +++ b/app/buck2_error/src/error.rs @@ -11,11 +11,12 @@ use std::fmt; use std::sync::Arc; use buck2_data::ActionError; -use itertools::Itertools; use smallvec::SmallVec; use crate::classify::best_tag; use crate::classify::error_tag_category; +use crate::classify::tag_is_generic; +use crate::classify::tag_is_hidden; use crate::context_value::ContextValue; use crate::context_value::StarlarkContext; use crate::context_value::TypedContext; @@ -146,32 +147,43 @@ impl Error { } /// Stable identifier for grouping errors. + /// + /// This tries to include the least information possible that can be used to uniquely identify an error type. pub fn category_key(&self) -> String { - let tags = self - .tags() - .into_iter() - .filter(|tag| match tag { - ErrorTag::Input | ErrorTag::Environment | ErrorTag::Tier0 => false, - _ => true, - }) - .map(|tag| tag.as_str_name()); + let tags = self.tags(); - let key_values = self.iter_context().filter_map(|kind| match kind { - ContextValue::Key(val) => Some(val.to_string()), - _ => None, - }); + let non_generic_tags: Vec = tags + .clone() + .into_iter() + .filter(|tag| !tag_is_generic(tag)) + .collect(); let source_location = if let Some(type_name) = self.source_location().type_name() { - type_name + // If type name available, include it and exclude source location. + Some(type_name.to_owned()) + } else if !non_generic_tags.is_empty() { + // Exclude source location if there are non-generic tags. + None } else { - &self.source_location().to_string() + // Only include source location if there are no non-generic tags. + Some(self.source_location().to_string()) }; - let mut values = vec![source_location] + let key_tags = tags + .into_iter() + .filter(|tag| !tag_is_hidden(tag)) + .map(|tag| tag.as_str_name().to_owned()); + + let context_key_values = self.iter_context().filter_map(|kind| match kind { + ContextValue::Key(val) => Some(val.to_string()), + _ => None, + }); + + let values: Vec = source_location .into_iter() - .chain(tags) - .map(|s| s.to_owned()) - .chain(key_values); + .chain(key_tags) + .chain(context_key_values) + .collect(); values.join(":").to_owned() } diff --git a/tests/core/build/test_error_categorization.py b/tests/core/build/test_error_categorization.py index 87c8511aeda8b..ca4ae6a8223f1 100644 --- a/tests/core/build/test_error_categorization.py +++ b/tests/core/build/test_error_categorization.py @@ -223,9 +223,7 @@ async def test_daemon_crash(buck: Buck, tmp_path: Path) -> None: assert invocation_record["best_error_tag"] == "SERVER_PANICKED" category_key = invocation_record["best_error_category_key"] - assert "buck2_client_ctx/src/daemon/client.rs" in category_key - assert "CLIENT_GRPC" in category_key - assert "SERVER_PANICKED" in category_key + assert category_key.startswith("CLIENT_GRPC:SERVER_PANICKED") # TODO dump stack trace on windows if not is_running_on_windows(): @@ -271,17 +269,13 @@ async def test_daemon_abort(buck: Buck, tmp_path: Path) -> None: if is_running_on_windows(): # TODO get windows to dump a stack trace assert "buckd stderr is empty" in error["message"] - assert "buck2_client_ctx/src/daemon/client.rs" in category_key - assert "CLIENT_GRPC" in category_key - assert "SERVER_STDERR_EMPTY" in category_key + assert category_key == "CLIENT_GRPC:SERVER_STDERR_EMPTY" assert invocation_record["best_error_tag"] == "SERVER_STDERR_EMPTY" else: # Messages from folly's signal handler. assert "*** Aborted at" in error["message"] assert "*** Signal 6 (SIGABRT)" in error["message"] - assert "buck2_client_ctx/src/daemon/client.rs" in category_key - assert "CLIENT_GRPC" in category_key - assert "SERVER_STDERR_UNKNOWN" in category_key + assert category_key.startswith("CLIENT_GRPC:SERVER_STDERR_UNKNOWN") assert invocation_record["best_error_tag"] == "SERVER_STDERR_UNKNOWN" # TODO dump stack trace on mac and windows