From d37b9b548a1b91d5400568c125e4808a381f445f Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Mon, 22 May 2023 15:50:51 +0200 Subject: [PATCH 01/22] Add Response data to Contexts --- .../src/protocol/contexts/response.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/relay-general/src/protocol/contexts/response.rs b/relay-general/src/protocol/contexts/response.rs index 00126897c8b..9de270f881a 100644 --- a/relay-general/src/protocol/contexts/response.rs +++ b/relay-general/src/protocol/contexts/response.rs @@ -2,6 +2,9 @@ use crate::protocol::{Cookies, Headers}; use crate::types::{Annotated, Object, Value}; /// Response interface that contains information on a HTTP response related to the event. +/// +/// The data variable should only contain the response body. It can either be +/// a dictionary (for standard HTTP responses) or a raw response body. #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] #[cfg_attr(feature = "jsonschema", derive(JsonSchema))] pub struct ResponseContext { @@ -26,6 +29,13 @@ pub struct ResponseContext { /// HTTP response body size. pub body_size: Annotated, + /// Response data in any format that makes sense. + /// + /// SDKs should discard large and binary bodies by default. Can be given as string or + /// structural data of any format. + #[metastructure(pii = "true", bag_size = "large")] + pub data: Annotated, + /// Additional arbitrary fields for forwards compatibility. /// These fields are retained (`retain = "true"`) to keep supporting the format that the Dio integration sends: /// @@ -75,6 +85,9 @@ mod tests { ], "status_code": 500, "body_size": 1000, + "data": { + "some": 1 + }, "arbitrary_field": "arbitrary", "type": "response" }"#; @@ -90,6 +103,11 @@ mod tests { headers: Annotated::new(Headers(PairList(headers))), status_code: Annotated::new(500), body_size: Annotated::new(1000), + data: { + let mut map = Object::new(); + map.insert("some".to_string(), Annotated::new(Value::I64(1))); + Annotated::new(Value::Object(map)) + }, other: { let mut map = Object::new(); map.insert( From e3c79ee8655de4f9ac02c5154468686b0467915a Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Tue, 23 May 2023 15:34:32 +0200 Subject: [PATCH 02/22] add comments and fix fixture --- relay-general/src/pii/processor.rs | 3 +++ relay-general/src/store/normalize/contexts.rs | 3 +++ .../tests/snapshots/test_fixtures__event_schema.snap | 7 ++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index c4ef7bce70c..e2616e259f0 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -195,6 +195,9 @@ impl<'a> Processor for PiiProcessor<'a> { replay.process_child_values(self, state)?; Ok(()) } + + // TODO: data scrubbing for graph response + // process_graphql_response } fn apply_rule_to_value( diff --git a/relay-general/src/store/normalize/contexts.rs b/relay-general/src/store/normalize/contexts.rs index 0852ccb5a2a..39160e921a8 100644 --- a/relay-general/src/store/normalize/contexts.rs +++ b/relay-general/src/store/normalize/contexts.rs @@ -197,6 +197,9 @@ fn normalize_response(response: &mut ResponseContext) { response.cookies = Annotated::from(new_cookies); headers.remove("Set-Cookie"); } + + // TODO: normalize_data for response.data + // https://github.com/getsentry/relay/blob/master/relay-general/src/store/normalize/request.rs } pub fn normalize_context(context: &mut Context) { diff --git a/relay-general/tests/snapshots/test_fixtures__event_schema.snap b/relay-general/tests/snapshots/test_fixtures__event_schema.snap index ce3956b27dd..9e3a949cf20 100644 --- a/relay-general/tests/snapshots/test_fixtures__event_schema.snap +++ b/relay-general/tests/snapshots/test_fixtures__event_schema.snap @@ -1,5 +1,6 @@ --- source: relay-general/tests/test_fixtures.rs +assertion_line: 109 expression: "relay_general::protocol::event_json_schema()" --- { @@ -2979,7 +2980,7 @@ expression: "relay_general::protocol::event_json_schema()" ] }, "ResponseContext": { - "description": " Response interface that contains information on a HTTP response related to the event.", + "description": " Response interface that contains information on a HTTP response related to the event.\n\n The data variable should only contain the response body. It can either be\n a dictionary (for standard HTTP responses) or a raw response body.", "anyOf": [ { "type": "object", @@ -3006,6 +3007,10 @@ expression: "relay_general::protocol::event_json_schema()" } ] }, + "data": { + "description": " Response data in any format that makes sense.\n\n SDKs should discard large and binary bodies by default. Can be given as string or\n structural data of any format.", + "default": null + }, "headers": { "description": " A dictionary of submitted headers.\n\n If a header appears multiple times it, needs to be merged according to the HTTP standard\n for header merging. Header names are treated case-insensitively by Sentry.", "default": null, From f5ed3db8414a1d58867686bd1be9976f5c5df3e0 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Tue, 23 May 2023 17:40:11 +0200 Subject: [PATCH 03/22] add data tests --- .../src/protocol/contexts/response.rs | 6 + relay-general/src/store/normalize/contexts.rs | 200 +++++++++++++++++- .../test_fixtures__event_schema.snap | 8 + 3 files changed, 209 insertions(+), 5 deletions(-) diff --git a/relay-general/src/protocol/contexts/response.rs b/relay-general/src/protocol/contexts/response.rs index 9de270f881a..fddba81ad0a 100644 --- a/relay-general/src/protocol/contexts/response.rs +++ b/relay-general/src/protocol/contexts/response.rs @@ -36,6 +36,10 @@ pub struct ResponseContext { #[metastructure(pii = "true", bag_size = "large")] pub data: Annotated, + /// The inferred content type of the response payload. + #[metastructure(skip_serialization = "empty")] + pub inferred_content_type: Annotated, + /// Additional arbitrary fields for forwards compatibility. /// These fields are retained (`retain = "true"`) to keep supporting the format that the Dio integration sends: /// @@ -88,6 +92,7 @@ mod tests { "data": { "some": 1 }, + "inferred_content_type": "application/json", "arbitrary_field": "arbitrary", "type": "response" }"#; @@ -108,6 +113,7 @@ mod tests { map.insert("some".to_string(), Annotated::new(Value::I64(1))); Annotated::new(Value::Object(map)) }, + inferred_content_type: Annotated::new("application/json".to_string()), other: { let mut map = Object::new(); map.insert( diff --git a/relay-general/src/store/normalize/contexts.rs b/relay-general/src/store/normalize/contexts.rs index 39160e921a8..5dfa6285c83 100644 --- a/relay-general/src/store/normalize/contexts.rs +++ b/relay-general/src/store/normalize/contexts.rs @@ -2,7 +2,7 @@ use once_cell::sync::Lazy; use regex::Regex; use crate::protocol::{Context, OsContext, ResponseContext, RuntimeContext, ANDROID_MODEL_NAMES}; -use crate::types::{Annotated, Empty}; +use crate::types::{Annotated, Empty, Value}; /// Environment.OSVersion (GetVersionEx) or RuntimeInformation.OSDescription on Windows static OS_WINDOWS_REGEX1: Lazy = Lazy::new(|| { @@ -177,7 +177,82 @@ fn normalize_os_context(os: &mut OsContext) { } } +// TODO: extract urlencoded_from_str, parse_raw_response_data and normalize_response_data +// can probably be reused in other places (original from normalize/request) + +/// Decodes an urlencoded body. +fn urlencoded_from_str(raw: &str) -> Option { + // Binary strings would be decoded, but we know url-encoded bodies are ASCII. + if !raw.is_ascii() { + return None; + } + + // Avoid false positives with XML and partial JSON. + if raw.starts_with(" value, + _ => return None, + }; + + // `serde_urlencoded` can decode any string with valid characters into an object. However, we + // need to account for false-positives in the following cases: + // - An empty string "" is decoded as empty object + // - A string "foo" is decoded as {"foo": ""} (check for single empty value) + // - A base64 encoded string "dGU=" also decodes with a single empty value + // - A base64 encoded string "dA==" decodes as {"dA": "="} (check for single =) + let is_valid = object.len() > 1 + || object + .values() + .next() + .and_then(Annotated::::as_str) + .map_or(false, |s| !matches!(s, "" | "=")); + + if is_valid { + Some(Value::Object(object)) + } else { + None + } +} + +fn parse_raw_response_data(response: &mut ResponseContext) -> Option<(&'static str, Value)> { + let raw = response.data.as_str()?; + + // TODO: Try to decode base64 first + + if let Ok(value) = serde_json::from_str(raw) { + Some(("application/json", value)) + } else { + urlencoded_from_str(raw).map(|value| ("application/x-www-form-urlencoded", value)) + } +} + +fn normalize_response_data(response: &mut ResponseContext) { + // Always derive the `inferred_content_type` from the response body, even if there is a + // `Content-Type` header present. This value can technically be ingested (due to the schema) but + // should always be overwritten in normalization. Only if inference fails, fall back to the + // content type header. + if let Some((content_type, parsed_data)) = parse_raw_response_data(response) { + // Retain meta data on the body (e.g. trimming annotations) but remove anything on the + // inferred content type. + response.data.set_value(Some(parsed_data)); + response.inferred_content_type = Annotated::from(content_type.to_string()); + } else { + response.inferred_content_type = response + .headers + .value() + .and_then(|headers| headers.get_header("Content-Type")) + .map(|value| value.split(';').next().unwrap_or(value).to_string()) + .into(); + } +} + fn normalize_response(response: &mut ResponseContext) { + normalize_response_data(response); + let headers = match response.headers.value_mut() { Some(headers) => headers, None => return, @@ -197,9 +272,6 @@ fn normalize_response(response: &mut ResponseContext) { response.cookies = Annotated::from(new_cookies); headers.remove("Set-Cookie"); } - - // TODO: normalize_data for response.data - // https://github.com/getsentry/relay/blob/master/relay-general/src/store/normalize/request.rs } pub fn normalize_context(context: &mut Context) { @@ -226,7 +298,7 @@ mod tests { use similar_asserts::assert_eq; use super::*; - use crate::protocol::LenientString; + use crate::protocol::{Headers, LenientString, PairList}; #[test] fn test_get_product_name() { @@ -609,4 +681,122 @@ mod tests { assert_eq!(Some("15.0"), os.kernel_version.as_str()); assert_eq!(None, os.build.value()); } + + #[test] + fn test_infer_json() { + let mut response = ResponseContext { + data: Annotated::from(Value::String(r#"{"foo":"bar"}"#.to_string())), + ..ResponseContext::default() + }; + + let mut expected_value = Object::new(); + expected_value.insert( + "foo".to_string(), + Annotated::from(Value::String("bar".into())), + ); + + normalize_response(&mut response); + assert_eq!( + response.inferred_content_type.as_str(), + Some("application/json") + ); + assert_eq!(response.data.value(), Some(&Value::Object(expected_value))); + } + + #[test] + fn test_broken_json_with_fallback() { + let mut response = ResponseContext { + data: Annotated::from(Value::String(r#"{"foo":"b"#.to_string())), + headers: Annotated::from(Headers(PairList(vec![Annotated::new(( + Annotated::new("Content-Type".to_string().into()), + Annotated::new("text/plain; encoding=utf-8".to_string().into()), + ))]))), + ..ResponseContext::default() + }; + + normalize_response(&mut response); + assert_eq!(response.inferred_content_type.as_str(), Some("text/plain")); + assert_eq!(response.data.as_str(), Some(r#"{"foo":"b"#)); + } + + #[test] + fn test_broken_json_without_fallback() { + let mut response = ResponseContext { + data: Annotated::from(Value::String(r#"{"foo":"b"#.to_string())), + ..ResponseContext::default() + }; + + normalize_response(&mut response); + assert_eq!(response.inferred_content_type.value(), None); + assert_eq!(response.data.as_str(), Some(r#"{"foo":"b"#)); + } + + #[test] + fn test_infer_url_encoded() { + let mut response = ResponseContext { + data: Annotated::from(Value::String(r#"foo=bar"#.to_string())), + ..ResponseContext::default() + }; + + let mut expected_value = Object::new(); + expected_value.insert( + "foo".to_string(), + Annotated::from(Value::String("bar".into())), + ); + + normalize_response(&mut response); + assert_eq!( + response.inferred_content_type.as_str(), + Some("application/x-www-form-urlencoded") + ); + assert_eq!(response.data.value(), Some(&Value::Object(expected_value))); + } + + #[test] + fn test_infer_url_false_positive() { + let mut response = ResponseContext { + data: Annotated::from(Value::String("dGU=".to_string())), + ..ResponseContext::default() + }; + + normalize_response(&mut response); + assert_eq!(response.inferred_content_type.value(), None); + assert_eq!(response.data.as_str(), Some("dGU=")); + } + + #[test] + fn test_infer_url_encoded_base64() { + let mut response = ResponseContext { + data: Annotated::from(Value::String("dA==".to_string())), + ..ResponseContext::default() + }; + + normalize_response(&mut response); + assert_eq!(response.inferred_content_type.value(), None); + assert_eq!(response.data.as_str(), Some("dA==")); + } + + #[test] + fn test_infer_xml() { + let mut response = ResponseContext { + data: Annotated::from(Value::String("".to_string())), + ..ResponseContext::default() + }; + + normalize_response(&mut response); + assert_eq!(response.inferred_content_type.value(), None); + assert_eq!(response.data.as_str(), Some("")); + } + + #[test] + fn test_infer_binary() { + let mut response = ResponseContext { + data: Annotated::from(Value::String("\u{001f}1\u{0000}\u{0000}".to_string())), + ..ResponseContext::default() + }; + + normalize_response(&mut response); + assert_eq!(response.inferred_content_type.value(), None); + assert_eq!(response.data.as_str(), Some("\u{001f}1\u{0000}\u{0000}")); + } } diff --git a/relay-general/tests/snapshots/test_fixtures__event_schema.snap b/relay-general/tests/snapshots/test_fixtures__event_schema.snap index 9e3a949cf20..2d169f1e5f0 100644 --- a/relay-general/tests/snapshots/test_fixtures__event_schema.snap +++ b/relay-general/tests/snapshots/test_fixtures__event_schema.snap @@ -3023,6 +3023,14 @@ expression: "relay_general::protocol::event_json_schema()" } ] }, + "inferred_content_type": { + "description": " The inferred content type of the response payload.", + "default": null, + "type": [ + "string", + "null" + ] + }, "status_code": { "description": " HTTP status code.", "default": null, From c355585ba8d4ea47a979f315158ffb16378cd50a Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Tue, 23 May 2023 17:45:47 +0200 Subject: [PATCH 04/22] fix import --- relay-general/src/store/normalize/contexts.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/relay-general/src/store/normalize/contexts.rs b/relay-general/src/store/normalize/contexts.rs index 5dfa6285c83..3ae0422833d 100644 --- a/relay-general/src/store/normalize/contexts.rs +++ b/relay-general/src/store/normalize/contexts.rs @@ -299,6 +299,7 @@ mod tests { use super::*; use crate::protocol::{Headers, LenientString, PairList}; + use crate::types::Object; #[test] fn test_get_product_name() { From df4ce1f24fa3589fdcf2d6cdb89969d12a7db4aa Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Wed, 24 May 2023 13:23:52 +0200 Subject: [PATCH 05/22] draft --- relay-general/src/pii/processor.rs | 15 ++++++++++++--- relay-general/src/processor/traits.rs | 1 + relay-general/src/protocol/contexts/response.rs | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index e2616e259f0..b94198f49c2 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -11,7 +11,7 @@ use crate::pii::{CompiledPiiConfig, Redaction, RuleType}; use crate::processor::{ process_chunked_value, Chunk, Pii, ProcessValue, ProcessingState, Processor, ValueType, }; -use crate::protocol::{AsPair, IpAddr, NativeImagePath, PairList, Replay, User}; +use crate::protocol::{AsPair, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User}; use crate::types::{Meta, ProcessingAction, ProcessingResult, Remark, RemarkType, Value}; /// A processor that performs PII stripping. @@ -196,8 +196,17 @@ impl<'a> Processor for PiiProcessor<'a> { Ok(()) } - // TODO: data scrubbing for graph response - // process_graphql_response + // Response PII processor entry point. + fn process_response( + &mut self, + response: &mut ResponseContext, + _meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult { + // TODO: implement PII data scrubbing in response.data + + Ok(()) + } } fn apply_rule_to_value( diff --git a/relay-general/src/processor/traits.rs b/relay-general/src/processor/traits.rs index 6f3a9019d0b..75f808a885f 100644 --- a/relay-general/src/processor/traits.rs +++ b/relay-general/src/processor/traits.rs @@ -93,6 +93,7 @@ pub trait Processor: Sized { process_method!(process_trace_context, crate::protocol::TraceContext); process_method!(process_native_image_path, crate::protocol::NativeImagePath); process_method!(process_contexts, crate::protocol::Contexts); + process_method!(process_response, crate::protocol::ResponseContext); fn process_other( &mut self, diff --git a/relay-general/src/protocol/contexts/response.rs b/relay-general/src/protocol/contexts/response.rs index fddba81ad0a..c59e8fc21e1 100644 --- a/relay-general/src/protocol/contexts/response.rs +++ b/relay-general/src/protocol/contexts/response.rs @@ -7,6 +7,7 @@ use crate::types::{Annotated, Object, Value}; /// a dictionary (for standard HTTP responses) or a raw response body. #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] #[cfg_attr(feature = "jsonschema", derive(JsonSchema))] +#[metastructure(process_func = "process_response", value_type = "ResponseContext")] pub struct ResponseContext { /// The cookie values. /// From 2bb2d006ad337e83a4eb3e31957a6b41d376408e Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Wed, 24 May 2023 13:47:26 +0200 Subject: [PATCH 06/22] fix --- relay-general/src/protocol/contexts/response.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-general/src/protocol/contexts/response.rs b/relay-general/src/protocol/contexts/response.rs index c59e8fc21e1..a591f7c3511 100644 --- a/relay-general/src/protocol/contexts/response.rs +++ b/relay-general/src/protocol/contexts/response.rs @@ -7,7 +7,7 @@ use crate::types::{Annotated, Object, Value}; /// a dictionary (for standard HTTP responses) or a raw response body. #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] #[cfg_attr(feature = "jsonschema", derive(JsonSchema))] -#[metastructure(process_func = "process_response", value_type = "ResponseContext")] +#[metastructure(process_func = "process_response")] pub struct ResponseContext { /// The cookie values. /// From dba8dd1f958b6ec4af1ddb751b335d6651c69f4a Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Tue, 30 May 2023 17:26:01 +0200 Subject: [PATCH 07/22] scrub data and variables keus --- relay-general/src/pii/processor.rs | 92 ++++++++++++- ...__tests__scrub_request_data_variables.snap | 124 ++++++++++++++++++ relay-general/src/processor/traits.rs | 1 - .../src/protocol/contexts/response.rs | 2 +- relay-general/src/store/normalize.rs | 3 + 5 files changed, 214 insertions(+), 8 deletions(-) create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index b94198f49c2..cf03cd4b3cf 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -11,8 +11,10 @@ use crate::pii::{CompiledPiiConfig, Redaction, RuleType}; use crate::processor::{ process_chunked_value, Chunk, Pii, ProcessValue, ProcessingState, Processor, ValueType, }; -use crate::protocol::{AsPair, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User}; -use crate::types::{Meta, ProcessingAction, ProcessingResult, Remark, RemarkType, Value}; +use crate::protocol::{AsPair, Context, Event, IpAddr, NativeImagePath, PairList, Replay, User}; +use crate::types::{ + Annotated, Meta, ProcessingAction, ProcessingResult, Remark, RemarkType, Value, +}; /// A processor that performs PII stripping. pub struct PiiProcessor<'a> { @@ -196,14 +198,42 @@ impl<'a> Processor for PiiProcessor<'a> { Ok(()) } - // Response PII processor entry point. - fn process_response( + fn process_event( &mut self, - response: &mut ResponseContext, + event: &mut Event, _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { - // TODO: implement PII data scrubbing in response.data + // collect the variables keys and remove them from the event + if let Some(request) = event.request.value_mut() { + if let Some(Value::Object(data)) = request.data.value_mut() { + // TODO: scrub the key values with [Filtered] instead + // data.remove("variables"); + + data.insert( + "variables".to_string(), + Annotated::new(Value::String("[Filtered]".to_string())), + ); + } + } + + // scrub PII from the data object if they match the variables keys + if let Some(ref mut contexts) = event.contexts.value_mut() { + if let Some(Context::Response(ref mut response)) = contexts.get_context_mut("response") + { + if let Some(Value::Object(data)) = response.data.value_mut() { + // TODO: scrub the key values with [Filtered] instead + // data.remove("data"); + + data.insert( + "data".to_string(), + Annotated::new(Value::String("[Filtered]".to_string())), + ); + } + } + } + + event.process_child_values(self, state)?; Ok(()) } @@ -1293,4 +1323,54 @@ mod tests { process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap(); assert_annotated_snapshot!(breadcrumb); } + + #[test] + fn test_scrub_request_data_variables() { + let mut data = Event::from_value( + serde_json::json!({ + "event_id": "5b978c77c0344ca1a360acca3da68167", + "request": { + "method": "POST", + "url": "http://absolute.uri/foo", + "query_string": "query=foobar&page=2", + "data": { + "query": "{\n viewer {\n login\n }\n}", + "variables": { + "login": "foo" + } + }, + "graphql_error": true + }, + "contexts": { + "response": { + "type": "response", + "data": { + "data": { + "viewer": { + "login": "foo" + } + } + }, + "graphql_error": true, + "status_code": 200 + } + } + }) + .into(), + ); + + let scrubbing_config = DataScrubbingConfig { + scrub_data: true, + scrub_ip_addresses: true, + scrub_defaults: true, + ..Default::default() + }; + + let pii_config = to_pii_config(&scrubbing_config).unwrap(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + + process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); + + assert_debug_snapshot!(&data); + } } diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap new file mode 100644 index 00000000000..436e307225d --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap @@ -0,0 +1,124 @@ +--- +source: relay-general/src/pii/processor.rs +expression: "&data" +--- +Event { + id: EventId( + 5b978c77-c034-4ca1-a360-acca3da68167, + ), + level: ~, + version: ~, + ty: ~, + fingerprint: ~, + culprit: ~, + transaction: ~, + transaction_info: ~, + time_spent: ~, + logentry: ~, + logger: ~, + modules: ~, + platform: ~, + timestamp: ~, + start_timestamp: ~, + received: ~, + server_name: ~, + release: ~, + dist: ~, + environment: ~, + site: ~, + user: ~, + request: Request { + url: "http://absolute.uri/foo", + method: "POST", + data: Object( + { + "query": String( + "{\n viewer {\n login\n }\n}", + ), + "variables": String( + "[Filtered]", + ), + }, + ), + query_string: Query( + PairList( + [ + ( + "query", + JsonLenientString( + "foobar", + ), + ), + ( + "page", + JsonLenientString( + "2", + ), + ), + ], + ), + ), + fragment: ~, + cookies: ~, + headers: ~, + body_size: ~, + env: ~, + inferred_content_type: ~, + other: { + "graphql_error": Bool( + true, + ), + }, + }, + contexts: Contexts( + { + "response": ContextInner( + Response( + ResponseContext { + cookies: ~, + headers: ~, + status_code: 200, + body_size: ~, + data: Object( + { + "data": String( + "[Filtered]", + ), + }, + ), + inferred_content_type: ~, + other: { + "graphql_error": Bool( + true, + ), + }, + }, + ), + ), + }, + ), + breadcrumbs: ~, + exceptions: ~, + stacktrace: ~, + template: ~, + threads: ~, + tags: ~, + extra: ~, + debug_meta: ~, + client_sdk: ~, + ingest_path: ~, + errors: ~, + key_id: ~, + project: ~, + grouping_config: ~, + checksum: ~, + csp: ~, + hpkp: ~, + expectct: ~, + expectstaple: ~, + spans: ~, + measurements: ~, + breakdowns: ~, + _metrics: ~, + other: {}, +} diff --git a/relay-general/src/processor/traits.rs b/relay-general/src/processor/traits.rs index 75f808a885f..6f3a9019d0b 100644 --- a/relay-general/src/processor/traits.rs +++ b/relay-general/src/processor/traits.rs @@ -93,7 +93,6 @@ pub trait Processor: Sized { process_method!(process_trace_context, crate::protocol::TraceContext); process_method!(process_native_image_path, crate::protocol::NativeImagePath); process_method!(process_contexts, crate::protocol::Contexts); - process_method!(process_response, crate::protocol::ResponseContext); fn process_other( &mut self, diff --git a/relay-general/src/protocol/contexts/response.rs b/relay-general/src/protocol/contexts/response.rs index a591f7c3511..81b90602320 100644 --- a/relay-general/src/protocol/contexts/response.rs +++ b/relay-general/src/protocol/contexts/response.rs @@ -7,7 +7,7 @@ use crate::types::{Annotated, Object, Value}; /// a dictionary (for standard HTTP responses) or a raw response body. #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] #[cfg_attr(feature = "jsonschema", derive(JsonSchema))] -#[metastructure(process_func = "process_response")] +// #[metastructure(process_func = "process_response")] pub struct ResponseContext { /// The cookie values. /// diff --git a/relay-general/src/store/normalize.rs b/relay-general/src/store/normalize.rs index 4e3c1a68641..11b2a4c3ccb 100644 --- a/relay-general/src/store/normalize.rs +++ b/relay-general/src/store/normalize.rs @@ -904,6 +904,9 @@ impl<'a> Processor for NormalizeProcessor<'a> { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { + // TODO: scrubb request.data by trying to convert it to a JSON object and scrubbing + // the "variables" object. + request.process_child_values(self, state)?; request::normalize_request(request)?; From 0ae5e30c38b716527a8e5507d6ead37c893e09e5 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Wed, 31 May 2023 12:27:06 +0200 Subject: [PATCH 08/22] fix scrubbing --- relay-general/src/pii/processor.rs | 71 ++++++++++++++----- ...__tests__scrub_request_data_variables.snap | 20 ++++-- 2 files changed, 68 insertions(+), 23 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index cf03cd4b3cf..db2a3269a31 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::collections::BTreeMap; use std::mem; use once_cell::sync::OnceCell; @@ -204,31 +205,46 @@ impl<'a> Processor for PiiProcessor<'a> { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { - // collect the variables keys and remove them from the event + // TODO: make keys lazy init + let mut keys = Vec::new(); + + // collect the variables keys and scrub them out if let Some(request) = event.request.value_mut() { if let Some(Value::Object(data)) = request.data.value_mut() { - // TODO: scrub the key values with [Filtered] instead - // data.remove("variables"); - - data.insert( - "variables".to_string(), - Annotated::new(Value::String("[Filtered]".to_string())), - ); + if let Some(Annotated(Some(Value::Bool(graph_ql_error)), _)) = + request.other.get("graphql_error") + { + dbg!("graphql_error request {graphql_error}"); + if *graph_ql_error { + if let Some(Annotated(Some(Value::Object(variables)), _)) = + data.get_mut("variables") + { + for item in variables.iter_mut() { + keys.push(item.0.to_string()); + item.1 + .set_value(Some(Value::String("[Filtered]".to_string()))); + } + } + } + } } } // scrub PII from the data object if they match the variables keys - if let Some(ref mut contexts) = event.contexts.value_mut() { - if let Some(Context::Response(ref mut response)) = contexts.get_context_mut("response") - { + if let Some(contexts) = event.contexts.value_mut() { + if let Some(Context::Response(response)) = contexts.get_context_mut("response") { if let Some(Value::Object(data)) = response.data.value_mut() { - // TODO: scrub the key values with [Filtered] instead - // data.remove("data"); - - data.insert( - "data".to_string(), - Annotated::new(Value::String("[Filtered]".to_string())), - ); + if let Some(Annotated(Some(Value::Bool(graph_ql_error)), _)) = + response.other.get("graphql_error") + { + if *graph_ql_error { + if let Some(Annotated(Some(Value::Object(response_data)), _)) = + data.get_mut("data") + { + scrub_data_from_response(keys.clone(), response_data); + } + } + } } } } @@ -239,6 +255,23 @@ impl<'a> Processor for PiiProcessor<'a> { } } +fn scrub_data_from_response(keys: Vec, data: &mut BTreeMap>) { + for item in data.iter_mut() { + match item.1.value_mut() { + Some(Value::Object(item_data)) => { + // TODO: can we avoid cloning every time? + scrub_data_from_response(keys.clone(), item_data); + } + _ => { + if keys.contains(item.0) { + item.1 + .set_value(Some(Value::String("[Filtered]".to_string()))); + } + } + } + } +} + fn apply_rule_to_value( meta: &mut Meta, rule: &RuleRef, @@ -1351,8 +1384,8 @@ mod tests { } } }, + "status_code": 200, "graphql_error": true, - "status_code": 200 } } }) diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap index 436e307225d..1fcc5c9044b 100644 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap @@ -35,8 +35,12 @@ Event { "query": String( "{\n viewer {\n login\n }\n}", ), - "variables": String( - "[Filtered]", + "variables": Object( + { + "login": String( + "[Filtered]", + ), + }, ), }, ), @@ -81,8 +85,16 @@ Event { body_size: ~, data: Object( { - "data": String( - "[Filtered]", + "data": Object( + { + "viewer": Object( + { + "login": String( + "[Filtered]", + ), + }, + ), + }, ), }, ), From 19e20f304c41d860ad250f90ff1130b699b89108 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Wed, 31 May 2023 14:04:02 +0200 Subject: [PATCH 09/22] ref normalize url encoding --- relay-general/src/store/normalize.rs | 4 +- relay-general/src/store/normalize/contexts.rs | 45 ++----------------- relay-general/src/store/normalize/request.rs | 42 ++--------------- .../src/store/normalize/url_encoding.rs | 39 ++++++++++++++++ 4 files changed, 46 insertions(+), 84 deletions(-) create mode 100644 relay-general/src/store/normalize/url_encoding.rs diff --git a/relay-general/src/store/normalize.rs b/relay-general/src/store/normalize.rs index 11b2a4c3ccb..35f523fb600 100644 --- a/relay-general/src/store/normalize.rs +++ b/relay-general/src/store/normalize.rs @@ -33,6 +33,7 @@ mod mechanism; mod request; mod spans; mod stacktrace; +mod url_encoding; pub mod user_agent; @@ -904,9 +905,6 @@ impl<'a> Processor for NormalizeProcessor<'a> { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { - // TODO: scrubb request.data by trying to convert it to a JSON object and scrubbing - // the "variables" object. - request.process_child_values(self, state)?; request::normalize_request(request)?; diff --git a/relay-general/src/store/normalize/contexts.rs b/relay-general/src/store/normalize/contexts.rs index 3ae0422833d..742d9595532 100644 --- a/relay-general/src/store/normalize/contexts.rs +++ b/relay-general/src/store/normalize/contexts.rs @@ -2,6 +2,7 @@ use once_cell::sync::Lazy; use regex::Regex; use crate::protocol::{Context, OsContext, ResponseContext, RuntimeContext, ANDROID_MODEL_NAMES}; +use crate::store::normalize::url_encoding; use crate::types::{Annotated, Empty, Value}; /// Environment.OSVersion (GetVersionEx) or RuntimeInformation.OSDescription on Windows @@ -177,47 +178,6 @@ fn normalize_os_context(os: &mut OsContext) { } } -// TODO: extract urlencoded_from_str, parse_raw_response_data and normalize_response_data -// can probably be reused in other places (original from normalize/request) - -/// Decodes an urlencoded body. -fn urlencoded_from_str(raw: &str) -> Option { - // Binary strings would be decoded, but we know url-encoded bodies are ASCII. - if !raw.is_ascii() { - return None; - } - - // Avoid false positives with XML and partial JSON. - if raw.starts_with(" value, - _ => return None, - }; - - // `serde_urlencoded` can decode any string with valid characters into an object. However, we - // need to account for false-positives in the following cases: - // - An empty string "" is decoded as empty object - // - A string "foo" is decoded as {"foo": ""} (check for single empty value) - // - A base64 encoded string "dGU=" also decodes with a single empty value - // - A base64 encoded string "dA==" decodes as {"dA": "="} (check for single =) - let is_valid = object.len() > 1 - || object - .values() - .next() - .and_then(Annotated::::as_str) - .map_or(false, |s| !matches!(s, "" | "=")); - - if is_valid { - Some(Value::Object(object)) - } else { - None - } -} - fn parse_raw_response_data(response: &mut ResponseContext) -> Option<(&'static str, Value)> { let raw = response.data.as_str()?; @@ -226,7 +186,8 @@ fn parse_raw_response_data(response: &mut ResponseContext) -> Option<(&'static s if let Ok(value) = serde_json::from_str(raw) { Some(("application/json", value)) } else { - urlencoded_from_str(raw).map(|value| ("application/x-www-form-urlencoded", value)) + url_encoding::encoded_from_str(raw) + .map(|value| ("application/x-www-form-urlencoded", value)) } } diff --git a/relay-general/src/store/normalize/request.rs b/relay-general/src/store/normalize/request.rs index efbb88ca9b4..b5977fd1590 100644 --- a/relay-general/src/store/normalize/request.rs +++ b/relay-general/src/store/normalize/request.rs @@ -3,6 +3,7 @@ use regex::Regex; use url::Url; use crate::protocol::{Query, Request}; +use crate::store::normalize::url_encoding; use crate::types::{Annotated, ErrorKind, Meta, ProcessingAction, ProcessingResult, Value}; const ELLIPSIS: char = '\u{2026}'; @@ -87,44 +88,6 @@ fn normalize_method(method: &mut String, meta: &mut Meta) -> ProcessingResult { Ok(()) } -/// Decodes an urlencoded body. -fn urlencoded_from_str(raw: &str) -> Option { - // Binary strings would be decoded, but we know url-encoded bodies are ASCII. - if !raw.is_ascii() { - return None; - } - - // Avoid false positives with XML and partial JSON. - if raw.starts_with(" value, - _ => return None, - }; - - // `serde_urlencoded` can decode any string with valid characters into an object. However, we - // need to account for false-positives in the following cases: - // - An empty string "" is decoded as empty object - // - A string "foo" is decoded as {"foo": ""} (check for single empty value) - // - A base64 encoded string "dGU=" also decodes with a single empty value - // - A base64 encoded string "dA==" decodes as {"dA": "="} (check for single =) - let is_valid = object.len() > 1 - || object - .values() - .next() - .and_then(Annotated::::as_str) - .map_or(false, |s| !matches!(s, "" | "=")); - - if is_valid { - Some(Value::Object(object)) - } else { - None - } -} - fn parse_raw_data(request: &Request) -> Option<(&'static str, Value)> { let raw = request.data.as_str()?; @@ -133,7 +96,8 @@ fn parse_raw_data(request: &Request) -> Option<(&'static str, Value)> { if let Ok(value) = serde_json::from_str(raw) { Some(("application/json", value)) } else { - urlencoded_from_str(raw).map(|value| ("application/x-www-form-urlencoded", value)) + url_encoding::encoded_from_str(raw) + .map(|value| ("application/x-www-form-urlencoded", value)) } } diff --git a/relay-general/src/store/normalize/url_encoding.rs b/relay-general/src/store/normalize/url_encoding.rs new file mode 100644 index 00000000000..70f8359fb7b --- /dev/null +++ b/relay-general/src/store/normalize/url_encoding.rs @@ -0,0 +1,39 @@ +use crate::types::{Annotated, Value}; + +/// Decodes an urlencoded body. +pub fn encoded_from_str(raw: &str) -> Option { + // Binary strings would be decoded, but we know url-encoded bodies are ASCII. + if !raw.is_ascii() { + return None; + } + + // Avoid false positives with XML and partial JSON. + if raw.starts_with(" value, + _ => return None, + }; + + // `serde_urlencoded` can decode any string with valid characters into an object. However, we + // need to account for false-positives in the following cases: + // - An empty string "" is decoded as empty object + // - A string "foo" is decoded as {"foo": ""} (check for single empty value) + // - A base64 encoded string "dGU=" also decodes with a single empty value + // - A base64 encoded string "dA==" decodes as {"dA": "="} (check for single =) + let is_valid = object.len() > 1 + || object + .values() + .next() + .and_then(Annotated::::as_str) + .map_or(false, |s| !matches!(s, "" | "=")); + + if is_valid { + Some(Value::Object(object)) + } else { + None + } +} From a89711e4dc57b014fbc1a1cb5397c5e970f14b76 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Wed, 31 May 2023 17:56:27 +0200 Subject: [PATCH 10/22] add more tests --- relay-general/src/pii/processor.rs | 202 ++++++++++++++---- ...__tests__does_not_scrub_if_no_graphql.snap | 128 +++++++++++ ..._graphql_response_data_with_variables.snap | 136 ++++++++++++ ...aphql_response_data_without_variables.snap | 117 ++++++++++ 4 files changed, 538 insertions(+), 45 deletions(-) create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index db2a3269a31..734dbf50db7 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::mem; use once_cell::sync::OnceCell; @@ -205,67 +205,84 @@ impl<'a> Processor for PiiProcessor<'a> { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { - // TODO: make keys lazy init - let mut keys = Vec::new(); - - // collect the variables keys and scrub them out - if let Some(request) = event.request.value_mut() { - if let Some(Value::Object(data)) = request.data.value_mut() { - if let Some(Annotated(Some(Value::Bool(graph_ql_error)), _)) = - request.other.get("graphql_error") - { - dbg!("graphql_error request {graphql_error}"); - if *graph_ql_error { - if let Some(Annotated(Some(Value::Object(variables)), _)) = - data.get_mut("variables") - { - for item in variables.iter_mut() { - keys.push(item.0.to_string()); - item.1 - .set_value(Some(Value::String("[Filtered]".to_string()))); - } + scrub_graphql(event); + + event.process_child_values(self, state)?; + + Ok(()) + } +} + +/// Scrubs GraphQL variables from the event. +fn scrub_graphql(event: &mut Event) { + let mut keys: Option> = None; + + // collect the variables keys and scrub them out. + if let Some(request) = event.request.value_mut() { + if let Some(Value::Object(data)) = request.data.value_mut() { + if let Some(Annotated(Some(Value::Bool(graphql_error)), _)) = + request.other.get("graphql_error") + { + if *graphql_error { + if let Some(Annotated(Some(Value::Object(variables)), _)) = + data.get_mut("variables") + { + let mut current_keys: BTreeSet = BTreeSet::new(); + for (key, value) in variables.iter_mut() { + current_keys.insert(key.to_string()); + value.set_value(Some(Value::String("[Filtered]".to_string()))); } + keys = Some(current_keys); } } } } + } - // scrub PII from the data object if they match the variables keys - if let Some(contexts) = event.contexts.value_mut() { - if let Some(Context::Response(response)) = contexts.get_context_mut("response") { - if let Some(Value::Object(data)) = response.data.value_mut() { - if let Some(Annotated(Some(Value::Bool(graph_ql_error)), _)) = - response.other.get("graphql_error") - { - if *graph_ql_error { - if let Some(Annotated(Some(Value::Object(response_data)), _)) = - data.get_mut("data") - { - scrub_data_from_response(keys.clone(), response_data); + // scrub PII from the data object if they match the variables keys. + if let Some(contexts) = event.contexts.value_mut() { + if let Some(Context::Response(response)) = contexts.get_context_mut("response") { + if let Some(Value::Object(data)) = response.data.value_mut() { + if let Some(Annotated(Some(Value::Bool(graphql_error)), _)) = + response.other.get("graphql_error") + { + if *graphql_error { + let mut delete_data = false; + if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = + data.get_mut("data") + { + if let Some(keys) = keys { + if !keys.is_empty() { + scrub_graphql_data(&keys, graphql_data); + } else { + delete_data = true; + } + } else { + delete_data = true; } } + if delete_data { + // if we don't have the variable keys, we scrub the whole data object + // because the query or mutation weren't parameterized. + data.remove("data"); + } } } } } - - event.process_child_values(self, state)?; - - Ok(()) } } -fn scrub_data_from_response(keys: Vec, data: &mut BTreeMap>) { - for item in data.iter_mut() { - match item.1.value_mut() { +/// Scrubs values from the data object to [Filtered] +fn scrub_graphql_data(keys: &BTreeSet, data: &mut BTreeMap>) { + for (key, value) in data.iter_mut() { + match value.value_mut() { Some(Value::Object(item_data)) => { - // TODO: can we avoid cloning every time? - scrub_data_from_response(keys.clone(), item_data); + scrub_graphql_data(keys, item_data); } _ => { - if keys.contains(item.0) { - item.1 - .set_value(Some(Value::String("[Filtered]".to_string()))); + if keys.contains(key) { + value.set_value(Some(Value::String("[Filtered]".to_string()))); } } } @@ -1358,7 +1375,7 @@ mod tests { } #[test] - fn test_scrub_request_data_variables() { + fn test_scrub_graphql_response_data_with_variables() { let mut data = Event::from_value( serde_json::json!({ "event_id": "5b978c77c0344ca1a360acca3da68167", @@ -1406,4 +1423,99 @@ mod tests { assert_debug_snapshot!(&data); } + + #[test] + fn test_scrub_graphql_response_data_without_variables() { + let mut data = Event::from_value( + serde_json::json!({ + "event_id": "5b978c77c0344ca1a360acca3da68167", + "request": { + "method": "POST", + "url": "http://absolute.uri/foo", + "query_string": "query=foobar&page=2", + "data": { + "query": "{\n viewer {\n login\n }\n}" + }, + "graphql_error": true + }, + "contexts": { + "response": { + "type": "response", + "data": { + "data": { + "viewer": { + "login": "foo" + } + } + }, + "status_code": 200, + "graphql_error": true, + } + } + }) + .into(), + ); + + let scrubbing_config = DataScrubbingConfig { + scrub_data: true, + scrub_ip_addresses: true, + scrub_defaults: true, + ..Default::default() + }; + + let pii_config = to_pii_config(&scrubbing_config).unwrap(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + + process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); + + assert_debug_snapshot!(&data); + } + + #[test] + fn test_does_not_scrub_if_no_graphql() { + let mut data = Event::from_value( + serde_json::json!({ + "event_id": "5b978c77c0344ca1a360acca3da68167", + "request": { + "method": "POST", + "url": "http://absolute.uri/foo", + "query_string": "query=foobar&page=2", + "data": { + "query": "{\n viewer {\n login\n }\n}", + "variables": { + "login": "foo" + } + }, + }, + "contexts": { + "response": { + "type": "response", + "data": { + "data": { + "viewer": { + "login": "foo" + } + } + }, + "status_code": 200, + } + } + }) + .into(), + ); + + let scrubbing_config = DataScrubbingConfig { + scrub_data: true, + scrub_ip_addresses: true, + scrub_defaults: true, + ..Default::default() + }; + + let pii_config = to_pii_config(&scrubbing_config).unwrap(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + + process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); + + assert_debug_snapshot!(&data); + } } diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap new file mode 100644 index 00000000000..064bde5799c --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap @@ -0,0 +1,128 @@ +--- +source: relay-general/src/pii/processor.rs +expression: "&data" +--- +Event { + id: EventId( + 5b978c77-c034-4ca1-a360-acca3da68167, + ), + level: ~, + version: ~, + ty: ~, + fingerprint: ~, + culprit: ~, + transaction: ~, + transaction_info: ~, + time_spent: ~, + logentry: ~, + logger: ~, + modules: ~, + platform: ~, + timestamp: ~, + start_timestamp: ~, + received: ~, + server_name: ~, + release: ~, + dist: ~, + environment: ~, + site: ~, + user: ~, + request: Request { + url: "http://absolute.uri/foo", + method: "POST", + data: Object( + { + "query": String( + "{\n viewer {\n login\n }\n}", + ), + "variables": Object( + { + "login": String( + "foo", + ), + }, + ), + }, + ), + query_string: Query( + PairList( + [ + ( + "query", + JsonLenientString( + "foobar", + ), + ), + ( + "page", + JsonLenientString( + "2", + ), + ), + ], + ), + ), + fragment: ~, + cookies: ~, + headers: ~, + body_size: ~, + env: ~, + inferred_content_type: ~, + other: {}, + }, + contexts: Contexts( + { + "response": ContextInner( + Response( + ResponseContext { + cookies: ~, + headers: ~, + status_code: 200, + body_size: ~, + data: Object( + { + "data": Object( + { + "viewer": Object( + { + "login": String( + "foo", + ), + }, + ), + }, + ), + }, + ), + inferred_content_type: ~, + other: {}, + }, + ), + ), + }, + ), + breadcrumbs: ~, + exceptions: ~, + stacktrace: ~, + template: ~, + threads: ~, + tags: ~, + extra: ~, + debug_meta: ~, + client_sdk: ~, + ingest_path: ~, + errors: ~, + key_id: ~, + project: ~, + grouping_config: ~, + checksum: ~, + csp: ~, + hpkp: ~, + expectct: ~, + expectstaple: ~, + spans: ~, + measurements: ~, + breakdowns: ~, + _metrics: ~, + other: {}, +} diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap new file mode 100644 index 00000000000..1fcc5c9044b --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap @@ -0,0 +1,136 @@ +--- +source: relay-general/src/pii/processor.rs +expression: "&data" +--- +Event { + id: EventId( + 5b978c77-c034-4ca1-a360-acca3da68167, + ), + level: ~, + version: ~, + ty: ~, + fingerprint: ~, + culprit: ~, + transaction: ~, + transaction_info: ~, + time_spent: ~, + logentry: ~, + logger: ~, + modules: ~, + platform: ~, + timestamp: ~, + start_timestamp: ~, + received: ~, + server_name: ~, + release: ~, + dist: ~, + environment: ~, + site: ~, + user: ~, + request: Request { + url: "http://absolute.uri/foo", + method: "POST", + data: Object( + { + "query": String( + "{\n viewer {\n login\n }\n}", + ), + "variables": Object( + { + "login": String( + "[Filtered]", + ), + }, + ), + }, + ), + query_string: Query( + PairList( + [ + ( + "query", + JsonLenientString( + "foobar", + ), + ), + ( + "page", + JsonLenientString( + "2", + ), + ), + ], + ), + ), + fragment: ~, + cookies: ~, + headers: ~, + body_size: ~, + env: ~, + inferred_content_type: ~, + other: { + "graphql_error": Bool( + true, + ), + }, + }, + contexts: Contexts( + { + "response": ContextInner( + Response( + ResponseContext { + cookies: ~, + headers: ~, + status_code: 200, + body_size: ~, + data: Object( + { + "data": Object( + { + "viewer": Object( + { + "login": String( + "[Filtered]", + ), + }, + ), + }, + ), + }, + ), + inferred_content_type: ~, + other: { + "graphql_error": Bool( + true, + ), + }, + }, + ), + ), + }, + ), + breadcrumbs: ~, + exceptions: ~, + stacktrace: ~, + template: ~, + threads: ~, + tags: ~, + extra: ~, + debug_meta: ~, + client_sdk: ~, + ingest_path: ~, + errors: ~, + key_id: ~, + project: ~, + grouping_config: ~, + checksum: ~, + csp: ~, + hpkp: ~, + expectct: ~, + expectstaple: ~, + spans: ~, + measurements: ~, + breakdowns: ~, + _metrics: ~, + other: {}, +} diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap new file mode 100644 index 00000000000..607e9f3e2f0 --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap @@ -0,0 +1,117 @@ +--- +source: relay-general/src/pii/processor.rs +expression: "&data" +--- +Event { + id: EventId( + 5b978c77-c034-4ca1-a360-acca3da68167, + ), + level: ~, + version: ~, + ty: ~, + fingerprint: ~, + culprit: ~, + transaction: ~, + transaction_info: ~, + time_spent: ~, + logentry: ~, + logger: ~, + modules: ~, + platform: ~, + timestamp: ~, + start_timestamp: ~, + received: ~, + server_name: ~, + release: ~, + dist: ~, + environment: ~, + site: ~, + user: ~, + request: Request { + url: "http://absolute.uri/foo", + method: "POST", + data: Object( + { + "query": String( + "{\n viewer {\n login\n }\n}", + ), + }, + ), + query_string: Query( + PairList( + [ + ( + "query", + JsonLenientString( + "foobar", + ), + ), + ( + "page", + JsonLenientString( + "2", + ), + ), + ], + ), + ), + fragment: ~, + cookies: ~, + headers: ~, + body_size: ~, + env: ~, + inferred_content_type: ~, + other: { + "graphql_error": Bool( + true, + ), + }, + }, + contexts: Contexts( + { + "response": ContextInner( + Response( + ResponseContext { + cookies: ~, + headers: ~, + status_code: 200, + body_size: ~, + data: Object( + {}, + ), + inferred_content_type: ~, + other: { + "graphql_error": Bool( + true, + ), + }, + }, + ), + ), + }, + ), + breadcrumbs: ~, + exceptions: ~, + stacktrace: ~, + template: ~, + threads: ~, + tags: ~, + extra: ~, + debug_meta: ~, + client_sdk: ~, + ingest_path: ~, + errors: ~, + key_id: ~, + project: ~, + grouping_config: ~, + checksum: ~, + csp: ~, + hpkp: ~, + expectct: ~, + expectstaple: ~, + spans: ~, + measurements: ~, + breakdowns: ~, + _metrics: ~, + other: {}, +} From 06d905862533d0cf4b881ec7dcd77dc456618ef1 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Wed, 31 May 2023 18:01:44 +0200 Subject: [PATCH 11/22] make tests simpler --- relay-general/src/pii/processor.rs | 15 ---------- ...__tests__does_not_scrub_if_no_graphql.snap | 29 ++++--------------- ..._graphql_response_data_with_variables.snap | 29 ++++--------------- ...aphql_response_data_without_variables.snap | 29 ++++--------------- 4 files changed, 15 insertions(+), 87 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 734dbf50db7..58c7328be1b 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -1378,11 +1378,7 @@ mod tests { fn test_scrub_graphql_response_data_with_variables() { let mut data = Event::from_value( serde_json::json!({ - "event_id": "5b978c77c0344ca1a360acca3da68167", "request": { - "method": "POST", - "url": "http://absolute.uri/foo", - "query_string": "query=foobar&page=2", "data": { "query": "{\n viewer {\n login\n }\n}", "variables": { @@ -1401,7 +1397,6 @@ mod tests { } } }, - "status_code": 200, "graphql_error": true, } } @@ -1428,11 +1423,7 @@ mod tests { fn test_scrub_graphql_response_data_without_variables() { let mut data = Event::from_value( serde_json::json!({ - "event_id": "5b978c77c0344ca1a360acca3da68167", "request": { - "method": "POST", - "url": "http://absolute.uri/foo", - "query_string": "query=foobar&page=2", "data": { "query": "{\n viewer {\n login\n }\n}" }, @@ -1448,7 +1439,6 @@ mod tests { } } }, - "status_code": 200, "graphql_error": true, } } @@ -1475,11 +1465,7 @@ mod tests { fn test_does_not_scrub_if_no_graphql() { let mut data = Event::from_value( serde_json::json!({ - "event_id": "5b978c77c0344ca1a360acca3da68167", "request": { - "method": "POST", - "url": "http://absolute.uri/foo", - "query_string": "query=foobar&page=2", "data": { "query": "{\n viewer {\n login\n }\n}", "variables": { @@ -1497,7 +1483,6 @@ mod tests { } } }, - "status_code": 200, } } }) diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap index 064bde5799c..75e733dd210 100644 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap @@ -3,9 +3,7 @@ source: relay-general/src/pii/processor.rs expression: "&data" --- Event { - id: EventId( - 5b978c77-c034-4ca1-a360-acca3da68167, - ), + id: ~, level: ~, version: ~, ty: ~, @@ -28,8 +26,8 @@ Event { site: ~, user: ~, request: Request { - url: "http://absolute.uri/foo", - method: "POST", + url: ~, + method: ~, data: Object( { "query": String( @@ -44,24 +42,7 @@ Event { ), }, ), - query_string: Query( - PairList( - [ - ( - "query", - JsonLenientString( - "foobar", - ), - ), - ( - "page", - JsonLenientString( - "2", - ), - ), - ], - ), - ), + query_string: ~, fragment: ~, cookies: ~, headers: ~, @@ -77,7 +58,7 @@ Event { ResponseContext { cookies: ~, headers: ~, - status_code: 200, + status_code: ~, body_size: ~, data: Object( { diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap index 1fcc5c9044b..d28cb437699 100644 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap @@ -3,9 +3,7 @@ source: relay-general/src/pii/processor.rs expression: "&data" --- Event { - id: EventId( - 5b978c77-c034-4ca1-a360-acca3da68167, - ), + id: ~, level: ~, version: ~, ty: ~, @@ -28,8 +26,8 @@ Event { site: ~, user: ~, request: Request { - url: "http://absolute.uri/foo", - method: "POST", + url: ~, + method: ~, data: Object( { "query": String( @@ -44,24 +42,7 @@ Event { ), }, ), - query_string: Query( - PairList( - [ - ( - "query", - JsonLenientString( - "foobar", - ), - ), - ( - "page", - JsonLenientString( - "2", - ), - ), - ], - ), - ), + query_string: ~, fragment: ~, cookies: ~, headers: ~, @@ -81,7 +62,7 @@ Event { ResponseContext { cookies: ~, headers: ~, - status_code: 200, + status_code: ~, body_size: ~, data: Object( { diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap index 607e9f3e2f0..fe7267a9a9e 100644 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap @@ -3,9 +3,7 @@ source: relay-general/src/pii/processor.rs expression: "&data" --- Event { - id: EventId( - 5b978c77-c034-4ca1-a360-acca3da68167, - ), + id: ~, level: ~, version: ~, ty: ~, @@ -28,8 +26,8 @@ Event { site: ~, user: ~, request: Request { - url: "http://absolute.uri/foo", - method: "POST", + url: ~, + method: ~, data: Object( { "query": String( @@ -37,24 +35,7 @@ Event { ), }, ), - query_string: Query( - PairList( - [ - ( - "query", - JsonLenientString( - "foobar", - ), - ), - ( - "page", - JsonLenientString( - "2", - ), - ), - ], - ), - ), + query_string: ~, fragment: ~, cookies: ~, headers: ~, @@ -74,7 +55,7 @@ Event { ResponseContext { cookies: ~, headers: ~, - status_code: 200, + status_code: ~, body_size: ~, data: Object( {}, From f837ad136180b77b94f8d8ec37b27e026285c821 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 10:31:55 +0200 Subject: [PATCH 12/22] add api_target --- relay-general/src/pii/processor.rs | 78 +++++++++---------- ...__tests__does_not_scrub_if_no_graphql.snap | 1 + ..._graphql_response_data_with_variables.snap | 13 +--- ...aphql_response_data_without_variables.snap | 13 +--- relay-general/src/protocol/request.rs | 11 +++ .../test_fixtures__event_schema.snap | 9 ++- 6 files changed, 64 insertions(+), 61 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 58c7328be1b..8ff5557db37 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -217,23 +217,27 @@ impl<'a> Processor for PiiProcessor<'a> { fn scrub_graphql(event: &mut Event) { let mut keys: Option> = None; + let mut is_graphql = false; + // collect the variables keys and scrub them out. if let Some(request) = event.request.value_mut() { if let Some(Value::Object(data)) = request.data.value_mut() { - if let Some(Annotated(Some(Value::Bool(graphql_error)), _)) = - request.other.get("graphql_error") - { - if *graphql_error { - if let Some(Annotated(Some(Value::Object(variables)), _)) = - data.get_mut("variables") - { - let mut current_keys: BTreeSet = BTreeSet::new(); - for (key, value) in variables.iter_mut() { - current_keys.insert(key.to_string()); - value.set_value(Some(Value::String("[Filtered]".to_string()))); - } - keys = Some(current_keys); + if let Some(api_target) = request.api_target.value() { + if api_target.to_lowercase().eq("graphql") { + is_graphql = true; + } + } + + if is_graphql { + if let Some(Annotated(Some(Value::Object(variables)), _)) = + data.get_mut("variables") + { + let mut current_keys: BTreeSet = BTreeSet::new(); + for (key, value) in variables.iter_mut() { + current_keys.insert(key.to_string()); + value.set_value(Some(Value::String("[Filtered]".to_string()))); } + keys = Some(current_keys); } } } @@ -243,29 +247,25 @@ fn scrub_graphql(event: &mut Event) { if let Some(contexts) = event.contexts.value_mut() { if let Some(Context::Response(response)) = contexts.get_context_mut("response") { if let Some(Value::Object(data)) = response.data.value_mut() { - if let Some(Annotated(Some(Value::Bool(graphql_error)), _)) = - response.other.get("graphql_error") - { - if *graphql_error { - let mut delete_data = false; - if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = - data.get_mut("data") - { - if let Some(keys) = keys { - if !keys.is_empty() { - scrub_graphql_data(&keys, graphql_data); - } else { - delete_data = true; - } + if is_graphql { + let mut delete_data = false; + if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = + data.get_mut("data") + { + if let Some(keys) = keys { + if !keys.is_empty() { + scrub_graphql_data(&keys, graphql_data); } else { delete_data = true; } + } else { + delete_data = true; } - if delete_data { - // if we don't have the variable keys, we scrub the whole data object - // because the query or mutation weren't parameterized. - data.remove("data"); - } + } + if delete_data { + // if we don't have the variable keys, we scrub the whole data object + // because the query or mutation weren't parameterized. + data.remove("data"); } } } @@ -273,7 +273,7 @@ fn scrub_graphql(event: &mut Event) { } } -/// Scrubs values from the data object to [Filtered] +/// Scrubs values from the data object to `[Filtered]`. fn scrub_graphql_data(keys: &BTreeSet, data: &mut BTreeMap>) { for (key, value) in data.iter_mut() { match value.value_mut() { @@ -1385,7 +1385,7 @@ mod tests { "login": "foo" } }, - "graphql_error": true + "api_target": "graphql" }, "contexts": { "response": { @@ -1396,8 +1396,7 @@ mod tests { "login": "foo" } } - }, - "graphql_error": true, + } } } }) @@ -1427,7 +1426,7 @@ mod tests { "data": { "query": "{\n viewer {\n login\n }\n}" }, - "graphql_error": true + "api_target": "graphql" }, "contexts": { "response": { @@ -1438,8 +1437,7 @@ mod tests { "login": "foo" } } - }, - "graphql_error": true, + } } } }) @@ -1482,7 +1480,7 @@ mod tests { "login": "foo" } } - }, + } } } }) diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap index 75e733dd210..df30a515747 100644 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__does_not_scrub_if_no_graphql.snap @@ -49,6 +49,7 @@ Event { body_size: ~, env: ~, inferred_content_type: ~, + api_target: ~, other: {}, }, contexts: Contexts( diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap index d28cb437699..b98eaa23225 100644 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_with_variables.snap @@ -49,11 +49,8 @@ Event { body_size: ~, env: ~, inferred_content_type: ~, - other: { - "graphql_error": Bool( - true, - ), - }, + api_target: "graphql", + other: {}, }, contexts: Contexts( { @@ -80,11 +77,7 @@ Event { }, ), inferred_content_type: ~, - other: { - "graphql_error": Bool( - true, - ), - }, + other: {}, }, ), ), diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap index fe7267a9a9e..6e3dfd08cde 100644 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_graphql_response_data_without_variables.snap @@ -42,11 +42,8 @@ Event { body_size: ~, env: ~, inferred_content_type: ~, - other: { - "graphql_error": Bool( - true, - ), - }, + api_target: "graphql", + other: {}, }, contexts: Contexts( { @@ -61,11 +58,7 @@ Event { {}, ), inferred_content_type: ~, - other: { - "graphql_error": Bool( - true, - ), - }, + other: {}, }, ), ), diff --git a/relay-general/src/protocol/request.rs b/relay-general/src/protocol/request.rs index aea4e460b46..3104c935af8 100644 --- a/relay-general/src/protocol/request.rs +++ b/relay-general/src/protocol/request.rs @@ -497,6 +497,15 @@ pub struct Request { #[metastructure(skip_serialization = "empty")] pub inferred_content_type: Annotated, + /// The API target/specification that made the request. + /// + /// Values can be `graphql`, `rest`, etc. + /// + /// The [data] field should contain the request and response bodies based on its target specification. + /// + /// This information can be used to better data scrubbing and normalization. + pub api_target: Annotated, + /// Additional arbitrary fields for forwards compatibility. #[metastructure(additional_properties, pii = "true")] pub other: Object, @@ -667,6 +676,7 @@ mod tests { "REMOTE_ADDR": "213.47.147.207" }, "inferred_content_type": "application/json", + "api_target": "graphql", "other": "value" }"#; @@ -709,6 +719,7 @@ mod tests { map }), inferred_content_type: Annotated::new("application/json".to_string()), + api_target: Annotated::new("graphql".to_string()), other: { let mut map = Object::new(); map.insert( diff --git a/relay-general/tests/snapshots/test_fixtures__event_schema.snap b/relay-general/tests/snapshots/test_fixtures__event_schema.snap index c2ef75d1920..252e7272626 100644 --- a/relay-general/tests/snapshots/test_fixtures__event_schema.snap +++ b/relay-general/tests/snapshots/test_fixtures__event_schema.snap @@ -1,6 +1,5 @@ --- source: relay-general/tests/test_fixtures.rs -assertion_line: 109 expression: "relay_general::protocol::event_json_schema()" --- { @@ -2841,6 +2840,14 @@ expression: "relay_general::protocol::event_json_schema()" { "type": "object", "properties": { + "api_target": { + "description": " The API target/specification that made the request.\n\n Values can be `graphql`, `rest`, etc.\n\n The [data] field should contain the request and response bodies based on its target specification.\n\n This information can be used to better data scrubbing and normalization.", + "default": null, + "type": [ + "string", + "null" + ] + }, "body_size": { "description": " HTTP request body size.", "default": null, From dda85759ec4c94d6722de027bfa0bca0aa93bce2 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 10:36:59 +0200 Subject: [PATCH 13/22] remove comment --- relay-general/src/protocol/contexts/response.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/relay-general/src/protocol/contexts/response.rs b/relay-general/src/protocol/contexts/response.rs index 81b90602320..fddba81ad0a 100644 --- a/relay-general/src/protocol/contexts/response.rs +++ b/relay-general/src/protocol/contexts/response.rs @@ -7,7 +7,6 @@ use crate::types::{Annotated, Object, Value}; /// a dictionary (for standard HTTP responses) or a raw response body. #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] #[cfg_attr(feature = "jsonschema", derive(JsonSchema))] -// #[metastructure(process_func = "process_response")] pub struct ResponseContext { /// The cookie values. /// From b640995afa848e8b5c0a4bf0fd9811eabcf1e34d Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 10:40:47 +0200 Subject: [PATCH 14/22] fix rustdoc --- relay-general/src/protocol/request.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-general/src/protocol/request.rs b/relay-general/src/protocol/request.rs index 3104c935af8..5292fe6c4f7 100644 --- a/relay-general/src/protocol/request.rs +++ b/relay-general/src/protocol/request.rs @@ -501,7 +501,7 @@ pub struct Request { /// /// Values can be `graphql`, `rest`, etc. /// - /// The [data] field should contain the request and response bodies based on its target specification. + /// The data field should contain the request and response bodies based on its target specification. /// /// This information can be used to better data scrubbing and normalization. pub api_target: Annotated, From 28733e34048f40aa17005dda2bb18a86b02226a9 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 10:48:50 +0200 Subject: [PATCH 15/22] fix doc --- relay-general/tests/snapshots/test_fixtures__event_schema.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-general/tests/snapshots/test_fixtures__event_schema.snap b/relay-general/tests/snapshots/test_fixtures__event_schema.snap index 252e7272626..2424695e800 100644 --- a/relay-general/tests/snapshots/test_fixtures__event_schema.snap +++ b/relay-general/tests/snapshots/test_fixtures__event_schema.snap @@ -2841,7 +2841,7 @@ expression: "relay_general::protocol::event_json_schema()" "type": "object", "properties": { "api_target": { - "description": " The API target/specification that made the request.\n\n Values can be `graphql`, `rest`, etc.\n\n The [data] field should contain the request and response bodies based on its target specification.\n\n This information can be used to better data scrubbing and normalization.", + "description": " The API target/specification that made the request.\n\n Values can be `graphql`, `rest`, etc.\n\n The data field should contain the request and response bodies based on its target specification.\n\n This information can be used to better data scrubbing and normalization.", "default": null, "type": [ "string", From e9ae8a42a320412f2cd5853d0c8a8a018aae99f2 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 10:54:22 +0200 Subject: [PATCH 16/22] add changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fd13fd3b2e..84ed104a905 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +**Features**: + +- Add `data` and `api_target` fields to `ResponseContext` and scrub `graphql` bodies. ([#2141](https://github.com/getsentry/relay/pull/2141)) + ## 23.5.2 **Features**: From ac80e06863d8522331df692ea6021d0554756c70 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 12:01:40 +0200 Subject: [PATCH 17/22] fixes code review --- relay-general/src/pii/processor.rs | 2 +- relay-general/src/protocol/request.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 8ff5557db37..9a98d1d65e2 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -223,7 +223,7 @@ fn scrub_graphql(event: &mut Event) { if let Some(request) = event.request.value_mut() { if let Some(Value::Object(data)) = request.data.value_mut() { if let Some(api_target) = request.api_target.value() { - if api_target.to_lowercase().eq("graphql") { + if api_target.eq_ignore_ascii_case("graphql") { is_graphql = true; } } diff --git a/relay-general/src/protocol/request.rs b/relay-general/src/protocol/request.rs index 5292fe6c4f7..506acc964c1 100644 --- a/relay-general/src/protocol/request.rs +++ b/relay-general/src/protocol/request.rs @@ -503,7 +503,7 @@ pub struct Request { /// /// The data field should contain the request and response bodies based on its target specification. /// - /// This information can be used to better data scrubbing and normalization. + /// This information can be used for better data scrubbing and normalization. pub api_target: Annotated, /// Additional arbitrary fields for forwards compatibility. From 68b794b4bd1f9f05d8612f640b4a380c8b43c01f Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 12:17:29 +0200 Subject: [PATCH 18/22] change String to &str --- relay-general/src/pii/processor.rs | 10 +++++----- .../tests/snapshots/test_fixtures__event_schema.snap | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 9a98d1d65e2..03fd66c2b62 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -215,7 +215,7 @@ impl<'a> Processor for PiiProcessor<'a> { /// Scrubs GraphQL variables from the event. fn scrub_graphql(event: &mut Event) { - let mut keys: Option> = None; + let mut keys: Option> = None; let mut is_graphql = false; @@ -232,9 +232,9 @@ fn scrub_graphql(event: &mut Event) { if let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables") { - let mut current_keys: BTreeSet = BTreeSet::new(); + let mut current_keys: BTreeSet<&str> = BTreeSet::new(); for (key, value) in variables.iter_mut() { - current_keys.insert(key.to_string()); + current_keys.insert(key); value.set_value(Some(Value::String("[Filtered]".to_string()))); } keys = Some(current_keys); @@ -274,14 +274,14 @@ fn scrub_graphql(event: &mut Event) { } /// Scrubs values from the data object to `[Filtered]`. -fn scrub_graphql_data(keys: &BTreeSet, data: &mut BTreeMap>) { +fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap>) { for (key, value) in data.iter_mut() { match value.value_mut() { Some(Value::Object(item_data)) => { scrub_graphql_data(keys, item_data); } _ => { - if keys.contains(key) { + if keys.contains(key.as_str()) { value.set_value(Some(Value::String("[Filtered]".to_string()))); } } diff --git a/relay-general/tests/snapshots/test_fixtures__event_schema.snap b/relay-general/tests/snapshots/test_fixtures__event_schema.snap index 2424695e800..971a896ddbc 100644 --- a/relay-general/tests/snapshots/test_fixtures__event_schema.snap +++ b/relay-general/tests/snapshots/test_fixtures__event_schema.snap @@ -2841,7 +2841,7 @@ expression: "relay_general::protocol::event_json_schema()" "type": "object", "properties": { "api_target": { - "description": " The API target/specification that made the request.\n\n Values can be `graphql`, `rest`, etc.\n\n The data field should contain the request and response bodies based on its target specification.\n\n This information can be used to better data scrubbing and normalization.", + "description": " The API target/specification that made the request.\n\n Values can be `graphql`, `rest`, etc.\n\n The data field should contain the request and response bodies based on its target specification.\n\n This information can be used for better data scrubbing and normalization.", "default": null, "type": [ "string", From 3d2cc5f4f591384ef702327e98a81f7b09526e82 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 13:05:42 +0200 Subject: [PATCH 19/22] remove Option from keys --- relay-general/src/pii/processor.rs | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 03fd66c2b62..78d74ec3da7 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -215,7 +215,7 @@ impl<'a> Processor for PiiProcessor<'a> { /// Scrubs GraphQL variables from the event. fn scrub_graphql(event: &mut Event) { - let mut keys: Option> = None; + let mut keys: BTreeSet<&str> = BTreeSet::new(); let mut is_graphql = false; @@ -232,12 +232,10 @@ fn scrub_graphql(event: &mut Event) { if let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables") { - let mut current_keys: BTreeSet<&str> = BTreeSet::new(); for (key, value) in variables.iter_mut() { - current_keys.insert(key); + keys.insert(key); value.set_value(Some(Value::String("[Filtered]".to_string()))); } - keys = Some(current_keys); } } } @@ -248,25 +246,17 @@ fn scrub_graphql(event: &mut Event) { if let Some(Context::Response(response)) = contexts.get_context_mut("response") { if let Some(Value::Object(data)) = response.data.value_mut() { if is_graphql { - let mut delete_data = false; if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data") { - if let Some(keys) = keys { - if !keys.is_empty() { - scrub_graphql_data(&keys, graphql_data); - } else { - delete_data = true; - } + if !keys.is_empty() { + scrub_graphql_data(&keys, graphql_data); } else { - delete_data = true; + // if we don't have the variable keys, we scrub the whole data object + // because the query or mutation weren't parameterized. + data.remove("data"); } } - if delete_data { - // if we don't have the variable keys, we scrub the whole data object - // because the query or mutation weren't parameterized. - data.remove("data"); - } } } } From 0ba12b738583b43cc9e65fb15f629b350e8d8aba Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Thu, 1 Jun 2023 14:01:17 +0200 Subject: [PATCH 20/22] fi response body type --- relay-general/src/store/normalize.rs | 1 - relay-general/src/store/normalize/contexts.rs | 75 +------------------ relay-general/src/store/normalize/request.rs | 42 ++++++++++- .../src/store/normalize/url_encoding.rs | 39 ---------- 4 files changed, 40 insertions(+), 117 deletions(-) delete mode 100644 relay-general/src/store/normalize/url_encoding.rs diff --git a/relay-general/src/store/normalize.rs b/relay-general/src/store/normalize.rs index c03cd2568de..684c45c2332 100644 --- a/relay-general/src/store/normalize.rs +++ b/relay-general/src/store/normalize.rs @@ -36,7 +36,6 @@ mod mechanism; mod request; mod spans; mod stacktrace; -mod url_encoding; pub mod user_agent; diff --git a/relay-general/src/store/normalize/contexts.rs b/relay-general/src/store/normalize/contexts.rs index 742d9595532..7a7e931cf0d 100644 --- a/relay-general/src/store/normalize/contexts.rs +++ b/relay-general/src/store/normalize/contexts.rs @@ -2,7 +2,6 @@ use once_cell::sync::Lazy; use regex::Regex; use crate::protocol::{Context, OsContext, ResponseContext, RuntimeContext, ANDROID_MODEL_NAMES}; -use crate::store::normalize::url_encoding; use crate::types::{Annotated, Empty, Value}; /// Environment.OSVersion (GetVersionEx) or RuntimeInformation.OSDescription on Windows @@ -181,13 +180,10 @@ fn normalize_os_context(os: &mut OsContext) { fn parse_raw_response_data(response: &mut ResponseContext) -> Option<(&'static str, Value)> { let raw = response.data.as_str()?; - // TODO: Try to decode base64 first - if let Ok(value) = serde_json::from_str(raw) { Some(("application/json", value)) } else { - url_encoding::encoded_from_str(raw) - .map(|value| ("application/x-www-form-urlencoded", value)) + None } } @@ -692,73 +688,4 @@ mod tests { assert_eq!(response.inferred_content_type.value(), None); assert_eq!(response.data.as_str(), Some(r#"{"foo":"b"#)); } - - #[test] - fn test_infer_url_encoded() { - let mut response = ResponseContext { - data: Annotated::from(Value::String(r#"foo=bar"#.to_string())), - ..ResponseContext::default() - }; - - let mut expected_value = Object::new(); - expected_value.insert( - "foo".to_string(), - Annotated::from(Value::String("bar".into())), - ); - - normalize_response(&mut response); - assert_eq!( - response.inferred_content_type.as_str(), - Some("application/x-www-form-urlencoded") - ); - assert_eq!(response.data.value(), Some(&Value::Object(expected_value))); - } - - #[test] - fn test_infer_url_false_positive() { - let mut response = ResponseContext { - data: Annotated::from(Value::String("dGU=".to_string())), - ..ResponseContext::default() - }; - - normalize_response(&mut response); - assert_eq!(response.inferred_content_type.value(), None); - assert_eq!(response.data.as_str(), Some("dGU=")); - } - - #[test] - fn test_infer_url_encoded_base64() { - let mut response = ResponseContext { - data: Annotated::from(Value::String("dA==".to_string())), - ..ResponseContext::default() - }; - - normalize_response(&mut response); - assert_eq!(response.inferred_content_type.value(), None); - assert_eq!(response.data.as_str(), Some("dA==")); - } - - #[test] - fn test_infer_xml() { - let mut response = ResponseContext { - data: Annotated::from(Value::String("".to_string())), - ..ResponseContext::default() - }; - - normalize_response(&mut response); - assert_eq!(response.inferred_content_type.value(), None); - assert_eq!(response.data.as_str(), Some("")); - } - - #[test] - fn test_infer_binary() { - let mut response = ResponseContext { - data: Annotated::from(Value::String("\u{001f}1\u{0000}\u{0000}".to_string())), - ..ResponseContext::default() - }; - - normalize_response(&mut response); - assert_eq!(response.inferred_content_type.value(), None); - assert_eq!(response.data.as_str(), Some("\u{001f}1\u{0000}\u{0000}")); - } } diff --git a/relay-general/src/store/normalize/request.rs b/relay-general/src/store/normalize/request.rs index b5977fd1590..efbb88ca9b4 100644 --- a/relay-general/src/store/normalize/request.rs +++ b/relay-general/src/store/normalize/request.rs @@ -3,7 +3,6 @@ use regex::Regex; use url::Url; use crate::protocol::{Query, Request}; -use crate::store::normalize::url_encoding; use crate::types::{Annotated, ErrorKind, Meta, ProcessingAction, ProcessingResult, Value}; const ELLIPSIS: char = '\u{2026}'; @@ -88,6 +87,44 @@ fn normalize_method(method: &mut String, meta: &mut Meta) -> ProcessingResult { Ok(()) } +/// Decodes an urlencoded body. +fn urlencoded_from_str(raw: &str) -> Option { + // Binary strings would be decoded, but we know url-encoded bodies are ASCII. + if !raw.is_ascii() { + return None; + } + + // Avoid false positives with XML and partial JSON. + if raw.starts_with(" value, + _ => return None, + }; + + // `serde_urlencoded` can decode any string with valid characters into an object. However, we + // need to account for false-positives in the following cases: + // - An empty string "" is decoded as empty object + // - A string "foo" is decoded as {"foo": ""} (check for single empty value) + // - A base64 encoded string "dGU=" also decodes with a single empty value + // - A base64 encoded string "dA==" decodes as {"dA": "="} (check for single =) + let is_valid = object.len() > 1 + || object + .values() + .next() + .and_then(Annotated::::as_str) + .map_or(false, |s| !matches!(s, "" | "=")); + + if is_valid { + Some(Value::Object(object)) + } else { + None + } +} + fn parse_raw_data(request: &Request) -> Option<(&'static str, Value)> { let raw = request.data.as_str()?; @@ -96,8 +133,7 @@ fn parse_raw_data(request: &Request) -> Option<(&'static str, Value)> { if let Ok(value) = serde_json::from_str(raw) { Some(("application/json", value)) } else { - url_encoding::encoded_from_str(raw) - .map(|value| ("application/x-www-form-urlencoded", value)) + urlencoded_from_str(raw).map(|value| ("application/x-www-form-urlencoded", value)) } } diff --git a/relay-general/src/store/normalize/url_encoding.rs b/relay-general/src/store/normalize/url_encoding.rs deleted file mode 100644 index 70f8359fb7b..00000000000 --- a/relay-general/src/store/normalize/url_encoding.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::types::{Annotated, Value}; - -/// Decodes an urlencoded body. -pub fn encoded_from_str(raw: &str) -> Option { - // Binary strings would be decoded, but we know url-encoded bodies are ASCII. - if !raw.is_ascii() { - return None; - } - - // Avoid false positives with XML and partial JSON. - if raw.starts_with(" value, - _ => return None, - }; - - // `serde_urlencoded` can decode any string with valid characters into an object. However, we - // need to account for false-positives in the following cases: - // - An empty string "" is decoded as empty object - // - A string "foo" is decoded as {"foo": ""} (check for single empty value) - // - A base64 encoded string "dGU=" also decodes with a single empty value - // - A base64 encoded string "dA==" decodes as {"dA": "="} (check for single =) - let is_valid = object.len() > 1 - || object - .values() - .next() - .and_then(Annotated::::as_str) - .map_or(false, |s| !matches!(s, "" | "=")); - - if is_valid { - Some(Value::Object(object)) - } else { - None - } -} From 85cec171912b2b89050546a1d5cfa3fdf0e7f801 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Fri, 2 Jun 2023 14:40:37 +0200 Subject: [PATCH 21/22] code review --- relay-general/src/pii/processor.rs | 27 ++++++++++--------- .../src/protocol/contexts/response.rs | 2 +- relay-general/src/protocol/request.rs | 2 +- relay-general/src/store/normalize/contexts.rs | 8 +++--- .../test_fixtures__event_schema.snap | 4 +-- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 78d74ec3da7..4b91e3e1d5b 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -219,7 +219,7 @@ fn scrub_graphql(event: &mut Event) { let mut is_graphql = false; - // collect the variables keys and scrub them out. + // Collect the variables keys and scrub them out. if let Some(request) = event.request.value_mut() { if let Some(Value::Object(data)) = request.data.value_mut() { if let Some(api_target) = request.api_target.value() { @@ -241,21 +241,22 @@ fn scrub_graphql(event: &mut Event) { } } - // scrub PII from the data object if they match the variables keys. + if !is_graphql { + return; + } + + // Scrub PII from the data object if they match the variables keys. if let Some(contexts) = event.contexts.value_mut() { if let Some(Context::Response(response)) = contexts.get_context_mut("response") { if let Some(Value::Object(data)) = response.data.value_mut() { - if is_graphql { - if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = - data.get_mut("data") - { - if !keys.is_empty() { - scrub_graphql_data(&keys, graphql_data); - } else { - // if we don't have the variable keys, we scrub the whole data object - // because the query or mutation weren't parameterized. - data.remove("data"); - } + if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data") + { + if !keys.is_empty() { + scrub_graphql_data(&keys, graphql_data); + } else { + // If we don't have the variable keys, we scrub the whole data object + // because the query or mutation weren't parameterized. + data.remove("data"); } } } diff --git a/relay-general/src/protocol/contexts/response.rs b/relay-general/src/protocol/contexts/response.rs index fddba81ad0a..22a9360074f 100644 --- a/relay-general/src/protocol/contexts/response.rs +++ b/relay-general/src/protocol/contexts/response.rs @@ -31,7 +31,7 @@ pub struct ResponseContext { /// Response data in any format that makes sense. /// - /// SDKs should discard large and binary bodies by default. Can be given as string or + /// SDKs should discard large and binary bodies by default. Can be given as a string or /// structural data of any format. #[metastructure(pii = "true", bag_size = "large")] pub data: Annotated, diff --git a/relay-general/src/protocol/request.rs b/relay-general/src/protocol/request.rs index 506acc964c1..8a213dbdb6a 100644 --- a/relay-general/src/protocol/request.rs +++ b/relay-general/src/protocol/request.rs @@ -445,7 +445,7 @@ pub struct Request { /// Request data in any format that makes sense. /// - /// SDKs should discard large and binary bodies by default. Can be given as string or + /// SDKs should discard large and binary bodies by default. Can be given as a string or /// structural data of any format. #[metastructure(pii = "true", bag_size = "large")] pub data: Annotated, diff --git a/relay-general/src/store/normalize/contexts.rs b/relay-general/src/store/normalize/contexts.rs index 7a7e931cf0d..7d881b39a99 100644 --- a/relay-general/src/store/normalize/contexts.rs +++ b/relay-general/src/store/normalize/contexts.rs @@ -180,11 +180,9 @@ fn normalize_os_context(os: &mut OsContext) { fn parse_raw_response_data(response: &mut ResponseContext) -> Option<(&'static str, Value)> { let raw = response.data.as_str()?; - if let Ok(value) = serde_json::from_str(raw) { - Some(("application/json", value)) - } else { - None - } + serde_json::from_str(raw) + .ok() + .map(|value| ("application/json", value)) } fn normalize_response_data(response: &mut ResponseContext) { diff --git a/relay-general/tests/snapshots/test_fixtures__event_schema.snap b/relay-general/tests/snapshots/test_fixtures__event_schema.snap index 971a896ddbc..843ef792145 100644 --- a/relay-general/tests/snapshots/test_fixtures__event_schema.snap +++ b/relay-general/tests/snapshots/test_fixtures__event_schema.snap @@ -2871,7 +2871,7 @@ expression: "relay_general::protocol::event_json_schema()" ] }, "data": { - "description": " Request data in any format that makes sense.\n\n SDKs should discard large and binary bodies by default. Can be given as string or\n structural data of any format.", + "description": " Request data in any format that makes sense.\n\n SDKs should discard large and binary bodies by default. Can be given as a string or\n structural data of any format.", "default": null }, "env": { @@ -3015,7 +3015,7 @@ expression: "relay_general::protocol::event_json_schema()" ] }, "data": { - "description": " Response data in any format that makes sense.\n\n SDKs should discard large and binary bodies by default. Can be given as string or\n structural data of any format.", + "description": " Response data in any format that makes sense.\n\n SDKs should discard large and binary bodies by default. Can be given as a string or\n structural data of any format.", "default": null }, "headers": { From 342bba1cda818ce0b4e0b811f744298e19e78ed1 Mon Sep 17 00:00:00 2001 From: Manoel Aranda Neto Date: Tue, 6 Jun 2023 14:21:38 +0200 Subject: [PATCH 22/22] delete old snap file --- ...__tests__scrub_request_data_variables.snap | 136 ------------------ 1 file changed, 136 deletions(-) delete mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap deleted file mode 100644 index 1fcc5c9044b..00000000000 --- a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_request_data_variables.snap +++ /dev/null @@ -1,136 +0,0 @@ ---- -source: relay-general/src/pii/processor.rs -expression: "&data" ---- -Event { - id: EventId( - 5b978c77-c034-4ca1-a360-acca3da68167, - ), - level: ~, - version: ~, - ty: ~, - fingerprint: ~, - culprit: ~, - transaction: ~, - transaction_info: ~, - time_spent: ~, - logentry: ~, - logger: ~, - modules: ~, - platform: ~, - timestamp: ~, - start_timestamp: ~, - received: ~, - server_name: ~, - release: ~, - dist: ~, - environment: ~, - site: ~, - user: ~, - request: Request { - url: "http://absolute.uri/foo", - method: "POST", - data: Object( - { - "query": String( - "{\n viewer {\n login\n }\n}", - ), - "variables": Object( - { - "login": String( - "[Filtered]", - ), - }, - ), - }, - ), - query_string: Query( - PairList( - [ - ( - "query", - JsonLenientString( - "foobar", - ), - ), - ( - "page", - JsonLenientString( - "2", - ), - ), - ], - ), - ), - fragment: ~, - cookies: ~, - headers: ~, - body_size: ~, - env: ~, - inferred_content_type: ~, - other: { - "graphql_error": Bool( - true, - ), - }, - }, - contexts: Contexts( - { - "response": ContextInner( - Response( - ResponseContext { - cookies: ~, - headers: ~, - status_code: 200, - body_size: ~, - data: Object( - { - "data": Object( - { - "viewer": Object( - { - "login": String( - "[Filtered]", - ), - }, - ), - }, - ), - }, - ), - inferred_content_type: ~, - other: { - "graphql_error": Bool( - true, - ), - }, - }, - ), - ), - }, - ), - breadcrumbs: ~, - exceptions: ~, - stacktrace: ~, - template: ~, - threads: ~, - tags: ~, - extra: ~, - debug_meta: ~, - client_sdk: ~, - ingest_path: ~, - errors: ~, - key_id: ~, - project: ~, - grouping_config: ~, - checksum: ~, - csp: ~, - hpkp: ~, - expectct: ~, - expectstaple: ~, - spans: ~, - measurements: ~, - breakdowns: ~, - _metrics: ~, - other: {}, -}