Skip to content

Commit 6e73e52

Browse files
committed
AVRO-3814: Fix schema resolution for records in union types (#2441)
* AVRO-3814: Add a minimal test-case to reproduce Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> * AVRO-3814: Fix schema resolution for records in union types The logic for validation records in Value::validate_internal() would be too strict when resolving union types containing a record. This could lead to a situation where schema resolution would fail because the correct schema to use for a union type could not be identified. This commit fixes this by passing a boolean `schema_resolution` to `Value::validate_internal()` which governs whether schema_resolution rules should be applied. * AVRO-3814: Ensure to validate the deserialized value against the schema * AVRO-3814: Extend test case for validate_record * AVRO-3814: Revert whitespace changes * AVRO-3814: Remove confusing comments * AVRO-3786: Add test-cases and fix for AVRO-3786 * AVRO-3786: Revert change to UnionSchema::find_schema_with_known_schemata * AVRO-3814: [Rust] Use types::Value::resolve_internal() instead of validate_internal() ... when looking for the matching schema in an union Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> * AVRO-3814: Revert changes to validate_internal() Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> * AVRO-3814: Remove obsolete rustdoc for arguments Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> --------- Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> Co-authored-by: Rik Heijdens <[email protected]> (cherry picked from commit 598911d)
1 parent 56f08b8 commit 6e73e52

File tree

3 files changed

+858
-3
lines changed

3 files changed

+858
-3
lines changed

lang/rust/avro/src/schema.rs

+128-2
Original file line numberDiff line numberDiff line change
@@ -837,9 +837,11 @@ impl UnionSchema {
837837
// extend known schemas with just resolved names
838838
collected_names.extend(resolved_names);
839839
let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone());
840+
840841
value
841-
.validate_internal(schema, &collected_names, namespace)
842-
.is_none()
842+
.clone()
843+
.resolve_internal(schema, &collected_names, namespace, &None)
844+
.is_ok()
843845
})
844846
}
845847
}
@@ -5171,4 +5173,128 @@ mod tests {
51715173

51725174
Ok(())
51735175
}
5176+
5177+
#[test]
5178+
fn test_avro_3814_schema_resolution_failure() -> TestResult {
5179+
// Define a reader schema: a nested record with an optional field.
5180+
let reader_schema = json!(
5181+
{
5182+
"type": "record",
5183+
"name": "MyOuterRecord",
5184+
"fields": [
5185+
{
5186+
"name": "inner_record",
5187+
"type": [
5188+
"null",
5189+
{
5190+
"type": "record",
5191+
"name": "MyRecord",
5192+
"fields": [
5193+
{"name": "a", "type": "string"}
5194+
]
5195+
}
5196+
],
5197+
"default": null
5198+
}
5199+
]
5200+
}
5201+
);
5202+
5203+
// Define a writer schema: a nested record with an optional field, which
5204+
// may optionally contain an enum.
5205+
let writer_schema = json!(
5206+
{
5207+
"type": "record",
5208+
"name": "MyOuterRecord",
5209+
"fields": [
5210+
{
5211+
"name": "inner_record",
5212+
"type": [
5213+
"null",
5214+
{
5215+
"type": "record",
5216+
"name": "MyRecord",
5217+
"fields": [
5218+
{"name": "a", "type": "string"},
5219+
{
5220+
"name": "b",
5221+
"type": [
5222+
"null",
5223+
{
5224+
"type": "enum",
5225+
"name": "MyEnum",
5226+
"symbols": ["A", "B", "C"],
5227+
"default": "C"
5228+
}
5229+
],
5230+
"default": null
5231+
},
5232+
]
5233+
}
5234+
]
5235+
}
5236+
],
5237+
"default": null
5238+
}
5239+
);
5240+
5241+
// Use different structs to represent the "Reader" and the "Writer"
5242+
// to mimic two different versions of a producer & consumer application.
5243+
#[derive(Serialize, Deserialize, Debug)]
5244+
struct MyInnerRecordReader {
5245+
a: String,
5246+
}
5247+
5248+
#[derive(Serialize, Deserialize, Debug)]
5249+
struct MyRecordReader {
5250+
inner_record: Option<MyInnerRecordReader>,
5251+
}
5252+
5253+
#[derive(Serialize, Deserialize, Debug)]
5254+
enum MyEnum {
5255+
A,
5256+
B,
5257+
C,
5258+
}
5259+
5260+
#[derive(Serialize, Deserialize, Debug)]
5261+
struct MyInnerRecordWriter {
5262+
a: String,
5263+
b: Option<MyEnum>,
5264+
}
5265+
5266+
#[derive(Serialize, Deserialize, Debug)]
5267+
struct MyRecordWriter {
5268+
inner_record: Option<MyInnerRecordWriter>,
5269+
}
5270+
5271+
let s = MyRecordWriter {
5272+
inner_record: Some(MyInnerRecordWriter {
5273+
a: "foo".to_string(),
5274+
b: None,
5275+
}),
5276+
};
5277+
5278+
// Serialize using the writer schema.
5279+
let writer_schema = Schema::parse(&writer_schema)?;
5280+
let avro_value = crate::to_value(s)?;
5281+
assert!(
5282+
avro_value.validate(&writer_schema),
5283+
"value is valid for schema",
5284+
);
5285+
let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
5286+
5287+
// Now, attempt to deserialize using the reader schema.
5288+
let reader_schema = Schema::parse(&reader_schema)?;
5289+
let mut x = &datum[..];
5290+
5291+
// Deserialization should succeed and we should be able to resolve the schema.
5292+
let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?;
5293+
assert!(deser_value.validate(&reader_schema));
5294+
5295+
// Verify that we can read a field from the record.
5296+
let d: MyRecordReader = crate::from_value(&deser_value)?;
5297+
assert_eq!(d.inner_record.unwrap().a, "foo".to_string());
5298+
Ok(())
5299+
}
51745300
}

lang/rust/avro/src/types.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ impl Value {
377377
}
378378
}
379379

380+
/// Validates the value against the provided schema.
380381
pub(crate) fn validate_internal<S: std::borrow::Borrow<Schema> + Debug>(
381382
&self,
382383
schema: &Schema,
@@ -516,6 +517,7 @@ impl Value {
516517
let non_nullable_fields_count =
517518
fields.iter().filter(|&rf| !rf.is_nullable()).count();
518519

520+
// If the record contains fewer fields as required fields by the schema, it is invalid.
519521
if record_fields.len() < non_nullable_fields_count {
520522
return Some(format!(
521523
"The value's records length ({}) doesn't match the schema ({} non-nullable fields)",
@@ -603,7 +605,7 @@ impl Value {
603605
self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None)
604606
}
605607

606-
fn resolve_internal(
608+
pub(crate) fn resolve_internal(
607609
mut self,
608610
schema: &Schema,
609611
names: &NamesRef,

0 commit comments

Comments
 (0)