diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 5bbe1e5e648f..103fe3face51 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -743,6 +743,11 @@ message Struct{ repeated Field sub_field_types = 1; } +message Map { + Field field_type = 1; + bool keys_sorted = 2; +} + enum UnionMode{ sparse = 0; dense = 1; @@ -894,6 +899,7 @@ message ArrowType{ Struct STRUCT = 28; Union UNION = 29; Dictionary DICTIONARY = 30; + Map MAP = 33; } } diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 1c84da002503..335f6f1c59da 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -1165,6 +1165,9 @@ impl serde::Serialize for ArrowType { arrow_type::ArrowTypeEnum::Dictionary(v) => { struct_ser.serialize_field("DICTIONARY", v)?; } + arrow_type::ArrowTypeEnum::Map(v) => { + struct_ser.serialize_field("MAP", v)?; + } } } struct_ser.end() @@ -1214,6 +1217,7 @@ impl<'de> serde::Deserialize<'de> for ArrowType { "STRUCT", "UNION", "DICTIONARY", + "MAP", ]; #[allow(clippy::enum_variant_names)] @@ -1250,6 +1254,7 @@ impl<'de> serde::Deserialize<'de> for ArrowType { Struct, Union, Dictionary, + Map, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -1303,6 +1308,7 @@ impl<'de> serde::Deserialize<'de> for ArrowType { "STRUCT" => Ok(GeneratedField::Struct), "UNION" => Ok(GeneratedField::Union), "DICTIONARY" => Ok(GeneratedField::Dictionary), + "MAP" => Ok(GeneratedField::Map), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -1542,6 +1548,13 @@ impl<'de> serde::Deserialize<'de> for ArrowType { return Err(serde::de::Error::duplicate_field("DICTIONARY")); } arrow_type_enum__ = map.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::Dictionary) +; + } + GeneratedField::Map => { + if arrow_type_enum__.is_some() { + return Err(serde::de::Error::duplicate_field("MAP")); + } + arrow_type_enum__ = map.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::Map) ; } } @@ -11139,6 +11152,116 @@ impl<'de> serde::Deserialize<'de> for LogicalPlanNode { deserializer.deserialize_struct("datafusion.LogicalPlanNode", FIELDS, GeneratedVisitor) } } +impl serde::Serialize for Map { + #[allow(deprecated)] + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut len = 0; + if self.field_type.is_some() { + len += 1; + } + if self.keys_sorted { + len += 1; + } + let mut struct_ser = serializer.serialize_struct("datafusion.Map", len)?; + if let Some(v) = self.field_type.as_ref() { + struct_ser.serialize_field("fieldType", v)?; + } + if self.keys_sorted { + struct_ser.serialize_field("keysSorted", &self.keys_sorted)?; + } + struct_ser.end() + } +} +impl<'de> serde::Deserialize<'de> for Map { + #[allow(deprecated)] + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + const FIELDS: &[&str] = &[ + "field_type", + "fieldType", + "keys_sorted", + "keysSorted", + ]; + + #[allow(clippy::enum_variant_names)] + enum GeneratedField { + FieldType, + KeysSorted, + } + impl<'de> serde::Deserialize<'de> for GeneratedField { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + struct GeneratedVisitor; + + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = GeneratedField; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "expected one of: {:?}", &FIELDS) + } + + #[allow(unused_variables)] + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + match value { + "fieldType" | "field_type" => Ok(GeneratedField::FieldType), + "keysSorted" | "keys_sorted" => Ok(GeneratedField::KeysSorted), + _ => Err(serde::de::Error::unknown_field(value, FIELDS)), + } + } + } + deserializer.deserialize_identifier(GeneratedVisitor) + } + } + struct GeneratedVisitor; + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = Map; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("struct datafusion.Map") + } + + fn visit_map(self, mut map: V) -> std::result::Result + where + V: serde::de::MapAccess<'de>, + { + let mut field_type__ = None; + let mut keys_sorted__ = None; + while let Some(k) = map.next_key()? { + match k { + GeneratedField::FieldType => { + if field_type__.is_some() { + return Err(serde::de::Error::duplicate_field("fieldType")); + } + field_type__ = map.next_value()?; + } + GeneratedField::KeysSorted => { + if keys_sorted__.is_some() { + return Err(serde::de::Error::duplicate_field("keysSorted")); + } + keys_sorted__ = Some(map.next_value()?); + } + } + } + Ok(Map { + field_type: field_type__, + keys_sorted: keys_sorted__.unwrap_or_default(), + }) + } + } + deserializer.deserialize_struct("datafusion.Map", FIELDS, GeneratedVisitor) + } +} impl serde::Serialize for NegativeNode { #[allow(deprecated)] fn serialize(&self, serializer: S) -> std::result::Result diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 5497a878372c..029380a99ed5 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -931,6 +931,14 @@ pub struct Struct { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct Map { + #[prost(message, optional, boxed, tag = "1")] + pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag = "2")] + pub keys_sorted: bool, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct Union { #[prost(message, repeated, tag = "1")] pub union_types: ::prost::alloc::vec::Vec, @@ -1139,7 +1147,7 @@ pub struct Decimal128 { pub struct ArrowType { #[prost( oneof = "arrow_type::ArrowTypeEnum", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 32, 15, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 32, 15, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33" )] pub arrow_type_enum: ::core::option::Option, } @@ -1217,6 +1225,8 @@ pub mod arrow_type { Union(super::Union), #[prost(message, tag = "30")] Dictionary(::prost::alloc::boxed::Box), + #[prost(message, tag = "33")] + Map(::prost::alloc::boxed::Box), } } /// Useful for representing an empty enum variant in rust diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 498563b2ab47..1b704f3aa526 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -351,6 +351,12 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType { let value_datatype = dict.as_ref().value.as_deref().required("value")?; DataType::Dictionary(Box::new(key_datatype), Box::new(value_datatype)) } + arrow_type::ArrowTypeEnum::Map(map) => { + let field: Field = + map.as_ref().field_type.as_deref().required("field_type")?; + let keys_sorted = map.keys_sorted; + DataType::Map(Box::new(field), keys_sorted) + } }) } } diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 6f44542b90b2..bf3c733c005a 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -2212,6 +2212,17 @@ mod roundtrip_tests { 4, )), ), + DataType::Map( + new_box_field( + "entries", + DataType::Struct(vec![ + Field::new("keys", DataType::Utf8, false), + Field::new("values", DataType::Int32, true), + ]), + true, + ), + false, + ), ]; for test_case in test_cases.into_iter() { diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 90c99c0d96bd..c240ef853f5b 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -218,9 +218,12 @@ impl TryFrom<&DataType> for protobuf::arrow_type::ArrowTypeEnum { DataType::Decimal256(_, _) => { return Err(Error::General("Proto serialization error: The Decimal256 data type is not yet supported".to_owned())) } - DataType::Map(_, _) => { - return Err(Error::General( - "Proto serialization error: The Map data type is not yet supported".to_owned() + DataType::Map(field, sorted) => { + Self::Map(Box::new( + protobuf::Map { + field_type: Some(Box::new(field.as_ref().try_into()?)), + keys_sorted: *sorted, + } )) } DataType::RunEndEncoded(_, _) => {