diff --git a/datafusion/proto/src/common.rs b/datafusion/proto/src/common.rs index ed826f587413..cbbb469f0863 100644 --- a/datafusion/proto/src/common.rs +++ b/datafusion/proto/src/common.rs @@ -17,26 +17,22 @@ use datafusion_common::{DataFusionError, Result}; -pub fn csv_delimiter_to_string(b: u8) -> Result { - let b = &[b]; - let b = std::str::from_utf8(b) - .map_err(|_| DataFusionError::Internal("Invalid CSV delimiter".to_owned()))?; - Ok(b.to_owned()) -} - -pub fn str_to_byte(s: &String) -> Result { +pub(crate) fn str_to_byte(s: &String, description: &str) -> Result { if s.len() != 1 { - return Err(DataFusionError::Internal( - "Invalid CSV delimiter".to_owned(), - )); + return Err(DataFusionError::Internal(format!( + "Invalid CSV {description}: expected single character, got {s}" + ))); } Ok(s.as_bytes()[0]) } -pub fn byte_to_string(b: u8) -> Result { +pub(crate) fn byte_to_string(b: u8, description: &str) -> Result { let b = &[b]; - let b = std::str::from_utf8(b) - .map_err(|_| DataFusionError::Internal("Invalid CSV delimiter".to_owned()))?; + let b = std::str::from_utf8(b).map_err(|_| { + DataFusionError::Internal(format!( + "Invalid CSV {description}: can not represent {b:0x?} as utf8" + )) + })?; Ok(b.to_owned()) } diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index 27f9389678cf..831ac10197fd 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -353,10 +353,10 @@ impl AsLogicalPlan for LogicalPlanNode { }) => { let mut csv = CsvFormat::default() .with_has_header(*has_header) - .with_delimiter(str_to_byte(delimiter)?) - .with_quote(str_to_byte(quote)?); + .with_delimiter(str_to_byte(delimiter, "delimiter")?) + .with_quote(str_to_byte(quote, "quote")?); if let Some(protobuf::csv_format::OptionalEscape::Escape(escape)) = optional_escape { - csv = csv.with_quote(str_to_byte(escape)?); + csv = csv.with_quote(str_to_byte(escape, "escape")?); } Arc::new(csv)}, FileFormatType::Avro(..) => Arc::new(AvroFormat), @@ -850,12 +850,12 @@ impl AsLogicalPlan for LogicalPlanNode { FileFormatType::Parquet(protobuf::ParquetFormat {}) } else if let Some(csv) = any.downcast_ref::() { FileFormatType::Csv(protobuf::CsvFormat { - delimiter: byte_to_string(csv.delimiter())?, + delimiter: byte_to_string(csv.delimiter(), "delimiter")?, has_header: csv.has_header(), - quote: byte_to_string(csv.quote())?, + quote: byte_to_string(csv.quote(), "quote")?, optional_escape: if let Some(escape) = csv.escape() { Some(protobuf::csv_format::OptionalEscape::Escape( - byte_to_string(escape)?, + byte_to_string(escape, "escape")?, )) } else { None diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index cebe4eddcc75..e97a773d3472 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -51,8 +51,8 @@ use datafusion_common::{DataFusionError, Result}; use prost::bytes::BufMut; use prost::Message; +use crate::common::str_to_byte; use crate::common::{byte_to_string, proto_error}; -use crate::common::{csv_delimiter_to_string, str_to_byte}; use crate::physical_plan::from_proto::{ parse_physical_expr, parse_physical_sort_expr, parse_protobuf_file_scan_config, }; @@ -155,13 +155,13 @@ impl AsExecutionPlan for PhysicalPlanNode { registry, )?, scan.has_header, - str_to_byte(&scan.delimiter)?, - str_to_byte(&scan.quote)?, + str_to_byte(&scan.delimiter, "delimiter")?, + str_to_byte(&scan.quote, "quote")?, if let Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape( escape, )) = &scan.optional_escape { - Some(str_to_byte(escape)?) + Some(str_to_byte(escape, "escape")?) } else { None }, @@ -1079,11 +1079,11 @@ impl AsExecutionPlan for PhysicalPlanNode { protobuf::CsvScanExecNode { base_conf: Some(exec.base_config().try_into()?), has_header: exec.has_header(), - delimiter: csv_delimiter_to_string(exec.delimiter())?, - quote: byte_to_string(exec.quote())?, + delimiter: byte_to_string(exec.delimiter(), "delimiter")?, + quote: byte_to_string(exec.quote(), "quote")?, optional_escape: if let Some(escape) = exec.escape() { Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape( - byte_to_string(escape)?, + byte_to_string(escape, "escape")?, )) } else { None