From e29fe220305cd9a83dbd713343058bec9c89fc5c Mon Sep 17 00:00:00 2001 From: sacundim Date: Tue, 21 Feb 2023 22:57:28 -0800 Subject: [PATCH] Unit test cases for https://github.com/apache/arrow-rs/issues/3744 (CSV reader infers Date64 type for fields like "2020-03-19 00:00:00" that it can't parse to Date64) --- arrow-cast/src/cast.rs | 12 +++++++++++- arrow-csv/src/reader/mod.rs | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 49461b14c339..210adb012849 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -4887,7 +4887,17 @@ mod tests { Some("Not a valid date"), None, ])) as ArrayRef; - for array in &[a1, a2] { + let a3 = Arc::new(StringArray::from(vec![ + Some("2020-09-08 12:00:00"), + Some("Not a valid date"), + None, + ])) as ArrayRef; + let a4 = Arc::new(LargeStringArray::from(vec![ + Some("2020-09-08 12:00:00"), + Some("Not a valid date"), + None, + ])) as ArrayRef; + for array in &[a1, a2, a3, a4] { let to_type = DataType::Date64; let b = cast(array, &to_type).unwrap(); let c = b.as_any().downcast_ref::().unwrap(); diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index 29bdeb4e2895..6341b170fd5d 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -1789,6 +1789,19 @@ mod tests { ); } + #[test] + fn test_can_parse_inferred_date64() { + let raw = "1900-02-28 12:34:56"; + assert_eq!( + infer_field_schema(raw, None), + DataType::Date64 + ); + assert_eq!( + parse_item::(raw), + Some(-2203932304000) + ); + } + #[test] fn test_parse_decimal() { let tests = [