From 8cd6129cf244a058cc5767c34da3dcd0a393e62e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 2 Nov 2022 11:25:53 -0600 Subject: [PATCH] Improve FieldNotFound errors (#4084) --- datafusion/common/src/column.rs | 13 ++++++-- datafusion/common/src/dfschema.rs | 18 +++++++++-- datafusion/common/src/error.rs | 43 +++++++++++++++---------- datafusion/core/tests/sql/references.rs | 2 +- datafusion/expr/src/expr_rewriter.rs | 2 +- datafusion/sql/src/planner.rs | 4 +-- 6 files changed, 55 insertions(+), 27 deletions(-) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index 2c196b10b5bf..d8adaacf20d5 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -34,6 +34,14 @@ pub struct Column { } impl Column { + /// Create Column from optional qualifier and name + pub fn new(relation: Option>, name: impl Into) -> Self { + Self { + relation: relation.map(|r| r.into()), + name: name.into(), + } + } + /// Create Column from unqualified name. pub fn from_name(name: impl Into) -> Self { Self { @@ -120,12 +128,11 @@ impl Column { } Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { - qualifier: self.relation.clone(), - name: self.name, + field: Column::new(self.relation.clone(), self.name), valid_fields: Some( schemas .iter() - .flat_map(|s| s.fields().iter().map(|f| f.qualified_name())) + .flat_map(|s| s.fields().iter().map(|f| f.qualified_column())) .collect(), ), })) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index e391c5fc1ca0..59dd8791ba93 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -596,6 +596,18 @@ mod tests { use arrow::datatypes::DataType; use std::collections::BTreeMap; + #[test] + fn qualifier_in_name() -> Result<()> { + let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?; + // lookup with unqualified name "t1.c0" + let err = schema.index_of_column_by_name(None, "t1.c0").err().unwrap(); + assert_eq!( + "Schema error: No field named 't1.c0'. Valid fields are 't1'.'c0', 't1'.'c1'.", + &format!("{}", err) + ); + Ok(()) + } + #[test] fn from_unqualified_field() { let field = Field::new("c0", DataType::Boolean, true); @@ -663,7 +675,7 @@ mod tests { assert!(join.is_err()); assert_eq!( "Schema error: Schema contains duplicate \ - qualified field name \'t1.c0\'", + qualified field name \'t1\'.\'c0\'", &format!("{}", join.err().unwrap()) ); Ok(()) @@ -712,7 +724,7 @@ mod tests { assert!(join.is_err()); assert_eq!( "Schema error: Schema contains qualified \ - field name \'t1.c0\' and unqualified field name \'c0\' which would be ambiguous", + field name \'t1\'.\'c0\' and unqualified field name \'c0\' which would be ambiguous", &format!("{}", join.err().unwrap()) ); Ok(()) @@ -722,7 +734,7 @@ mod tests { #[test] fn helpful_error_messages() -> Result<()> { let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?; - let expected_help = "Valid fields are \'t1.c0\', \'t1.c1\'."; + let expected_help = "Valid fields are \'t1\'.\'c0\', \'t1\'.\'c1\'."; // Pertinent message parts let expected_err_msg = "Fully qualified field name \'t1.c0\'"; assert!(schema diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index c2d3e389cae5..afd03976e174 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -22,7 +22,7 @@ use std::fmt::{Display, Formatter}; use std::io; use std::result; -use crate::DFSchema; +use crate::{Column, DFSchema}; #[cfg(feature = "avro")] use apache_avro::Error as AvroError; use arrow::error::ArrowError; @@ -123,9 +123,8 @@ pub enum SchemaError { DuplicateUnqualifiedField { name: String }, /// No field with this name FieldNotFound { - qualifier: Option, - name: String, - valid_fields: Option>, + field: Column, + valid_fields: Option>, }, } @@ -136,9 +135,14 @@ pub fn field_not_found( schema: &DFSchema, ) -> DataFusionError { DataFusionError::SchemaError(SchemaError::FieldNotFound { - qualifier, - name: name.to_string(), - valid_fields: Some(schema.field_names()), + field: Column::new(qualifier, name), + valid_fields: Some( + schema + .fields() + .iter() + .map(|f| f.qualified_column()) + .collect(), + ), }) } @@ -146,23 +150,28 @@ impl Display for SchemaError { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::FieldNotFound { - qualifier, - name, + field, valid_fields, } => { write!(f, "No field named ")?; - if let Some(q) = qualifier { - write!(f, "'{}.{}'", q, name)?; + if let Some(q) = &field.relation { + write!(f, "'{}'.'{}'", q, field.name)?; } else { - write!(f, "'{}'", name)?; + write!(f, "'{}'", field.name)?; } - if let Some(field_names) = valid_fields { + if let Some(fields) = valid_fields { write!( f, ". Valid fields are {}", - field_names + fields .iter() - .map(|name| format!("'{}'", name)) + .map(|field| { + if let Some(q) = &field.relation { + format!("'{}'.'{}'", q, field.name) + } else { + format!("'{}'", field.name) + } + }) .collect::>() .join(", ") )?; @@ -172,7 +181,7 @@ impl Display for SchemaError { Self::DuplicateQualifiedField { qualifier, name } => { write!( f, - "Schema contains duplicate qualified field name '{}.{}'", + "Schema contains duplicate qualified field name '{}'.'{}'", qualifier, name ) } @@ -185,7 +194,7 @@ impl Display for SchemaError { } Self::AmbiguousReference { qualifier, name } => { if let Some(q) = qualifier { - write!(f, "Schema contains qualified field name '{}.{}' and unqualified field name '{}' which would be ambiguous", q, name, name) + write!(f, "Schema contains qualified field name '{}'.'{}' and unqualified field name '{}' which would be ambiguous", q, name, name) } else { write!(f, "Ambiguous reference to unqualified field '{}'", name) } diff --git a/datafusion/core/tests/sql/references.rs b/datafusion/core/tests/sql/references.rs index 77afdb66c888..e63acc444184 100644 --- a/datafusion/core/tests/sql/references.rs +++ b/datafusion/core/tests/sql/references.rs @@ -67,7 +67,7 @@ async fn qualified_table_references_and_fields() -> Result<()> { let error = ctx.create_logical_plan(sql).unwrap_err(); assert_contains!( error.to_string(), - "No field named 'f1.c1'. Valid fields are 'test.f.c1', 'test.test.c2'" + "No field named 'f1'.'c1'. Valid fields are 'test'.'f.c1', 'test'.'test.c2'" ); // however, enclosing it in double quotes is ok diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index 8ac645617a45..40516c53767e 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -673,7 +673,7 @@ mod test { .to_string(); assert_eq!( error, - "Schema error: No field named 'b'. Valid fields are 'tableA.a'." + "Schema error: No field named 'b'. Valid fields are 'tableA'.'a'." ); } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 68b84cc9f127..21761a0bc536 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -3585,8 +3585,8 @@ mod tests { let sql = "SELECT SUM(age) FROM person GROUP BY doesnotexist"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!("Schema error: No field named 'doesnotexist'. Valid fields are 'SUM(person.age)', \ - 'person.id', 'person.first_name', 'person.last_name', 'person.age', 'person.state', \ - 'person.salary', 'person.birth_date', 'person.😀'.", format!("{}", err)); + 'person'.'id', 'person'.'first_name', 'person'.'last_name', 'person'.'age', 'person'.'state', \ + 'person'.'salary', 'person'.'birth_date', 'person'.'😀'.", format!("{}", err)); } #[test]