Skip to content

Commit

Permalink
Alternate format support for Timestamp casting (DATETIME for MySQL) (#23
Browse files Browse the repository at this point in the history
)
  • Loading branch information
sgrebnov authored Jul 21, 2024
1 parent 790f9c6 commit ffe792d
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 8 deletions.
67 changes: 66 additions & 1 deletion datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
// specific language governing permissions and limitations
// under the License.

use std::sync::Arc;

use arrow_schema::TimeUnit;
use regex::Regex;
use sqlparser::{
ast::{self, Ident, ObjectName},
ast::{self, Ident, ObjectName, TimezoneInfo},
keywords::ALL_KEYWORDS,
};

Expand Down Expand Up @@ -70,6 +73,22 @@ pub trait Dialect: Send + Sync {
fn int64_cast_dtype(&self) -> ast::DataType {
ast::DataType::BigInt(None)
}

// The SQL type to use for Timestamp casting
// Most dialects use Timestamp, but some, like MySQL, require Datetime
// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
fn timestamp_cast_dtype(
&self,
_time_unit: &TimeUnit,
tz: &Option<Arc<str>>,
) -> ast::DataType {
let tz_info = match tz {
Some(_) => TimezoneInfo::WithTimeZone,
None => TimezoneInfo::None,
};

ast::DataType::Timestamp(None, tz_info)
}
}

/// `IntervalStyle` to use for unparsing
Expand Down Expand Up @@ -157,6 +176,14 @@ impl Dialect for MySqlDialect {
fn int64_cast_dtype(&self) -> ast::DataType {
ast::DataType::Custom(ObjectName(vec![Ident::new("SIGNED")]), vec![])
}

fn timestamp_cast_dtype(
&self,
_time_unit: &TimeUnit,
_tz: &Option<Arc<str>>,
) -> ast::DataType {
ast::DataType::Datetime(None)
}
}

pub struct SqliteDialect {}
Expand All @@ -176,6 +203,8 @@ pub struct CustomDialect {
use_char_for_utf8_cast: bool,
date_subfield_extract_style: DateFieldExtractStyle,
int64_cast_dtype: ast::DataType,
timestamp_cast_dtype: ast::DataType,
timestamp_cast_dtype_tz: ast::DataType,
}

impl Default for CustomDialect {
Expand All @@ -189,6 +218,11 @@ impl Default for CustomDialect {
use_char_for_utf8_cast: false,
date_subfield_extract_style: DateFieldExtractStyle::DatePart,
int64_cast_dtype: ast::DataType::BigInt(None),
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
timestamp_cast_dtype_tz: ast::DataType::Timestamp(
None,
TimezoneInfo::WithTimeZone,
),
}
}
}
Expand Down Expand Up @@ -235,6 +269,18 @@ impl Dialect for CustomDialect {
fn int64_cast_dtype(&self) -> ast::DataType {
self.int64_cast_dtype.clone()
}

fn timestamp_cast_dtype(
&self,
_time_unit: &TimeUnit,
tz: &Option<Arc<str>>,
) -> ast::DataType {
if tz.is_some() {
self.timestamp_cast_dtype_tz.clone()
} else {
self.timestamp_cast_dtype.clone()
}
}
}

// create a CustomDialectBuilder
Expand All @@ -247,6 +293,8 @@ pub struct CustomDialectBuilder {
use_char_for_utf8_cast: bool,
date_subfield_extract_style: DateFieldExtractStyle,
int64_cast_dtype: ast::DataType,
timestamp_cast_dtype: ast::DataType,
timestamp_cast_dtype_tz: ast::DataType,
}

impl CustomDialectBuilder {
Expand All @@ -260,6 +308,11 @@ impl CustomDialectBuilder {
use_char_for_utf8_cast: false,
date_subfield_extract_style: DateFieldExtractStyle::DatePart,
int64_cast_dtype: ast::DataType::BigInt(None),
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
timestamp_cast_dtype_tz: ast::DataType::Timestamp(
None,
TimezoneInfo::WithTimeZone,
),
}
}

Expand All @@ -273,6 +326,8 @@ impl CustomDialectBuilder {
use_char_for_utf8_cast: self.use_char_for_utf8_cast,
date_subfield_extract_style: self.date_subfield_extract_style,
int64_cast_dtype: self.int64_cast_dtype,
timestamp_cast_dtype: self.timestamp_cast_dtype,
timestamp_cast_dtype_tz: self.timestamp_cast_dtype_tz,
}
}

Expand Down Expand Up @@ -327,4 +382,14 @@ impl CustomDialectBuilder {
self.int64_cast_dtype = int64_cast_dtype;
self
}

pub fn with_timestamp_cast_dtype(
mut self,
timestamp_cast_dtype: ast::DataType,
timestamp_cast_dtype_tz: ast::DataType,
) -> Self {
self.timestamp_cast_dtype = timestamp_cast_dtype;
self.timestamp_cast_dtype_tz = timestamp_cast_dtype_tz;
self
}
}
48 changes: 41 additions & 7 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1286,13 +1286,8 @@ impl Unparser<'_> {
} else {
ast::DataType::Double
}),
DataType::Timestamp(_, tz) => {
let tz_info = match tz {
Some(_) => TimezoneInfo::WithTimeZone,
None => TimezoneInfo::None,
};

Ok(ast::DataType::Timestamp(None, tz_info))
DataType::Timestamp(time_unit, tz) => {
Ok(self.dialect.timestamp_cast_dtype(time_unit, tz))
}
DataType::Date32 => Ok(ast::DataType::Date),
DataType::Date64 => Ok(self.ast_type_for_date64_in_cast()),
Expand Down Expand Up @@ -2158,4 +2153,43 @@ mod tests {
}
Ok(())
}

#[test]
fn custom_dialect_with_teimstamp_cast_dtype() -> Result<()> {
let default_dialect = CustomDialectBuilder::new().build();
let mysql_dialect = CustomDialectBuilder::new()
.with_timestamp_cast_dtype(
ast::DataType::Datetime(None),
ast::DataType::Datetime(None),
)
.build();

let timestamp = DataType::Timestamp(TimeUnit::Nanosecond, None);
let timestamp_with_tz =
DataType::Timestamp(TimeUnit::Nanosecond, Some("+08:00".into()));

for (dialect, data_type, identifier) in [
(&default_dialect, &timestamp, "TIMESTAMP"),
(
&default_dialect,
&timestamp_with_tz,
"TIMESTAMP WITH TIME ZONE",
),
(&mysql_dialect, &timestamp, "DATETIME"),
(&mysql_dialect, &timestamp_with_tz, "DATETIME"),
] {
let unparser = Unparser::new(dialect);
let expr = Expr::Cast(Cast {
expr: Box::new(col("a")),
data_type: data_type.clone(),
});
let ast = unparser.expr_to_sql(&expr)?;

let actual = format!("{}", ast);
let expected = format!(r#"CAST(a AS {identifier})"#);

assert_eq!(actual, expected);
}
Ok(())
}
}

0 comments on commit ffe792d

Please sign in to comment.