Skip to content

Commit 65dd364

Browse files
authored
Improve unparser MySQL compatibility (#11589)
* Configurable date field extraction style for unparsing (#21) * Add support for IntervalStyle::MySQL (#18) * Support alternate format for Int64 unparsing (SIGNED for MySQL) (#22) * Alternate format support for Timestamp casting (DATETIME for MySQL) (#23) * Improve * Fix clippy and docs
1 parent 268be45 commit 65dd364

File tree

2 files changed

+507
-45
lines changed

2 files changed

+507
-45
lines changed

datafusion/sql/src/unparser/dialect.rs

+146-9
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,14 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::sync::Arc;
19+
20+
use arrow_schema::TimeUnit;
1821
use regex::Regex;
19-
use sqlparser::{ast, keywords::ALL_KEYWORDS};
22+
use sqlparser::{
23+
ast::{self, Ident, ObjectName, TimezoneInfo},
24+
keywords::ALL_KEYWORDS,
25+
};
2026

2127
/// `Dialect` to use for Unparsing
2228
///
@@ -36,8 +42,8 @@ pub trait Dialect: Send + Sync {
3642
true
3743
}
3844

39-
// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
40-
// E.g. Trino, Athena and Dremio does not have DATETIME data type
45+
/// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
46+
/// E.g. Trino, Athena and Dremio does not have DATETIME data type
4147
fn use_timestamp_for_date64(&self) -> bool {
4248
false
4349
}
@@ -46,23 +52,50 @@ pub trait Dialect: Send + Sync {
4652
IntervalStyle::PostgresVerbose
4753
}
4854

49-
// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
50-
// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
55+
/// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
56+
/// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
5157
fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
5258
sqlparser::ast::DataType::Double
5359
}
5460

55-
// The SQL type to use for Arrow Utf8 unparsing
56-
// Most dialects use VARCHAR, but some, like MySQL, require CHAR
61+
/// The SQL type to use for Arrow Utf8 unparsing
62+
/// Most dialects use VARCHAR, but some, like MySQL, require CHAR
5763
fn utf8_cast_dtype(&self) -> ast::DataType {
5864
ast::DataType::Varchar(None)
5965
}
6066

61-
// The SQL type to use for Arrow LargeUtf8 unparsing
62-
// Most dialects use TEXT, but some, like MySQL, require CHAR
67+
/// The SQL type to use for Arrow LargeUtf8 unparsing
68+
/// Most dialects use TEXT, but some, like MySQL, require CHAR
6369
fn large_utf8_cast_dtype(&self) -> ast::DataType {
6470
ast::DataType::Text
6571
}
72+
73+
/// The date field extract style to use: `DateFieldExtractStyle`
74+
fn date_field_extract_style(&self) -> DateFieldExtractStyle {
75+
DateFieldExtractStyle::DatePart
76+
}
77+
78+
/// The SQL type to use for Arrow Int64 unparsing
79+
/// Most dialects use BigInt, but some, like MySQL, require SIGNED
80+
fn int64_cast_dtype(&self) -> ast::DataType {
81+
ast::DataType::BigInt(None)
82+
}
83+
84+
/// The SQL type to use for Timestamp unparsing
85+
/// Most dialects use Timestamp, but some, like MySQL, require Datetime
86+
/// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
87+
fn timestamp_cast_dtype(
88+
&self,
89+
_time_unit: &TimeUnit,
90+
tz: &Option<Arc<str>>,
91+
) -> ast::DataType {
92+
let tz_info = match tz {
93+
Some(_) => TimezoneInfo::WithTimeZone,
94+
None => TimezoneInfo::None,
95+
};
96+
97+
ast::DataType::Timestamp(None, tz_info)
98+
}
6699
}
67100

68101
/// `IntervalStyle` to use for unparsing
@@ -80,6 +113,19 @@ pub enum IntervalStyle {
80113
MySQL,
81114
}
82115

116+
/// Datetime subfield extraction style for unparsing
117+
///
118+
/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
119+
/// Different DBMSs follow different standards; popular ones are:
120+
/// date_part('YEAR', date '2001-02-16')
121+
/// EXTRACT(YEAR from date '2001-02-16')
122+
/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
123+
#[derive(Clone, Copy, PartialEq)]
124+
pub enum DateFieldExtractStyle {
125+
DatePart,
126+
Extract,
127+
}
128+
83129
pub struct DefaultDialect {}
84130

85131
impl Dialect for DefaultDialect {
@@ -133,6 +179,22 @@ impl Dialect for MySqlDialect {
133179
fn large_utf8_cast_dtype(&self) -> ast::DataType {
134180
ast::DataType::Char(None)
135181
}
182+
183+
fn date_field_extract_style(&self) -> DateFieldExtractStyle {
184+
DateFieldExtractStyle::Extract
185+
}
186+
187+
fn int64_cast_dtype(&self) -> ast::DataType {
188+
ast::DataType::Custom(ObjectName(vec![Ident::new("SIGNED")]), vec![])
189+
}
190+
191+
fn timestamp_cast_dtype(
192+
&self,
193+
_time_unit: &TimeUnit,
194+
_tz: &Option<Arc<str>>,
195+
) -> ast::DataType {
196+
ast::DataType::Datetime(None)
197+
}
136198
}
137199

138200
pub struct SqliteDialect {}
@@ -151,6 +213,10 @@ pub struct CustomDialect {
151213
float64_ast_dtype: sqlparser::ast::DataType,
152214
utf8_cast_dtype: ast::DataType,
153215
large_utf8_cast_dtype: ast::DataType,
216+
date_field_extract_style: DateFieldExtractStyle,
217+
int64_cast_dtype: ast::DataType,
218+
timestamp_cast_dtype: ast::DataType,
219+
timestamp_tz_cast_dtype: ast::DataType,
154220
}
155221

156222
impl Default for CustomDialect {
@@ -163,6 +229,13 @@ impl Default for CustomDialect {
163229
float64_ast_dtype: sqlparser::ast::DataType::Double,
164230
utf8_cast_dtype: ast::DataType::Varchar(None),
165231
large_utf8_cast_dtype: ast::DataType::Text,
232+
date_field_extract_style: DateFieldExtractStyle::DatePart,
233+
int64_cast_dtype: ast::DataType::BigInt(None),
234+
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
235+
timestamp_tz_cast_dtype: ast::DataType::Timestamp(
236+
None,
237+
TimezoneInfo::WithTimeZone,
238+
),
166239
}
167240
}
168241
}
@@ -206,6 +279,26 @@ impl Dialect for CustomDialect {
206279
fn large_utf8_cast_dtype(&self) -> ast::DataType {
207280
self.large_utf8_cast_dtype.clone()
208281
}
282+
283+
fn date_field_extract_style(&self) -> DateFieldExtractStyle {
284+
self.date_field_extract_style
285+
}
286+
287+
fn int64_cast_dtype(&self) -> ast::DataType {
288+
self.int64_cast_dtype.clone()
289+
}
290+
291+
fn timestamp_cast_dtype(
292+
&self,
293+
_time_unit: &TimeUnit,
294+
tz: &Option<Arc<str>>,
295+
) -> ast::DataType {
296+
if tz.is_some() {
297+
self.timestamp_tz_cast_dtype.clone()
298+
} else {
299+
self.timestamp_cast_dtype.clone()
300+
}
301+
}
209302
}
210303

211304
/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -230,6 +323,10 @@ pub struct CustomDialectBuilder {
230323
float64_ast_dtype: sqlparser::ast::DataType,
231324
utf8_cast_dtype: ast::DataType,
232325
large_utf8_cast_dtype: ast::DataType,
326+
date_field_extract_style: DateFieldExtractStyle,
327+
int64_cast_dtype: ast::DataType,
328+
timestamp_cast_dtype: ast::DataType,
329+
timestamp_tz_cast_dtype: ast::DataType,
233330
}
234331

235332
impl Default for CustomDialectBuilder {
@@ -248,6 +345,13 @@ impl CustomDialectBuilder {
248345
float64_ast_dtype: sqlparser::ast::DataType::Double,
249346
utf8_cast_dtype: ast::DataType::Varchar(None),
250347
large_utf8_cast_dtype: ast::DataType::Text,
348+
date_field_extract_style: DateFieldExtractStyle::DatePart,
349+
int64_cast_dtype: ast::DataType::BigInt(None),
350+
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
351+
timestamp_tz_cast_dtype: ast::DataType::Timestamp(
352+
None,
353+
TimezoneInfo::WithTimeZone,
354+
),
251355
}
252356
}
253357

@@ -260,6 +364,10 @@ impl CustomDialectBuilder {
260364
float64_ast_dtype: self.float64_ast_dtype,
261365
utf8_cast_dtype: self.utf8_cast_dtype,
262366
large_utf8_cast_dtype: self.large_utf8_cast_dtype,
367+
date_field_extract_style: self.date_field_extract_style,
368+
int64_cast_dtype: self.int64_cast_dtype,
369+
timestamp_cast_dtype: self.timestamp_cast_dtype,
370+
timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
263371
}
264372
}
265373

@@ -293,6 +401,7 @@ impl CustomDialectBuilder {
293401
self
294402
}
295403

404+
/// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
296405
pub fn with_float64_ast_dtype(
297406
mut self,
298407
float64_ast_dtype: sqlparser::ast::DataType,
@@ -301,16 +410,44 @@ impl CustomDialectBuilder {
301410
self
302411
}
303412

413+
/// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
304414
pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
305415
self.utf8_cast_dtype = utf8_cast_dtype;
306416
self
307417
}
308418

419+
/// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
309420
pub fn with_large_utf8_cast_dtype(
310421
mut self,
311422
large_utf8_cast_dtype: ast::DataType,
312423
) -> Self {
313424
self.large_utf8_cast_dtype = large_utf8_cast_dtype;
314425
self
315426
}
427+
428+
/// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
429+
pub fn with_date_field_extract_style(
430+
mut self,
431+
date_field_extract_style: DateFieldExtractStyle,
432+
) -> Self {
433+
self.date_field_extract_style = date_field_extract_style;
434+
self
435+
}
436+
437+
/// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
438+
pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
439+
self.int64_cast_dtype = int64_cast_dtype;
440+
self
441+
}
442+
443+
/// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
444+
pub fn with_timestamp_cast_dtype(
445+
mut self,
446+
timestamp_cast_dtype: ast::DataType,
447+
timestamp_tz_cast_dtype: ast::DataType,
448+
) -> Self {
449+
self.timestamp_cast_dtype = timestamp_cast_dtype;
450+
self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
451+
self
452+
}
316453
}

0 commit comments

Comments
 (0)