15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
+ use std:: sync:: Arc ;
19
+
20
+ use arrow_schema:: TimeUnit ;
18
21
use regex:: Regex ;
19
- use sqlparser:: { ast, keywords:: ALL_KEYWORDS } ;
22
+ use sqlparser:: {
23
+ ast:: { self , Ident , ObjectName , TimezoneInfo } ,
24
+ keywords:: ALL_KEYWORDS ,
25
+ } ;
20
26
21
27
/// `Dialect` to use for Unparsing
22
28
///
@@ -36,8 +42,8 @@ pub trait Dialect: Send + Sync {
36
42
true
37
43
}
38
44
39
- // Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
40
- // E.g. Trino, Athena and Dremio does not have DATETIME data type
45
+ /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
46
+ /// E.g. Trino, Athena and Dremio does not have DATETIME data type
41
47
fn use_timestamp_for_date64 ( & self ) -> bool {
42
48
false
43
49
}
@@ -46,23 +52,50 @@ pub trait Dialect: Send + Sync {
46
52
IntervalStyle :: PostgresVerbose
47
53
}
48
54
49
- // Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
50
- // E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
55
+ /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
56
+ /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
51
57
fn float64_ast_dtype ( & self ) -> sqlparser:: ast:: DataType {
52
58
sqlparser:: ast:: DataType :: Double
53
59
}
54
60
55
- // The SQL type to use for Arrow Utf8 unparsing
56
- // Most dialects use VARCHAR, but some, like MySQL, require CHAR
61
+ /// The SQL type to use for Arrow Utf8 unparsing
62
+ /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
57
63
fn utf8_cast_dtype ( & self ) -> ast:: DataType {
58
64
ast:: DataType :: Varchar ( None )
59
65
}
60
66
61
- // The SQL type to use for Arrow LargeUtf8 unparsing
62
- // Most dialects use TEXT, but some, like MySQL, require CHAR
67
+ /// The SQL type to use for Arrow LargeUtf8 unparsing
68
+ /// Most dialects use TEXT, but some, like MySQL, require CHAR
63
69
fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
64
70
ast:: DataType :: Text
65
71
}
72
+
73
+ /// The date field extract style to use: `DateFieldExtractStyle`
74
+ fn date_field_extract_style ( & self ) -> DateFieldExtractStyle {
75
+ DateFieldExtractStyle :: DatePart
76
+ }
77
+
78
+ /// The SQL type to use for Arrow Int64 unparsing
79
+ /// Most dialects use BigInt, but some, like MySQL, require SIGNED
80
+ fn int64_cast_dtype ( & self ) -> ast:: DataType {
81
+ ast:: DataType :: BigInt ( None )
82
+ }
83
+
84
+ /// The SQL type to use for Timestamp unparsing
85
+ /// Most dialects use Timestamp, but some, like MySQL, require Datetime
86
+ /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
87
+ fn timestamp_cast_dtype (
88
+ & self ,
89
+ _time_unit : & TimeUnit ,
90
+ tz : & Option < Arc < str > > ,
91
+ ) -> ast:: DataType {
92
+ let tz_info = match tz {
93
+ Some ( _) => TimezoneInfo :: WithTimeZone ,
94
+ None => TimezoneInfo :: None ,
95
+ } ;
96
+
97
+ ast:: DataType :: Timestamp ( None , tz_info)
98
+ }
66
99
}
67
100
68
101
/// `IntervalStyle` to use for unparsing
@@ -80,6 +113,19 @@ pub enum IntervalStyle {
80
113
MySQL ,
81
114
}
82
115
116
+ /// Datetime subfield extraction style for unparsing
117
+ ///
118
+ /// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
119
+ /// Different DBMSs follow different standards; popular ones are:
120
+ /// date_part('YEAR', date '2001-02-16')
121
+ /// EXTRACT(YEAR from date '2001-02-16')
122
+ /// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
123
+ #[ derive( Clone , Copy , PartialEq ) ]
124
+ pub enum DateFieldExtractStyle {
125
+ DatePart ,
126
+ Extract ,
127
+ }
128
+
83
129
pub struct DefaultDialect { }
84
130
85
131
impl Dialect for DefaultDialect {
@@ -133,6 +179,22 @@ impl Dialect for MySqlDialect {
133
179
fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
134
180
ast:: DataType :: Char ( None )
135
181
}
182
+
183
+ fn date_field_extract_style ( & self ) -> DateFieldExtractStyle {
184
+ DateFieldExtractStyle :: Extract
185
+ }
186
+
187
+ fn int64_cast_dtype ( & self ) -> ast:: DataType {
188
+ ast:: DataType :: Custom ( ObjectName ( vec ! [ Ident :: new( "SIGNED" ) ] ) , vec ! [ ] )
189
+ }
190
+
191
+ fn timestamp_cast_dtype (
192
+ & self ,
193
+ _time_unit : & TimeUnit ,
194
+ _tz : & Option < Arc < str > > ,
195
+ ) -> ast:: DataType {
196
+ ast:: DataType :: Datetime ( None )
197
+ }
136
198
}
137
199
138
200
pub struct SqliteDialect { }
@@ -151,6 +213,10 @@ pub struct CustomDialect {
151
213
float64_ast_dtype : sqlparser:: ast:: DataType ,
152
214
utf8_cast_dtype : ast:: DataType ,
153
215
large_utf8_cast_dtype : ast:: DataType ,
216
+ date_field_extract_style : DateFieldExtractStyle ,
217
+ int64_cast_dtype : ast:: DataType ,
218
+ timestamp_cast_dtype : ast:: DataType ,
219
+ timestamp_tz_cast_dtype : ast:: DataType ,
154
220
}
155
221
156
222
impl Default for CustomDialect {
@@ -163,6 +229,13 @@ impl Default for CustomDialect {
163
229
float64_ast_dtype : sqlparser:: ast:: DataType :: Double ,
164
230
utf8_cast_dtype : ast:: DataType :: Varchar ( None ) ,
165
231
large_utf8_cast_dtype : ast:: DataType :: Text ,
232
+ date_field_extract_style : DateFieldExtractStyle :: DatePart ,
233
+ int64_cast_dtype : ast:: DataType :: BigInt ( None ) ,
234
+ timestamp_cast_dtype : ast:: DataType :: Timestamp ( None , TimezoneInfo :: None ) ,
235
+ timestamp_tz_cast_dtype : ast:: DataType :: Timestamp (
236
+ None ,
237
+ TimezoneInfo :: WithTimeZone ,
238
+ ) ,
166
239
}
167
240
}
168
241
}
@@ -206,6 +279,26 @@ impl Dialect for CustomDialect {
206
279
fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
207
280
self . large_utf8_cast_dtype . clone ( )
208
281
}
282
+
283
+ fn date_field_extract_style ( & self ) -> DateFieldExtractStyle {
284
+ self . date_field_extract_style
285
+ }
286
+
287
+ fn int64_cast_dtype ( & self ) -> ast:: DataType {
288
+ self . int64_cast_dtype . clone ( )
289
+ }
290
+
291
+ fn timestamp_cast_dtype (
292
+ & self ,
293
+ _time_unit : & TimeUnit ,
294
+ tz : & Option < Arc < str > > ,
295
+ ) -> ast:: DataType {
296
+ if tz. is_some ( ) {
297
+ self . timestamp_tz_cast_dtype . clone ( )
298
+ } else {
299
+ self . timestamp_cast_dtype . clone ( )
300
+ }
301
+ }
209
302
}
210
303
211
304
/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -230,6 +323,10 @@ pub struct CustomDialectBuilder {
230
323
float64_ast_dtype : sqlparser:: ast:: DataType ,
231
324
utf8_cast_dtype : ast:: DataType ,
232
325
large_utf8_cast_dtype : ast:: DataType ,
326
+ date_field_extract_style : DateFieldExtractStyle ,
327
+ int64_cast_dtype : ast:: DataType ,
328
+ timestamp_cast_dtype : ast:: DataType ,
329
+ timestamp_tz_cast_dtype : ast:: DataType ,
233
330
}
234
331
235
332
impl Default for CustomDialectBuilder {
@@ -248,6 +345,13 @@ impl CustomDialectBuilder {
248
345
float64_ast_dtype : sqlparser:: ast:: DataType :: Double ,
249
346
utf8_cast_dtype : ast:: DataType :: Varchar ( None ) ,
250
347
large_utf8_cast_dtype : ast:: DataType :: Text ,
348
+ date_field_extract_style : DateFieldExtractStyle :: DatePart ,
349
+ int64_cast_dtype : ast:: DataType :: BigInt ( None ) ,
350
+ timestamp_cast_dtype : ast:: DataType :: Timestamp ( None , TimezoneInfo :: None ) ,
351
+ timestamp_tz_cast_dtype : ast:: DataType :: Timestamp (
352
+ None ,
353
+ TimezoneInfo :: WithTimeZone ,
354
+ ) ,
251
355
}
252
356
}
253
357
@@ -260,6 +364,10 @@ impl CustomDialectBuilder {
260
364
float64_ast_dtype : self . float64_ast_dtype ,
261
365
utf8_cast_dtype : self . utf8_cast_dtype ,
262
366
large_utf8_cast_dtype : self . large_utf8_cast_dtype ,
367
+ date_field_extract_style : self . date_field_extract_style ,
368
+ int64_cast_dtype : self . int64_cast_dtype ,
369
+ timestamp_cast_dtype : self . timestamp_cast_dtype ,
370
+ timestamp_tz_cast_dtype : self . timestamp_tz_cast_dtype ,
263
371
}
264
372
}
265
373
@@ -293,6 +401,7 @@ impl CustomDialectBuilder {
293
401
self
294
402
}
295
403
404
+ /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
296
405
pub fn with_float64_ast_dtype (
297
406
mut self ,
298
407
float64_ast_dtype : sqlparser:: ast:: DataType ,
@@ -301,16 +410,44 @@ impl CustomDialectBuilder {
301
410
self
302
411
}
303
412
413
+ /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
304
414
pub fn with_utf8_cast_dtype ( mut self , utf8_cast_dtype : ast:: DataType ) -> Self {
305
415
self . utf8_cast_dtype = utf8_cast_dtype;
306
416
self
307
417
}
308
418
419
+ /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
309
420
pub fn with_large_utf8_cast_dtype (
310
421
mut self ,
311
422
large_utf8_cast_dtype : ast:: DataType ,
312
423
) -> Self {
313
424
self . large_utf8_cast_dtype = large_utf8_cast_dtype;
314
425
self
315
426
}
427
+
428
+ /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
429
+ pub fn with_date_field_extract_style (
430
+ mut self ,
431
+ date_field_extract_style : DateFieldExtractStyle ,
432
+ ) -> Self {
433
+ self . date_field_extract_style = date_field_extract_style;
434
+ self
435
+ }
436
+
437
+ /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
438
+ pub fn with_int64_cast_dtype ( mut self , int64_cast_dtype : ast:: DataType ) -> Self {
439
+ self . int64_cast_dtype = int64_cast_dtype;
440
+ self
441
+ }
442
+
443
+ /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
444
+ pub fn with_timestamp_cast_dtype (
445
+ mut self ,
446
+ timestamp_cast_dtype : ast:: DataType ,
447
+ timestamp_tz_cast_dtype : ast:: DataType ,
448
+ ) -> Self {
449
+ self . timestamp_cast_dtype = timestamp_cast_dtype;
450
+ self . timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
451
+ self
452
+ }
316
453
}
0 commit comments