@@ -291,6 +291,28 @@ impl FromStr for BuiltinScalarFunction {
291
291
}
292
292
}
293
293
294
+ macro_rules! make_utf8_to_return_type {
295
+ ( $FUNC: ident, $largeUtf8Type: expr, $utf8Type: expr) => {
296
+ fn $FUNC( arg_type: & DataType , name: & str ) -> Result <DataType > {
297
+ Ok ( match arg_type {
298
+ DataType :: LargeUtf8 => $largeUtf8Type,
299
+ DataType :: Utf8 => $utf8Type,
300
+ _ => {
301
+ // this error is internal as `data_types` should have captured this.
302
+ return Err ( DataFusionError :: Internal ( format!(
303
+ "The {:?} function can only accept strings." ,
304
+ name
305
+ ) ) ) ;
306
+ }
307
+ } )
308
+ }
309
+ } ;
310
+ }
311
+
312
+ make_utf8_to_return_type ! ( utf8_to_str_type, DataType :: LargeUtf8 , DataType :: Utf8 ) ;
313
+ make_utf8_to_return_type ! ( utf8_to_int_type, DataType :: Int64 , DataType :: Int32 ) ;
314
+ make_utf8_to_return_type ! ( utf8_to_binary_type, DataType :: Binary , DataType :: Binary ) ;
315
+
294
316
/// Returns the datatype of the scalar function
295
317
pub fn return_type (
296
318
fun : & BuiltinScalarFunction ,
@@ -310,259 +332,49 @@ pub fn return_type(
310
332
arg_types. len ( ) as i32 ,
311
333
) ) ,
312
334
BuiltinScalarFunction :: Ascii => Ok ( DataType :: Int32 ) ,
313
- BuiltinScalarFunction :: BitLength => Ok ( match arg_types[ 0 ] {
314
- DataType :: LargeUtf8 => DataType :: Int64 ,
315
- DataType :: Utf8 => DataType :: Int32 ,
316
- _ => {
317
- // this error is internal as `data_types` should have captured this.
318
- return Err ( DataFusionError :: Internal (
319
- "The bit_length function can only accept strings." . to_string ( ) ,
320
- ) ) ;
321
- }
322
- } ) ,
323
- BuiltinScalarFunction :: Btrim => Ok ( match arg_types[ 0 ] {
324
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
325
- DataType :: Utf8 => DataType :: Utf8 ,
326
- _ => {
327
- // this error is internal as `data_types` should have captured this.
328
- return Err ( DataFusionError :: Internal (
329
- "The btrim function can only accept strings." . to_string ( ) ,
330
- ) ) ;
331
- }
332
- } ) ,
333
- BuiltinScalarFunction :: CharacterLength => Ok ( match arg_types[ 0 ] {
334
- DataType :: LargeUtf8 => DataType :: Int64 ,
335
- DataType :: Utf8 => DataType :: Int32 ,
336
- _ => {
337
- // this error is internal as `data_types` should have captured this.
338
- return Err ( DataFusionError :: Internal (
339
- "The character_length function can only accept strings." . to_string ( ) ,
340
- ) ) ;
341
- }
342
- } ) ,
335
+ BuiltinScalarFunction :: BitLength => utf8_to_int_type ( & arg_types[ 0 ] , "bit_length" ) ,
336
+ BuiltinScalarFunction :: Btrim => utf8_to_str_type ( & arg_types[ 0 ] , "btrim" ) ,
337
+ BuiltinScalarFunction :: CharacterLength => {
338
+ utf8_to_int_type ( & arg_types[ 0 ] , "character_length" )
339
+ }
343
340
BuiltinScalarFunction :: Chr => Ok ( DataType :: Utf8 ) ,
344
341
BuiltinScalarFunction :: Concat => Ok ( DataType :: Utf8 ) ,
345
342
BuiltinScalarFunction :: ConcatWithSeparator => Ok ( DataType :: Utf8 ) ,
346
343
BuiltinScalarFunction :: DatePart => Ok ( DataType :: Int32 ) ,
347
344
BuiltinScalarFunction :: DateTrunc => {
348
345
Ok ( DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) )
349
346
}
350
- BuiltinScalarFunction :: InitCap => Ok ( match arg_types[ 0 ] {
351
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
352
- DataType :: Utf8 => DataType :: Utf8 ,
353
- _ => {
354
- // this error is internal as `data_types` should have captured this.
355
- return Err ( DataFusionError :: Internal (
356
- "The initcap function can only accept strings." . to_string ( ) ,
357
- ) ) ;
358
- }
359
- } ) ,
360
- BuiltinScalarFunction :: Left => Ok ( match arg_types[ 0 ] {
361
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
362
- DataType :: Utf8 => DataType :: Utf8 ,
363
- _ => {
364
- // this error is internal as `data_types` should have captured this.
365
- return Err ( DataFusionError :: Internal (
366
- "The left function can only accept strings." . to_string ( ) ,
367
- ) ) ;
368
- }
369
- } ) ,
370
- BuiltinScalarFunction :: Lower => Ok ( match arg_types[ 0 ] {
371
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
372
- DataType :: Utf8 => DataType :: Utf8 ,
373
- _ => {
374
- // this error is internal as `data_types` should have captured this.
375
- return Err ( DataFusionError :: Internal (
376
- "The upper function can only accept strings." . to_string ( ) ,
377
- ) ) ;
378
- }
379
- } ) ,
380
- BuiltinScalarFunction :: Lpad => Ok ( match arg_types[ 0 ] {
381
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
382
- DataType :: Utf8 => DataType :: Utf8 ,
383
- _ => {
384
- // this error is internal as `data_types` should have captured this.
385
- return Err ( DataFusionError :: Internal (
386
- "The lpad function can only accept strings." . to_string ( ) ,
387
- ) ) ;
388
- }
389
- } ) ,
390
- BuiltinScalarFunction :: Ltrim => Ok ( match arg_types[ 0 ] {
391
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
392
- DataType :: Utf8 => DataType :: Utf8 ,
393
- _ => {
394
- // this error is internal as `data_types` should have captured this.
395
- return Err ( DataFusionError :: Internal (
396
- "The ltrim function can only accept strings." . to_string ( ) ,
397
- ) ) ;
398
- }
399
- } ) ,
400
- BuiltinScalarFunction :: MD5 => Ok ( match arg_types[ 0 ] {
401
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
402
- DataType :: Utf8 => DataType :: Utf8 ,
403
- _ => {
404
- // this error is internal as `data_types` should have captured this.
405
- return Err ( DataFusionError :: Internal (
406
- "The md5 function can only accept strings." . to_string ( ) ,
407
- ) ) ;
408
- }
409
- } ) ,
347
+ BuiltinScalarFunction :: InitCap => utf8_to_str_type ( & arg_types[ 0 ] , "initcap" ) ,
348
+ BuiltinScalarFunction :: Left => utf8_to_str_type ( & arg_types[ 0 ] , "left" ) ,
349
+ BuiltinScalarFunction :: Lower => utf8_to_str_type ( & arg_types[ 0 ] , "lower" ) ,
350
+ BuiltinScalarFunction :: Lpad => utf8_to_str_type ( & arg_types[ 0 ] , "lpad" ) ,
351
+ BuiltinScalarFunction :: Ltrim => utf8_to_str_type ( & arg_types[ 0 ] , "ltrim" ) ,
352
+ BuiltinScalarFunction :: MD5 => utf8_to_str_type ( & arg_types[ 0 ] , "md5" ) ,
410
353
BuiltinScalarFunction :: NullIf => {
411
354
// NULLIF has two args and they might get coerced, get a preview of this
412
355
let coerced_types = data_types ( arg_types, & signature ( fun) ) ;
413
356
coerced_types. map ( |typs| typs[ 0 ] . clone ( ) )
414
357
}
415
- BuiltinScalarFunction :: OctetLength => Ok ( match arg_types[ 0 ] {
416
- DataType :: LargeUtf8 => DataType :: Int64 ,
417
- DataType :: Utf8 => DataType :: Int32 ,
418
- _ => {
419
- // this error is internal as `data_types` should have captured this.
420
- return Err ( DataFusionError :: Internal (
421
- "The octet_length function can only accept strings." . to_string ( ) ,
422
- ) ) ;
423
- }
424
- } ) ,
425
- BuiltinScalarFunction :: RegexpReplace => Ok ( match arg_types[ 0 ] {
426
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
427
- DataType :: Utf8 => DataType :: Utf8 ,
428
- _ => {
429
- // this error is internal as `data_types` should have captured this.
430
- return Err ( DataFusionError :: Internal (
431
- "The regexp_replace function can only accept strings." . to_string ( ) ,
432
- ) ) ;
433
- }
434
- } ) ,
435
- BuiltinScalarFunction :: Repeat => Ok ( match arg_types[ 0 ] {
436
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
437
- DataType :: Utf8 => DataType :: Utf8 ,
438
- _ => {
439
- // this error is internal as `data_types` should have captured this.
440
- return Err ( DataFusionError :: Internal (
441
- "The repeat function can only accept strings." . to_string ( ) ,
442
- ) ) ;
443
- }
444
- } ) ,
445
- BuiltinScalarFunction :: Replace => Ok ( match arg_types[ 0 ] {
446
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
447
- DataType :: Utf8 => DataType :: Utf8 ,
448
- _ => {
449
- // this error is internal as `data_types` should have captured this.
450
- return Err ( DataFusionError :: Internal (
451
- "The replace function can only accept strings." . to_string ( ) ,
452
- ) ) ;
453
- }
454
- } ) ,
455
- BuiltinScalarFunction :: Reverse => Ok ( match arg_types[ 0 ] {
456
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
457
- DataType :: Utf8 => DataType :: Utf8 ,
458
- _ => {
459
- // this error is internal as `data_types` should have captured this.
460
- return Err ( DataFusionError :: Internal (
461
- "The reverse function can only accept strings." . to_string ( ) ,
462
- ) ) ;
463
- }
464
- } ) ,
465
- BuiltinScalarFunction :: Right => Ok ( match arg_types[ 0 ] {
466
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
467
- DataType :: Utf8 => DataType :: Utf8 ,
468
- _ => {
469
- // this error is internal as `data_types` should have captured this.
470
- return Err ( DataFusionError :: Internal (
471
- "The right function can only accept strings." . to_string ( ) ,
472
- ) ) ;
473
- }
474
- } ) ,
475
- BuiltinScalarFunction :: Rpad => Ok ( match arg_types[ 0 ] {
476
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
477
- DataType :: Utf8 => DataType :: Utf8 ,
478
- _ => {
479
- // this error is internal as `data_types` should have captured this.
480
- return Err ( DataFusionError :: Internal (
481
- "The rpad function can only accept strings." . to_string ( ) ,
482
- ) ) ;
483
- }
484
- } ) ,
485
- BuiltinScalarFunction :: Rtrim => Ok ( match arg_types[ 0 ] {
486
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
487
- DataType :: Utf8 => DataType :: Utf8 ,
488
- _ => {
489
- // this error is internal as `data_types` should have captured this.
490
- return Err ( DataFusionError :: Internal (
491
- "The rtrim function can only accept strings." . to_string ( ) ,
492
- ) ) ;
493
- }
494
- } ) ,
495
- BuiltinScalarFunction :: SHA224 => Ok ( match arg_types[ 0 ] {
496
- DataType :: LargeUtf8 => DataType :: Binary ,
497
- DataType :: Utf8 => DataType :: Binary ,
498
- _ => {
499
- // this error is internal as `data_types` should have captured this.
500
- return Err ( DataFusionError :: Internal (
501
- "The sha224 function can only accept strings." . to_string ( ) ,
502
- ) ) ;
503
- }
504
- } ) ,
505
- BuiltinScalarFunction :: SHA256 => Ok ( match arg_types[ 0 ] {
506
- DataType :: LargeUtf8 => DataType :: Binary ,
507
- DataType :: Utf8 => DataType :: Binary ,
508
- _ => {
509
- // this error is internal as `data_types` should have captured this.
510
- return Err ( DataFusionError :: Internal (
511
- "The sha256 function can only accept strings." . to_string ( ) ,
512
- ) ) ;
513
- }
514
- } ) ,
515
- BuiltinScalarFunction :: SHA384 => Ok ( match arg_types[ 0 ] {
516
- DataType :: LargeUtf8 => DataType :: Binary ,
517
- DataType :: Utf8 => DataType :: Binary ,
518
- _ => {
519
- // this error is internal as `data_types` should have captured this.
520
- return Err ( DataFusionError :: Internal (
521
- "The sha384 function can only accept strings." . to_string ( ) ,
522
- ) ) ;
523
- }
524
- } ) ,
525
- BuiltinScalarFunction :: SHA512 => Ok ( match arg_types[ 0 ] {
526
- DataType :: LargeUtf8 => DataType :: Binary ,
527
- DataType :: Utf8 => DataType :: Binary ,
528
- _ => {
529
- // this error is internal as `data_types` should have captured this.
530
- return Err ( DataFusionError :: Internal (
531
- "The sha512 function can only accept strings." . to_string ( ) ,
532
- ) ) ;
533
- }
534
- } ) ,
535
- BuiltinScalarFunction :: SplitPart => Ok ( match arg_types[ 0 ] {
536
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
537
- DataType :: Utf8 => DataType :: Utf8 ,
538
- _ => {
539
- // this error is internal as `data_types` should have captured this.
540
- return Err ( DataFusionError :: Internal (
541
- "The split_part function can only accept strings." . to_string ( ) ,
542
- ) ) ;
543
- }
544
- } ) ,
358
+ BuiltinScalarFunction :: OctetLength => {
359
+ utf8_to_int_type ( & arg_types[ 0 ] , "octet_length" )
360
+ }
361
+ BuiltinScalarFunction :: RegexpReplace => {
362
+ utf8_to_str_type ( & arg_types[ 0 ] , "regex_replace" )
363
+ }
364
+ BuiltinScalarFunction :: Repeat => utf8_to_str_type ( & arg_types[ 0 ] , "repeat" ) ,
365
+ BuiltinScalarFunction :: Replace => utf8_to_str_type ( & arg_types[ 0 ] , "replace" ) ,
366
+ BuiltinScalarFunction :: Reverse => utf8_to_str_type ( & arg_types[ 0 ] , "reverse" ) ,
367
+ BuiltinScalarFunction :: Right => utf8_to_str_type ( & arg_types[ 0 ] , "right" ) ,
368
+ BuiltinScalarFunction :: Rpad => utf8_to_str_type ( & arg_types[ 0 ] , "rpad" ) ,
369
+ BuiltinScalarFunction :: Rtrim => utf8_to_str_type ( & arg_types[ 0 ] , "rtrimp" ) ,
370
+ BuiltinScalarFunction :: SHA224 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha224" ) ,
371
+ BuiltinScalarFunction :: SHA256 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha256" ) ,
372
+ BuiltinScalarFunction :: SHA384 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha384" ) ,
373
+ BuiltinScalarFunction :: SHA512 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha512" ) ,
374
+ BuiltinScalarFunction :: SplitPart => utf8_to_str_type ( & arg_types[ 0 ] , "split_part" ) ,
545
375
BuiltinScalarFunction :: StartsWith => Ok ( DataType :: Boolean ) ,
546
- BuiltinScalarFunction :: Strpos => Ok ( match arg_types[ 0 ] {
547
- DataType :: LargeUtf8 => DataType :: Int64 ,
548
- DataType :: Utf8 => DataType :: Int32 ,
549
- _ => {
550
- // this error is internal as `data_types` should have captured this.
551
- return Err ( DataFusionError :: Internal (
552
- "The strpos function can only accept strings." . to_string ( ) ,
553
- ) ) ;
554
- }
555
- } ) ,
556
- BuiltinScalarFunction :: Substr => Ok ( match arg_types[ 0 ] {
557
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
558
- DataType :: Utf8 => DataType :: Utf8 ,
559
- _ => {
560
- // this error is internal as `data_types` should have captured this.
561
- return Err ( DataFusionError :: Internal (
562
- "The substr function can only accept strings." . to_string ( ) ,
563
- ) ) ;
564
- }
565
- } ) ,
376
+ BuiltinScalarFunction :: Strpos => utf8_to_int_type ( & arg_types[ 0 ] , "strpos" ) ,
377
+ BuiltinScalarFunction :: Substr => utf8_to_str_type ( & arg_types[ 0 ] , "substr" ) ,
566
378
BuiltinScalarFunction :: ToHex => Ok ( match arg_types[ 0 ] {
567
379
DataType :: Int8 | DataType :: Int16 | DataType :: Int32 | DataType :: Int64 => {
568
380
DataType :: Utf8
@@ -578,36 +390,9 @@ pub fn return_type(
578
390
Ok ( DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) )
579
391
}
580
392
BuiltinScalarFunction :: Now => Ok ( DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) ) ,
581
- BuiltinScalarFunction :: Translate => Ok ( match arg_types[ 0 ] {
582
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
583
- DataType :: Utf8 => DataType :: Utf8 ,
584
- _ => {
585
- // this error is internal as `data_types` should have captured this.
586
- return Err ( DataFusionError :: Internal (
587
- "The translate function can only accept strings." . to_string ( ) ,
588
- ) ) ;
589
- }
590
- } ) ,
591
- BuiltinScalarFunction :: Trim => Ok ( match arg_types[ 0 ] {
592
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
593
- DataType :: Utf8 => DataType :: Utf8 ,
594
- _ => {
595
- // this error is internal as `data_types` should have captured this.
596
- return Err ( DataFusionError :: Internal (
597
- "The trim function can only accept strings." . to_string ( ) ,
598
- ) ) ;
599
- }
600
- } ) ,
601
- BuiltinScalarFunction :: Upper => Ok ( match arg_types[ 0 ] {
602
- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
603
- DataType :: Utf8 => DataType :: Utf8 ,
604
- _ => {
605
- // this error is internal as `data_types` should have captured this.
606
- return Err ( DataFusionError :: Internal (
607
- "The upper function can only accept strings." . to_string ( ) ,
608
- ) ) ;
609
- }
610
- } ) ,
393
+ BuiltinScalarFunction :: Translate => utf8_to_str_type ( & arg_types[ 0 ] , "translate" ) ,
394
+ BuiltinScalarFunction :: Trim => utf8_to_str_type ( & arg_types[ 0 ] , "trim" ) ,
395
+ BuiltinScalarFunction :: Upper => utf8_to_str_type ( & arg_types[ 0 ] , "upper" ) ,
611
396
BuiltinScalarFunction :: RegexpMatch => Ok ( match arg_types[ 0 ] {
612
397
DataType :: LargeUtf8 => {
613
398
DataType :: List ( Box :: new ( Field :: new ( "item" , DataType :: LargeUtf8 , true ) ) )
0 commit comments