@@ -346,8 +346,6 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
346
346
/*
347
347
* Parses (almost) standard ISO 8601 date strings. The differences are:
348
348
*
349
- * + The date "20100312" is parsed as the year 20100312, not as
350
- * equivalent to "2010-03-12". The '-' in the dates are not optional.
351
349
* + Only seconds may have a decimal point, with up to 18 digits after it
352
350
* (maximum attoseconds precision).
353
351
* + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
@@ -396,6 +394,16 @@ parse_iso_8601_datetime(char *str, int len,
396
394
char * substr , sublen ;
397
395
PANDAS_DATETIMEUNIT bestunit ;
398
396
397
+ /* if date components in are separated by one of valid separators
398
+ * months/days without leadings 0s will be parsed
399
+ * (though not iso8601). If the components aren't separated,
400
+ * an error code will be retuned because the date is ambigous
401
+ */
402
+ int has_sep = 0 ;
403
+ char sep ;
404
+ char valid_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
405
+ int valid_sep_len = 5 ;
406
+
399
407
/* Initialize the output to all zeros */
400
408
memset (out , 0 , sizeof (pandas_datetimestruct ));
401
409
out -> month = 1 ;
@@ -523,12 +531,16 @@ parse_iso_8601_datetime(char *str, int len,
523
531
goto parse_error ;
524
532
}
525
533
526
- /* PARSE THE YEAR (digits until the '-' character ) */
534
+ /* PARSE THE YEAR (4 digits ) */
527
535
out -> year = 0 ;
528
- while (sublen > 0 && isdigit (* substr )) {
529
- out -> year = 10 * out -> year + (* substr - '0' );
530
- ++ substr ;
531
- -- sublen ;
536
+ if (sublen >= 4 && isdigit (substr [0 ]) && isdigit (substr [1 ]) &&
537
+ isdigit (substr [2 ]) && isdigit (substr [3 ])) {
538
+
539
+ out -> year = 1000 * (substr [0 ] - '0' ) + 100 * (substr [1 ] - '0' ) +
540
+ 10 * (substr [2 ] - '0' ) + (substr [3 ] - '0' );
541
+
542
+ substr += 4 ;
543
+ sublen -= 4 ;;
532
544
}
533
545
534
546
/* Negate the year if necessary */
@@ -538,29 +550,49 @@ parse_iso_8601_datetime(char *str, int len,
538
550
/* Check whether it's a leap-year */
539
551
year_leap = is_leapyear (out -> year );
540
552
541
- /* Next character must be a '-' or the end of the string */
553
+ /* Next character must be a separator, start of month or end */
542
554
if (sublen == 0 ) {
543
555
if (out_local != NULL ) {
544
556
* out_local = 0 ;
545
557
}
546
558
bestunit = PANDAS_FR_Y ;
547
559
goto finish ;
548
560
}
549
- else if (* substr == '-' ) {
550
- ++ substr ;
551
- -- sublen ;
552
- }
553
- else {
554
- goto parse_error ;
561
+ else if (!isdigit (* substr )) {
562
+ for (i = 0 ; i < valid_sep_len ; ++ i ) {
563
+ if (* substr == valid_sep [i ]) {
564
+ has_sep = 1 ;
565
+ sep = valid_sep [i ];
566
+ ++ substr ;
567
+ -- sublen ;
568
+ break ;
569
+ }
570
+ }
571
+ if (i == valid_sep_len ) {
572
+ goto parse_error ;
573
+ }
555
574
}
556
575
557
- /* Can't have a trailing '-' */
576
+ /* Can't have a trailing sep */
558
577
if (sublen == 0 ) {
559
578
goto parse_error ;
560
579
}
561
580
581
+
562
582
/* PARSE THE MONTH (2 digits) */
563
- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
583
+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
584
+ || (sublen == 1 && isdigit (substr [0 ])))) {
585
+ out -> month = (substr [0 ] - '0' );
586
+
587
+ if (out -> month < 1 ) {
588
+ PyErr_Format (PyExc_ValueError ,
589
+ "Month out of range in datetime string \"%s\"" , str );
590
+ goto error ;
591
+ }
592
+ ++ substr ;
593
+ -- sublen ;
594
+ }
595
+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
564
596
out -> month = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
565
597
566
598
if (out -> month < 1 || out -> month > 12 ) {
@@ -577,18 +609,22 @@ parse_iso_8601_datetime(char *str, int len,
577
609
578
610
/* Next character must be a '-' or the end of the string */
579
611
if (sublen == 0 ) {
612
+ /* dates of form YYYYMM are not valid */
613
+ if (!has_sep ) {
614
+ goto parse_error ;
615
+ }
580
616
if (out_local != NULL ) {
581
617
* out_local = 0 ;
582
618
}
583
619
bestunit = PANDAS_FR_M ;
584
620
goto finish ;
585
621
}
586
- else if (* substr == '-' ) {
622
+ else if (has_sep && * substr == sep ) {
587
623
++ substr ;
588
624
-- sublen ;
589
625
}
590
- else {
591
- goto parse_error ;
626
+ else if (! isdigit ( * substr )) {
627
+ goto parse_error ;
592
628
}
593
629
594
630
/* Can't have a trailing '-' */
@@ -597,7 +633,19 @@ parse_iso_8601_datetime(char *str, int len,
597
633
}
598
634
599
635
/* PARSE THE DAY (2 digits) */
600
- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
636
+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
637
+ || (sublen == 1 && isdigit (substr [0 ])))) {
638
+ out -> day = (substr [0 ] - '0' );
639
+
640
+ if (out -> day < 1 ) {
641
+ PyErr_Format (PyExc_ValueError ,
642
+ "Day out of range in datetime string \"%s\"" , str );
643
+ goto error ;
644
+ }
645
+ ++ substr ;
646
+ -- sublen ;
647
+ }
648
+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
601
649
out -> day = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
602
650
603
651
if (out -> day < 1 ||
@@ -633,14 +681,19 @@ parse_iso_8601_datetime(char *str, int len,
633
681
if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
634
682
out -> hour = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
635
683
636
- if (out -> hour < 0 || out -> hour >= 24 ) {
684
+ if (out -> hour >= 24 ) {
637
685
PyErr_Format (PyExc_ValueError ,
638
686
"Hours out of range in datetime string \"%s\"" , str );
639
687
goto error ;
640
688
}
641
689
substr += 2 ;
642
690
sublen -= 2 ;
643
691
}
692
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
693
+ out -> hour = substr [0 ] - '0' ;
694
+ ++ substr ;
695
+ -- sublen ;
696
+ }
644
697
else {
645
698
goto parse_error ;
646
699
}
@@ -664,14 +717,19 @@ parse_iso_8601_datetime(char *str, int len,
664
717
if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
665
718
out -> min = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
666
719
667
- if (out -> hour < 0 || out -> min >= 60 ) {
720
+ if (out -> min >= 60 ) {
668
721
PyErr_Format (PyExc_ValueError ,
669
722
"Minutes out of range in datetime string \"%s\"" , str );
670
723
goto error ;
671
724
}
672
725
substr += 2 ;
673
726
sublen -= 2 ;
674
727
}
728
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
729
+ out -> min = substr [0 ] - '0' ;
730
+ ++ substr ;
731
+ -- sublen ;
732
+ }
675
733
else {
676
734
goto parse_error ;
677
735
}
@@ -695,14 +753,19 @@ parse_iso_8601_datetime(char *str, int len,
695
753
if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
696
754
out -> sec = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
697
755
698
- if (out -> sec < 0 || out -> sec >= 60 ) {
756
+ if (out -> sec >= 60 ) {
699
757
PyErr_Format (PyExc_ValueError ,
700
758
"Seconds out of range in datetime string \"%s\"" , str );
701
759
goto error ;
702
760
}
703
761
substr += 2 ;
704
762
sublen -= 2 ;
705
763
}
764
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
765
+ out -> sec = substr [0 ] - '0' ;
766
+ ++ substr ;
767
+ -- sublen ;
768
+ }
706
769
else {
707
770
goto parse_error ;
708
771
}
@@ -781,6 +844,12 @@ parse_iso_8601_datetime(char *str, int len,
781
844
}
782
845
783
846
parse_timezone :
847
+ /* trim any whitepsace between time/timeezone */
848
+ while (sublen > 0 && isspace (* substr )) {
849
+ ++ substr ;
850
+ -- sublen ;
851
+ }
852
+
784
853
if (sublen == 0 ) {
785
854
// Unlike NumPy, treating no time zone as naive
786
855
goto finish ;
@@ -832,6 +901,11 @@ parse_iso_8601_datetime(char *str, int len,
832
901
goto error ;
833
902
}
834
903
}
904
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
905
+ offset_hour = substr [0 ] - '0' ;
906
+ ++ substr ;
907
+ -- sublen ;
908
+ }
835
909
else {
836
910
goto parse_error ;
837
911
}
@@ -856,6 +930,11 @@ parse_iso_8601_datetime(char *str, int len,
856
930
goto error ;
857
931
}
858
932
}
933
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
934
+ offset_minute = substr [0 ] - '0' ;
935
+ ++ substr ;
936
+ -- sublen ;
937
+ }
859
938
else {
860
939
goto parse_error ;
861
940
}
0 commit comments