Skip to content

Commit 3ee5008

Browse files
committed
#26 Remove year limitations from Pyton's datetime
Permit processing and representation of years before 1 AD and after 999 AD by removing use of Pyhon's `datetime` and related modules, which are limited to these years. This changes the public API so the following methods return `struct_time` objects instead of `date` or `datetime` objects: `lower_strict()`, `upper_strict()`, `lower_fuzzy()`, `upper_fuzzy()` Details: - stop using `datetime` modules internally when parsing and processing EDTF syntax - change `_strict_date()` and all dependent methods to return `struct_time` instead of objects from the `datetime` module. This affects public API methods listed above - remove deliberate coercion of out-of-date year values to `date.min` and `date.max` boundaries - update tests to exercise broader date ranges
1 parent 539bfda commit 3ee5008

File tree

2 files changed

+164
-74
lines changed

2 files changed

+164
-74
lines changed

edtf/parser/parser_classes.py

+127-52
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import calendar
22
import re
3+
from time import struct_time
34
from datetime import date, datetime
4-
from dateutil.parser import parse
5+
from operator import add, sub
6+
from exceptions import OverflowError
7+
58
from dateutil.relativedelta import relativedelta
9+
610
from edtf import appsettings
711

812
EARLIEST = 'earliest'
@@ -17,6 +21,91 @@
1721
PRECISION_DAY = "day"
1822

1923

24+
TIME_EMPTY_TIME = [0, 0, 0] # tm_hour, tm_min, tm_sec
25+
TIME_EMPTY_EXTRAS = [0, 0, -1] # tm_wday, tm_yday, tm_isdst
26+
27+
28+
def days_in_month(year, month):
29+
"""
30+
Return the number of days in the given year and month, where month is
31+
1=January to 12=December, and respecting leap years as identified by
32+
`calendar.isleap()`
33+
"""
34+
return {
35+
1: 31,
36+
2: 29 if calendar.isleap(year) else 28,
37+
3: 31,
38+
4: 30,
39+
5: 31,
40+
6: 30,
41+
7: 31,
42+
8: 31,
43+
9: 30,
44+
10: 31,
45+
11: 30,
46+
12: 31,
47+
}[month]
48+
49+
50+
def apply_relativedelta(op, time_struct, delta):
51+
"""
52+
Apply `relativedelta` to `struct_time` data structure.
53+
54+
This function is required because we cannot use standard `datetime` module
55+
objects for conversion when the date/time is, or will become, outside the
56+
boundary years 1 AD to 9999 AD.
57+
"""
58+
if not delta:
59+
return time_struct # No work to do
60+
61+
try:
62+
dt_result = op(datetime(*time_struct[:6]), delta)
63+
return dt_to_struct_time(dt_result)
64+
except (OverflowError, ValueError):
65+
# Year is not within supported 1 to 9999 AD range
66+
pass
67+
68+
# Here we fake the year to one in the acceptable range to avoid having to
69+
# write our own date rolling logic
70+
71+
# Adjust the year to be close to the 2000 millenium in 1,000 year
72+
# increments to try and retain accurate relative leap years
73+
actual_year = time_struct.tm_year
74+
millenium = int(float(actual_year) / 1000)
75+
millenium_diff = (2 - millenium) * 1000
76+
adjusted_year = actual_year + millenium_diff
77+
# Apply delta to the date/time with adjusted year
78+
dt = datetime(*(adjusted_year,) + time_struct[1:6])
79+
dt_result = op(dt, delta)
80+
# Convert result year back to its original millenium
81+
final_year = dt_result.year - millenium_diff
82+
return struct_time(
83+
(final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS))
84+
85+
86+
def dt_to_struct_time(dt):
87+
"""
88+
Convert a `datetime.date` or `datetime.datetime` to a `struct_time`
89+
representation *with zero values* for data fields that we cannot always
90+
rely on for ancient or far-future dates: tm_wday, tm_yday, tm_isdst
91+
92+
NOTE: If it wasn't for the requirement that the extra fields are unset
93+
we could use the `timetuple()` method instead of this function.
94+
"""
95+
if isinstance(dt, date):
96+
return struct_time(
97+
[dt.year, dt.month, dt.day] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS
98+
)
99+
elif isinstance(dt, datetime):
100+
return struct_time(
101+
[dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second] +
102+
TIME_EMPTY_EXTRAS
103+
)
104+
else:
105+
raise NotImplementedError(
106+
"Cannot convert %s to `struct_time`" % type(dt))
107+
108+
20109
class EDTFObject(object):
21110
"""
22111
Object to attact to a parser to become instantiated when the parser
@@ -86,57 +175,63 @@ def set_is_uncertain(self, val):
86175

87176
def lower_fuzzy(self):
88177
strict_val = self.lower_strict()
89-
# Do not exceed or adjust boundary datetimes
90-
if strict_val in (date.min, date.max):
91-
return strict_val
92-
return strict_val - self._get_fuzzy_padding(EARLIEST)
178+
return apply_relativedelta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
93179

94180
def upper_fuzzy(self):
95181
strict_val = self.upper_strict()
96-
# Do not exceed or adjust boundary datetimes
97-
if strict_val in (date.min, date.max):
98-
return strict_val
99-
return strict_val + self._get_fuzzy_padding(LATEST)
182+
return apply_relativedelta(add, strict_val, self._get_fuzzy_padding(LATEST))
100183

101184
def __eq__(self, other):
102185
if isinstance(other, EDTFObject):
103186
return str(self) == str(other)
104187
elif isinstance(other, date):
105188
return str(self) == other.isoformat()
189+
elif isinstance(other, struct_time):
190+
return self._strict_date() == other
106191
return False
107192

108193
def __ne__(self, other):
109194
if isinstance(other, EDTFObject):
110195
return str(self) != str(other)
111196
elif isinstance(other, date):
112197
return str(self) != other.isoformat()
198+
elif isinstance(other, struct_time):
199+
return self._strict_date() != other
113200
return True
114201

115202
def __gt__(self, other):
116203
if isinstance(other, EDTFObject):
117204
return self.lower_strict() > other.lower_strict()
118205
elif isinstance(other, date):
206+
return self.lower_strict() > dt_to_struct_time(other)
207+
elif isinstance(other, struct_time):
119208
return self.lower_strict() > other
120209
raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__))
121210

122211
def __ge__(self, other):
123212
if isinstance(other, EDTFObject):
124213
return self.lower_strict() >= other.lower_strict()
125214
elif isinstance(other, date):
215+
return self.lower_strict() >= dt_to_struct_time(other)
216+
elif isinstance(other, struct_time):
126217
return self.lower_strict() >= other
127218
raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__))
128219

129220
def __lt__(self, other):
130221
if isinstance(other, EDTFObject):
131222
return self.lower_strict() < other.lower_strict()
132223
elif isinstance(other, date):
224+
return self.lower_strict() < dt_to_struct_time(other)
225+
elif isinstance(other, struct_time):
133226
return self.lower_strict() < other
134227
raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__))
135228

136229
def __le__(self, other):
137230
if isinstance(other, EDTFObject):
138231
return self.lower_strict() <= other.lower_strict()
139232
elif isinstance(other, date):
233+
return self.lower_strict() <= dt_to_struct_time(other)
234+
elif isinstance(other, struct_time):
140235
return self.lower_strict() <= other
141236
raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__))
142237

@@ -204,49 +299,28 @@ def _precise_month(self, lean):
204299
else:
205300
return 1 if lean == EARLIEST else 12
206301

207-
@staticmethod
208-
def _days_in_month(yr, month):
209-
return calendar.monthrange(int(yr), int(month))[1]
210-
211302
def _precise_day(self, lean):
212303
if not self.day or self.day == 'uu':
213304
if lean == EARLIEST:
214305
return 1
215306
else:
216-
return self._days_in_month(
307+
return days_in_month(
217308
self._precise_year(LATEST), self._precise_month(LATEST)
218309
)
219310
else:
220311
return int(self.day)
221312

222313
def _strict_date(self, lean):
223-
py = self._precise_year(lean)
224-
if py < 1: # year is not positive
225-
return date.min
226-
227-
parts = {
228-
'year': py,
229-
'month': self._precise_month(lean),
230-
'day': self._precise_day(lean),
231-
}
232-
233-
isoish = "%(year)s-%(month)02d-%(day)02d" % parts
234-
235-
try:
236-
dt = parse(
237-
isoish,
238-
fuzzy=True,
239-
yearfirst=True,
240-
dayfirst=False,
241-
default=date.max if lean == LATEST else date.min
242-
)
243-
return dt
244-
245-
except ValueError: # year is out of range
246-
if isoish < date.min.isoformat():
247-
return date.min
248-
else:
249-
return date.max
314+
"""
315+
Return a `time.struct_time` representation of the date.
316+
"""
317+
return struct_time(
318+
(
319+
self._precise_year(lean),
320+
self._precise_month(lean),
321+
self._precise_day(lean),
322+
) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS)
323+
)
250324

251325
@property
252326
def precision(self):
@@ -274,11 +348,15 @@ def _strict_date(self, lean):
274348
def __eq__(self, other):
275349
if isinstance(other, datetime):
276350
return self.isoformat() == other.isoformat()
351+
elif isinstance(other, struct_time):
352+
return self._strict_date() == other
277353
return super(DateAndTime, self).__eq__(other)
278354

279355
def __ne__(self, other):
280356
if isinstance(other, datetime):
281357
return self.isoformat() != other.isoformat()
358+
elif isinstance(other, struct_time):
359+
return self._strict_date() != other
282360
return super(DateAndTime, self).__ne__(other)
283361

284362

@@ -299,7 +377,7 @@ def _strict_date(self, lean):
299377
return r
300378
except AttributeError: # it's a string, or no date. Result depends on the upper date
301379
upper = self.upper._strict_date(LATEST)
302-
return upper - appsettings.DELTA_IF_UNKNOWN
380+
return apply_relativedelta(sub, upper, appsettings.DELTA_IF_UNKNOWN)
303381
else:
304382
try:
305383
r = self.upper._strict_date(lean)
@@ -308,10 +386,10 @@ def _strict_date(self, lean):
308386
return r
309387
except AttributeError: # an 'unknown' or 'open' string - depends on the lower date
310388
if self.upper and (self.upper == "open" or self.upper.date == "open"):
311-
return date.today() # it's still happening
389+
return dt_to_struct_time(date.today()) # it's still happening
312390
else:
313391
lower = self.lower._strict_date(EARLIEST)
314-
return lower + appsettings.DELTA_IF_UNKNOWN
392+
return apply_relativedelta(add, lower, appsettings.DELTA_IF_UNKNOWN)
315393

316394

317395
# (* ************************** Level 1 *************************** *)
@@ -360,7 +438,7 @@ def __str__(self):
360438

361439
def _strict_date(self, lean):
362440
if self.date == "open":
363-
return date.today()
441+
return dt_to_struct_time(date.today())
364442
if self.date =="unknown":
365443
return None # depends on the other date
366444
return self.date._strict_date(lean)
@@ -406,15 +484,12 @@ def _precise_year(self):
406484

407485
def _strict_date(self, lean):
408486
py = self._precise_year()
409-
if py >= date.max.year:
410-
return date.max
411-
if py <= date.min.year:
412-
return date.min
413-
414487
if lean == EARLIEST:
415-
return date(py, 1, 1)
488+
return struct_time(
489+
[py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
416490
else:
417-
return date(py, 12, 31)
491+
return struct_time(
492+
[py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
418493

419494

420495
class Season(Date):

0 commit comments

Comments
 (0)