Skip to content

Commit 84739a8

Browse files
committed
Improves parsing of ISO 8601 strings
1 parent ebd3c28 commit 84739a8

File tree

3 files changed

+217
-10
lines changed

3 files changed

+217
-10
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
- Added `on()` and `at()` methods which replace `with_date()` and `with_time()`.
88

9+
### Changed
10+
11+
- Improved parsing of ISO 8601 strings.
12+
913
### Deprecated
1014

1115
- `with_date()` and `with_time()` are deprecated. Use `on()` and `at()` instead.

pendulum/parsing/parser.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import re
44
import copy
55

6+
from datetime import datetime
67
from dateutil import parser
78

89
from .exceptions import ParserError
@@ -15,8 +16,8 @@ class Parser(object):
1516
'^'
1617
'(\d{4})' # Year
1718
'('
18-
' ((?:-|/)?(\d{1,2}))?' # Month (optional)
19-
' ((?:-|/)?(\d{1,2}))?' # Day (optional)
19+
' ((?:-|/)?(\d{2,3}))?' # Month (optional)
20+
' ((?:-|/)?(\d{2}))?' # Day (optional)
2021
')?'
2122

2223
# Time (Optional)
@@ -30,13 +31,24 @@ class Parser(object):
3031
' )?'
3132
# Timezone offset
3233
' ('
33-
' (-|\+)\d{2}:?\d{2}|Z' # Offset (+HH:mm or +HHmm or Z)
34+
' (-|\+)\d{2}:?(?:\d{2})?|Z' # Offset (+HH:mm or +HHmm or +HH or Z)
3435
' )?'
3536
')?'
3637
'$',
3738
re.VERBOSE
3839
)
3940

41+
ISO8601_WEEK = re.compile(
42+
'^'
43+
'(\d{4})' # Year
44+
'-?' # Separator (optional)
45+
'W' # W separator
46+
'(\d{2})' # Week number
47+
'-?' # Separator (optional)
48+
'(\d)?' # Weekday (optional)
49+
'$'
50+
)
51+
4052
DEFAULT_OPTIONS = {
4153
'day_first': False
4254
}
@@ -64,6 +76,11 @@ def parse_common(self, text):
6476
day = 1
6577
else:
6678
if m.group(4) and m.group(6):
79+
if len(m.group(4)) == 3:
80+
# Should not happen
81+
# Ordinal day with extra day set
82+
raise ParserError('Invalid date string: {}'.format(text))
83+
6784
# Month and day
6885
if self._options['day_first']:
6986
month = int(m.group(6))
@@ -73,8 +90,17 @@ def parse_common(self, text):
7390
day = int(m.group(6))
7491
else:
7592
# Only month
76-
month = int(m.group(4) or m.group(6))
77-
day = 1
93+
if m.group(4) and len(m.group(4)) == 3:
94+
# Ordinal day
95+
dt = datetime.strptime(
96+
'{}-{}'.format(year, m.group(4)),
97+
'%Y-%j'
98+
)
99+
month = dt.month
100+
day = dt.day
101+
else:
102+
month = int(m.group(4) or m.group(6))
103+
day = 1
78104

79105
parsed = {
80106
'year': year,
@@ -111,6 +137,9 @@ def parse_common(self, text):
111137
negative = True if tz.startswith('-') else False
112138
tz = tz[1:]
113139
if ':' not in tz:
140+
if len(tz) == 2:
141+
tz = '{}00'.format(tz)
142+
114143
off_hour = tz[0:2]
115144
off_minute = tz[2:4]
116145
else:
@@ -125,6 +154,34 @@ def parse_common(self, text):
125154

126155
return parsed
127156

157+
def parse_8601_week(self, text):
158+
m = self.ISO8601_WEEK.match(text)
159+
160+
if not m:
161+
return {}
162+
163+
year = m.group(1)
164+
week = m.group(2)
165+
weekday = m.group(3)
166+
if not weekday:
167+
weekday = '1'
168+
169+
fmt = '%YW%W%w'
170+
string = '{}W{}{}'.format(year, week, weekday)
171+
172+
dt = datetime.strptime(string, fmt)
173+
174+
return {
175+
'year': dt.year,
176+
'month': dt.month,
177+
'day': dt.day,
178+
'hour': dt.hour,
179+
'minute': dt.minute,
180+
'second': dt.second,
181+
'subsecond': dt.microsecond * 1000,
182+
'offset': dt.utcoffset().total_seconds() if dt.tzinfo else None,
183+
}
184+
128185
def parse(self, text):
129186
"""
130187
Parses a string with the given options.
@@ -134,6 +191,11 @@ def parse(self, text):
134191
135192
:rtype: dict
136193
"""
194+
# ISO8601 week notation
195+
parsed = self.parse_8601_week(text)
196+
if parsed:
197+
return parsed
198+
137199
parsed = self.parse_common(text)
138200
if parsed:
139201
return parsed
@@ -150,6 +212,6 @@ def parse(self, text):
150212
'hour': dt.hour,
151213
'minute': dt.minute,
152214
'second': dt.second,
153-
'subsecond': dt.microsecond,
215+
'subsecond': dt.microsecond * 1000,
154216
'offset': dt.utcoffset().total_seconds() if dt.tzinfo else None,
155217
}

tests/parsing_test/test_parser.py

Lines changed: 145 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,68 @@ def test_rfc_3339_extended_nanoseconds(self):
147147
self.assertEqual(123456789, parsed['subsecond'])
148148
self.assertEqual(19800, parsed['offset'])
149149

150-
def test_iso_8601(self):
151-
text = '201610'
150+
def test_iso_8601_date(self):
151+
text = '2012'
152152

153153
parsed = Parser().parse(text)
154-
self.assertEqual(2016, parsed['year'])
155-
self.assertEqual(10, parsed['month'])
154+
self.assertEqual(2012, parsed['year'])
155+
self.assertEqual(1, parsed['month'])
156156
self.assertEqual(1, parsed['day'])
157157
self.assertEqual(0, parsed['hour'])
158158
self.assertEqual(0, parsed['minute'])
159159
self.assertEqual(0, parsed['second'])
160160
self.assertEqual(0, parsed['subsecond'])
161161
self.assertEqual(None, parsed['offset'])
162162

163+
text = '2012-05-03'
164+
165+
parsed = Parser().parse(text)
166+
self.assertEqual(2012, parsed['year'])
167+
self.assertEqual(5, parsed['month'])
168+
self.assertEqual(3, parsed['day'])
169+
self.assertEqual(0, parsed['hour'])
170+
self.assertEqual(0, parsed['minute'])
171+
self.assertEqual(0, parsed['second'])
172+
self.assertEqual(0, parsed['subsecond'])
173+
self.assertEqual(None, parsed['offset'])
174+
175+
text = '20120503'
176+
177+
parsed = Parser().parse(text)
178+
self.assertEqual(2012, parsed['year'])
179+
self.assertEqual(5, parsed['month'])
180+
self.assertEqual(3, parsed['day'])
181+
self.assertEqual(0, parsed['hour'])
182+
self.assertEqual(0, parsed['minute'])
183+
self.assertEqual(0, parsed['second'])
184+
self.assertEqual(0, parsed['subsecond'])
185+
self.assertEqual(None, parsed['offset'])
186+
187+
text = '2012-05'
188+
189+
parsed = Parser().parse(text)
190+
self.assertEqual(2012, parsed['year'])
191+
self.assertEqual(5, parsed['month'])
192+
self.assertEqual(1, parsed['day'])
193+
self.assertEqual(0, parsed['hour'])
194+
self.assertEqual(0, parsed['minute'])
195+
self.assertEqual(0, parsed['second'])
196+
self.assertEqual(0, parsed['subsecond'])
197+
self.assertEqual(None, parsed['offset'])
198+
199+
text = '201205'
200+
201+
parsed = Parser().parse(text)
202+
self.assertEqual(2012, parsed['year'])
203+
self.assertEqual(5, parsed['month'])
204+
self.assertEqual(1, parsed['day'])
205+
self.assertEqual(0, parsed['hour'])
206+
self.assertEqual(0, parsed['minute'])
207+
self.assertEqual(0, parsed['second'])
208+
self.assertEqual(0, parsed['subsecond'])
209+
self.assertEqual(None, parsed['offset'])
210+
211+
def test_iso8601_datetime(self):
163212
text = '2016-10-01T14'
164213

165214
parsed = Parser().parse(text)
@@ -232,6 +281,98 @@ def test_iso_8601(self):
232281
self.assertEqual(400000000, parsed['subsecond'])
233282
self.assertEqual(19800, parsed['offset'])
234283

284+
text = '2008-09-03T20:56:35.450686+01'
285+
286+
parsed = Parser().parse(text)
287+
self.assertEqual(2008, parsed['year'])
288+
self.assertEqual(9, parsed['month'])
289+
self.assertEqual(3, parsed['day'])
290+
self.assertEqual(20, parsed['hour'])
291+
self.assertEqual(56, parsed['minute'])
292+
self.assertEqual(35, parsed['second'])
293+
self.assertEqual(450686000, parsed['subsecond'])
294+
self.assertEqual(3600, parsed['offset'])
295+
296+
def test_iso8601_week_number(self):
297+
text = '2012-W05'
298+
299+
parsed = Parser().parse(text)
300+
self.assertEqual(2012, parsed['year'])
301+
self.assertEqual(1, parsed['month'])
302+
self.assertEqual(30, parsed['day'])
303+
self.assertEqual(0, parsed['hour'])
304+
self.assertEqual(0, parsed['minute'])
305+
self.assertEqual(0, parsed['second'])
306+
self.assertEqual(0, parsed['subsecond'])
307+
self.assertEqual(None, parsed['offset'])
308+
309+
text = '2012W05'
310+
311+
parsed = Parser().parse(text)
312+
self.assertEqual(2012, parsed['year'])
313+
self.assertEqual(1, parsed['month'])
314+
self.assertEqual(30, parsed['day'])
315+
self.assertEqual(0, parsed['hour'])
316+
self.assertEqual(0, parsed['minute'])
317+
self.assertEqual(0, parsed['second'])
318+
self.assertEqual(0, parsed['subsecond'])
319+
self.assertEqual(None, parsed['offset'])
320+
321+
text = '2012-W05-5'
322+
323+
parsed = Parser().parse(text)
324+
self.assertEqual(2012, parsed['year'])
325+
self.assertEqual(2, parsed['month'])
326+
self.assertEqual(3, parsed['day'])
327+
self.assertEqual(0, parsed['hour'])
328+
self.assertEqual(0, parsed['minute'])
329+
self.assertEqual(0, parsed['second'])
330+
self.assertEqual(0, parsed['subsecond'])
331+
self.assertEqual(None, parsed['offset'])
332+
333+
text = '2012W055'
334+
335+
parsed = Parser().parse(text)
336+
self.assertEqual(2012, parsed['year'])
337+
self.assertEqual(2, parsed['month'])
338+
self.assertEqual(3, parsed['day'])
339+
self.assertEqual(0, parsed['hour'])
340+
self.assertEqual(0, parsed['minute'])
341+
self.assertEqual(0, parsed['second'])
342+
self.assertEqual(0, parsed['subsecond'])
343+
self.assertEqual(None, parsed['offset'])
344+
345+
def test_iso8601_ordinal(self):
346+
text = '2012-007'
347+
348+
parsed = Parser().parse(text)
349+
self.assertEqual(2012, parsed['year'])
350+
self.assertEqual(1, parsed['month'])
351+
self.assertEqual(7, parsed['day'])
352+
self.assertEqual(0, parsed['hour'])
353+
self.assertEqual(0, parsed['minute'])
354+
self.assertEqual(0, parsed['second'])
355+
self.assertEqual(0, parsed['subsecond'])
356+
self.assertEqual(None, parsed['offset'])
357+
358+
text = '2012007'
359+
360+
parsed = Parser().parse(text)
361+
self.assertEqual(2012, parsed['year'])
362+
self.assertEqual(1, parsed['month'])
363+
self.assertEqual(7, parsed['day'])
364+
self.assertEqual(0, parsed['hour'])
365+
self.assertEqual(0, parsed['minute'])
366+
self.assertEqual(0, parsed['second'])
367+
self.assertEqual(0, parsed['subsecond'])
368+
self.assertEqual(None, parsed['offset'])
369+
370+
def test_iso8601_ordinal_invalid(self):
371+
text = '2012-007-05'
372+
373+
self.assertRaises(ParserError, Parser().parse, text)
374+
375+
235376
def test_invalid(self):
236377
text = '201610T'
237378

0 commit comments

Comments
 (0)