Skip to content

Commit b7bb1a5

Browse files
authored
fix: assume current year for formats without it (#1288)
1 parent bdc57d3 commit b7bb1a5

File tree

4 files changed

+120
-2
lines changed

4 files changed

+120
-2
lines changed

dateparser/date.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
set_correct_day_from_settings,
2020
set_correct_month_from_settings,
2121
)
22+
from dateparser.utils.strptime import strptime as patched_strptime
2223

2324
APOSTROPHE_LOOK_ALIKE_CHARS = [
2425
"\N{RIGHT SINGLE QUOTATION MARK}", # '\u2019'
@@ -182,7 +183,7 @@ def parse_with_formats(date_string, date_formats, settings):
182183
period = "day"
183184
for date_format in date_formats:
184185
try:
185-
date_obj = datetime.strptime(date_string, date_format)
186+
date_obj = patched_strptime(date_string, date_format)
186187
except ValueError:
187188
continue
188189
else:

dateparser/utils/strptime.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,34 @@ def patch_strptime():
9090
__strptime = patch_strptime()
9191

9292

93-
def strptime(date_string, format):
93+
def _prepare_format(date_string: str, og_format: str) -> tuple[str, str]:
94+
# Adapted from std lib: https://github.com/python/cpython/blob/e34a5e33049ce845de646cf24a498766a2da3586/Lib/_strptime.py#L448
95+
format = re.sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", og_format)
96+
format = re.sub(r"\s+", r"\\s+", format)
97+
format = re.sub(r"'", "['\u02bc]", format)
98+
year_in_format = False
99+
day_of_month_in_format = False
100+
101+
def repl(m: re.Match[str]) -> str:
102+
format_char = m[1]
103+
if format_char in ("Y", "y", "G"):
104+
nonlocal year_in_format
105+
year_in_format = True
106+
elif format_char in ("d",):
107+
nonlocal day_of_month_in_format
108+
day_of_month_in_format = True
109+
110+
return ""
111+
112+
_ = re.sub(r"%[-_0^#]*[0-9]*([OE]?\\?.?)", repl, format)
113+
if day_of_month_in_format and not year_in_format:
114+
current_year = datetime.today().year
115+
return f"{current_year} {date_string}", f"%Y {og_format}"
116+
return date_string, og_format
117+
118+
119+
def strptime(date_string: str, format: str) -> datetime:
120+
date_string, format = _prepare_format(date_string, format)
94121
obj = datetime(*__strptime(date_string, format)[:-3])
95122

96123
if "%f" in format:

tests/test_parser.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from datetime import datetime, time
2+
import warnings
23

34
from parameterized import param, parameterized
45

@@ -507,6 +508,30 @@ def then_error_is_raised_when_date_is_parsed(self, date_string):
507508
with self.assertRaises(ValueError):
508509
self.parser.parse(date_string, self.settings)
509510

511+
@parameterized.expand(
512+
[
513+
param(date_string="oct 14"),
514+
param(date_string="14-October-2025"),
515+
param(date_string="2024-11-27"),
516+
param(date_string="tomorrow"),
517+
param(date_string="1484823450"),
518+
param(date_string="In two months"),
519+
]
520+
)
521+
def test_parser_does_not_raise_ambiguious_date_deprecation_warning(
522+
self, date_string
523+
):
524+
with warnings.catch_warnings(record=True) as w:
525+
warnings.simplefilter("always")
526+
self.when_date_is_parsed(date_string)
527+
year_warnings = [
528+
warn
529+
for warn in w
530+
if "day of month without a year specified is ambiguious"
531+
in str(warn.message)
532+
]
533+
self.assertEqual(len(year_warnings), 0)
534+
510535

511536
class TestTimeParser(BaseTestCase):
512537
@parameterized.expand(

tests/test_utils_strptime.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import locale
22
from datetime import datetime
33
from unittest import SkipTest
4+
import warnings
45

56
from parameterized import param, parameterized
67

@@ -173,3 +174,67 @@ def test_parsing_date_should_fail_using_datetime_strptime_if_locale_is_non_engli
173174
def test_microseconds_are_parsed_correctly(self, date_string, fmt, expected):
174175
self.when_date_string_is_parsed(date_string, fmt)
175176
self.then_date_object_is(expected)
177+
178+
@parameterized.expand(
179+
[
180+
param(date_string="oct 14", fmt=r"%m %d"),
181+
param(date_string="10-14", fmt=r"%b %d"),
182+
param(date_string="12 Dec 10:30:55.000111", fmt="%d %b %H:%M:%S.%f"),
183+
param(date_string="Wed 12 December 22:41", fmt="%a %d %B %H:%M"),
184+
]
185+
)
186+
def test_dates_with_no_year_do_not_raise_a_deprecation_warning(
187+
self, date_string, fmt
188+
):
189+
with warnings.catch_warnings(record=True) as w:
190+
warnings.simplefilter("always")
191+
self.when_date_string_is_parsed(date_string, fmt)
192+
year_warnings = [
193+
warn
194+
for warn in w
195+
if "day of month without a year specified is ambiguious"
196+
in str(warn.message)
197+
]
198+
self.assertEqual(len(year_warnings), 0)
199+
200+
@parameterized.expand(
201+
[
202+
param(
203+
date_string="oct 14",
204+
fmt=r"%b %d",
205+
expected=datetime(2010, 10, 14, 0, 0),
206+
),
207+
param(
208+
date_string="10 14",
209+
fmt=r"%m %d",
210+
expected=datetime(2010, 10, 14, 0, 0),
211+
),
212+
param(
213+
date_string="14 Oct",
214+
fmt=r"%d %b",
215+
expected=datetime(2010, 10, 14, 0, 0),
216+
),
217+
param(
218+
"Monday 21 January",
219+
"%A %d %B",
220+
expected=datetime(2010, 1, 21, 0, 0),
221+
),
222+
param(
223+
"Tue 2 Mar",
224+
"%a %d %b",
225+
expected=datetime(2010, 3, 2, 0, 0),
226+
),
227+
param(
228+
"Friday 12 December 10:30",
229+
"%A %d %B %H:%M",
230+
expected=datetime(2010, 12, 12, 10, 30),
231+
),
232+
]
233+
)
234+
def test_dates_with_no_year_use_the_current_year(
235+
self, date_string: str, fmt: str, expected: datetime
236+
):
237+
self.when_date_string_is_parsed(date_string, fmt)
238+
current_year = datetime.today().year
239+
expected = expected.replace(year=current_year)
240+
self.assertEqual(self.result, expected)

0 commit comments

Comments
 (0)