diff --git a/dateparser/date.py b/dateparser/date.py index 5d8f9a63a..e23444720 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -19,6 +19,7 @@ set_correct_day_from_settings, set_correct_month_from_settings, ) +from dateparser.utils.strptime import strptime as patched_strptime APOSTROPHE_LOOK_ALIKE_CHARS = [ "\N{RIGHT SINGLE QUOTATION MARK}", # '\u2019' @@ -182,7 +183,7 @@ def parse_with_formats(date_string, date_formats, settings): period = "day" for date_format in date_formats: try: - date_obj = datetime.strptime(date_string, date_format) + date_obj = patched_strptime(date_string, date_format) except ValueError: continue else: diff --git a/dateparser/utils/strptime.py b/dateparser/utils/strptime.py index 226716c8a..b22356753 100644 --- a/dateparser/utils/strptime.py +++ b/dateparser/utils/strptime.py @@ -90,7 +90,34 @@ def patch_strptime(): __strptime = patch_strptime() -def strptime(date_string, format): +def _prepare_format(date_string: str, og_format: str) -> tuple[str, str]: + # Adapted from std lib: https://github.com/python/cpython/blob/e34a5e33049ce845de646cf24a498766a2da3586/Lib/_strptime.py#L448 + format = re.sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", og_format) + format = re.sub(r"\s+", r"\\s+", format) + format = re.sub(r"'", "['\u02bc]", format) + year_in_format = False + day_of_month_in_format = False + + def repl(m: re.Match[str]) -> str: + format_char = m[1] + if format_char in ("Y", "y", "G"): + nonlocal year_in_format + year_in_format = True + elif format_char in ("d",): + nonlocal day_of_month_in_format + day_of_month_in_format = True + + return "" + + _ = re.sub(r"%[-_0^#]*[0-9]*([OE]?\\?.?)", repl, format) + if day_of_month_in_format and not year_in_format: + current_year = datetime.today().year + return f"{current_year} {date_string}", f"%Y {og_format}" + return date_string, og_format + + +def strptime(date_string: str, format: str) -> datetime: + date_string, format = _prepare_format(date_string, format) obj = datetime(*__strptime(date_string, format)[:-3]) if "%f" in format: diff --git a/tests/test_parser.py b/tests/test_parser.py index fbeeb8884..c1c5f6def 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,4 +1,5 @@ from datetime import datetime, time +import warnings from parameterized import param, parameterized @@ -507,6 +508,30 @@ def then_error_is_raised_when_date_is_parsed(self, date_string): with self.assertRaises(ValueError): self.parser.parse(date_string, self.settings) + @parameterized.expand( + [ + param(date_string="oct 14"), + param(date_string="14-October-2025"), + param(date_string="2024-11-27"), + param(date_string="tomorrow"), + param(date_string="1484823450"), + param(date_string="In two months"), + ] + ) + def test_parser_does_not_raise_ambiguious_date_deprecation_warning( + self, date_string + ): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.when_date_is_parsed(date_string) + year_warnings = [ + warn + for warn in w + if "day of month without a year specified is ambiguious" + in str(warn.message) + ] + self.assertEqual(len(year_warnings), 0) + class TestTimeParser(BaseTestCase): @parameterized.expand( diff --git a/tests/test_utils_strptime.py b/tests/test_utils_strptime.py index 690e55be1..529158af7 100644 --- a/tests/test_utils_strptime.py +++ b/tests/test_utils_strptime.py @@ -1,6 +1,7 @@ import locale from datetime import datetime from unittest import SkipTest +import warnings from parameterized import param, parameterized @@ -173,3 +174,67 @@ def test_parsing_date_should_fail_using_datetime_strptime_if_locale_is_non_engli def test_microseconds_are_parsed_correctly(self, date_string, fmt, expected): self.when_date_string_is_parsed(date_string, fmt) self.then_date_object_is(expected) + + @parameterized.expand( + [ + param(date_string="oct 14", fmt=r"%m %d"), + param(date_string="10-14", fmt=r"%b %d"), + param(date_string="12 Dec 10:30:55.000111", fmt="%d %b %H:%M:%S.%f"), + param(date_string="Wed 12 December 22:41", fmt="%a %d %B %H:%M"), + ] + ) + def test_dates_with_no_year_do_not_raise_a_deprecation_warning( + self, date_string, fmt + ): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.when_date_string_is_parsed(date_string, fmt) + year_warnings = [ + warn + for warn in w + if "day of month without a year specified is ambiguious" + in str(warn.message) + ] + self.assertEqual(len(year_warnings), 0) + + @parameterized.expand( + [ + param( + date_string="oct 14", + fmt=r"%b %d", + expected=datetime(2010, 10, 14, 0, 0), + ), + param( + date_string="10 14", + fmt=r"%m %d", + expected=datetime(2010, 10, 14, 0, 0), + ), + param( + date_string="14 Oct", + fmt=r"%d %b", + expected=datetime(2010, 10, 14, 0, 0), + ), + param( + "Monday 21 January", + "%A %d %B", + expected=datetime(2010, 1, 21, 0, 0), + ), + param( + "Tue 2 Mar", + "%a %d %b", + expected=datetime(2010, 3, 2, 0, 0), + ), + param( + "Friday 12 December 10:30", + "%A %d %B %H:%M", + expected=datetime(2010, 12, 12, 10, 30), + ), + ] + ) + def test_dates_with_no_year_use_the_current_year( + self, date_string: str, fmt: str, expected: datetime + ): + self.when_date_string_is_parsed(date_string, fmt) + current_year = datetime.today().year + expected = expected.replace(year=current_year) + self.assertEqual(self.result, expected)