Skip to content

Commit bdc57d3

Browse files
authored
Add time span support (#1284)
* Add time span functionality to search_dates for date range expressions * Add docs
1 parent 0667cb3 commit bdc57d3

File tree

8 files changed

+338
-1
lines changed

8 files changed

+338
-1
lines changed

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Key Features
5858
- `Search
5959
dates <https://dateparser.readthedocs.io/en/latest/introduction.html#search-for-dates-in-longer-chunks-of-text>`__
6060
in longer texts.
61+
- Time span detection for expressions like "past month", "last week".
6162

6263
Online demo
6364
-----------

dateparser/conf.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ class Settings:
2727
* `SKIP_TOKENS`
2828
* `NORMALIZE`
2929
* `RETURN_TIME_AS_PERIOD`
30+
* `RETURN_TIME_SPAN`
31+
* `DEFAULT_START_OF_WEEK`
32+
* `DEFAULT_DAYS_IN_MONTH`
3033
* `PARSERS`
3134
* `DEFAULT_LANGUAGES`
3235
* `LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD`
@@ -229,6 +232,14 @@ def check_settings(settings):
229232
"CACHE_SIZE_LIMIT": {
230233
"type": int,
231234
},
235+
"RETURN_TIME_SPAN": {"type": bool},
236+
"DEFAULT_START_OF_WEEK": {
237+
"values": ("monday", "sunday"),
238+
"type": str,
239+
},
240+
"DEFAULT_DAYS_IN_MONTH": {
241+
"type": int,
242+
},
232243
}
233244

234245
modified_settings = settings._mod_settings # check only modified settings

dateparser/search/search.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections.abc import Set
2+
from datetime import datetime
23

34
import regex as re
45

@@ -7,6 +8,7 @@
78
from dateparser.date import DateDataParser
89
from dateparser.languages.loader import LocaleDataLoader
910
from dateparser.search.text_detection import FullTextLanguageDetector
11+
from dateparser.utils.time_spans import detect_time_span, generate_time_span
1012

1113
RELATIVE_REG = re.compile("(ago|in|from now|tomorrow|today|yesterday)")
1214

@@ -185,8 +187,23 @@ def search_parse(self, shortname, text, settings):
185187
translated=translated,
186188
settings=settings,
187189
)
190+
191+
results = list(zip(substrings, [i[0]["date_obj"] for i in parsed]))
192+
193+
if getattr(settings, "RETURN_TIME_SPAN", False):
194+
span_info = detect_time_span(text)
195+
if span_info:
196+
base_date = getattr(settings, "RELATIVE_BASE", None) or datetime.now()
197+
start_date, end_date = generate_time_span(
198+
span_info, base_date, settings
199+
)
200+
201+
matched_text = span_info["matched_text"]
202+
results.append((matched_text + " (start)", start_date))
203+
results.append((matched_text + " (end)", end_date))
204+
188205
parser._settings = Settings()
189-
return list(zip(substrings, [i[0]["date_obj"] for i in parsed]))
206+
return results
190207

191208

192209
class DateSearchWithDetection:

dateparser/utils/time_spans.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""
2+
Utilities for handling time spans and date ranges.
3+
"""
4+
5+
import re
6+
from datetime import datetime, timedelta
7+
from dateutil.relativedelta import relativedelta
8+
9+
10+
def get_week_start(date, start_of_week="monday"):
11+
"""Get the start of the week for a given date."""
12+
if start_of_week == "monday":
13+
days_back = date.weekday()
14+
else: # sunday
15+
days_back = (date.weekday() + 1) % 7
16+
17+
return date - timedelta(days=days_back)
18+
19+
20+
def get_week_end(date, start_of_week="monday"):
21+
"""Get the end of the week for a given date."""
22+
week_start = get_week_start(date, start_of_week)
23+
return week_start + timedelta(days=6)
24+
25+
26+
def detect_time_span(text):
27+
"""Detect time span expressions in text and return span information."""
28+
span_patterns = [
29+
{
30+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+month\b",
31+
"type": "month",
32+
"direction": "past",
33+
},
34+
{
35+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+week\b",
36+
"type": "week",
37+
"direction": "past",
38+
},
39+
{
40+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+(\d+)\s+days?\b",
41+
"type": "days",
42+
"direction": "past",
43+
},
44+
{
45+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+(\d+)\s+weeks?\b",
46+
"type": "weeks",
47+
"direction": "past",
48+
},
49+
{
50+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+(\d+)\s+months?\b",
51+
"type": "months",
52+
"direction": "past",
53+
},
54+
{
55+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+month\b",
56+
"type": "month",
57+
"direction": "future",
58+
},
59+
{
60+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+week\b",
61+
"type": "week",
62+
"direction": "future",
63+
},
64+
{
65+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+(\d+)\s+days?\b",
66+
"type": "days",
67+
"direction": "future",
68+
},
69+
{
70+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+(\d+)\s+weeks?\b",
71+
"type": "weeks",
72+
"direction": "future",
73+
},
74+
{
75+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+(\d+)\s+months?\b",
76+
"type": "months",
77+
"direction": "future",
78+
},
79+
]
80+
81+
for pattern_info in span_patterns:
82+
match = re.search(pattern_info["pattern"], text, re.IGNORECASE)
83+
if match:
84+
result = {
85+
"type": pattern_info["type"],
86+
"direction": pattern_info["direction"],
87+
"matched_text": match.group(0),
88+
"start_pos": match.start(),
89+
"end_pos": match.end(),
90+
}
91+
92+
if match.groups():
93+
result["number"] = int(match.group(1))
94+
95+
return result
96+
97+
return None
98+
99+
100+
def generate_time_span(span_info, base_date=None, settings=None):
101+
"""Generate start and end dates for a time span."""
102+
if base_date is None:
103+
base_date = datetime.now()
104+
105+
if settings is None:
106+
start_of_week = "monday"
107+
days_in_month = 30
108+
else:
109+
start_of_week = getattr(settings, "DEFAULT_START_OF_WEEK", "monday")
110+
days_in_month = getattr(settings, "DEFAULT_DAYS_IN_MONTH", 30)
111+
112+
span_type = span_info["type"]
113+
direction = span_info["direction"]
114+
number = span_info.get("number", 1)
115+
116+
if direction == "past":
117+
end_date = base_date
118+
119+
if span_type == "month":
120+
start_date = end_date - relativedelta(days=days_in_month)
121+
elif span_type == "week":
122+
week_start = get_week_start(end_date, start_of_week)
123+
start_date = week_start - timedelta(days=7)
124+
end_date = week_start - timedelta(days=1)
125+
elif span_type == "days":
126+
start_date = end_date - timedelta(days=number)
127+
elif span_type == "weeks":
128+
start_date = end_date - timedelta(weeks=number)
129+
elif span_type == "months":
130+
start_date = end_date - relativedelta(months=number)
131+
else:
132+
start_date = end_date - timedelta(days=1)
133+
134+
else:
135+
start_date = base_date
136+
137+
if span_type == "month":
138+
end_date = start_date + relativedelta(days=days_in_month)
139+
elif span_type == "week":
140+
week_start = get_week_start(start_date, start_of_week)
141+
start_date = week_start + timedelta(days=7)
142+
end_date = start_date + timedelta(days=6)
143+
elif span_type == "days":
144+
end_date = start_date + timedelta(days=number)
145+
elif span_type == "weeks":
146+
end_date = start_date + timedelta(weeks=number)
147+
elif span_type == "months":
148+
end_date = start_date + relativedelta(months=number)
149+
else:
150+
end_date = start_date + timedelta(days=1)
151+
152+
return (start_date, end_date)

dateparser_data/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
"DEFAULT_LANGUAGES": [],
2727
# Optional language detection
2828
"LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": 0.5,
29+
# Time span settings
30+
"RETURN_TIME_SPAN": False,
31+
"DEFAULT_START_OF_WEEK": "monday",
32+
"DEFAULT_DAYS_IN_MONTH": 30,
2933
# Other settings
3034
"RETURN_TIME_AS_PERIOD": False,
3135
"PARSERS": default_parsers,

docs/introduction.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,17 @@ You can extract dates from longer strings of text. They are returned as list of
211211
:members: search_dates
212212
:noindex:
213213

214+
Time Span Detection
215+
-------------------
216+
217+
The `search_dates` function can detect time spans from expressions like "past month", "last week", etc. When `RETURN_TIME_SPAN` is enabled, it returns start and end dates for the detected period.
218+
219+
.. code-block:: python
220+
221+
>>> search_dates("Messages from the past month", settings={'RETURN_TIME_SPAN': True})
222+
[('past month (start)', datetime.datetime(2024, 11, 7, 0, 0)),
223+
('past month (end)', datetime.datetime(2024, 12, 7, 23, 59, 59, 999999))]
224+
214225
Advanced Usage
215226
==============
216227
If you need more control over what is being parser check the :ref:`settings` section as well as the :ref:`using-datedataparser` section.

docs/settings.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,27 @@ For example, assuming current date is `June 16, 2015`:
112112
>>> parse('tomorrow', settings={'RELATIVE_BASE': datetime.datetime(2020, 1, 1)})
113113
datetime.datetime(2020, 1, 2, 0, 0)
114114

115+
``RETURN_TIME_SPAN``
116+
~~~~~~~~~~~~~~~~~~~~
117+
118+
**default**: ``False``
119+
120+
When enabled, `search_dates` detects time span expressions (e.g., "past month", "last week") and returns start/end dates instead of single dates.
121+
122+
``DEFAULT_START_OF_WEEK``
123+
~~~~~~~~~~~~~~~~~~~~~~~~~
124+
125+
**default**: ``'monday'``
126+
127+
Sets which day starts the week for time span calculations. Options: ``'monday'``, ``'sunday'``.
128+
129+
``DEFAULT_DAYS_IN_MONTH``
130+
~~~~~~~~~~~~~~~~~~~~~~~~~
131+
132+
**default**: ``30``
133+
134+
Sets the number of days to use for "past month" and similar expressions in time span detection.
135+
115136
``STRICT_PARSING``: defaults to ``False``.
116137

117138
When set to ``True`` if missing any of ``day``, ``month`` or ``year`` parts, it does not return any result altogether.:

0 commit comments

Comments
 (0)