|
6 | 6 | from .formatter import Formatter |
7 | 7 |
|
8 | 8 |
|
| 9 | +_MATCH_1 = re.compile('\d') |
| 10 | +_MATCH_2 = re.compile('\d\d') |
| 11 | +_MATCH_3 = re.compile('\d{3}') |
| 12 | +_MATCH_4 = re.compile('\d{4}') |
| 13 | +_MATCH_6 = re.compile('[+-]?\d{6}') |
| 14 | +_MATCH_1_TO_2 = re.compile('\d\d?') |
| 15 | +_MATCH_1_TO_3 = re.compile('\d{1,3}') |
| 16 | +_MATCH_1_TO_4 = re.compile('\d{1,4}') |
| 17 | +_MATCH_1_TO_6 = re.compile('[+-]?\d{1,6}') |
| 18 | +_MATCH_3_TO_4 = re.compile('\d{3}\d?') |
| 19 | +_MATCH_5_TO_6 = re.compile('\d{5}\d?') |
| 20 | +_MATCH_UNSIGNED = re.compile('\d+') |
| 21 | +_MATCH_SIGNED = re.compile('[+-]?\d+') |
| 22 | +_MATCH_OFFSET = re.compile('(?i)Z|[+-]\d\d:?\d\d') |
| 23 | +_MATCH_SHORT_OFFSET = re.compile('(?i)Z|[+-]\d\d(?::?\d\d)?') |
| 24 | +_MATCH_TIMESTAMP = re.compile('[+-]?\d+(\.\d{1,3})?') |
| 25 | +_MATCH_WORD = re.compile("[0-9]*['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+|[\u0600-\u06FF\/]+(\s*?[\u0600-\u06FF]+){1,2}") |
| 26 | + |
| 27 | + |
| 28 | + |
9 | 29 | class AlternativeFormatter(Formatter): |
10 | 30 |
|
11 | 31 | _TOKENS = '\[([^\[]*)\]|\\\(.)|' \ |
@@ -99,6 +119,71 @@ class AlternativeFormatter(Formatter): |
99 | 119 | 'LLLL': 'dddd, MMMM D, YYYY h:mm A', |
100 | 120 | } |
101 | 121 |
|
| 122 | + _REGEX_TOKENS = { |
| 123 | + 'Y': _MATCH_SIGNED, |
| 124 | + 'YY': (_MATCH_1_TO_2, _MATCH_2), |
| 125 | + 'YYYY': (_MATCH_1_TO_4, _MATCH_4), |
| 126 | + 'Q': _MATCH_1, |
| 127 | + 'Qo': None, |
| 128 | + 'M': _MATCH_1_TO_2, |
| 129 | + 'MM': (_MATCH_1_TO_2, _MATCH_2), |
| 130 | + 'MMM': None, |
| 131 | + 'MMMM': None, |
| 132 | + 'D': _MATCH_1_TO_2, |
| 133 | + 'DD': (_MATCH_1_TO_2, _MATCH_2), |
| 134 | + 'DDD': _MATCH_1_TO_3, |
| 135 | + 'DDDD': _MATCH_3, |
| 136 | + 'Do': None, |
| 137 | + 'H': _MATCH_1_TO_2, |
| 138 | + 'HH': (_MATCH_1_TO_2, _MATCH_2), |
| 139 | + 'h': _MATCH_1_TO_2, |
| 140 | + 'hh': (_MATCH_1_TO_2, _MATCH_2), |
| 141 | + 'm': _MATCH_1_TO_2, |
| 142 | + 'mm': (_MATCH_1_TO_2, _MATCH_2), |
| 143 | + 's': _MATCH_1_TO_2, |
| 144 | + 'ss': (_MATCH_1_TO_2, _MATCH_2), |
| 145 | + 'S': (_MATCH_1_TO_3, _MATCH_1), |
| 146 | + 'SS': (_MATCH_1_TO_3, _MATCH_2), |
| 147 | + 'SSS': (_MATCH_1_TO_3, _MATCH_3), |
| 148 | + 'SSSS': _MATCH_UNSIGNED, |
| 149 | + 'SSSSS': _MATCH_UNSIGNED, |
| 150 | + 'SSSSSS': _MATCH_UNSIGNED, |
| 151 | + 'a': None, |
| 152 | + 'x': _MATCH_SIGNED, |
| 153 | + 'X': re.compile('[+-]?\d+(\.\d{1,3})?') |
| 154 | + } |
| 155 | + |
| 156 | + _PARSE_TOKENS = { |
| 157 | + 'YYYY': lambda year: int(year), |
| 158 | + 'YY': lambda year: 1900 + int(year), |
| 159 | + 'Q': lambda quarter: int(quarter), |
| 160 | + 'MMMM': lambda month: None, |
| 161 | + 'MMM': lambda month: None, |
| 162 | + 'MM': lambda month: int(month), |
| 163 | + 'M': lambda month: int(month), |
| 164 | + 'DDDD': lambda day: int(day), |
| 165 | + 'DDD': lambda day: int(day), |
| 166 | + 'DD': lambda day: int(day), |
| 167 | + 'D': lambda day: int(day), |
| 168 | + 'HH': lambda hour: int(hour), |
| 169 | + 'H': lambda hour: int(hour), |
| 170 | + 'hh': lambda hour: int(hour), |
| 171 | + 'h': lambda hour: int(hour), |
| 172 | + 'mm': lambda minute: int(minute), |
| 173 | + 'm': lambda minute: int(minute), |
| 174 | + 'ss': lambda second: int(second), |
| 175 | + 's': lambda second: int(second), |
| 176 | + 'S': lambda us: int(us) * 100000, |
| 177 | + 'SS': lambda us: int(us) * 10000, |
| 178 | + 'SSS': lambda us: int(us) * 1000, |
| 179 | + 'SSSS': lambda us: int(us) * 100, |
| 180 | + 'SSSSS': lambda us: int(us) * 10, |
| 181 | + 'SSSSSS': lambda us: int(us), |
| 182 | + 'a': lambda meridiem: None, |
| 183 | + 'X': lambda ts: float(ts), |
| 184 | + 'x': lambda ts: float(ts) / 1e3, |
| 185 | + } |
| 186 | + |
102 | 187 | def format(self, dt, fmt, locale=None): |
103 | 188 | """ |
104 | 189 | Formats a Pendulum instance with a given format and locale. |
@@ -232,3 +317,97 @@ def _format_localizable_token(self, dt, token, locale): |
232 | 317 | return self._format_localizable_token(dt, token, 'en') |
233 | 318 |
|
234 | 319 | return trans |
| 320 | + |
| 321 | + def parse(self, time, fmt): |
| 322 | + """ |
| 323 | + Parses a time string matching a given format as a tuple. |
| 324 | +
|
| 325 | + :param time: The timestring |
| 326 | + :type time: str |
| 327 | +
|
| 328 | + :param fmt: The format |
| 329 | + :type fmt: str |
| 330 | +
|
| 331 | + :rtype: tuple |
| 332 | + """ |
| 333 | + tokens = self._FORMAT_RE.findall(fmt) |
| 334 | + if not tokens: |
| 335 | + return time |
| 336 | + |
| 337 | + parsed = { |
| 338 | + 'year': None, |
| 339 | + 'month': None, |
| 340 | + 'day': None, |
| 341 | + 'hour': None, |
| 342 | + 'minute': None, |
| 343 | + 'second': None, |
| 344 | + 'microsecond': None, |
| 345 | + 'tz': None, |
| 346 | + 'quarter': None, |
| 347 | + 'day_of_week': None, |
| 348 | + 'day_of_year': None, |
| 349 | + 'meridiem': None, |
| 350 | + 'timestamp': None |
| 351 | + } |
| 352 | + |
| 353 | + pattern = self._FORMAT_RE.sub(lambda m: self._replace_tokens(m.group(0)), fmt) |
| 354 | + |
| 355 | + if not re.match(pattern, time): |
| 356 | + raise ValueError('String does not match format {}'.format(fmt)) |
| 357 | + |
| 358 | + re.sub(pattern, lambda m: self._get_parsed_values(m, parsed), time) |
| 359 | + |
| 360 | + return parsed |
| 361 | + |
| 362 | + def _get_parsed_values(self, m, parsed): |
| 363 | + for token, index in m.re.groupindex.items(): |
| 364 | + self._get_parsed_value(token, m.group(index), parsed) |
| 365 | + |
| 366 | + def _get_parsed_value(self, token, value, parsed): |
| 367 | + parsed_token = self._PARSE_TOKENS[token](value) |
| 368 | + |
| 369 | + if 'Y' in token: |
| 370 | + parsed['year'] = parsed_token |
| 371 | + elif 'Q' == token: |
| 372 | + parsed['quarter'] = parsed_token |
| 373 | + elif 'M' in token: |
| 374 | + parsed['month'] = parsed_token |
| 375 | + elif token in ['DDDD', 'DDD']: |
| 376 | + parsed['day_of_year'] = parsed_token |
| 377 | + elif 'D' in token: |
| 378 | + parsed['day'] = parsed_token |
| 379 | + elif 'H' in token: |
| 380 | + parsed['hour'] = parsed_token |
| 381 | + elif token in ['hh', 'h']: |
| 382 | + parsed['hour'] = parsed_token |
| 383 | + elif 'm' in token: |
| 384 | + parsed['minute'] = parsed_token |
| 385 | + elif 's' in token: |
| 386 | + parsed['second'] = parsed_token |
| 387 | + elif 'S' in token: |
| 388 | + parsed['microsecond'] = parsed_token |
| 389 | + elif token in ['MMM', 'MMMM']: |
| 390 | + parsed['day_of_week'] = parsed_token |
| 391 | + elif token == 'a': |
| 392 | + pass |
| 393 | + elif token in ['X', 'x']: |
| 394 | + parsed['timestamp'] = parsed_token |
| 395 | + |
| 396 | + def _replace_tokens(self, token): |
| 397 | + if token.startswith('[') and token.endswith(']'): |
| 398 | + return token[1:-1] |
| 399 | + elif token.startswith('\\'): |
| 400 | + return token[1:] |
| 401 | + elif token not in self._REGEX_TOKENS: |
| 402 | + raise ValueError('Unsupported token: {}'.format(token)) |
| 403 | + |
| 404 | + candidates = self._REGEX_TOKENS[token] |
| 405 | + if not candidates: |
| 406 | + raise ValueError('Unsupported token: {}'.format(token)) |
| 407 | + |
| 408 | + if not isinstance(candidates, tuple): |
| 409 | + candidates = (candidates,) |
| 410 | + |
| 411 | + pattern = '(?P<{}>{})'.format(token, '|'.join([p.pattern for p in candidates])) |
| 412 | + |
| 413 | + return pattern |
0 commit comments