From 5c06cbbf817b22dfb1e09453dc009254264f9fe9 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 02:30:58 +0100 Subject: [PATCH 1/9] TST: xfail test for DatetimeIndex.union across DST boundary (GH#62915) --- pandas/tests/indexes/datetimes/test_setops.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7a68cb867c94e..e2b30425d835c 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -60,6 +60,30 @@ def test_union3(self, sort, box): result = first.union(case, sort=sort) tm.assert_index_equal(result, expected) + +@pytest.mark.xfail(reason="see GH#62915: union across DST boundary", strict=False) +def test_union_across_dst_boundary_xfail(): + # US/Eastern DST spring-forward on 2021-03-14 at 02:00 (02:00-02:59 local time does not exist) + tz = "US/Eastern" + # Left side spans up to the missing hour window + left = date_range("2021-03-14 00:00", periods=3, freq="H", tz=tz) + # right side continues from the first valid post-DST hour + right = date_range("2021-03-14 03:00", periods=3, freq="H", tz=tz) + + # Expect a union that preserves tz and includes valid hours without duplicates + expected = DatetimeIndex( + [ + Timestamp("2021-03-14 00:00", tz=tz), + Timestamp("2021-03-14 01:00", tz=tz), + Timestamp("2021-03-14 03:00", tz=tz), + Timestamp("2021-03-14 04:00", tz=tz), + Timestamp("2021-03-14 05:00", tz=tz), + ] + ) + + result = left.union(right) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("tz", tz) def test_union(self, tz, sort): rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz) From f487dc9e0e3c7d9ef75c07b761cfcc210719a8bc Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 03:01:12 +0100 Subject: [PATCH 2/9] TST: wrap long comment to satisfy ruff E501 (GH#62915) --- pandas/tests/indexes/datetimes/test_setops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index e2b30425d835c..b2620cdae5984 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -63,7 +63,8 @@ def test_union3(self, sort, box): @pytest.mark.xfail(reason="see GH#62915: union across DST boundary", strict=False) def test_union_across_dst_boundary_xfail(): - # US/Eastern DST spring-forward on 2021-03-14 at 02:00 (02:00-02:59 local time does not exist) + # US/Eastern DST spring-forward on 2021-03-14 at 02:00 + # (02:00-02:59 local time does not exist) tz = "US/Eastern" # Left side spans up to the missing hour window left = date_range("2021-03-14 00:00", periods=3, freq="H", tz=tz) From e8bf47c89a39c0c8295a5b9db329e1ed6898dc55 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 03:23:54 +0100 Subject: [PATCH 3/9] FIX: robust DatetimeIndex.union across DST transitions + flip test to pass (GH#62915) --- pandas/core/indexes/datetimelike.py | 18 +++++++++++++++++- pandas/tests/indexes/datetimes/test_setops.py | 9 ++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58529c5597b6e..c902a84106cf3 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -541,7 +541,14 @@ def _as_range_index(self) -> RangeIndex: return RangeIndex(rng) def _can_range_setop(self, other) -> bool: - return isinstance(self.freq, Tick) and isinstance(other.freq, Tick) + # Only allow range-based setops when both objects are tick-based AND + # not timezone-aware. For tz-aware DatetimeIndex, constant i8 stepping + # does not hold across DST transitions in local time, so avoid range path. + if not (isinstance(self.freq, Tick) and isinstance(other.freq, Tick)): + return False + self_tz = getattr(self.dtype, "tz", None) + other_tz = getattr(other.dtype, "tz", None) + return self_tz is None and other_tz is None def _wrap_range_setop(self, other, res_i8) -> Self: new_freq = None @@ -726,6 +733,15 @@ def _union(self, other, sort): # that result.freq == self.freq return result else: + # For tz-aware DatetimeIndex, perform union in UTC to avoid + # local-time irregularities across DST transitions, then convert back. + tz = getattr(self.dtype, "tz", None) + if tz is not None: + left_utc = self.tz_convert("UTC") + right_utc = other.tz_convert("UTC") + res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) + res = res_utc.tz_convert(tz) + return res._with_freq("infer") return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index b2620cdae5984..25e4250866e91 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -61,15 +61,14 @@ def test_union3(self, sort, box): tm.assert_index_equal(result, expected) -@pytest.mark.xfail(reason="see GH#62915: union across DST boundary", strict=False) -def test_union_across_dst_boundary_xfail(): +def test_union_across_dst_boundary(): # US/Eastern DST spring-forward on 2021-03-14 at 02:00 # (02:00-02:59 local time does not exist) tz = "US/Eastern" # Left side spans up to the missing hour window - left = date_range("2021-03-14 00:00", periods=3, freq="H", tz=tz) + left = date_range("2021-03-14 00:00", periods=3, freq="h", tz=tz) # right side continues from the first valid post-DST hour - right = date_range("2021-03-14 03:00", periods=3, freq="H", tz=tz) + right = date_range("2021-03-14 03:00", periods=3, freq="h", tz=tz) # Expect a union that preserves tz and includes valid hours without duplicates expected = DatetimeIndex( @@ -80,7 +79,7 @@ def test_union_across_dst_boundary_xfail(): Timestamp("2021-03-14 04:00", tz=tz), Timestamp("2021-03-14 05:00", tz=tz), ] - ) + ).as_unit(left.unit) result = left.union(right) tm.assert_index_equal(result, expected) From c90c78d3feed2a5def8f446a0c3a382e67a8d07e Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 05:11:27 +0100 Subject: [PATCH 4/9] TYPING: narrow tz-aware union path to DatetimeArray in _union to satisfy mypy; behavior unchanged (GH#62915) --- pandas/core/indexes/datetimelike.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c902a84106cf3..2b083ed7b22cc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -737,11 +737,21 @@ def _union(self, other, sort): # local-time irregularities across DST transitions, then convert back. tz = getattr(self.dtype, "tz", None) if tz is not None: - left_utc = self.tz_convert("UTC") - right_utc = other.tz_convert("UTC") - res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) - res = res_utc.tz_convert(tz) - return res._with_freq("infer") + # Narrow to DatetimeArray to access tz_convert without mypy errors + if isinstance(self._data, DatetimeArray) and isinstance( + other._data, DatetimeArray + ): + left_utc_arr = self._data.tz_convert("UTC") + right_utc_arr = other._data.tz_convert("UTC") + left_utc = type(self)._simple_new(left_utc_arr, name=self.name) + right_utc = type(other)._simple_new(right_utc_arr, name=other.name) + res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) + # res_utc is DatetimeIndex; convert its underlying array back to tz + res_arr = cast(DatetimeArray, res_utc._data).tz_convert(tz) + res = type(self)._simple_new(res_arr, name=res_utc.name) + return res._with_freq("infer") + # Defensive fallback if types are unexpected + return super()._union(other, sort)._with_freq("infer") return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- From b970007a85b8950ac97ba96ca719a2cae2b84736 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 06:41:24 +0100 Subject: [PATCH 5/9] BUG: tz-aware DatetimeIndex.union - perform UTC-naive base union when tz matches to avoid recursion; preserve semantics for differing tz; mypy-safe ops; wrap long comment (GH#62915) --- pandas/core/indexes/datetimelike.py | 34 ++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 2b083ed7b22cc..6b0e79d580576 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -736,22 +736,36 @@ def _union(self, other, sort): # For tz-aware DatetimeIndex, perform union in UTC to avoid # local-time irregularities across DST transitions, then convert back. tz = getattr(self.dtype, "tz", None) - if tz is not None: + other_tz = getattr(other.dtype, "tz", None) + if tz is not None and tz == other_tz: # Narrow to DatetimeArray to access tz_convert without mypy errors if isinstance(self._data, DatetimeArray) and isinstance( other._data, DatetimeArray ): - left_utc_arr = self._data.tz_convert("UTC") - right_utc_arr = other._data.tz_convert("UTC") - left_utc = type(self)._simple_new(left_utc_arr, name=self.name) - right_utc = type(other)._simple_new(right_utc_arr, name=other.name) - res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) - # res_utc is DatetimeIndex; convert its underlying array back to tz - res_arr = cast(DatetimeArray, res_utc._data).tz_convert(tz) - res = type(self)._simple_new(res_arr, name=res_utc.name) + # Convert both to UTC, then drop tz to avoid re-entering + # tz-aware path + left_utc_naive = self._data.tz_convert("UTC").tz_localize(None) + right_utc_naive = other._data.tz_convert("UTC").tz_localize(None) + left_naive = type(self)._simple_new(left_utc_naive, name=self.name) + right_naive = type(other)._simple_new( + right_utc_naive, name=other.name + ) + # Perform base union on tz-naive indices to avoid DST complications + res_naive = super(type(left_naive), left_naive)._union( + right_naive, sort + ) + # Localize back to UTC and then convert to original tz + if isinstance(res_naive, DatetimeArray): + base_arr = res_naive + name = self.name + else: + base_arr = cast(DatetimeArray, res_naive._data) + name = res_naive.name + res_arr = base_arr.tz_localize("UTC").tz_convert(tz) + res = type(self)._simple_new(res_arr, name=name) return res._with_freq("infer") # Defensive fallback if types are unexpected - return super()._union(other, sort)._with_freq("infer") + return super()._union(other, sort) return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- From 1ebd2264bfcffd084f3cc19fa0276fc5ca50718e Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 10:25:35 +0100 Subject: [PATCH 6/9] CI: retrigger pipeline for PR #63088 From 1b25d69092a4e6de85a3aa598a602296b7a56e48 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 11:29:29 +0100 Subject: [PATCH 7/9] TST: make xfail for test_np_fix conditional on runtime behavior to avoid XPASS in numpy-dev/python-dev --- pandas/tests/series/test_ufunc.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 5faacbb5559a9..f0b478d0ab1f7 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -9,6 +9,15 @@ import pandas._testing as tm from pandas.arrays import SparseArray +# Probe whether np.fix works with Series without raising due to read-only out +# This avoids relying solely on is_numpy_dev, which may not reflect CI pinning. +try: + _ser = pd.Series([-1.5, -0.5]) + _probe_result = np.fix(_ser) + _NP_FIX_WORKS = True +except Exception: # pragma: no cover - best-effort environment probe + _NP_FIX_WORKS = False + @pytest.fixture(params=[np.add, np.logaddexp]) def ufunc(request): @@ -238,6 +247,12 @@ def __init__(self, value) -> None: def __add__(self, other): return self.value + other.value + def __eq__(self, other) -> bool: + return type(other) is Dummy and self.value == other.value + + def __repr__(self) -> str: + return f"Dummy({self.value})" + arr = np.array([Dummy(0), Dummy(1)]) ser = pd.Series(arr) tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) @@ -457,7 +472,11 @@ def add3(x, y, z): ufunc(ser, ser, df) -@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082") +@pytest.mark.xfail( + condition=not _NP_FIX_WORKS, + reason="see https://github.com/pandas-dev/pandas/pull/51082", + strict=True, +) def test_np_fix(): # np.fix is not a ufunc but is composed of several ufunc calls under the hood # with `out` and `where` keywords From 714d8a7e02388415f9276e26f13a56cfb211750a Mon Sep 17 00:00:00 2001 From: antznette1 Date: Sun, 16 Nov 2025 23:17:32 +0100 Subject: [PATCH 8/9] BUG: fix DatetimeIndex union across DST without disabling range fast path --- pandas/core/indexes/datetimelike.py | 71 ++++++++++++----------------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 6b0e79d580576..b4c3e6ea754aa 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -541,14 +541,7 @@ def _as_range_index(self) -> RangeIndex: return RangeIndex(rng) def _can_range_setop(self, other) -> bool: - # Only allow range-based setops when both objects are tick-based AND - # not timezone-aware. For tz-aware DatetimeIndex, constant i8 stepping - # does not hold across DST transitions in local time, so avoid range path. - if not (isinstance(self.freq, Tick) and isinstance(other.freq, Tick)): - return False - self_tz = getattr(self.dtype, "tz", None) - other_tz = getattr(other.dtype, "tz", None) - return self_tz is None and other_tz is None + return isinstance(self.freq, Tick) and isinstance(other.freq, Tick) def _wrap_range_setop(self, other, res_i8) -> Self: new_freq = None @@ -724,6 +717,35 @@ def _union(self, other, sort): assert isinstance(other, type(self)) assert self.dtype == other.dtype + # For tz-aware DatetimeIndex, perform union in UTC to avoid + # local-time irregularities across DST transitions, then convert back. + tz = getattr(self.dtype, "tz", None) + if tz is not None: + other_tz = getattr(other.dtype, "tz", None) + if ( + other_tz == tz + and isinstance(self._data, DatetimeArray) + and isinstance(other._data, DatetimeArray) + ): + left_utc_naive = self._data.tz_convert("UTC").tz_localize(None) + right_utc_naive = other._data.tz_convert("UTC").tz_localize(None) + left_naive = type(self)._simple_new(left_utc_naive, name=self.name) + right_naive = type(other)._simple_new(right_utc_naive, name=other.name) + res_naive = super(type(left_naive), left_naive)._union( + right_naive, sort + ) + + if isinstance(res_naive, DatetimeArray): + base_arr = res_naive + name = self.name + else: + base_arr = cast(DatetimeArray, res_naive._data) + name = res_naive.name + + res_arr = base_arr.tz_localize("UTC").tz_convert(tz) + res = type(self)._simple_new(res_arr, name=name) + return res._with_freq("infer") + if self._can_range_setop(other): return self._range_union(other, sort=sort) @@ -733,39 +755,6 @@ def _union(self, other, sort): # that result.freq == self.freq return result else: - # For tz-aware DatetimeIndex, perform union in UTC to avoid - # local-time irregularities across DST transitions, then convert back. - tz = getattr(self.dtype, "tz", None) - other_tz = getattr(other.dtype, "tz", None) - if tz is not None and tz == other_tz: - # Narrow to DatetimeArray to access tz_convert without mypy errors - if isinstance(self._data, DatetimeArray) and isinstance( - other._data, DatetimeArray - ): - # Convert both to UTC, then drop tz to avoid re-entering - # tz-aware path - left_utc_naive = self._data.tz_convert("UTC").tz_localize(None) - right_utc_naive = other._data.tz_convert("UTC").tz_localize(None) - left_naive = type(self)._simple_new(left_utc_naive, name=self.name) - right_naive = type(other)._simple_new( - right_utc_naive, name=other.name - ) - # Perform base union on tz-naive indices to avoid DST complications - res_naive = super(type(left_naive), left_naive)._union( - right_naive, sort - ) - # Localize back to UTC and then convert to original tz - if isinstance(res_naive, DatetimeArray): - base_arr = res_naive - name = self.name - else: - base_arr = cast(DatetimeArray, res_naive._data) - name = res_naive.name - res_arr = base_arr.tz_localize("UTC").tz_convert(tz) - res = type(self)._simple_new(res_arr, name=name) - return res._with_freq("infer") - # Defensive fallback if types are unexpected - return super()._union(other, sort) return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- From 3a662c6aae6e21550afeae6ad467ec3d6e1b5eb9 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Mon, 17 Nov 2025 00:41:25 +0100 Subject: [PATCH 9/9] MAINT: sync test_ufunc.py with main for PR #63088 --- pandas/tests/series/test_ufunc.py | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index f0b478d0ab1f7..797f7bdb1ab7e 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -9,15 +9,6 @@ import pandas._testing as tm from pandas.arrays import SparseArray -# Probe whether np.fix works with Series without raising due to read-only out -# This avoids relying solely on is_numpy_dev, which may not reflect CI pinning. -try: - _ser = pd.Series([-1.5, -0.5]) - _probe_result = np.fix(_ser) - _NP_FIX_WORKS = True -except Exception: # pragma: no cover - best-effort environment probe - _NP_FIX_WORKS = False - @pytest.fixture(params=[np.add, np.logaddexp]) def ufunc(request): @@ -247,12 +238,6 @@ def __init__(self, value) -> None: def __add__(self, other): return self.value + other.value - def __eq__(self, other) -> bool: - return type(other) is Dummy and self.value == other.value - - def __repr__(self) -> str: - return f"Dummy({self.value})" - arr = np.array([Dummy(0), Dummy(1)]) ser = pd.Series(arr) tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) @@ -472,15 +457,12 @@ def add3(x, y, z): ufunc(ser, ser, df) -@pytest.mark.xfail( - condition=not _NP_FIX_WORKS, - reason="see https://github.com/pandas-dev/pandas/pull/51082", - strict=True, -) -def test_np_fix(): - # np.fix is not a ufunc but is composed of several ufunc calls under the hood - # with `out` and `where` keywords +def test_np_trunc(): + # This used to test np.fix, which is not a ufunc but is composed of + # several ufunc calls under the hood with `out` and `where` keywords. But numpy + # is deprecating that (or at least discussing deprecating) in favor of np.trunc, + # which _is_ a ufunc without the out keyword usage. ser = pd.Series([-1.5, -0.5, 0.5, 1.5]) - result = np.fix(ser) + result = np.trunc(ser) expected = pd.Series([-1.0, -0.0, 0.0, 1.0]) tm.assert_series_equal(result, expected)