Skip to content
Open
29 changes: 29 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,35 @@ def _union(self, other, sort):
assert isinstance(other, type(self))
assert self.dtype == other.dtype

# For tz-aware DatetimeIndex, perform union in UTC to avoid
# local-time irregularities across DST transitions, then convert back.
tz = getattr(self.dtype, "tz", None)
if tz is not None:
other_tz = getattr(other.dtype, "tz", None)
if (
other_tz == tz
and isinstance(self._data, DatetimeArray)
and isinstance(other._data, DatetimeArray)
):
left_utc_naive = self._data.tz_convert("UTC").tz_localize(None)
right_utc_naive = other._data.tz_convert("UTC").tz_localize(None)
left_naive = type(self)._simple_new(left_utc_naive, name=self.name)
right_naive = type(other)._simple_new(right_utc_naive, name=other.name)
res_naive = super(type(left_naive), left_naive)._union(
right_naive, sort
)

if isinstance(res_naive, DatetimeArray):
base_arr = res_naive
name = self.name
else:
base_arr = cast(DatetimeArray, res_naive._data)
name = res_naive.name

res_arr = base_arr.tz_localize("UTC").tz_convert(tz)
res = type(self)._simple_new(res_arr, name=name)
return res._with_freq("infer")

if self._can_range_setop(other):
return self._range_union(other, sort=sort)

Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,30 @@ def test_union3(self, sort, box):
result = first.union(case, sort=sort)
tm.assert_index_equal(result, expected)


def test_union_across_dst_boundary():
# US/Eastern DST spring-forward on 2021-03-14 at 02:00
# (02:00-02:59 local time does not exist)
tz = "US/Eastern"
# Left side spans up to the missing hour window
left = date_range("2021-03-14 00:00", periods=3, freq="h", tz=tz)
# right side continues from the first valid post-DST hour
right = date_range("2021-03-14 03:00", periods=3, freq="h", tz=tz)

# Expect a union that preserves tz and includes valid hours without duplicates
expected = DatetimeIndex(
[
Timestamp("2021-03-14 00:00", tz=tz),
Timestamp("2021-03-14 01:00", tz=tz),
Timestamp("2021-03-14 03:00", tz=tz),
Timestamp("2021-03-14 04:00", tz=tz),
Timestamp("2021-03-14 05:00", tz=tz),
]
).as_unit(left.unit)

result = left.union(right)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("tz", tz)
def test_union(self, tz, sort):
rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,11 +457,12 @@ def add3(x, y, z):
ufunc(ser, ser, df)


@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082")
def test_np_fix():
# np.fix is not a ufunc but is composed of several ufunc calls under the hood
# with `out` and `where` keywords
def test_np_trunc():
# This used to test np.fix, which is not a ufunc but is composed of
# several ufunc calls under the hood with `out` and `where` keywords. But numpy
# is deprecating that (or at least discussing deprecating) in favor of np.trunc,
# which _is_ a ufunc without the out keyword usage.
ser = pd.Series([-1.5, -0.5, 0.5, 1.5])
result = np.fix(ser)
result = np.trunc(ser)
expected = pd.Series([-1.0, -0.0, 0.0, 1.0])
tm.assert_series_equal(result, expected)
Loading