diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1383202154f04..1e613bd562d4d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -927,6 +927,7 @@ Other - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`) - Bug in :meth:`MultiIndex.fillna` error message was referring to ``isna`` instead of ``fillna`` (:issue:`60974`) - Bug in :meth:`Series.describe` where median percentile was always included when the ``percentiles`` argument was passed (:issue:`60550`). +- Bug in :meth:`Series.describe` where statistics with multiple dtypes for ExtensionArrays were coerced to ``float64`` which raised a ``DimensionalityError``` (:issue:`61707`) - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 944e28a9b0238..b02e902f2af11 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -251,6 +251,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: import pyarrow as pa dtype = ArrowDtype(pa.float64()) + + elif any(type(item) != type(d[0]) for item in d): + # GH61707: describe() doesn't work on EAs + # when series entries cannot be cast to float64, set dtype=None + dtype = None + else: dtype = Float64Dtype() elif series.dtype.kind in "iufb": diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py index c3246fd35227c..59e45e49a8ad0 100644 --- a/pandas/tests/series/methods/test_describe.py +++ b/pandas/tests/series/methods/test_describe.py @@ -92,6 +92,29 @@ def test_describe_empty_object(self): # ensure NaN, not None assert np.isnan(result.iloc[2]) assert np.isnan(result.iloc[3]) + + def test_series_cast_to_float64_fails(self): + # https://github.com/pandas-dev/pandas/issues/61707 + from decimal import Decimal + + from pandas.tests.extension.decimal import to_decimal + + s = Series(to_decimal([1, 2.5, 3]), dtype="decimal") + + expected = Series( + [ + 3, + Decimal("2.166666666666666666666666667"), + Decimal("0.8498365855987974716713706849"), + Decimal("1"), + Decimal("3"), + ], + index=["count", "mean", "std", "min", "max"], + dtype="object", + ) + + result = s.describe(percentiles=[]) + tm.assert_series_equal(result, expected) def test_describe_with_tz(self, tz_naive_fixture): # GH 21332