Skip to content
Open
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ Other enhancements
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
- :meth:`DataFrame.rank` now preserves the dtype for extension arrays (:issue:`52829`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be dtype_backend, not dtype?

- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
Expand Down
1 change: 0 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,6 @@ def rank(
)
else:
raise TypeError("Array with ndim > 2 are not supported.")

return ranks


Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2408,6 +2408,7 @@ def _rank(
"""
See Series.rank.__doc__.
"""

if axis != 0:
raise NotImplementedError

Expand Down
56 changes: 33 additions & 23 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9276,34 +9276,35 @@ def rank(
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
raise ValueError(msg)

def ranker(data):
if data.ndim == 2:
# i.e. DataFrame, we cast to ndarray
values = data.values
else:
# i.e. Series, can dispatch to EA
values = data._values

if isinstance(values, ExtensionArray):
ranks = values._rank(
axis=axis_int,
def ranker(blk_values):
if isinstance(blk_values, ExtensionArray) and blk_values.ndim == 1:
ranks = blk_values._rank(
axis=0,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
)
else:
ranks = algos.rank(
values,
axis=axis_int,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
)

ranks_obj = self._constructor(ranks, **data._construct_axes_dict())
return ranks_obj.__finalize__(self, method="rank")
if axis_int == 0:
ranks = algos.rank(
blk_values.T,
axis=axis_int,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
).T
else:
ranks = algos.rank(
blk_values,
axis=axis_int,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
)
return ranks

if numeric_only:
if self.ndim == 1 and not is_numeric_dtype(self.dtype):
Expand All @@ -9316,7 +9317,16 @@ def ranker(data):
else:
data = self

return ranker(data)
should_transpose = axis_int == 1

if should_transpose:
data = data.T
applied = data._mgr.apply(ranker)
result = self._constructor_from_mgr(applied, axes=applied.axes)
if should_transpose:
result = result.T

return result.__finalize__(self, method="rank")

@doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"])
def compare(
Expand Down
47 changes: 47 additions & 0 deletions pandas/tests/frame/methods/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Infinity,
NegInfinity,
)
import pandas.util._test_decorators as td

from pandas import (
DataFrame,
Expand Down Expand Up @@ -498,3 +499,49 @@ def test_rank_string_dtype(self, string_dtype_no_object):
exp_dtype = "float64"
expected = Series([1, 2, None, 3], dtype=exp_dtype)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"method,og_dtype,expected_dtype",
[
("average", "UInt32", "Float64"),
("average", "Float32", "Float64"),
pytest.param(
"average",
"int32[pyarrow]",
"double[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
("min", "Int32", "Float64"),
("min", "Float32", "Float64"),
pytest.param(
"min",
"int32[pyarrow]",
"double[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
],
)
def test_rank_extension_array_dtype(self, method, og_dtype, expected_dtype):
# GH#52829
result = DataFrame([4, 89, 33], dtype=og_dtype).rank()
if method == "average":
expected = DataFrame([1.0, 3.0, 2.0], dtype=expected_dtype)
else:
expected = DataFrame([1, 3, 2], dtype=expected_dtype)
tm.assert_frame_equal(result, expected)

def test_rank_mixed_extension_array_dtype(self):
pytest.importorskip("pyarrow")
result = DataFrame(
{
"base": Series([4, 5, 6]),
"extension": Series([7, 8, 9], dtype="int32[pyarrow]"),
}
).rank(method="min")
expected = DataFrame(
{
"base": Series([1.0, 2.0, 3.0], dtype="float64"),
"extension": Series([1, 2, 3], dtype="uint64[pyarrow]"),
}
)
tm.assert_frame_equal(result, expected)
Loading