From d041ff258b26e7080f6dee69d7b53bbacda00da8 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sat, 19 Jul 2025 21:11:18 +0200 Subject: [PATCH 1/4] fix(DataFrame): to_dict("index") and typevar --- pandas-stubs/core/frame.pyi | 45 +++++++++++++++------ tests/test_frame.py | 81 ++++++++++++++++++++++++++++++------- 2 files changed, 99 insertions(+), 27 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index ed3892817..fad0640fb 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -19,6 +19,7 @@ from typing import ( Generic, Literal, NoReturn, + TypeVar, final, overload, ) @@ -165,6 +166,8 @@ from pandas._typing import ( from pandas.io.formats.style import Styler from pandas.plotting import PlotAccessor +_T_MUTABLE_MAPPING = TypeVar("_T_MUTABLE_MAPPING", bound=MutableMapping, covariant=True) + class _iLocIndexerFrame(_iLocIndexer, Generic[_T]): @overload def __getitem__(self, idx: tuple[int, int]) -> Scalar: ... @@ -396,9 +399,9 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): self, orient: Literal["records"], *, - into: MutableMapping | type[MutableMapping], + into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], index: Literal[True] = ..., - ) -> list[MutableMapping[Hashable, Any]]: ... + ) -> list[_T_MUTABLE_MAPPING]: ... @overload def to_dict( self, @@ -410,39 +413,55 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def to_dict( self, - orient: Literal["dict", "list", "series", "index"], + orient: Literal["index"], + *, + into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], + index: Literal[True] = ..., + ) -> MutableMapping[Hashable, _T_MUTABLE_MAPPING]: ... + @overload + def to_dict( + self, + orient: Literal["index"], + *, + into: type[dict] = ..., + index: Literal[True] = ..., + ) -> dict[Hashable, dict[Hashable, Any]]: ... + @overload + def to_dict( + self, + orient: Literal["dict", "list", "series"], *, - into: MutableMapping | type[MutableMapping], + into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], index: Literal[True] = ..., - ) -> MutableMapping[Hashable, Any]: ... + ) -> _T_MUTABLE_MAPPING: ... @overload def to_dict( self, orient: Literal["split", "tight"], *, - into: MutableMapping | type[MutableMapping], + into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], index: bool = ..., - ) -> MutableMapping[Hashable, Any]: ... + ) -> _T_MUTABLE_MAPPING: ... @overload def to_dict( self, - orient: Literal["dict", "list", "series", "index"] = ..., + orient: Literal["dict", "list", "series"] = ..., *, - into: MutableMapping | type[MutableMapping], + into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], index: Literal[True] = ..., - ) -> MutableMapping[Hashable, Any]: ... + ) -> _T_MUTABLE_MAPPING: ... @overload def to_dict( self, orient: Literal["split", "tight"] = ..., *, - into: MutableMapping | type[MutableMapping], + into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], index: bool = ..., - ) -> MutableMapping[Hashable, Any]: ... + ) -> _T_MUTABLE_MAPPING: ... @overload def to_dict( self, - orient: Literal["dict", "list", "series", "index"] = ..., + orient: Literal["dict", "list", "series"] = ..., *, into: type[dict] = ..., index: Literal[True] = ..., diff --git a/tests/test_frame.py b/tests/test_frame.py index ffd85d609..1f9f1c29d 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -3638,33 +3638,83 @@ def test_to_records() -> None: ) -def test_to_dict() -> None: +def test_to_dict_simple() -> None: check(assert_type(DF.to_dict(), dict[Hashable, Any]), dict) check(assert_type(DF.to_dict("split"), dict[Hashable, Any]), dict) + check(assert_type(DF.to_dict("records"), list[dict[Hashable, Any]]), list) + + if TYPE_CHECKING_INVALID_USAGE: + + def test(mapping: Mapping) -> None: # pyright: ignore[reportUnusedFunction] + DF.to_dict( # type: ignore[call-overload] + into=mapping # pyright: ignore[reportArgumentType,reportCallIssue] + ) + + +def test_to_dict_into_defaultdict_any() -> None: + """Test DataFrame.to_dict with `into=defaultdict[Any, list]`""" + + data = pd.DataFrame({("str", "rts"): [[1, 2, 4], [2, 3], [3]]}) + target: defaultdict[Hashable, list[Any]] = defaultdict(list) - target: MutableMapping = defaultdict(list) check( - assert_type(DF.to_dict(into=target), MutableMapping[Hashable, Any]), defaultdict + assert_type(data.to_dict(into=target), defaultdict[Hashable, list[Any]]), + defaultdict, ) - target = defaultdict(list) check( - assert_type(DF.to_dict("tight", into=target), MutableMapping[Hashable, Any]), + assert_type( + data.to_dict("index", into=target), + MutableMapping[Hashable, defaultdict[Hashable, list[Any]]], + ), + defaultdict, + ) + check( + assert_type( + data.to_dict("tight", into=target), defaultdict[Hashable, list[Any]] + ), defaultdict, ) - target = defaultdict(list) - check(assert_type(DF.to_dict("records"), list[dict[Hashable, Any]]), list) check( assert_type( - DF.to_dict("records", into=target), list[MutableMapping[Hashable, Any]] + data.to_dict("records", into=target), list[defaultdict[Hashable, list[Any]]] ), list, ) - if TYPE_CHECKING_INVALID_USAGE: - def test(mapping: Mapping) -> None: # pyright: ignore[reportUnusedFunction] - DF.to_dict( # type: ignore[call-overload] - into=mapping # pyright: ignore[reportArgumentType,reportCallIssue] - ) + +def test_to_dict_into_defaultdict_typed() -> None: + """Test DataFrame.to_dict with `into=defaultdict[tuple[str, str], list[int]]`""" + + data = pd.DataFrame({("str", "rts"): [[1, 2, 4], [2, 3], [3]]}) + target: defaultdict[tuple[str, str], list[int]] = defaultdict(list) + target[("str", "rts")].append(1) + + check( + assert_type(data.to_dict(into=target), defaultdict[tuple[str, str], list[int]]), + defaultdict, + tuple, + ) + check( + assert_type( + data.to_dict("index", into=target), + MutableMapping[Hashable, defaultdict[tuple[str, str], list[int]]], + ), + defaultdict, + ) + check( + assert_type( + data.to_dict("tight", into=target), defaultdict[tuple[str, str], list[int]] + ), + defaultdict, + ) + check( + assert_type( + data.to_dict("records", into=target), + list[defaultdict[tuple[str, str], list[int]]], + ), + list, + defaultdict, + ) def test_neg() -> None: @@ -4111,7 +4161,10 @@ def test_to_dict_index() -> None: assert_type(df.to_dict(orient="series", index=True), dict[Hashable, Any]), dict ) check( - assert_type(df.to_dict(orient="index", index=True), dict[Hashable, Any]), dict + assert_type( + df.to_dict(orient="index", index=True), dict[Hashable, dict[Hashable, Any]] + ), + dict, ) check( assert_type(df.to_dict(orient="split", index=True), dict[Hashable, Any]), dict From 7da086e39ec731eda03c6f10d0c1cb4e0b7c810d Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sat, 19 Jul 2025 22:22:07 +0200 Subject: [PATCH 2/4] feat: https://github.com/pandas-dev/pandas/blob/v2.3.1/pandas/core/common.py#L416-L417 --- pandas-stubs/core/frame.pyi | 9 +++++++++ tests/test_frame.py | 12 +++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index fad0640fb..7f3a0c57c 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -2,6 +2,7 @@ from builtins import ( bool as _bool, str as _str, ) +from collections import defaultdict from collections.abc import ( Callable, Hashable, @@ -395,6 +396,14 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): na_value: Scalar = ..., ) -> np.ndarray: ... @overload + def to_dict( + self, + orient=..., + *, + into: type[defaultdict], + index: Literal[True] = ..., + ) -> Never: ... + @overload def to_dict( self, orient: Literal["records"], diff --git a/tests/test_frame.py b/tests/test_frame.py index 1f9f1c29d..ea0041388 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -2,6 +2,7 @@ from collections import defaultdict from collections.abc import ( + Callable, Hashable, Iterable, Iterator, @@ -20,7 +21,6 @@ from typing import ( TYPE_CHECKING, Any, - Callable, Generic, TypedDict, TypeVar, @@ -39,6 +39,7 @@ ) import pytest from typing_extensions import ( + Never, TypeAlias, assert_never, assert_type, @@ -3650,6 +3651,15 @@ def test(mapping: Mapping) -> None: # pyright: ignore[reportUnusedFunction] into=mapping # pyright: ignore[reportArgumentType,reportCallIssue] ) + assert_type(DF.to_dict(into=defaultdict), Never) + assert_type(DF.to_dict("records", into=defaultdict), Never) + assert_type(DF.to_dict("index", into=defaultdict), Never) + assert_type(DF.to_dict("dict", into=defaultdict), Never) + assert_type(DF.to_dict("list", into=defaultdict), Never) + assert_type(DF.to_dict("series", into=defaultdict), Never) + assert_type(DF.to_dict("split", into=defaultdict), Never) + assert_type(DF.to_dict("tight", into=defaultdict), Never) + def test_to_dict_into_defaultdict_any() -> None: """Test DataFrame.to_dict with `into=defaultdict[Any, list]`""" From afb5729c46d497a5615323ce3b47bd5c1548d9b1 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sun, 20 Jul 2025 13:30:21 +0200 Subject: [PATCH 3/4] fix: https://github.com/pandas-dev/pandas-stubs/pull/1283#discussion_r2217455479 --- pandas-stubs/core/frame.pyi | 49 ++++++++++++------------ tests/test_frame.py | 75 ++++++++++++++++++------------------- 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 7f3a0c57c..00c0966d6 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -2,7 +2,10 @@ from builtins import ( bool as _bool, str as _str, ) -from collections import defaultdict +from collections import ( + OrderedDict, + defaultdict, +) from collections.abc import ( Callable, Hashable, @@ -398,7 +401,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def to_dict( self, - orient=..., + orient: str = ..., *, into: type[defaultdict], index: Literal[True] = ..., @@ -424,33 +427,33 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): self, orient: Literal["index"], *, - into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], + into: defaultdict, index: Literal[True] = ..., - ) -> MutableMapping[Hashable, _T_MUTABLE_MAPPING]: ... + ) -> defaultdict[Hashable, dict[Hashable, Any]]: ... @overload def to_dict( self, orient: Literal["index"], *, - into: type[dict] = ..., + into: OrderedDict | type[OrderedDict], index: Literal[True] = ..., - ) -> dict[Hashable, dict[Hashable, Any]]: ... + ) -> OrderedDict[Hashable, dict[Hashable, Any]]: ... @overload def to_dict( self, - orient: Literal["dict", "list", "series"], + orient: Literal["index"], *, - into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], + into: MutableMapping | type[MutableMapping], index: Literal[True] = ..., - ) -> _T_MUTABLE_MAPPING: ... + ) -> MutableMapping[Hashable, dict[Hashable, Any]]: ... @overload def to_dict( self, - orient: Literal["split", "tight"], + orient: Literal["index"], *, - into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], - index: bool = ..., - ) -> _T_MUTABLE_MAPPING: ... + into: type[dict] = ..., + index: Literal[True] = ..., + ) -> dict[Hashable, dict[Hashable, Any]]: ... @overload def to_dict( self, @@ -460,14 +463,6 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): index: Literal[True] = ..., ) -> _T_MUTABLE_MAPPING: ... @overload - def to_dict( - self, - orient: Literal["split", "tight"] = ..., - *, - into: _T_MUTABLE_MAPPING | type[_T_MUTABLE_MAPPING], - index: bool = ..., - ) -> _T_MUTABLE_MAPPING: ... - @overload def to_dict( self, orient: Literal["dict", "list", "series"] = ..., @@ -478,11 +473,19 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): @overload def to_dict( self, - orient: Literal["split", "tight"] = ..., + orient: Literal["split", "tight"], + *, + into: MutableMapping | type[MutableMapping], + index: bool = ..., + ) -> MutableMapping[str, list]: ... + @overload + def to_dict( + self, + orient: Literal["split", "tight"], *, into: type[dict] = ..., index: bool = ..., - ) -> dict[Hashable, Any]: ... + ) -> dict[str, list]: ... def to_gbq( self, destination_table: str, diff --git a/tests/test_frame.py b/tests/test_frame.py index ea0041388..69113aec8 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1,6 +1,9 @@ from __future__ import annotations -from collections import defaultdict +from collections import ( + OrderedDict, + defaultdict, +) from collections.abc import ( Callable, Hashable, @@ -3641,8 +3644,13 @@ def test_to_records() -> None: def test_to_dict_simple() -> None: check(assert_type(DF.to_dict(), dict[Hashable, Any]), dict) - check(assert_type(DF.to_dict("split"), dict[Hashable, Any]), dict) check(assert_type(DF.to_dict("records"), list[dict[Hashable, Any]]), list) + check(assert_type(DF.to_dict("index"), dict[Hashable, dict[Hashable, Any]]), dict) + check(assert_type(DF.to_dict("dict"), dict[Hashable, Any]), dict) + check(assert_type(DF.to_dict("list"), dict[Hashable, Any]), dict) + check(assert_type(DF.to_dict("series"), dict[Hashable, Any]), dict) + check(assert_type(DF.to_dict("split"), dict[str, list]), dict, str) + check(assert_type(DF.to_dict("tight"), dict[str, list]), dict, str) if TYPE_CHECKING_INVALID_USAGE: @@ -3661,69 +3669,58 @@ def test(mapping: Mapping) -> None: # pyright: ignore[reportUnusedFunction] assert_type(DF.to_dict("tight", into=defaultdict), Never) -def test_to_dict_into_defaultdict_any() -> None: - """Test DataFrame.to_dict with `into=defaultdict[Any, list]`""" +def test_to_dict_into_defaultdict() -> None: + """Test DataFrame.to_dict with `into` is an instance of defaultdict[Any, list]""" data = pd.DataFrame({("str", "rts"): [[1, 2, 4], [2, 3], [3]]}) - target: defaultdict[Hashable, list[Any]] = defaultdict(list) + target: defaultdict[Any, list] = defaultdict(list) check( - assert_type(data.to_dict(into=target), defaultdict[Hashable, list[Any]]), + assert_type(data.to_dict(into=target), defaultdict[Any, list]), defaultdict, + tuple, ) check( - assert_type( + assert_type( # type: ignore[assert-type] data.to_dict("index", into=target), - MutableMapping[Hashable, defaultdict[Hashable, list[Any]]], + defaultdict[Hashable, dict[Hashable, Any]], ), defaultdict, ) check( - assert_type( - data.to_dict("tight", into=target), defaultdict[Hashable, list[Any]] - ), + assert_type(data.to_dict("tight", into=target), MutableMapping[str, list]), defaultdict, + str, ) check( - assert_type( - data.to_dict("records", into=target), list[defaultdict[Hashable, list[Any]]] - ), + assert_type(data.to_dict("records", into=target), list[defaultdict[Any, list]]), list, + defaultdict, ) -def test_to_dict_into_defaultdict_typed() -> None: - """Test DataFrame.to_dict with `into=defaultdict[tuple[str, str], list[int]]`""" +def test_to_dict_into_ordered_dict() -> None: + """Test DataFrame.to_dict with `into=OrderedDict`""" data = pd.DataFrame({("str", "rts"): [[1, 2, 4], [2, 3], [3]]}) - target: defaultdict[tuple[str, str], list[int]] = defaultdict(list) - target[("str", "rts")].append(1) - check( - assert_type(data.to_dict(into=target), defaultdict[tuple[str, str], list[int]]), - defaultdict, - tuple, - ) + check(assert_type(data.to_dict(into=OrderedDict), OrderedDict), OrderedDict, tuple) check( assert_type( - data.to_dict("index", into=target), - MutableMapping[Hashable, defaultdict[tuple[str, str], list[int]]], + data.to_dict("index", into=OrderedDict), + OrderedDict[Hashable, dict[Hashable, Any]], ), - defaultdict, + OrderedDict, ) check( - assert_type( - data.to_dict("tight", into=target), defaultdict[tuple[str, str], list[int]] - ), - defaultdict, + assert_type(data.to_dict("tight", into=OrderedDict), MutableMapping[str, list]), + OrderedDict, + str, ) check( - assert_type( - data.to_dict("records", into=target), - list[defaultdict[tuple[str, str], list[int]]], - ), + assert_type(data.to_dict("records", into=OrderedDict), list[OrderedDict]), list, - defaultdict, + OrderedDict, ) @@ -4177,16 +4174,16 @@ def test_to_dict_index() -> None: dict, ) check( - assert_type(df.to_dict(orient="split", index=True), dict[Hashable, Any]), dict + assert_type(df.to_dict(orient="split", index=True), dict[str, list]), dict, str ) check( - assert_type(df.to_dict(orient="tight", index=True), dict[Hashable, Any]), dict + assert_type(df.to_dict(orient="tight", index=True), dict[str, list]), dict, str ) check( - assert_type(df.to_dict(orient="tight", index=False), dict[Hashable, Any]), dict + assert_type(df.to_dict(orient="tight", index=False), dict[str, list]), dict, str ) check( - assert_type(df.to_dict(orient="split", index=False), dict[Hashable, Any]), dict + assert_type(df.to_dict(orient="split", index=False), dict[str, list]), dict, str ) if TYPE_CHECKING_INVALID_USAGE: check(assert_type(df.to_dict(orient="records", index=False), list[dict[Hashable, Any]]), list) # type: ignore[assert-type, call-overload] # pyright: ignore[reportArgumentType,reportAssertTypeFailure,reportCallIssue] From 81c57e3aa6c41096b53947d25611b2384656dfd6 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Wed, 30 Jul 2025 18:03:18 +0200 Subject: [PATCH 4/4] fix(comment): https://github.com/pandas-dev/pandas-stubs/pull/1283#discussion_r2229500036 --- pandas-stubs/core/frame.pyi | 2 +- tests/test_frame.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 00c0966d6..b5fd68e5c 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -443,7 +443,7 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): self, orient: Literal["index"], *, - into: MutableMapping | type[MutableMapping], + into: type[MutableMapping], index: Literal[True] = ..., ) -> MutableMapping[Hashable, dict[Hashable, Any]]: ... @overload diff --git a/tests/test_frame.py b/tests/test_frame.py index 69113aec8..1aea31aba 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -3681,7 +3681,7 @@ def test_to_dict_into_defaultdict() -> None: tuple, ) check( - assert_type( # type: ignore[assert-type] + assert_type( data.to_dict("index", into=target), defaultdict[Hashable, dict[Hashable, Any]], ),