Merge branch 'main' into issue_61863

khemkaran10 · web-flow · commit 9b74934558dd · 2025-07-17T21:10:33.000+05:30
diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml
@@ -57,8 +57,6 @@ jobs:
       run: python web/pandas_web.py web/pandas --target-path=web/build
 
     - name: Build documentation
-      # TEMP don't let errors fail the build until all string dtype changes are fixed
-      continue-on-error: true
       run: doc/make.py --warnings-are-errors
 
     - name: Build the interactive terminal
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -590,7 +590,7 @@ arguments. The special value ``all`` can also be used:
 
 .. ipython:: python
 
-    frame.describe(include=["object"])
+    frame.describe(include=["str"])
     frame.describe(include=["number"])
     frame.describe(include="all")
 
diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst
@@ -184,7 +184,7 @@ API changes
   .. ipython:: python
      :okwarning:
 
-     dfc.loc[0]['A'] = 1111
+     dfc.loc[0]['B'] = 1111
 
   ::
 
@@ -198,7 +198,7 @@ API changes
 
   .. ipython:: python
 
-     dfc.loc[0, 'A'] = 11
+     dfc.loc[0, 'B'] = 1111
      dfc
 
 - ``Panel.reindex`` has the following call signature ``Panel.reindex(items=None, major_axis=None, minor_axis=None, **kwargs)``
diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst
@@ -1025,20 +1025,49 @@ Other:
 - :func:`describe` on mixed-types DataFrames is more flexible. Type-based column filtering is now possible via the ``include``/``exclude`` arguments.
   See the :ref:`docs <basics.describe>` (:issue:`8164`).
 
-  .. ipython:: python
+  .. code-block:: python
 
-    df = pd.DataFrame({'catA': ['foo', 'foo', 'bar'] * 8,
-                       'catB': ['a', 'b', 'c', 'd'] * 6,
-                       'numC': np.arange(24),
-                       'numD': np.arange(24.) + .5})
-    df.describe(include=["object"])
-    df.describe(include=["number", "object"], exclude=["float"])
+    >>> df = pd.DataFrame({'catA': ['foo', 'foo', 'bar'] * 8,
+    ...                    'catB': ['a', 'b', 'c', 'd'] * 6,
+    ...                    'numC': np.arange(24),
+    ...                    'numD': np.arange(24.) + .5})
+    >>> df.describe(include=["object"])
+           catA catB
+    count    24   24
+    unique    2    4
+    top     foo    a
+    freq     16    6
+    >>> df.describe(include=["number", "object"], exclude=["float"])
+           catA catB       numC
+    count    24   24  24.000000
+    unique    2    4        NaN
+    top     foo    a        NaN
+    freq     16    6        NaN
+    mean    NaN  NaN  11.500000
+    std     NaN  NaN   7.071068
+    min     NaN  NaN   0.000000
+    25%     NaN  NaN   5.750000
+    50%     NaN  NaN  11.500000
+    75%     NaN  NaN  17.250000
+    max     NaN  NaN  23.000000
 
   Requesting all columns is possible with the shorthand 'all'
 
-  .. ipython:: python
+  .. code-block:: python
 
-    df.describe(include='all')
+    >>> df.describe(include='all')
+           catA catB       numC       numD
+    count    24   24  24.000000  24.000000
+    unique    2    4        NaN        NaN
+    top     foo    a        NaN        NaN
+    freq     16    6        NaN        NaN
+    mean    NaN  NaN  11.500000  12.000000
+    std     NaN  NaN   7.071068   7.071068
+    min     NaN  NaN   0.000000   0.500000
+    25%     NaN  NaN   5.750000   6.250000
+    50%     NaN  NaN  11.500000  12.000000
+    75%     NaN  NaN  17.250000  17.750000
+    max     NaN  NaN  23.000000  23.500000
 
   Without those arguments, ``describe`` will behave as before, including only numerical columns or, if none are, only categorical columns. See also the :ref:`docs <basics.describe>`
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -794,28 +794,28 @@ def categories(self) -> Index:
 
         >>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
         >>> ser.cat.categories
-        Index(['a', 'b', 'c'], dtype='object')
+        Index(['a', 'b', 'c'], dtype='str')
 
         >>> raw_cat = pd.Categorical(["a", "b", "c", "a"], categories=["b", "c", "d"])
         >>> ser = pd.Series(raw_cat)
         >>> ser.cat.categories
-        Index(['b', 'c', 'd'], dtype='object')
+        Index(['b', 'c', 'd'], dtype='str')
 
         For :class:`pandas.Categorical`:
 
         >>> cat = pd.Categorical(["a", "b"], ordered=True)
         >>> cat.categories
-        Index(['a', 'b'], dtype='object')
+        Index(['a', 'b'], dtype='str')
 
         For :class:`pandas.CategoricalIndex`:
 
         >>> ci = pd.CategoricalIndex(["a", "c", "b", "a", "c", "b"])
         >>> ci.categories
-        Index(['a', 'b', 'c'], dtype='object')
+        Index(['a', 'b', 'c'], dtype='str')
 
         >>> ci = pd.CategoricalIndex(["a", "c"], categories=["c", "b", "a"])
         >>> ci.categories
-        Index(['c', 'b', 'a'], dtype='object')
+        Index(['c', 'b', 'a'], dtype='str')
         """
         return self.dtype.categories
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -647,7 +647,7 @@ def categories(self) -> Index:
         --------
         >>> cat_type = pd.CategoricalDtype(categories=["a", "b"], ordered=True)
         >>> cat_type.categories
-        Index(['a', 'b'], dtype='object')
+        Index(['a', 'b'], dtype='str')
         """
         return self._categories
 
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -158,9 +158,9 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
 
     >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]])
     >>> df
-         0     1    2
-    0  ant   bee  cat
-    1  dog  None  fly
+         0    1    2
+    0  ant  bee  cat
+    1  dog  NaN  fly
     >>> pd.isna(df)
            0      1      2
     0  False  False  False
@@ -373,9 +373,9 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
 
     >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]])
     >>> df
-         0     1    2
-    0  ant   bee  cat
-    1  dog  None  fly
+         0    1    2
+    0  ant  bee  cat
+    1  dog  NaN  fly
     >>> pd.notna(df)
           0      1     2
     0  True   True  True
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1015,8 +1015,7 @@ def axes(self) -> list[Index]:
         --------
         >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
         >>> df.axes
-        [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'],
-        dtype='object')]
+        [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], dtype='str')]
         """
         return [self.index, self.columns]
 
@@ -14070,7 +14069,7 @@ def values(self) -> np.ndarray:
         ...     columns=("name", "max_speed", "rank"),
         ... )
         >>> df2.dtypes
-        name          object
+        name             str
         max_speed    float64
         rank          object
         dtype: object
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -4628,13 +4628,13 @@ def ngroup(self, ascending: bool = True):
         --------
         >>> df = pd.DataFrame({"color": ["red", None, "red", "blue", "blue", "red"]})
         >>> df
-           color
-        0    red
-        1   None
-        2    red
-        3   blue
-        4   blue
-        5    red
+          color
+        0   red
+        1   NaN
+        2   red
+        3  blue
+        4  blue
+        5   red
         >>> df.groupby("color").ngroup()
         0    1.0
         1    NaN
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -368,7 +368,7 @@ class Index(IndexOpsMixin, PandasObject):
     Index([1, 2, 3], dtype='int64')
 
     >>> pd.Index(list("abc"))
-    Index(['a', 'b', 'c'], dtype='object')
+    Index(['a', 'b', 'c'], dtype='str')
 
     >>> pd.Index([1, 2, 3], dtype="uint8")
     Index([1, 2, 3], dtype='uint8')
@@ -7599,7 +7599,7 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index:
     Examples
     --------
     >>> ensure_index(["a", "b"])
-    Index(['a', 'b'], dtype='object')
+    Index(['a', 'b'], dtype='str')
 
     >>> ensure_index([("a", "a"), ("b", "c")])
     Index([('a', 'a'), ('b', 'c')], dtype='object')
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -77,7 +77,7 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame:
     >>> df_not_necessarily_pandas = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
     >>> interchange_object = df_not_necessarily_pandas.__dataframe__()
     >>> interchange_object.column_names()
-    Index(['A', 'B'], dtype='object')
+    Index(['A', 'B'], dtype='str')
     >>> df_pandas = pd.api.interchange.from_dataframe(
     ...     interchange_object.select_columns_by_name(["A"])
     ... )
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -258,7 +258,7 @@ def concat(
     1    b
     0    c
     1    d
-    dtype: object
+    dtype: str
 
     Clear the existing index and reset it in the result
     by setting the ``ignore_index`` option to ``True``.
@@ -268,7 +268,7 @@ def concat(
     1    b
     2    c
     3    d
-    dtype: object
+    dtype: str
 
     Add a hierarchical index at the outermost level of
     the data with the ``keys`` option.
@@ -278,7 +278,7 @@ def concat(
         1    b
     s2  0    c
         1    d
-    dtype: object
+    dtype: str
 
     Label the index keys you create with the ``names`` option.
 
@@ -288,7 +288,7 @@ def concat(
                  1         b
     s2           0         c
                  1         d
-    dtype: object
+    dtype: str
 
     Combine two ``DataFrame`` objects with identical columns.