diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst index faa61cf4bd3bc..03244c808ad03 100644 --- a/doc/source/whatsnew/v2.3.2.rst +++ b/doc/source/whatsnew/v2.3.2.rst @@ -22,7 +22,10 @@ become the default string dtype in pandas 3.0. See Bug fixes ^^^^^^^^^ -- +- Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the + "string" type in the JSON Table Schema for :class:`StringDtype` columns + (:issue:`61889`) + .. --------------------------------------------------------------------------- .. _whatsnew_232.contributors: diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 4d9fba72cf173..c72411d87eabf 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -90,8 +90,6 @@ def as_json_table_type(x: DtypeObj) -> str: return "datetime" elif lib.is_np_dtype(x, "m"): return "duration" - elif isinstance(x, ExtensionDtype): - return "any" elif is_string_dtype(x): return "string" else: @@ -197,7 +195,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: """ typ = field["type"] if typ == "string": - return "object" + return field.get("extDtype", None) elif typ == "integer": return field.get("extDtype", "int64") elif typ == "number": diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 1c7320aa7a083..aac271b3f1f79 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -69,7 +69,7 @@ def test_build_table_schema(self, df_schema, using_infer_string): "primaryKey": ["idx"], } if using_infer_string: - expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "str"} + expected["fields"][2] = {"name": "B", "type": "string", "extDtype": "str"} assert result == expected result = build_table_schema(df_schema) assert "pandas_version" in result @@ -119,10 +119,10 @@ def test_multiindex(self, df_schema, using_infer_string): if using_infer_string: expected["fields"][0] = { "name": "level_0", - "type": "any", + "type": "string", "extDtype": "str", } - expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"} + expected["fields"][3] = {"name": "B", "type": "string", "extDtype": "str"} assert result == expected df.index.names = ["idx0", None] @@ -305,7 +305,7 @@ def test_to_json(self, df_table, using_infer_string): ] if using_infer_string: - fields[2] = {"name": "B", "type": "any", "extDtype": "str"} + fields[2] = {"name": "B", "type": "string", "extDtype": "str"} schema = {"fields": fields, "primaryKey": ["idx"]} data = [ @@ -544,7 +544,7 @@ def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered): }, CategoricalDtype(categories=["a", "b", "c"], ordered=True), ), - ({"type": "string"}, "object"), + ({"type": "string"}, None), ], ) def test_convert_json_field_to_pandas_type(self, inp, exp): diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index b7bb057bc538e..3da180db94db5 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -50,7 +50,7 @@ def test_build_table_schema(self): {"name": "index", "type": "integer"}, {"name": "A", "type": "any", "extDtype": "DateDtype"}, {"name": "B", "type": "number", "extDtype": "decimal"}, - {"name": "C", "type": "any", "extDtype": "string"}, + {"name": "C", "type": "string", "extDtype": "string"}, {"name": "D", "type": "integer", "extDtype": "Int64"}, ], "primaryKey": ["index"], @@ -96,10 +96,10 @@ def test_as_json_table_type_ext_decimal_dtype(self): ], ) def test_as_json_table_type_ext_string_array_dtype(self, string_data): - assert as_json_table_type(string_data.dtype) == "any" + assert as_json_table_type(string_data.dtype) == "string" def test_as_json_table_type_ext_string_dtype(self): - assert as_json_table_type(StringDtype()) == "any" + assert as_json_table_type(StringDtype()) == "string" @pytest.mark.parametrize( "integer_data", @@ -204,7 +204,7 @@ def test_build_string_series(self, sa): fields = [ {"name": "id", "type": "integer"}, - {"name": "a", "type": "any", "extDtype": "string"}, + {"name": "a", "type": "string", "extDtype": "string"}, ] schema = {"fields": fields, "primaryKey": ["id"]} @@ -256,7 +256,7 @@ def test_to_json(self, df): OrderedDict({"name": "idx", "type": "integer"}), OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}), OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}), - OrderedDict({"name": "C", "type": "any", "extDtype": "string"}), + OrderedDict({"name": "C", "type": "string", "extDtype": "string"}), OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}), ]