Skip to content

Commit f7a2cfd

Browse files
[backport 2.3.x] BUG/DEPR: logical operation with bool and string (#61995) (#62114)
Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>
1 parent 7981a43 commit f7a2cfd

File tree

4 files changed

+65
-0
lines changed

4 files changed

+65
-0
lines changed

doc/source/whatsnew/v2.3.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Bug fixes
2525
- Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
2626
"string" type in the JSON Table Schema for :class:`StringDtype` columns
2727
(:issue:`61889`)
28+
- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`)
2829
- Fixed ``~Series.str.match``, ``~Series.str.fullmatch`` and ``~Series.str.contains``
2930
with compiled regex for the Arrow-backed string dtype (:issue:`61964`, :issue:`61942`)
3031

pandas/core/arrays/arrow/array.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,25 @@ def _logical_method(self, other, op):
829829
# integer types. Otherwise these are boolean ops.
830830
if pa.types.is_integer(self._pa_array.type):
831831
return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS)
832+
elif (
833+
(
834+
pa.types.is_string(self._pa_array.type)
835+
or pa.types.is_large_string(self._pa_array.type)
836+
)
837+
and op in (roperator.ror_, roperator.rand_, roperator.rxor)
838+
and isinstance(other, np.ndarray)
839+
and other.dtype == bool
840+
):
841+
# GH#60234 backward compatibility for the move to StringDtype in 3.0
842+
op_name = op.__name__[1:].strip("_")
843+
warnings.warn(
844+
f"'{op_name}' operations between boolean dtype and {self.dtype} are "
845+
"deprecated and will raise in a future version. Explicitly "
846+
"cast the strings to a boolean dtype before operating instead.",
847+
DeprecationWarning,
848+
stacklevel=find_stack_level(),
849+
)
850+
return op(other, self.astype(bool))
832851
else:
833852
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
834853

pandas/core/arrays/string_.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
missing,
5050
nanops,
5151
ops,
52+
roperator,
5253
)
5354
from pandas.core.algorithms import isin
5455
from pandas.core.array_algos import masked_reductions
@@ -385,6 +386,26 @@ class BaseStringArray(ExtensionArray):
385386

386387
dtype: StringDtype
387388

389+
# TODO(4.0): Once the deprecation here is enforced, this method can be
390+
# removed and we use the parent class method instead.
391+
def _logical_method(self, other, op):
392+
if (
393+
op in (roperator.ror_, roperator.rand_, roperator.rxor)
394+
and isinstance(other, np.ndarray)
395+
and other.dtype == bool
396+
):
397+
# GH#60234 backward compatibility for the move to StringDtype in 3.0
398+
op_name = op.__name__[1:].strip("_")
399+
warnings.warn(
400+
f"'{op_name}' operations between boolean dtype and {self.dtype} are "
401+
"deprecated and will raise in a future version. Explicitly "
402+
"cast the strings to a boolean dtype before operating instead.",
403+
DeprecationWarning,
404+
stacklevel=find_stack_level(),
405+
)
406+
return op(other, self.astype(bool))
407+
return NotImplemented
408+
388409
@doc(ExtensionArray.tolist)
389410
def tolist(self):
390411
if self.ndim > 1:

pandas/tests/strings/test_strings.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,3 +776,27 @@ def test_series_str_decode():
776776
result = Series([b"x", b"y"]).str.decode(encoding="UTF-8", errors="strict")
777777
expected = Series(["x", "y"], dtype="str")
778778
tm.assert_series_equal(result, expected)
779+
780+
781+
def test_reversed_logical_ops(any_string_dtype):
782+
# GH#60234
783+
dtype = any_string_dtype
784+
warn = None if dtype == object else DeprecationWarning
785+
left = Series([True, False, False, True])
786+
right = Series(["", "", "b", "c"], dtype=dtype)
787+
788+
msg = "operations between boolean dtype and"
789+
with tm.assert_produces_warning(warn, match=msg):
790+
result = left | right
791+
expected = left | right.astype(bool)
792+
tm.assert_series_equal(result, expected)
793+
794+
with tm.assert_produces_warning(warn, match=msg):
795+
result = left & right
796+
expected = left & right.astype(bool)
797+
tm.assert_series_equal(result, expected)
798+
799+
with tm.assert_produces_warning(warn, match=msg):
800+
result = left ^ right
801+
expected = left ^ right.astype(bool)
802+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)