diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst index 53a8d28687518..0d99ac289f6c9 100644 --- a/doc/source/whatsnew/v2.3.2.rst +++ b/doc/source/whatsnew/v2.3.2.rst @@ -26,6 +26,7 @@ Bug fixes "string" type in the JSON Table Schema for :class:`StringDtype` columns (:issue:`61889`) - Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`) +- Fixed ``Series.str.contains`` with compiled regex on Arrow string dtype, which now correctly delegates to the object-dtype implementation. (:issue:`61942`) .. --------------------------------------------------------------------------- .. _whatsnew_232.contributors: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 2ca12870709f0..ea13062448dfe 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -346,6 +346,8 @@ def _str_contains( ): if flags: return super()._str_contains(pat, case, flags, na, regex) + if isinstance(pat, re.Pattern): + pat = pat.pattern return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 30e6ebf0eed13..425030ed63fb5 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -281,6 +281,21 @@ def test_contains_nan(any_string_dtype): tm.assert_series_equal(result, expected) +def test_str_contains_compiled_regex_arrow_dtype(any_string_dtype): + # GH#61942 + ser = Series(["foo", "bar", "baz"], dtype=any_string_dtype) + pat = re.compile("ba.") + result = ser.str.contains(pat) + # Determine expected dtype and values + expected_dtype = { + "string[pyarrow]": "bool[pyarrow]", + "string": "boolean", + "str": bool, + }.get(any_string_dtype, object) + expected = Series([False, True, True], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + # -------------------------------------------------------------------------------------- # str.startswith # --------------------------------------------------------------------------------------