From 091baa8c975aa68f6e04d13229cb716bb994e723 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 28 Oct 2024 16:14:09 +0100
Subject: [PATCH 01/10] String dtype: use ObjectEngine for indexing for now
 correctness over performance

---
 pandas/core/indexes/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 4a90b164c89cc..af2f83927f197 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -875,8 +875,7 @@ def _engine(
             # error: Item "ExtensionArray" of "Union[ExtensionArray,
             # ndarray[Any, Any]]" has no attribute "_ndarray"  [union-attr]
             target_values = self._data._ndarray  # type: ignore[union-attr]
-        elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
-            return libindex.StringEngine(target_values)
+        # TODO re-enable StringEngine for string dtype
 
         # error: Argument 1 to "ExtensionEngine" has incompatible type
         # "ndarray[Any, Any]"; expected "ExtensionArray"

From cfb73f5ab1c4d9c9e78751ff28a2ae1997ae473a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 15 Nov 2024 17:36:14 +0100
Subject: [PATCH 02/10] add string-specific ObjectEngine subclass for
 pre-processing of input values

---
 pandas/_libs/index.pyi      |  1 +
 pandas/_libs/index.pyx      | 10 ++++++++++
 pandas/core/indexes/base.py |  2 ++
 3 files changed, 13 insertions(+)

diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
index bf6d8ba8973d3..99b45f474e4f3 100644
--- a/pandas/_libs/index.pyi
+++ b/pandas/_libs/index.pyi
@@ -54,6 +54,7 @@ class UInt16Engine(IndexEngine): ...
 class UInt8Engine(IndexEngine): ...
 class ObjectEngine(IndexEngine): ...
 class StringEngine(IndexEngine): ...
+class StringObjectEngine(ObjectEngine): ...
 class DatetimeEngine(Int64Engine): ...
 class TimedeltaEngine(DatetimeEngine): ...
 class PeriodEngine(Int64Engine): ...
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 1506a76aa94a6..94698aadac771 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -557,6 +557,16 @@ cdef class StringEngine(IndexEngine):
             raise KeyError(val)
         return str(val)
 
+cdef class StringObjectEngine(ObjectEngine):
+
+    cdef _check_type(self, object val):
+        if isinstance(val, str):
+            return val
+        elif checknull(val):
+            return np.nan
+        else:
+            raise KeyError(val)
+
 
 cdef class DatetimeEngine(Int64Engine):
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index af2f83927f197..26f2e27924493 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -876,6 +876,8 @@ def _engine(
             # ndarray[Any, Any]]" has no attribute "_ndarray"  [union-attr]
             target_values = self._data._ndarray  # type: ignore[union-attr]
         # TODO re-enable StringEngine for string dtype
+        elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
+            return libindex.StringObjectEngine(target_values)
 
         # error: Argument 1 to "ExtensionEngine" has incompatible type
         # "ndarray[Any, Any]"; expected "ExtensionArray"

From 6892f8364f5595ec097b0fb9f039ee058cf7ac7e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 15 Nov 2024 19:55:37 +0100
Subject: [PATCH 03/10] remove xfails

---
 pandas/tests/frame/indexing/test_indexing.py | 3 ---
 pandas/tests/reshape/test_pivot.py           | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 84c01e0be3b6f..a9bc485283985 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import iNaT
 from pandas.errors import InvalidIndexError
 
@@ -503,7 +501,6 @@ def test_setitem_ambig(self, using_infer_string):
         else:
             assert dm[2].dtype == np.object_
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_None(self, float_frame):
         # GH #766
         float_frame[None] = float_frame["A"]
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index d8a9acdc561fd..3f48d6af7df2a 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -2698,9 +2698,6 @@ def test_pivot_index_is_none(self):
         expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
-    )
     def test_pivot_values_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})

From e007299cebe221d396fef2046bdc510450457e6d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 15 Nov 2024 20:14:26 +0100
Subject: [PATCH 04/10] add tests for get_loc + fix for NA variant of string
 dtype

---
 pandas/_libs/index.pyx                       |  9 +++++-
 pandas/core/indexes/base.py                  |  3 +-
 pandas/tests/indexes/string/test_indexing.py | 31 ++++++++++++++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 94698aadac771..c219d0b63870f 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -559,11 +559,18 @@ cdef class StringEngine(IndexEngine):
 
 cdef class StringObjectEngine(ObjectEngine):
 
+    cdef:
+        object na_value
+
+    def __init__(self, ndarray values, na_value):
+        super().__init__(values)
+        self.na_value = na_value
+
     cdef _check_type(self, object val):
         if isinstance(val, str):
             return val
         elif checknull(val):
-            return np.nan
+            return self.na_value
         else:
             raise KeyError(val)
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 26f2e27924493..71abcd2c6e13f 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -875,9 +875,8 @@ def _engine(
             # error: Item "ExtensionArray" of "Union[ExtensionArray,
             # ndarray[Any, Any]]" has no attribute "_ndarray"  [union-attr]
             target_values = self._data._ndarray  # type: ignore[union-attr]
-        # TODO re-enable StringEngine for string dtype
         elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
-            return libindex.StringObjectEngine(target_values)
+            return libindex.StringObjectEngine(target_values, self.dtype.na_value)
 
         # error: Argument 1 to "ExtensionEngine" has incompatible type
         # "ndarray[Any, Any]"; expected "ExtensionArray"
diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
index 755b7109a5a04..4b2de683c353e 100644
--- a/pandas/tests/indexes/string/test_indexing.py
+++ b/pandas/tests/indexes/string/test_indexing.py
@@ -6,6 +6,37 @@
 import pandas._testing as tm
 
 
+class TestGetLoc:
+    def test_get_loc(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        assert index.get_loc("b") == 1
+
+    def test_get_loc_raises(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError, match="d"):
+            index.get_loc("d")
+
+    def test_get_loc_invalid_value(self, any_string_dtype):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError, match="1"):
+            index.get_loc(1)
+
+    def test_get_loc_non_unique(self, any_string_dtype):
+        index = Index(["a", "b", "a"], dtype=any_string_dtype)
+        result = index.get_loc("a")
+        expected = np.array([True, False, True])
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
+        index = Index(["a", "b", "c"], dtype=any_string_dtype)
+        with pytest.raises(KeyError):
+            index.get_loc(nulls_fixture)
+
+    def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
+        index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
+        assert index.get_loc(nulls_fixture) == 2
+
+
 class TestGetIndexer:
     @pytest.mark.parametrize(
         "method,expected",

From bb148ba8a40dd9d4344ad9464ed5396473824aff Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 15 Nov 2024 20:51:04 +0100
Subject: [PATCH 05/10] support get_indexer

---
 pandas/core/indexes/base.py                  |  4 +++
 pandas/tests/indexes/string/test_indexing.py | 32 ++++++++++++++++----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 71abcd2c6e13f..837a678736cd0 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6222,6 +6222,10 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
                 # let's instead try with a straight Index
                 self = Index(self._values)
 
+        elif self.dtype == "string" and other.dtype == "object":
+            if lib.is_string_array(other._values, skipna=True):
+                return self, other.astype(self.dtype)
+
         if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
             # Reverse op so we dont need to re-implement on the subclasses
             other, self = other._maybe_downcast_for_indexing(self)
diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
index 4b2de683c353e..dc8e1c0ba9bee 100644
--- a/pandas/tests/indexes/string/test_indexing.py
+++ b/pandas/tests/indexes/string/test_indexing.py
@@ -72,12 +72,32 @@ def test_get_indexer_strings_raises(self, any_string_dtype):
                 ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
             )
 
+    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
+    def test_get_indexer_missing(self, any_string_dtype, null):
+        # NaT and Decimal("NaN") from null_fixture are not supported for string dtype
+        index = Index(["a", "b", null], dtype=any_string_dtype)
+        result = index.get_indexer(["a", null, "c"])
+        expected = np.array([0, 2, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
 
 class TestGetIndexerNonUnique:
-    @pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
-    def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
-        index = Index(["a", "b", None], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
+    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
+    def test_get_indexer_non_unique_nas(self, request, any_string_dtype, null):
+        if (
+            any_string_dtype == "string"
+            and any_string_dtype.na_value is pd.NA
+            and isinstance(null, float)
+        ):
+            # TODO(infer_string)
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason="NA-variant string dtype does not work with NaN"
+                )
+            )
+
+        index = Index(["a", "b", null], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique([null])
 
         expected_indexer = np.array([2], dtype=np.intp)
         expected_missing = np.array([], dtype=np.intp)
@@ -85,8 +105,8 @@ def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
         tm.assert_numpy_array_equal(missing, expected_missing)
 
         # actually non-unique
-        index = Index(["a", None, "b", None], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
+        index = Index(["a", null, "b", null], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique([null])
 
         expected_indexer = np.array([1, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)

From a669d75f661783f0e1029efca621319ce2e2f38e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 16 Nov 2024 15:26:14 +0100
Subject: [PATCH 06/10] update tests

---
 pandas/tests/frame/test_arithmetic.py | 23 +++++++++++++++--------
 pandas/tests/reshape/test_pivot.py    |  9 +++++----
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 6b61fe8b05219..f9342cf33f6f9 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -11,8 +11,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -2101,12 +2099,21 @@ def test_enum_column_equality():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_mixed_col_index_dtype():
+def test_mixed_col_index_dtype(any_string_dtype):
     # GH 47382
-    df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
-    df2 = DataFrame(columns=list("abc"), data=0.0, index=[0])
-    df1.columns = df2.columns.astype("string")
+    df1 = DataFrame(
+        columns=Index(list("abc"), dtype=any_string_dtype), data=1.0, index=[0]
+    )
+    df2 = DataFrame(columns=Index(list("abc"), dtype="object"), data=0.0, index=[0])
+
     result = df1 + df2
-    expected = DataFrame(columns=list("abc"), data=1.0, index=[0])
+    expected = DataFrame(
+        columns=Index(list("abc"), dtype=any_string_dtype), data=1.0, index=[0]
+    )
+    tm.assert_frame_equal(result, expected)
+
+    result = df2 + df1
+    expected = DataFrame(
+        columns=Index(list("abc"), dtype="object"), data=1.0, index=[0]
+    )
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 3f48d6af7df2a..a70f31ddc8c62 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -2664,6 +2664,8 @@ def test_pivot_columns_not_given(self):
         with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
             df.pivot()
 
+    # this still fails because columns=None gets passed down to unstack as level=None
+    # while at that point None was converted to NaN
     @pytest.mark.xfail(
         using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
     )
@@ -2682,10 +2684,7 @@ def test_pivot_columns_is_none(self):
         expected = DataFrame({1: 3}, index=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
-    )
-    def test_pivot_index_is_none(self):
+    def test_pivot_index_is_none(self, using_infer_string):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
 
@@ -2696,6 +2695,8 @@ def test_pivot_index_is_none(self):
 
         result = df.pivot(columns="b", index=None, values="c")
         expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
+        if using_infer_string:
+            expected.index.name = np.nan
         tm.assert_frame_equal(result, expected)
 
     def test_pivot_values_is_none(self):

From fccd220c32977937932042cdd57986fd73775631 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 17 Nov 2024 09:58:31 +0100
Subject: [PATCH 07/10] update xfail for parser test

---
 pandas/tests/io/parser/common/test_common_basic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 511db2c6a33d8..3680273f5e98a 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -15,6 +15,7 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
 from pandas.errors import (
     EmptyDataError,
     ParserError,
@@ -766,7 +767,7 @@ def test_dict_keys_as_names(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @xfail_pyarrow  # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0
 def test_encoding_surrogatepass(all_parsers):
     # GH39017

From 81423007ea7ba53518c2e8007a86766c3b66a8f5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 17 Nov 2024 10:02:17 +0100
Subject: [PATCH 08/10] try fix typing

---
 pandas/_libs/index.pyi      | 4 +++-
 pandas/core/indexes/base.py | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
index 99b45f474e4f3..3af2856d2fbbf 100644
--- a/pandas/_libs/index.pyi
+++ b/pandas/_libs/index.pyi
@@ -54,7 +54,6 @@ class UInt16Engine(IndexEngine): ...
 class UInt8Engine(IndexEngine): ...
 class ObjectEngine(IndexEngine): ...
 class StringEngine(IndexEngine): ...
-class StringObjectEngine(ObjectEngine): ...
 class DatetimeEngine(Int64Engine): ...
 class TimedeltaEngine(DatetimeEngine): ...
 class PeriodEngine(Int64Engine): ...
@@ -73,6 +72,9 @@ class MaskedUInt16Engine(MaskedIndexEngine): ...
 class MaskedUInt8Engine(MaskedIndexEngine): ...
 class MaskedBoolEngine(MaskedUInt8Engine): ...
 
+class StringObjectEngine(ObjectEngine):
+    def __init__(self, values: object, na_value) -> None: ...
+
 class BaseMultiIndexCodesEngine:
     levels: list[np.ndarray]
     offsets: np.ndarray  # np.ndarray[..., ndim=1]
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2eaa2c7944fd2..6c363864b5d03 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -876,7 +876,7 @@ def _engine(
             # ndarray[Any, Any]]" has no attribute "_ndarray"  [union-attr]
             target_values = self._data._ndarray  # type: ignore[union-attr]
         elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
-            return libindex.StringObjectEngine(target_values, self.dtype.na_value)
+            return libindex.StringObjectEngine(target_values, self.dtype.na_value)  # type: ignore[union-attr]
 
         # error: Argument 1 to "ExtensionEngine" has incompatible type
         # "ndarray[Any, Any]"; expected "ExtensionArray"
@@ -6223,7 +6223,7 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
                 self = Index(self._values)
 
         elif self.dtype == "string" and other.dtype == "object":
-            if lib.is_string_array(other._values, skipna=True):
+            if lib.is_string_array(other._values, skipna=True):  # type: ignore[arg-type]
                 return self, other.astype(self.dtype)
 
         if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):

From 43a3edfbc2bd871b5879acfbf882dd28678cafc1 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 23 Nov 2024 16:46:55 +0100
Subject: [PATCH 09/10] limit get_loc to exact match for now

---
 pandas/_libs/index.pyx                       | 10 +++++-
 pandas/core/indexes/base.py                  |  5 ---
 pandas/tests/frame/indexing/test_indexing.py |  3 ++
 pandas/tests/frame/test_arithmetic.py        | 23 +++++-------
 pandas/tests/indexes/string/test_indexing.py | 38 ++++++++++----------
 pandas/tests/reshape/test_pivot.py           | 12 ++++---
 6 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index c219d0b63870f..688f943760d1f 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -561,15 +561,23 @@ cdef class StringObjectEngine(ObjectEngine):
 
     cdef:
         object na_value
+        bint uses_na
 
     def __init__(self, ndarray values, na_value):
         super().__init__(values)
         self.na_value = na_value
+        self.uses_na = na_value is C_NA
+
+    cdef bint _checknull(self, object val):
+        if self.uses_na:
+            return val is C_NA
+        else:
+            return util.is_nan(val)
 
     cdef _check_type(self, object val):
         if isinstance(val, str):
             return val
-        elif checknull(val):
+        elif self._checknull(val):
             return self.na_value
         else:
             raise KeyError(val)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 6c363864b5d03..165fe109c4c94 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5974,7 +5974,6 @@ def _should_fallback_to_positional(self) -> bool:
     def get_indexer_non_unique(
         self, target
     ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
-        target = ensure_index(target)
         target = self._maybe_cast_listlike_indexer(target)
 
         if not self._should_compare(target) and not self._should_partial_index(target):
@@ -6222,10 +6221,6 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
                 # let's instead try with a straight Index
                 self = Index(self._values)
 
-        elif self.dtype == "string" and other.dtype == "object":
-            if lib.is_string_array(other._values, skipna=True):  # type: ignore[arg-type]
-                return self, other.astype(self.dtype)
-
         if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
             # Reverse op so we dont need to re-implement on the subclasses
             other, self = other._maybe_downcast_for_indexing(self)
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index a9bc485283985..84c01e0be3b6f 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -9,6 +9,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas._libs import iNaT
 from pandas.errors import InvalidIndexError
 
@@ -501,6 +503,7 @@ def test_setitem_ambig(self, using_infer_string):
         else:
             assert dm[2].dtype == np.object_
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_None(self, float_frame):
         # GH #766
         float_frame[None] = float_frame["A"]
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index f9342cf33f6f9..6b61fe8b05219 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -11,6 +11,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -2099,21 +2101,12 @@ def test_enum_column_equality():
     tm.assert_series_equal(result, expected)
 
 
-def test_mixed_col_index_dtype(any_string_dtype):
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+def test_mixed_col_index_dtype():
     # GH 47382
-    df1 = DataFrame(
-        columns=Index(list("abc"), dtype=any_string_dtype), data=1.0, index=[0]
-    )
-    df2 = DataFrame(columns=Index(list("abc"), dtype="object"), data=0.0, index=[0])
-
+    df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
+    df2 = DataFrame(columns=list("abc"), data=0.0, index=[0])
+    df1.columns = df2.columns.astype("string")
     result = df1 + df2
-    expected = DataFrame(
-        columns=Index(list("abc"), dtype=any_string_dtype), data=1.0, index=[0]
-    )
-    tm.assert_frame_equal(result, expected)
-
-    result = df2 + df1
-    expected = DataFrame(
-        columns=Index(list("abc"), dtype="object"), data=1.0, index=[0]
-    )
+    expected = DataFrame(columns=list("abc"), data=1.0, index=[0])
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
index dc8e1c0ba9bee..3afcec9d73fdd 100644
--- a/pandas/tests/indexes/string/test_indexing.py
+++ b/pandas/tests/indexes/string/test_indexing.py
@@ -6,6 +6,13 @@
 import pandas._testing as tm
 
 
+def _isnan(val):
+    try:
+        return val is not pd.NA and np.isnan(val)
+    except TypeError:
+        return False
+
+
 class TestGetLoc:
     def test_get_loc(self, any_string_dtype):
         index = Index(["a", "b", "c"], dtype=any_string_dtype)
@@ -34,7 +41,14 @@ def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
 
     def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
         index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
-        assert index.get_loc(nulls_fixture) == 2
+        if any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and nulls_fixture is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(nulls_fixture))
+        ):
+            with pytest.raises(KeyError):
+                index.get_loc(nulls_fixture)
+        else:
+            assert index.get_loc(nulls_fixture) == 2
 
 
 class TestGetIndexer:
@@ -83,32 +97,20 @@ def test_get_indexer_missing(self, any_string_dtype, null):
 
 class TestGetIndexerNonUnique:
     @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
-    def test_get_indexer_non_unique_nas(self, request, any_string_dtype, null):
-        if (
-            any_string_dtype == "string"
-            and any_string_dtype.na_value is pd.NA
-            and isinstance(null, float)
-        ):
-            # TODO(infer_string)
-            request.applymarker(
-                pytest.mark.xfail(
-                    reason="NA-variant string dtype does not work with NaN"
-                )
-            )
-
+    def test_get_indexer_non_unique_nas(self, any_string_dtype, null):
         index = Index(["a", "b", null], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([null])
+        indexer, missing = index.get_indexer_non_unique(["a", null])
 
-        expected_indexer = np.array([2], dtype=np.intp)
+        expected_indexer = np.array([0, 2], dtype=np.intp)
         expected_missing = np.array([], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
 
         # actually non-unique
         index = Index(["a", null, "b", null], dtype=any_string_dtype)
-        indexer, missing = index.get_indexer_non_unique([null])
+        indexer, missing = index.get_indexer_non_unique(["a", null])
 
-        expected_indexer = np.array([1, 3], dtype=np.intp)
+        expected_indexer = np.array([0, 1, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 374d236c8ff39..f42f7f8232229 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -2668,8 +2668,6 @@ def test_pivot_columns_not_given(self):
         with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
             df.pivot()
 
-    # this still fails because columns=None gets passed down to unstack as level=None
-    # while at that point None was converted to NaN
     @pytest.mark.xfail(
         using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
     )
@@ -2688,7 +2686,10 @@ def test_pivot_columns_is_none(self):
         expected = DataFrame({1: 3}, index=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    def test_pivot_index_is_none(self, using_infer_string):
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
+    def test_pivot_index_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
 
@@ -2699,10 +2700,11 @@ def test_pivot_index_is_none(self, using_infer_string):
 
         result = df.pivot(columns="b", index=None, values="c")
         expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
-        if using_infer_string:
-            expected.index.name = np.nan
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
     def test_pivot_values_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})

From c546a518aedbac0a96a4f2aea7ea8d18cb8a4d81 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 23 Nov 2024 17:20:33 +0100
Subject: [PATCH 10/10] fix for non-infer_string mode

---
 pandas/tests/indexes/string/test_indexing.py | 41 +++++++++++++++++---
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
index 3afcec9d73fdd..d1a278af337b7 100644
--- a/pandas/tests/indexes/string/test_indexing.py
+++ b/pandas/tests/indexes/string/test_indexing.py
@@ -87,22 +87,43 @@ def test_get_indexer_strings_raises(self, any_string_dtype):
             )
 
     @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
-    def test_get_indexer_missing(self, any_string_dtype, null):
+    def test_get_indexer_missing(self, any_string_dtype, null, using_infer_string):
         # NaT and Decimal("NaN") from null_fixture are not supported for string dtype
         index = Index(["a", "b", null], dtype=any_string_dtype)
         result = index.get_indexer(["a", null, "c"])
-        expected = np.array([0, 2, -1], dtype=np.intp)
+        if using_infer_string:
+            expected = np.array([0, 2, -1], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            expected = np.array([0, -1, -1], dtype=np.intp)
+        else:
+            expected = np.array([0, 2, -1], dtype=np.intp)
+
         tm.assert_numpy_array_equal(result, expected)
 
 
 class TestGetIndexerNonUnique:
     @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
-    def test_get_indexer_non_unique_nas(self, any_string_dtype, null):
+    def test_get_indexer_non_unique_nas(
+        self, any_string_dtype, null, using_infer_string
+    ):
         index = Index(["a", "b", null], dtype=any_string_dtype)
         indexer, missing = index.get_indexer_non_unique(["a", null])
 
-        expected_indexer = np.array([0, 2], dtype=np.intp)
-        expected_missing = np.array([], dtype=np.intp)
+        if using_infer_string:
+            expected_indexer = np.array([0, 2], dtype=np.intp)
+            expected_missing = np.array([], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            expected_indexer = np.array([0, -1], dtype=np.intp)
+            expected_missing = np.array([1], dtype=np.intp)
+        else:
+            expected_indexer = np.array([0, 2], dtype=np.intp)
+            expected_missing = np.array([], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
 
@@ -110,7 +131,15 @@ def test_get_indexer_non_unique_nas(self, any_string_dtype, null):
         index = Index(["a", null, "b", null], dtype=any_string_dtype)
         indexer, missing = index.get_indexer_non_unique(["a", null])
 
-        expected_indexer = np.array([0, 1, 3], dtype=np.intp)
+        if using_infer_string:
+            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
+        elif any_string_dtype == "string" and (
+            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
+            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        ):
+            pass
+        else:
+            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)