From 7929800bc03f7a5c3a456fbc953121799951b6ed Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 27 Dec 2023 17:13:58 +0100 Subject: [PATCH 1/3] CoW: Boolean indexer in MultiIndex raising read-only error --- pandas/core/indexes/multi.py | 2 ++ pandas/tests/copy_view/test_indexing.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2a4e027e2b806..681a3287e46e6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3487,6 +3487,8 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: "cannot index with a boolean indexer that " "is not the same length as the index" ) + if isinstance(k, (ABCSeries, Index)): + k = k._values lvl_indexer = np.asarray(k) elif is_list_like(k): diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 9afc98e558c11..7f7be355b76c7 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1180,6 +1180,24 @@ def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write): tm.assert_series_equal(ser, expected) +def test_midx_read_only_bool_indexer(): + def mklbl(prefix, n): + return [f"{prefix}{i}" for i in range(n)] + + idx = pd.MultiIndex.from_product( + [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)] + ) + cols = pd.MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"] + ) + df = DataFrame(1, index=idx, columns=cols).sort_index().sort_index(axis=1) + + mask = df[("a", "foo")] == 1 + result = df.loc[pd.IndexSlice[mask, :, ["C1", "C3"]], :] + expected = df.loc[pd.IndexSlice[:, :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + def test_loc_enlarging_with_dataframe(using_copy_on_write): df = DataFrame({"a": [1, 2, 3]}) rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) From b7ac9be3a7d4c6a8fa1574eeea3eadfc905debdd Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 27 Dec 2023 20:44:48 +0100 Subject: [PATCH 2/3] Fixup --- pandas/core/indexes/multi.py | 4 +--- pandas/tests/copy_view/test_indexing.py | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 681a3287e46e6..fd1d60d5befdc 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3487,9 +3487,7 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: "cannot index with a boolean indexer that " "is not the same length as the index" ) - if isinstance(k, (ABCSeries, Index)): - k = k._values - lvl_indexer = np.asarray(k) + lvl_indexer = np.asarray(k).copy() elif is_list_like(k): # a collection of labels to include from this level (these are or'd) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 7f7be355b76c7..3a1b732fce86e 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1193,9 +1193,11 @@ def mklbl(prefix, n): df = DataFrame(1, index=idx, columns=cols).sort_index().sort_index(axis=1) mask = df[("a", "foo")] == 1 + expected_mask = mask.copy() result = df.loc[pd.IndexSlice[mask, :, ["C1", "C3"]], :] expected = df.loc[pd.IndexSlice[:, :, ["C1", "C3"]], :] tm.assert_frame_equal(result, expected) + tm.assert_series_equal(mask, expected_mask) def test_loc_enlarging_with_dataframe(using_copy_on_write): From b8d5c3ea182fbd4d94934b34cbd3bb0669d0173b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:42:48 +0100 Subject: [PATCH 3/3] Add whatsnew --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/indexes/multi.py | 4 +++- pandas/tests/copy_view/test_indexing.py | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 5ee94b74c527e..38e82d43b3e92 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -773,6 +773,7 @@ Interval Indexing ^^^^^^^^ +- Bug in :meth:`DataFrame.loc` mutating a boolean indexer when :class:`DataFrame` has a :class:`MultiIndex` (:issue:`56635`) - Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`) - Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`) - Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fd1d60d5befdc..02a841a2075fd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3487,7 +3487,9 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: "cannot index with a boolean indexer that " "is not the same length as the index" ) - lvl_indexer = np.asarray(k).copy() + lvl_indexer = np.asarray(k) + if indexer is None: + lvl_indexer = lvl_indexer.copy() elif is_list_like(k): # a collection of labels to include from this level (these are or'd) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 3a1b732fce86e..91cd77741f79b 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1181,6 +1181,7 @@ def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write): def test_midx_read_only_bool_indexer(): + # GH#56635 def mklbl(prefix, n): return [f"{prefix}{i}" for i in range(n)]