diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 3e359335719..fa7680df8a5 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -5474,11 +5474,17 @@ def test_memory_usage_list(): @pytest.mark.parametrize("rows", [10, 100]) def test_memory_usage_multi(rows): deep = True + # We need to sample without replacement to guarantee that the size of the + # levels are always the same. df = pd.DataFrame( { "A": np.arange(rows, dtype="int32"), - "B": np.random.choice(np.arange(3, dtype="int64"), rows), - "C": np.random.choice(np.arange(3, dtype="float64"), rows), + "B": np.random.choice( + np.arange(rows, dtype="int64"), rows, replace=False + ), + "C": np.random.choice( + np.arange(rows, dtype="float64"), rows, replace=False + ), } ).set_index(["B", "C"]) gdf = cudf.from_pandas(df) @@ -5486,8 +5492,8 @@ def test_memory_usage_multi(rows): # of the underlying columns, levels, and codes expect = rows * 16 # Source Columns expect += rows * 16 # Codes - expect += 3 * 8 # Level 0 - expect += 3 * 8 # Level 1 + expect += rows * 8 # Level 0 + expect += rows * 8 # Level 1 assert expect == gdf.index.memory_usage(deep=deep)