Fix flaky memory usage test by guaranteeing array size.

rapidsai · Jan 24, 2022 · b99c80d · b99c80d
1 parent cfb6cbe
commit b99c80d
Showing 1 changed file with 10 additions and 4 deletions.
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
@@ -5474,20 +5474,26 @@ def test_memory_usage_list():
 @pytest.mark.parametrize("rows", [10, 100])
 def test_memory_usage_multi(rows):
     deep = True
+    # We need to sample without replacement to guarantee that the size of the
+    # levels are always the same.
     df = pd.DataFrame(
         {
             "A": np.arange(rows, dtype="int32"),
-            "B": np.random.choice(np.arange(3, dtype="int64"), rows),
-            "C": np.random.choice(np.arange(3, dtype="float64"), rows),
+            "B": np.random.choice(
+                np.arange(rows, dtype="int64"), rows, replace=False
+            ),
+            "C": np.random.choice(
+                np.arange(rows, dtype="float64"), rows, replace=False
+            ),
         }
     ).set_index(["B", "C"])
     gdf = cudf.from_pandas(df)
     # Assume MultiIndex memory footprint is just that
     # of the underlying columns, levels, and codes
     expect = rows * 16  # Source Columns
     expect += rows * 16  # Codes
-    expect += 3 * 8  # Level 0
-    expect += 3 * 8  # Level 1
+    expect += rows * 8  # Level 0
+    expect += rows * 8  # Level 1
 
     assert expect == gdf.index.memory_usage(deep=deep)