rapidsai · rapids-bot · Apr 24, 2023 · Apr 20, 2023 · Apr 21, 2023 · Apr 22, 2023
diff --git a/benchmarks/cugraph/pytest-based/bench_algos.py b/benchmarks/cugraph/pytest-based/bench_algos.py
@@ -66,16 +66,23 @@ def createGraph(csvFileName, graphType=None):
         else:
             graphType = cugraph.Graph()
 
+    gdf = utils.read_csv_file(csvFileName)
+    if len(gdf.columns) == 2:
+        edge_attr = None
+    else:
+        edge_attr = "2"
+
     return cugraph.from_cudf_edgelist(
-        utils.read_csv_file(csvFileName),
-        source="0", destination="1", edge_attr="2",
+        gdf,
+        source="0", destination="1", edge_attr=edge_attr,
         create_using=graphType,
         renumber=True)
 
 
 # Record the current RMM settings so reinitialize() will be called only when a
-# change is needed (RMM defaults both values to False). This allows the
-# --no-rmm-reinit option to prevent reinitialize() from being called at all
+# change is needed (RMM defaults both values to False). The --allow-rmm-reinit
+# option is required to allow the RMM options to be set by the pytest user
+# directly, in order to prevent reinitialize() from being called more than once
 # (see conftest.py for details).
 RMM_SETTINGS = {"managed_mem": False,
                 "pool_alloc": False}
@@ -208,7 +215,8 @@ def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed):
 
 
 def bench_bfs(gpubenchmark, anyGraphWithAdjListComputed):
-    gpubenchmark(cugraph.bfs, anyGraphWithAdjListComputed, 0)
+    start = anyGraphWithAdjListComputed.edgelist.edgelist_df["src"][0]
+    gpubenchmark(cugraph.bfs, anyGraphWithAdjListComputed, start)
 
 
 def bench_force_atlas2(gpubenchmark, anyGraphWithAdjListComputed):
@@ -217,7 +225,8 @@ def bench_force_atlas2(gpubenchmark, anyGraphWithAdjListComputed):
 
 
 def bench_sssp(gpubenchmark, anyGraphWithAdjListComputed):
-    gpubenchmark(cugraph.sssp, anyGraphWithAdjListComputed, 0)
+    start = anyGraphWithAdjListComputed.edgelist.edgelist_df["src"][0]
+    gpubenchmark(cugraph.sssp, anyGraphWithAdjListComputed, start)
 
 
 def bench_jaccard(gpubenchmark, graphWithAdjListComputed):
@@ -232,8 +241,11 @@ def bench_louvain(gpubenchmark, graphWithAdjListComputed):
 
 def bench_weakly_connected_components(gpubenchmark,
                                       anyGraphWithAdjListComputed):
-    gpubenchmark(cugraph.weakly_connected_components,
-                 anyGraphWithAdjListComputed)
+    if anyGraphWithAdjListComputed.is_directed():
+        G = anyGraphWithAdjListComputed.to_undirected()
+    else:
+        G = anyGraphWithAdjListComputed
+    gpubenchmark(cugraph.weakly_connected_components, G)
 
 
 def bench_overlap(gpubenchmark, anyGraphWithAdjListComputed):

diff --git a/benchmarks/cugraph/pytest-based/conftest.py b/benchmarks/cugraph/pytest-based/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -12,24 +12,31 @@
 # limitations under the License.
 
 def pytest_addoption(parser):
-    parser.addoption("--no-rmm-reinit", action="store_true", default=False,
-                     help="Do not reinit RMM to run benchmarks with different"
-                          " managed memory and pool allocator options.")
+    parser.addoption("--allow-rmm-reinit",
+                     action="store_true",
+                     default=False,
+                     help="Allow RMM to be reinitialized, possibly multiple times within "
+                     "the same process, in order to run benchmarks with different managed "
+                     "memory and pool allocator options. This is not the default behavior "
+                     "since it does not represent a typical use case, and support for "
+                     "this may be limited. Instead, consider multiple pytest runs that "
+                     "use a fixed set of RMM settings.")
 
 
 def pytest_sessionstart(session):
-    # if the --no-rmm-reinit option is given, set (or add to) the CLI "mark
-    # expression" (-m) the markers for no managedmem and no poolallocator. This
-    # will cause the RMM reinit() function to not be called.
-    if session.config.getoption("no_rmm_reinit"):
-        newMarkexpr = "managedmem_off and poolallocator_off"
+    # if the --allow-rmm-reinit option is not given, set (or add to) the CLI
+    # "mark expression" (-m) the markers for no managedmem and
+    # poolallocator. This will result in the RMM reinit() function to be called
+    # only once in the running process (the typical use case).
+    if session.config.getoption("allow_rmm_reinit") is False:
         currentMarkexpr = session.config.getoption("markexpr")
 
         if ("managedmem" in currentMarkexpr) or \
            ("poolallocator" in currentMarkexpr):
             raise RuntimeError("managedmem and poolallocator markers cannot "
-                               "be used with --no-rmm-reinit")
+                               "be used without --allow-rmm-reinit.")
 
+        newMarkexpr = "managedmem_off and poolallocator_on"
         if currentMarkexpr:
             newMarkexpr = f"({currentMarkexpr}) and ({newMarkexpr})"
 

@@ -80,7 +80,7 @@ pushd benchmarks
 pytest \
   --capture=no \
   --verbose \
-  -m "managedmem_on and poolallocator_on and tiny" \
+  -m tiny \
   --benchmark-disable \
   cugraph/pytest-based/bench_algos.py
 popd
@@ -198,13 +198,13 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
       --cov-report=term \
       .
     popd
-    
+
     # Reactivate the test environment back
     set +u
     conda deactivate
     conda activate test
     set -u
-    
+
   else
     rapids-logger "skipping cugraph_pyg pytest on ARM64"
   fi

@@ -224,25 +224,23 @@ def create_obj_from_csv(
 
 
 def read_csv_file(csv_file, read_weights_in_sp=True):
-    print("Reading " + str(csv_file) + "...")
     if read_weights_in_sp is True:
         return cudf.read_csv(
             csv_file,
             delimiter=" ",
-            dtype=["int32", "int32", "float32"],
+            dtype={"0": "int32", "1": "int32", "2": "float32"},
             header=None,
         )
     else:
         return cudf.read_csv(
             csv_file,
             delimiter=" ",
-            dtype=["int32", "int32", "float64"],
+            dtype={"0": "int32", "1": "int32", "2": "float64"},
             header=None,
         )
 
 
 def read_dask_cudf_csv_file(csv_file, read_weights_in_sp=True, single_partition=True):
-    print("Reading " + str(csv_file) + "...")
     if read_weights_in_sp is True:
         if single_partition:
             chunksize = os.path.getsize(csv_file)