From c8536d55f22af1b3ecd36eedf4863e321a460dfc Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 3 Nov 2023 17:22:59 -0700 Subject: [PATCH 1/8] optimize_dgl_csc_codepath --- .../dataloading/utils/sampling_helpers.py | 81 ++++++++++--------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index a4f64668348..00e33fe4958 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -14,7 +14,6 @@ from typing import List, Tuple, Dict, Optional from collections import defaultdict import cudf -import cupy from cugraph.utilities.utils import import_optional from cugraph_dgl.nn import SparseGraph @@ -76,6 +75,8 @@ def _get_renumber_map(df): renumber_map_batch_indices = map[1 : map_starting_offset - 1].reset_index(drop=True) renumber_map_batch_indices = renumber_map_batch_indices - map_starting_offset + renumber_map = renumber_map.reset_index(drop=True) + map_end_offset = map_starting_offset + len(renumber_map) # We only need to drop rows if the length of dataframe is determined by the map # that is if map_length > sampled edges length @@ -444,53 +445,64 @@ def _process_sampled_df_csc( destinations, respectively. """ # dropna - major_offsets = df.major_offsets.dropna().values - label_hop_offsets = df.label_hop_offsets.dropna().values - renumber_map_offsets = df.renumber_map_offsets.dropna().values - renumber_map = df.map.dropna().values - minors = df.minors.dropna().values - - n_batches = renumber_map_offsets.size - 1 - n_hops = int((label_hop_offsets.size - 1) / n_batches) + major_offsets = torch.as_tensor(df.major_offsets.dropna().values, device="cuda") + label_hop_offsets = torch.as_tensor( + df.label_hop_offsets.dropna().values, device="cuda" + ) + renumber_map_offsets = torch.as_tensor( + df.renumber_map_offsets.dropna().values, device="cuda" + ) + renumber_map = torch.as_tensor(df.map.dropna().values, device="cuda") + minors = torch.as_tensor(df.minors.dropna().values, device="cuda") + n_batches = len(renumber_map_offsets) - 1 + n_hops = int((len(label_hop_offsets) - 1) / n_batches) # make global offsets local - major_offsets -= major_offsets[0] - label_hop_offsets -= label_hop_offsets[0] - renumber_map_offsets -= renumber_map_offsets[0] + # Have to make a clone as pytorch does not allow + # in-place operations on tensors + major_offsets -= major_offsets[0].clone() + label_hop_offsets -= label_hop_offsets[0].clone() + renumber_map_offsets -= renumber_map_offsets[0].clone() # get the sizes of each adjacency matrix (for MFGs) mfg_sizes = (label_hop_offsets[1:] - label_hop_offsets[:-1]).reshape( (n_batches, n_hops) ) n_nodes = renumber_map_offsets[1:] - renumber_map_offsets[:-1] - mfg_sizes = cupy.hstack((mfg_sizes, n_nodes.reshape(n_batches, -1))) + mfg_sizes = torch.hstack((mfg_sizes, n_nodes.reshape(n_batches, -1))) if reverse_hop_id: - mfg_sizes = mfg_sizes[:, ::-1] + mfg_sizes = mfg_sizes.flip(1) tensors_dict = {} renumber_map_list = [] + # Note: minors and major_offsets from BulkSampler are of type int32 + # and int64 respectively. Since pylibcugraphops binding code doesn't + # support distinct node and edge index type, we simply casting both + # to int32 for now. + minors = torch.as_tensor(minors, device="cuda").int() + major_offsets = torch.as_tensor(major_offsets, device="cuda").int() + renumber_map = torch.as_tensor(renumber_map, device="cuda") + renumber_map_offsets = torch.as_tensor(renumber_map_offsets, device="cuda") + + # Note: We transfer tensors to CPU here to avoid the overhead of + # transferring them in each iteration of the for loop below. + major_offsets_cpu = major_offsets.to("cpu").numpy() + label_hop_offsets_cpu = label_hop_offsets.to("cpu").numpy() + for batch_id in range(n_batches): batch_dict = {} - for hop_id in range(n_hops): hop_dict = {} idx = batch_id * n_hops + hop_id # idx in label_hop_offsets - major_offsets_start = label_hop_offsets[idx].item() - major_offsets_end = label_hop_offsets[idx + 1].item() - minors_start = major_offsets[major_offsets_start].item() - minors_end = major_offsets[major_offsets_end].item() - # Note: minors and major_offsets from BulkSampler are of type int32 - # and int64 respectively. Since pylibcugraphops binding code doesn't - # support distinct node and edge index type, we simply casting both - # to int32 for now. - hop_dict["minors"] = torch.as_tensor( - minors[minors_start:minors_end], device="cuda" - ).int() - hop_dict["major_offsets"] = torch.as_tensor( + major_offsets_start = label_hop_offsets_cpu[idx] + major_offsets_end = label_hop_offsets_cpu[idx + 1] + minors_start = major_offsets_cpu[major_offsets_start] + minors_end = major_offsets_cpu[major_offsets_end] + hop_dict["minors"] = minors[minors_start:minors_end] + hop_dict["major_offsets"] = ( major_offsets[major_offsets_start : major_offsets_end + 1] - - major_offsets[major_offsets_start], - device="cuda", - ).int() + - major_offsets[major_offsets_start] + ) if reverse_hop_id: batch_dict[n_hops - 1 - hop_id] = hop_dict else: @@ -499,12 +511,9 @@ def _process_sampled_df_csc( tensors_dict[batch_id] = batch_dict renumber_map_list.append( - torch.as_tensor( - renumber_map[ - renumber_map_offsets[batch_id] : renumber_map_offsets[batch_id + 1] - ], - device="cuda", - ) + renumber_map[ + renumber_map_offsets[batch_id] : renumber_map_offsets[batch_id + 1] + ], ) return tensors_dict, renumber_map_list, mfg_sizes.tolist() From 30b346f2d01be62d75488089433b086020379bd8 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 3 Nov 2023 17:26:42 -0700 Subject: [PATCH 2/8] optimize_dgl_csc_codepath --- .../dataloading/utils/sampling_helpers.py | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index 00e33fe4958..98218c9f520 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -445,15 +445,11 @@ def _process_sampled_df_csc( destinations, respectively. """ # dropna - major_offsets = torch.as_tensor(df.major_offsets.dropna().values, device="cuda") - label_hop_offsets = torch.as_tensor( - df.label_hop_offsets.dropna().values, device="cuda" - ) - renumber_map_offsets = torch.as_tensor( - df.renumber_map_offsets.dropna().values, device="cuda" - ) - renumber_map = torch.as_tensor(df.map.dropna().values, device="cuda") - minors = torch.as_tensor(df.minors.dropna().values, device="cuda") + major_offsets = cast_to_tensor(df.major_offsets.dropna()) + label_hop_offsets = cast_to_tensor(df.label_hop_offsets.dropna()) + renumber_map_offsets = cast_to_tensor(df.renumber_map_offsets.dropna()) + renumber_map = cast_to_tensor(df.map.dropna()) + minors = cast_to_tensor(df.minors.dropna()) n_batches = len(renumber_map_offsets) - 1 n_hops = int((len(label_hop_offsets) - 1) / n_batches) @@ -479,11 +475,8 @@ def _process_sampled_df_csc( # and int64 respectively. Since pylibcugraphops binding code doesn't # support distinct node and edge index type, we simply casting both # to int32 for now. - minors = torch.as_tensor(minors, device="cuda").int() - major_offsets = torch.as_tensor(major_offsets, device="cuda").int() - renumber_map = torch.as_tensor(renumber_map, device="cuda") - renumber_map_offsets = torch.as_tensor(renumber_map_offsets, device="cuda") - + minors = minors.int() + major_offsets = major_offsets.int() # Note: We transfer tensors to CPU here to avoid the overhead of # transferring them in each iteration of the for loop below. major_offsets_cpu = major_offsets.to("cpu").numpy() From a51ab9508c4d4105ff5c93f6b35faa993f3dad5f Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Sat, 4 Nov 2023 13:48:42 -0700 Subject: [PATCH 3/8] Add cugraph_dgl_benchmark --- .../scale-benchmarks/cugraph_dgl_benchmark.py | 152 ++++++++++++++++++ .../cugraph-dgl/scale-benchmarks/model.py | 4 + 2 files changed, 156 insertions(+) create mode 100644 benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py diff --git a/benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py b/benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py new file mode 100644 index 00000000000..8b82ffcdad5 --- /dev/null +++ b/benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py @@ -0,0 +1,152 @@ +# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +os.environ["LIBCUDF_CUFILE_POLICY"] = "KVIKIO" +os.environ["KVIKIO_NTHREADS"] = "64" +os.environ["RAPIDS_NO_INITIALIZE"] = "1" +import json +import pandas as pd +import os +import time +from rmm.allocators.torch import rmm_torch_allocator +import rmm +import torch +from cugraph_dgl.dataloading import HomogenousBulkSamplerDataset +from model import run_1_epoch +from argparse import ArgumentParser +from load_graph_feats import load_node_labels, load_node_features + + +def create_dataloader(sampled_dir, total_num_nodes, sparse_format, return_type): + print("Creating dataloader", flush=True) + st = time.time() + dataset = HomogenousBulkSamplerDataset( + total_num_nodes, + edge_dir="in", + sparse_format=sparse_format, + return_type=return_type, + ) + + dataset.set_input_files(sampled_dir) + dataloader = torch.utils.data.DataLoader( + dataset, collate_fn=lambda x: x, shuffle=False, num_workers=0, batch_size=None + ) + et = time.time() + print(f"Time to create dataloader = {et - st:.2f} seconds", flush=True) + return dataloader + + +def setup_common_pool(): + rmm.reinitialize(initial_pool_size=5e9, pool_allocator=True) + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + +def main(args): + print( + f"Running cugraph-dgl dataloading benchmark with the following parameters:\n" + f"Dataset path = {args.dataset_path}\n" + f"Sampling path = {args.sampling_path}\n" + ) + with open(os.path.join(args.dataset_path, "meta.json"), "r") as f: + input_meta = json.load(f) + + sampled_dirs = [ + os.path.join(args.sampling_path, f) for f in os.listdir(args.sampling_path) + ] + + time_ls = [] + for sampled_dir in sampled_dirs: + with open(os.path.join(sampled_dir, "output_meta.json"), "r") as f: + sampled_meta_d = json.load(f) + + replication_factor = sampled_meta_d["replication_factor"] + feat_load_st = time.time() + label_data = load_node_labels( + args.dataset_path, replication_factor, input_meta + )["paper"]["y"] + feat_data = feat_data = load_node_features( + args.dataset_path, replication_factor, node_type="paper" + ) + print( + f"Feature and label data loading took = {time.time()-feat_load_st}", + flush=True, + ) + + r_time_ls = e2e_benchmark(sampled_dir, feat_data, label_data, sampled_meta_d) + [x.update({"replication_factor": replication_factor}) for x in r_time_ls] + [x.update({"num_edges": sampled_meta_d["total_num_edges"]}) for x in r_time_ls] + time_ls.extend(r_time_ls) + + print( + f"Benchmark completed for replication factor = {replication_factor}\n{'=' * 30}", + flush=True, + ) + + df = pd.DataFrame(time_ls) + df.to_csv("dgl_e2e_benchmark.csv", index=False) + print(f"Benchmark completed for all replication factors\n{'=' * 30}", flush=True) + + +def e2e_benchmark( + sampled_dir: str, feat: torch.Tensor, y: torch.Tensor, sampled_meta_d: dict +): + """ + Run the e2e_benchmark + Args: + sampled_dir: directory containing the sampled graph + feat: node features + y: node labels + sampled_meta_d: dictionary containing the sampled graph metadata + """ + time_ls = [] + + # TODO: Make this a parameter in bulk sampling script + sampled_meta_d["sparse_format"] = "csc" + sampled_dir = os.path.join(sampled_dir, "samples") + dataloader = create_dataloader( + sampled_dir, + sampled_meta_d["total_num_nodes"], + sampled_meta_d["sparse_format"], + return_type="cugraph_dgl.nn.SparseGraph", + ) + time_d = run_1_epoch( + dataloader, + feat, + y, + fanout=sampled_meta_d["fanout"], + batch_size=sampled_meta_d["batch_size"], + model_backend="cugraph_dgl", + ) + time_ls.append(time_d) + print("=" * 30) + return time_ls + + +def parse_arguments(): + parser = ArgumentParser() + parser.add_argument( + "--dataset_path", type=str, default="/raid/vjawa/ogbn_papers100M/" + ) + parser.add_argument( + "--sampling_path", + type=str, + default="/raid/vjawa/nov_1_bulksampling_benchmarks/", + ) + return parser.parse_args() + + +if __name__ == "__main__": + setup_common_pool() + arguments = parse_arguments() + main(arguments) diff --git a/benchmarks/cugraph-dgl/scale-benchmarks/model.py b/benchmarks/cugraph-dgl/scale-benchmarks/model.py index 08ae0e8b1ee..8f70c94c51e 100644 --- a/benchmarks/cugraph-dgl/scale-benchmarks/model.py +++ b/benchmarks/cugraph-dgl/scale-benchmarks/model.py @@ -119,6 +119,10 @@ def run_1_epoch(dataloader, feat, y, fanout, batch_size, model_backend): else: model = None opt = None + + # Warmup RUN + times = train_model(model, dataloader, opt, feat, y) + epoch_st = time.time() times = train_model(model, dataloader, opt, feat, y) epoch_time = time.time() - epoch_st From 9eb946f820a6fe20d472bbe371e11a0d133598c7 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Sat, 4 Nov 2023 15:04:17 -0700 Subject: [PATCH 4/8] optimize_dgl_csc_codepath --- .../scale-benchmarks/cugraph_dgl_benchmark.py | 2 +- benchmarks/cugraph-dgl/scale-benchmarks/model.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py b/benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py index 8b82ffcdad5..85f43b97b90 100644 --- a/benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py +++ b/benchmarks/cugraph-dgl/scale-benchmarks/cugraph_dgl_benchmark.py @@ -94,7 +94,7 @@ def main(args): ) df = pd.DataFrame(time_ls) - df.to_csv("dgl_e2e_benchmark.csv", index=False) + df.to_csv("cugraph_dgl_e2e_benchmark.csv", index=False) print(f"Benchmark completed for all replication factors\n{'=' * 30}", flush=True) diff --git a/benchmarks/cugraph-dgl/scale-benchmarks/model.py b/benchmarks/cugraph-dgl/scale-benchmarks/model.py index 8f70c94c51e..9a9dfe58f96 100644 --- a/benchmarks/cugraph-dgl/scale-benchmarks/model.py +++ b/benchmarks/cugraph-dgl/scale-benchmarks/model.py @@ -57,11 +57,11 @@ def create_model(feat_size, num_classes, num_layers, model_backend="dgl"): def train_model(model, dataloader, opt, feat, y): - times = {key: 0 for key in ["mfg_creation", "feature", "m_fwd", "m_bkwd"]} + times_d = {key: 0 for key in ["mfg_creation", "feature", "m_fwd", "m_bkwd"]} epoch_st = time.time() mfg_st = time.time() for input_nodes, output_nodes, blocks in dataloader: - times["mfg_creation"] += time.time() - mfg_st + times_d["mfg_creation"] += time.time() - mfg_st if feat is not None: fst = time.time() input_nodes = input_nodes.to("cpu") @@ -71,23 +71,24 @@ def train_model(model, dataloader, opt, feat, y): output_nodes = output_nodes["paper"] output_nodes = output_nodes.to(y.device) y_batch = y[output_nodes].to("cuda") - times["feature"] += time.time() - fst + times_d["feature"] += time.time() - fst m_fwd_st = time.time() y_hat = model(blocks, input_feat) - times["m_fwd"] += time.time() - m_fwd_st + times_d["m_fwd"] += time.time() - m_fwd_st m_bkwd_st = time.time() loss = F.cross_entropy(y_hat, y_batch) opt.zero_grad() loss.backward() opt.step() - times["m_bkwd"] += time.time() - m_bkwd_st + times_d["m_bkwd"] += time.time() - m_bkwd_st mfg_st = time.time() print(f"Epoch time = {time.time() - epoch_st:.2f} seconds") + print(f"Time to create MFG = {times_d['mfg_creation']:.2f} seconds") - return times + return times_d def analyze_time(dataloader, times, epoch_time, fanout, batch_size): From 6d41e3eae01315cae60bab9138a7639d8a2feec5 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Sun, 5 Nov 2023 11:42:24 -0800 Subject: [PATCH 5/8] Remove reset_index --- .../cugraph_dgl/dataloading/utils/sampling_helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index 98218c9f520..f674bece8be 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -75,8 +75,6 @@ def _get_renumber_map(df): renumber_map_batch_indices = map[1 : map_starting_offset - 1].reset_index(drop=True) renumber_map_batch_indices = renumber_map_batch_indices - map_starting_offset - renumber_map = renumber_map.reset_index(drop=True) - map_end_offset = map_starting_offset + len(renumber_map) # We only need to drop rows if the length of dataframe is determined by the map # that is if map_length > sampled edges length @@ -450,6 +448,7 @@ def _process_sampled_df_csc( renumber_map_offsets = cast_to_tensor(df.renumber_map_offsets.dropna()) renumber_map = cast_to_tensor(df.map.dropna()) minors = cast_to_tensor(df.minors.dropna()) + n_batches = len(renumber_map_offsets) - 1 n_hops = int((len(label_hop_offsets) - 1) / n_batches) From 4e12e1bb39b59bb8013770568cc6aee42931bed0 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Mon, 6 Nov 2023 08:23:01 -0800 Subject: [PATCH 6/8] Add arguments for cugraph_dgl_csr_sampling --- .../bulk_sampling/cugraph_bulk_sampling.py | 46 +++++++++++++++---- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py index a8c0658767d..3ddfafe4c31 100644 --- a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py +++ b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py @@ -22,7 +22,6 @@ get_allocation_counts_dask_lazy, sizeof_fmt, get_peak_output_ratio_across_workers, - restart_client, start_dask_client, stop_dask_client, enable_spilling, @@ -187,10 +186,10 @@ def sample_graph( output_path, seed=42, batch_size=500, - seeds_per_call=200000, + seeds_per_call=400000, batches_per_partition=100, fanout=[5, 5, 5], - persist=False, + sampling_kwargs={}, ): cupy.random.seed(seed) @@ -204,6 +203,7 @@ def sample_graph( seeds_per_call=seeds_per_call, batches_per_partition=batches_per_partition, log_level=logging.INFO, + **sampling_kwargs, ) n_workers = len(default_client().scheduler_info()["workers"]) @@ -469,6 +469,7 @@ def benchmark_cugraph_bulk_sampling( batch_size, seeds_per_call, fanout, + sampling_target_framework, reverse_edges=True, dataset_dir=".", replication_factor=1, @@ -564,17 +565,30 @@ def benchmark_cugraph_bulk_sampling( output_sample_path = os.path.join(output_subdir, "samples") os.makedirs(output_sample_path) - batches_per_partition = 200_000 // batch_size + if sampling_target_framework == "cugraph_dgl_csr": + sampling_kwargs = { + "deduplicate_sources": True, + "prior_sources_behavior": "carryover", + "renumber": True, + "compression": "CSR", + "compress_per_hop": True, + "use_legacy_names": False, + "include_hop_column": False, + } + else: + sampling_kwargs = {} + + batches_per_partition = 400_000 // batch_size execution_time, allocation_counts = sample_graph( - G, - dask_label_df, - output_sample_path, + G=G, + label_df=dask_label_df, + output_path=output_sample_path, seed=seed, batch_size=batch_size, seeds_per_call=seeds_per_call, batches_per_partition=batches_per_partition, fanout=fanout, - persist=persist, + sampling_kwargs=sampling_kwargs, ) output_meta = { @@ -701,7 +715,13 @@ def get_args(): required=False, default=False, ) - + parser.add_argument( + "--sampling_target_framework", + type=str, + help="The target framework for sampling (i.e. cugraph_dgl_csr, cugraph_pyg_csc, ...)", + required=False, + default=None, + ) parser.add_argument( "--dask_worker_devices", type=str, @@ -738,6 +758,12 @@ def get_args(): logging.basicConfig() args = get_args() + if args.sampling_target_framework not in ["cugraph_dgl_csr", None]: + raise ValueError( + "sampling_target_framework must be one of cugraph_dgl_csr or None", + "Other frameworks are not supported at this time.", + ) + fanouts = [ [int(f) for f in fanout.split("_")] for fanout in args.fanouts.split(",") ] @@ -785,6 +811,7 @@ def get_args(): batch_size=batch_size, seeds_per_call=seeds_per_call, fanout=fanout, + sampling_target_framework=args.sampling_target_framework, dataset_dir=args.dataset_root, reverse_edges=args.reverse_edges, replication_factor=replication_factor, @@ -809,7 +836,6 @@ def get_args(): warnings.warn("An Exception Occurred!") print(e) traceback.print_exc() - restart_client(client) sleep(10) stats_df = pd.DataFrame( From 25412c079e39ed104ab318db98d8af0085ac87f0 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Mon, 6 Nov 2023 10:05:51 -0800 Subject: [PATCH 7/8] Update benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py Co-authored-by: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> --- .../standalone/bulk_sampling/cugraph_bulk_sampling.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py index 3ddfafe4c31..40d233424af 100644 --- a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py +++ b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py @@ -576,7 +576,16 @@ def benchmark_cugraph_bulk_sampling( "include_hop_column": False, } else: - sampling_kwargs = {} + # FIXME: Update these arguments when CSC mode is fixed in cuGraph-PyG (release 24.02) + sampling_kwargs = { + "deduplicate_sources": True, + "prior_sources_behavior": "exclude", + "renumber": True, + "compression": "COO", + "compress_per_hop": False, + "use_legacy_names": False, + "include_hop_column": True + } batches_per_partition = 400_000 // batch_size execution_time, allocation_counts = sample_graph( From e996ce99b0b9de313cca6e4d6361219681e24643 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Tue, 7 Nov 2023 14:17:46 -0800 Subject: [PATCH 8/8] Style fixes --- .../bulk_sampling/cugraph_bulk_sampling.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py index 40d233424af..1ca5d6db637 100644 --- a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py +++ b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py @@ -578,13 +578,13 @@ def benchmark_cugraph_bulk_sampling( else: # FIXME: Update these arguments when CSC mode is fixed in cuGraph-PyG (release 24.02) sampling_kwargs = { - "deduplicate_sources": True, - "prior_sources_behavior": "exclude", - "renumber": True, - "compression": "COO", - "compress_per_hop": False, - "use_legacy_names": False, - "include_hop_column": True + "deduplicate_sources": True, + "prior_sources_behavior": "exclude", + "renumber": True, + "compression": "COO", + "compress_per_hop": False, + "use_legacy_names": False, + "include_hop_column": True, } batches_per_partition = 400_000 // batch_size