rapidsai · rapids-bot · Feb 6, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 5, 2024
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -181,7 +181,9 @@ def _write_samples_to_parquet_csr(
             [
                 cudf.Series(minors_array[results_start:results_end], name="minors"),
                 cudf.Series(
-                    renumber_map.map.values[renumber_map_start:renumber_map_end],
+                    renumber_map.renumber_map.values[
+                        renumber_map_start:renumber_map_end
+                    ],
                     name="map",
                 ),
                 label_hop_offsets_current_partition,
@@ -299,7 +301,7 @@ def _write_samples_to_parquet_coo(
             else:
                 renumber_map_end_ix = offsets_z.renumber_map_offsets.iloc[0]
 
-            renumber_map_p = renumber_map.map.iloc[
+            renumber_map_p = renumber_map.renumber_map.iloc[
                 renumber_map_start_ix:renumber_map_end_ix
             ]
 

@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -120,9 +120,9 @@ def sample_neighbors(
             return self._get_edgeid_type_d(sampled_df)
         else:
             return (
-                sampled_df[src_n].values,
-                sampled_df[dst_n].values,
-                sampled_df["indices"].values,
+                sampled_df[src_n].astype("float").values,
+                sampled_df[dst_n].astype("float").values,
+                sampled_df["indices"].astype("float").values,
             )
 
     def _get_edgeid_type_d(self, df):

@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -74,7 +74,7 @@ def sampling_results_from_cupy_array_dict(
         if renumber:
             renumber_df = cudf.DataFrame(
                 {
-                    "map": cupy_array_dict["renumber_map"],
+                    "renumber_map": cupy_array_dict["renumber_map"],
                 }
             )
 

@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -222,7 +222,7 @@ def test_bulk_sampler_partitions(scratch_dir):
         ]
 
         recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
-        recovered_map = recovered_samples.map
+        recovered_map = recovered_samples["map"]
         recovered_samples = recovered_samples.drop("map", axis=1).dropna()
 
         for current_batch_id in range(start_batch_id, end_batch_id + 1):

@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -166,7 +166,7 @@ def test_bulk_sampler_partitions(dask_client, scratch_dir, mg_input):
         ]
 
         recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
-        recovered_map = recovered_samples.map
+        recovered_map = recovered_samples["map"]
         recovered_samples = recovered_samples.drop("map", axis=1).dropna()
 
         for current_batch_id in range(start_batch_id, end_batch_id + 1):

@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -796,7 +796,9 @@ def test_uniform_neighbor_sample_renumber(hops):
         expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
 
         assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
-            renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+            renumber_map.renumber_map[
+                0 : len(expected_renumber_map)
+            ].values_host.tolist()
         )
     assert (renumber_map.batch_id == 0).all()
 
@@ -854,7 +856,9 @@ def test_uniform_neighbor_sample_offset_renumber(hops):
         expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
 
         assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
-            renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+            renumber_map.renumber_map[
+                0 : len(expected_renumber_map)
+            ].values_host.tolist()
         )
 
     renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
@@ -902,8 +906,8 @@ def test_uniform_neighbor_sample_csr_csc_global(hops, seed):
     minors = sampling_results["minors"].dropna()
     assert len(majors) == len(minors)
 
-    majors = renumber_map.map.iloc[majors]
-    minors = renumber_map.map.iloc[minors]
+    majors = renumber_map.renumber_map.iloc[majors]
+    minors = renumber_map.renumber_map.iloc[minors]
 
     for i in range(len(majors)):
         assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
@@ -952,8 +956,8 @@ def test_uniform_neighbor_sample_csr_csc_local(hops, seed):
         majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
         majors = majors.repeat(cupy.diff(major_offsets))
 
-        majors = renumber_map.map.iloc[majors]
-        minors = renumber_map.map.iloc[minors]
+        majors = renumber_map.renumber_map.iloc[majors]
+        minors = renumber_map.renumber_map.iloc[minors]
 
         for i in range(len(majors)):
             assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])

@@ -1015,7 +1015,7 @@ def test_uniform_neighbor_sample_renumber(dask_client, hops):
 
     assert (renumber_map.batch_id == 0).all()
     assert (
-        renumber_map.map.nunique()
+        renumber_map.renumber_map.nunique()
         == cudf.concat(
             [sources_hop_0, sampling_results_renumbered.destinations]
         ).nunique()
@@ -1091,7 +1091,9 @@ def test_uniform_neighbor_sample_offset_renumber(dask_client, hops):
         expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
 
         assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
-            renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+            renumber_map.renumber_map[
+                0 : len(expected_renumber_map)
+            ].values_host.tolist()
         )
 
     renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
@@ -1153,8 +1155,8 @@ def test_uniform_neighbor_sample_csr_csc_global(dask_client, hops, seed):
     minors = sampling_results["minors"].dropna()
     assert len(majors) == len(minors)
 
-    majors = renumber_map.map.iloc[majors]
-    minors = renumber_map.map.iloc[minors]
+    majors = renumber_map.renumber_map.iloc[majors]
+    minors = renumber_map.renumber_map.iloc[minors]
 
     for i in range(len(majors)):
         assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
@@ -1221,8 +1223,8 @@ def test_uniform_neighbor_sample_csr_csc_local(dask_client, hops, seed):
         majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
         majors = majors.repeat(cupy.diff(major_offsets))
 
-        majors = renumber_map.map.iloc[majors]
-        minors = renumber_map.map.iloc[minors]
+        majors = renumber_map.renumber_map.iloc[majors]
+        minors = renumber_map.renumber_map.iloc[minors]
 
         for i in range(len(majors)):
             assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])