diff --git a/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py b/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py index 6cdf0d793d4..f60f597cfae 100644 --- a/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py +++ b/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -1370,7 +1370,23 @@ def __get_graph_data_as_numpy_bytes(self, dataframe, null_replacement_value): # FIXME: should something other than a numpy type be serialized to # prevent a copy? (note: any other type required to be de-serialzed # on the client end could add dependencies on the client) - df_numpy = dataframe.to_numpy(na_value=n) + df_copy = dataframe.copy() + for col_name in df_copy.columns: + if df_copy[col_name].dtype == "category": + cat_dt = df_copy.dtypes[col_name].categories.dtype + if cat_dt == "object": + new_cat = str(n) + else: + new_cat = n + if new_cat not in df_copy.dtypes[col_name].categories: + df_copy[col_name] = df_copy[col_name].cat.add_categories( + new_cat + ) + df_copy[col_name].fillna(new_cat, inplace=True) + else: + df_copy[col_name].fillna(n, inplace=True) + + df_numpy = df_copy.to_numpy() return df_numpy.dumps() except Exception: diff --git a/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py b/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py index 361226c8071..dbd75e6abd9 100644 --- a/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py +++ b/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,12 +25,12 @@ def create_graph_from_builtin_dataset(dataset_name, mg=False, server=None): dataset_obj = getattr(datasets, dataset_name) # FIXME: create an MG graph if server is mg? - return dataset_obj.get_graph(fetch=True) + return dataset_obj.get_graph(download=True) def create_property_graph_from_builtin_dataset(dataset_name, mg=False, server=None): dataset_obj = getattr(datasets, dataset_name) - edgelist_df = dataset_obj.get_edgelist(fetch=True) + edgelist_df = dataset_obj.get_edgelist(download=True) if mg and (server is not None) and server.is_multi_gpu: G = MGPropertyGraph() diff --git a/python/cugraph/cugraph/structure/property_graph.py b/python/cugraph/cugraph/structure/property_graph.py index 513798f35f9..53c1bf778c7 100644 --- a/python/cugraph/cugraph/structure/property_graph.py +++ b/python/cugraph/cugraph/structure/property_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -123,6 +123,17 @@ class EXPERIMENTAL__PropertyGraph: _default_type_name = "" + _internal_col_names = set( + ( + vertex_col_name, + src_col_name, + dst_col_name, + type_col_name, + edge_id_col_name, + weight_col_name, + ) + ) + def __init__(self): # The dataframe containing the properties for each vertex. # Each vertex occupies a row, and individual properties are maintained @@ -1380,6 +1391,15 @@ def fillna_vertices(self, val=0): Series is passed, the index or keys are the columns to fill and the values are the fill value for the corresponding column. """ + # Omit internal columns if an object is passed in to be applied to the + # entire DataFrame and assume the intent is for users to fillna only on + # their data. + if type(val) not in [dict, self.__series_type]: + user_col_names = ( + set(self.__vertex_prop_dataframe.columns) - self._internal_col_names + ) + val = dict((k, val) for k in user_col_names) + self.__vertex_prop_dataframe.fillna(val, inplace=True) def fillna_edges(self, val=0): @@ -1394,6 +1414,14 @@ def fillna_edges(self, val=0): Series is passed, the index or keys are the columns to fill and the values are the fill value for the corresponding column. """ + # Omit internal columns if an object is passed in to be applied to the + # entire DataFrame and assume the intent is for users to fillna only on + # their data. + if type(val) not in [dict, self.__series_type]: + user_col_names = ( + set(self.__edge_prop_dataframe.columns) - self._internal_col_names + ) + val = dict((k, val) for k in user_col_names) self.__edge_prop_dataframe.fillna(val, inplace=True)