diff --git a/python/cugraph/cugraph/structure/property_graph.py b/python/cugraph/cugraph/structure/property_graph.py index 815192ef7b4..59f66f44f93 100644 --- a/python/cugraph/cugraph/structure/property_graph.py +++ b/python/cugraph/cugraph/structure/property_graph.py @@ -308,7 +308,7 @@ def add_vertex_data(self, # column in the incoming dataframe, since the initial merge may not # result in the same dtype. (see # https://github.com/rapidsai/cudf/issues/9981) - self.__update_dataframe_dtypes( + self.__vertex_prop_dataframe = self.__update_dataframe_dtypes( self.__vertex_prop_dataframe, {self.vertex_col_name: dataframe[vertex_col_name].dtype}) @@ -430,7 +430,7 @@ def add_edge_data(self, # column in the incoming dataframe, since the initial merge may not # result in the same dtype. (see # https://github.com/rapidsai/cudf/issues/9981) - self.__update_dataframe_dtypes( + self.__edge_prop_dataframe = self.__update_dataframe_dtypes( self.__edge_prop_dataframe, {self.src_col_name: dataframe[vertex_col_names[0]].dtype, self.dst_col_name: dataframe[vertex_col_names[1]].dtype, @@ -659,7 +659,7 @@ def extract_subgraph(self, # possibly had their dtypes converted in order to accommodate NaN # values. Restore the original dtypes in the resulting edges df prior # to creating a Graph. - self.__update_dataframe_dtypes(edges, self.__edge_prop_dtypes) + edges = self.__update_dataframe_dtypes(edges, self.__edge_prop_dtypes) return self.edge_props_to_graph( edges, @@ -727,7 +727,9 @@ def annotate_dataframe(self, df, G, edge_vertex_col_names): inplace=True) # restore the original dtypes - self.__update_dataframe_dtypes(new_df, self.__edge_prop_dtypes) + new_df = self.__update_dataframe_dtypes( + new_df, self.__edge_prop_dtypes + ) for col in df.columns: new_df[col] = new_df[col].astype(df[col].dtype) @@ -906,6 +908,7 @@ def __update_dataframe_dtypes(df, column_dtype_dict): This also handles converting standard integer dtypes to nullable integer dtypes, needed to accommodate NA values in columns. """ + update_cols = {} for (col, dtype) in column_dtype_dict.items(): # If the DataFrame is Pandas and the dtype is an integer type, # ensure a nullable integer array is used by specifying the correct @@ -919,7 +922,11 @@ def __update_dataframe_dtypes(df, column_dtype_dict): # Assigning to df[col] produces a (false?) warning with Pandas, # but assigning to df.loc[:,col] does not update the df in # cudf, so do one or the other based on type. - if type(df) is cudf.DataFrame: - df[col] = df[col].astype(dtype_str) - else: - df.loc[:, col] = df[col].astype(dtype_str) + update_cols[col] = df[col].astype(dtype_str) + if not update_cols: + return df + # Use df.assign to avoid assignment into df in case df is a view: + # https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html + # #returning-a-view-versus-a-copy + # Note that this requires all column names to be strings. + return df.assign(**update_cols) diff --git a/python/cugraph/cugraph/tests/test_property_graph.py b/python/cugraph/cugraph/tests/test_property_graph.py index d4d185dac18..5a91d9d577f 100644 --- a/python/cugraph/cugraph/tests/test_property_graph.py +++ b/python/cugraph/cugraph/tests/test_property_graph.py @@ -118,6 +118,21 @@ def setup_function(): # ============================================================================= # Pytest fixtures # ============================================================================= +@pytest.fixture(scope="function", autouse=True) +def raise_on_pandas_warning(): + """Raise when pandas gives SettingWithCopyWarning warning""" + # Perhaps we should put this in pytest.ini, pyproject.toml, or conftest.py + import warnings + + filters = list(warnings.filters) + warnings.filterwarnings( + "error", + category=pd.core.common.SettingWithCopyWarning + ) + yield + warnings.filters = filters + + df_types = [cudf.DataFrame, pd.DataFrame]