From 4d592d344440cdb5e634ae9d8189a75b7f2e9cb3 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Tue, 13 Aug 2024 14:41:11 -0700 Subject: [PATCH 1/3] Check for duplicate column names --- python/cugraph/cugraph/structure/hypergraph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/structure/hypergraph.py b/python/cugraph/cugraph/structure/hypergraph.py index add68cb6dac..23a37dbd0c9 100644 --- a/python/cugraph/cugraph/structure/hypergraph.py +++ b/python/cugraph/cugraph/structure/hypergraph.py @@ -440,6 +440,8 @@ def _create_hyper_edges( for key, col in events[columns].items(): cat = categories.get(key, key) fs = [EVENTID] + ([key] if drop_edge_attrs else edge_attrs) + fs = list(set(fs)) + # breakpoint() df = events[fs].dropna(subset=[key]) if dropna else events[fs] if len(df) == 0: continue @@ -464,7 +466,8 @@ def _create_hyper_edges( if not drop_edge_attrs: columns += edge_attrs - edges = cudf.concat(edges)[columns] + # breakpoint() + edges = cudf.concat(edges)[list(set(columns))] edges.reset_index(drop=True, inplace=True) return edges From a707fb275d9294eb909c698d07f0c11b228c3984 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Wed, 14 Aug 2024 08:05:47 -0700 Subject: [PATCH 2/3] Fixes for hypegraph bug --- python/cugraph/cugraph/structure/hypergraph.py | 8 +++----- .../cugraph/cugraph/tests/structure/test_hypergraph.py | 9 ++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/cugraph/cugraph/structure/hypergraph.py b/python/cugraph/cugraph/structure/hypergraph.py index 23a37dbd0c9..b52fef4dcfc 100644 --- a/python/cugraph/cugraph/structure/hypergraph.py +++ b/python/cugraph/cugraph/structure/hypergraph.py @@ -441,7 +441,6 @@ def _create_hyper_edges( cat = categories.get(key, key) fs = [EVENTID] + ([key] if drop_edge_attrs else edge_attrs) fs = list(set(fs)) - # breakpoint() df = events[fs].dropna(subset=[key]) if dropna else events[fs] if len(df) == 0: continue @@ -466,9 +465,7 @@ def _create_hyper_edges( if not drop_edge_attrs: columns += edge_attrs - # breakpoint() - edges = cudf.concat(edges)[list(set(columns))] - edges.reset_index(drop=True, inplace=True) + edges = cudf.concat(edges, ignore_index=True)[list(set(columns))] return edges @@ -549,6 +546,7 @@ def _create_direct_edges( for key2, col2 in events[sorted(edge_shape[key1])].items(): cat2 = categories.get(key2, key2) fs = [EVENTID] + ([key1, key2] if drop_edge_attrs else edge_attrs) + fs = list(set(fs)) df = events[fs].dropna(subset=[key1, key2]) if dropna else events[fs] if len(df) == 0: continue @@ -576,7 +574,7 @@ def _create_direct_edges( if not drop_edge_attrs: columns += edge_attrs - edges = cudf.concat(edges)[columns] + edges = cudf.concat(edges)[list(set(columns))] edges.reset_index(drop=True, inplace=True) return edges diff --git a/python/cugraph/cugraph/tests/structure/test_hypergraph.py b/python/cugraph/cugraph/tests/structure/test_hypergraph.py index 848f31b940f..bcbc2f29073 100644 --- a/python/cugraph/cugraph/tests/structure/test_hypergraph.py +++ b/python/cugraph/cugraph/tests/structure/test_hypergraph.py @@ -171,7 +171,8 @@ def test_hyperedges(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_frame_equal(edges, h["edges"], check_dtype=False) + # check_like ignores the order of columns as long as all correct ones are present + assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True) for (k, v) in [("entities", 12), ("nodes", 15), ("edges", 12), ("events", 3)]: assert len(h[k]) == v @@ -266,7 +267,8 @@ def test_drop_edge_attrs(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_frame_equal(edges, h["edges"], check_dtype=False) + # check_like ignores the order of columns as long as all correct ones are present + assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True) for (k, v) in [("entities", 9), ("nodes", 12), ("edges", 9), ("events", 3)]: assert len(h[k]) == v @@ -308,7 +310,8 @@ def test_drop_edge_attrs_direct(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_frame_equal(edges, h["edges"], check_dtype=False) + # check_like ignores the order of columns as long as all correct ones are present + assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True) for (k, v) in [("entities", 9), ("nodes", 9), ("edges", 6), ("events", 0)]: assert len(h[k]) == v From b7f7df765f8e97e31fad33bc42ad32b8c68781b1 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Wed, 14 Aug 2024 08:26:42 -0700 Subject: [PATCH 3/3] Style --- python/cugraph/cugraph/tests/structure/test_hypergraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/structure/test_hypergraph.py b/python/cugraph/cugraph/tests/structure/test_hypergraph.py index bcbc2f29073..f1dfc17a509 100644 --- a/python/cugraph/cugraph/tests/structure/test_hypergraph.py +++ b/python/cugraph/cugraph/tests/structure/test_hypergraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at