Use Datasets API to Update Docstring Examples (rapidsai#2441)

closes rapidsai#2361 Docstring examples now use the new method of creating graphs by using the `datasets` API. This change cleans up the code by eliminating the usage of `cuDF`. Old docstring example: ``` >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') ``` Updated docstring example: ``` >>> from cugraph.experimental.datasets import karate >>> G = karate.get_graph() ``` Authors: - Ralph Liu (https://github.com/oorliu) - Dylan Chima-Sanchez (https://github.com/betochimas) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: rapidsai#2441
oorliu · Aug 3, 2022 · 4dc286e · 4dc286e
1 parent d50622f
commit 4dc286e
Show file tree

Hide file tree

Showing 32 changed files with 90 additions and 207 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,4 @@
 include python/versioneer.py
 include python/cugraph/_version.py
 include cugraph/experimental/datasets/*.yaml
-include cugraph/experimental/datasets/metadata/*.yaml
+include cugraph/experimental/datasets/metadata/*.yaml
diff --git a/python/cugraph/MANIFEST.in b/python/cugraph/MANIFEST.in
@@ -1,4 +1,4 @@
 include versioneer.py
 include cugraph/_version.py
 include cugraph/experimental/datasets/*.yaml
-include cugraph/experimental/datasets/metadata/*.yaml
+include cugraph/experimental/datasets/metadata/*.yaml
diff --git a/python/cugraph/cugraph/centrality/betweenness_centrality.py b/python/cugraph/cugraph/centrality/betweenness_centrality.py
@@ -106,10 +106,8 @@ def betweenness_centrality(
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> bc = cugraph.betweenness_centrality(G)
 
     """
@@ -235,11 +233,9 @@ def edge_betweenness_centrality(
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
-    >>> ebc = cugraph.edge_betweenness_centrality(G)
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
+    >>> bc = cugraph.betweenness_centrality(G)
 
     """
     if weight is not None:

diff --git a/python/cugraph/cugraph/centrality/degree_centrality.py b/python/cugraph/cugraph/centrality/degree_centrality.py
@@ -42,10 +42,8 @@ def degree_centrality(G, normalized=True):
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> dc = cugraph.degree_centrality(G)
 
     """

diff --git a/python/cugraph/cugraph/centrality/eigenvector_centrality.py b/python/cugraph/cugraph/centrality/eigenvector_centrality.py
@@ -68,10 +68,8 @@ def eigenvector_centrality(
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> ec = cugraph.eigenvector_centrality(G)
 
     """

diff --git a/python/cugraph/cugraph/centrality/katz_centrality.py b/python/cugraph/cugraph/centrality/katz_centrality.py
@@ -107,10 +107,8 @@ def katz_centrality(
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> kc = cugraph.katz_centrality(G)
 
     """

diff --git a/python/cugraph/cugraph/community/ecg.py b/python/cugraph/cugraph/community/ecg.py
@@ -61,11 +61,8 @@ def ecg(input_graph, min_weight=0.05, ensemble_size=16, weight=None):
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> parts = cugraph.ecg(G)
 
     """

diff --git a/python/cugraph/cugraph/community/egonet.py b/python/cugraph/cugraph/community/egonet.py
@@ -81,12 +81,8 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None):
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> ego_graph = cugraph.ego_graph(G, 1, radius=2)
 
     """
@@ -157,12 +153,8 @@ def batched_ego_graphs(
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> b_ego_graph, offsets = cugraph.batched_ego_graphs(G, seeds=[1,5],
     ...                                                   radius=2)
 

diff --git a/python/cugraph/cugraph/community/ktruss_subgraph.py b/python/cugraph/cugraph/community/ktruss_subgraph.py
@@ -67,11 +67,8 @@ def k_truss(G, k):
 
     Examples
     --------
-    >>> import cudf # k_truss does not run on CUDA 11.5
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> k_subgraph = cugraph.k_truss(G, 3)
 
     """
@@ -150,11 +147,8 @@ def ktruss_subgraph(G, k, use_weights=True):
 
     Examples
     --------
-    >>> import cudf # ktruss_subgraph does not run on CUDA 11.5
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> k_subgraph = cugraph.ktruss_subgraph(G, 3)
 
     """

diff --git a/python/cugraph/cugraph/community/leiden.py b/python/cugraph/cugraph/community/leiden.py
@@ -66,12 +66,8 @@ def leiden(G, max_iter=100, resolution=1.):
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> parts, modularity_score = cugraph.leiden(G)
 
     """

diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py
@@ -65,12 +65,8 @@ def louvain(G, max_iter=100, resolution=1.):
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> parts, modularity_score = cugraph.louvain(G)
 
     """

diff --git a/python/cugraph/cugraph/community/spectral_clustering.py b/python/cugraph/cugraph/community/spectral_clustering.py
@@ -71,12 +71,8 @@ def spectralBalancedCutClustering(
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.spectralBalancedCutClustering(G, 5)
 
     """
@@ -158,12 +154,8 @@ def spectralModularityMaximizationClustering(
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.spectralModularityMaximizationClustering(G, 5)
 
     """
@@ -226,12 +218,8 @@ def analyzeClustering_modularity(G, n_clusters, clustering,
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.spectralBalancedCutClustering(G, 5)
     >>> score = cugraph.analyzeClustering_modularity(G, 5, df)
 
@@ -297,12 +285,8 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering,
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None)
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.spectralBalancedCutClustering(G, 5)
     >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df)
 
@@ -365,12 +349,8 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering,
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.spectralBalancedCutClustering(G, 5)
     >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df, 'vertex',
     ...                                             'cluster')

diff --git a/python/cugraph/cugraph/community/subgraph_extraction.py b/python/cugraph/cugraph/community/subgraph_extraction.py
@@ -43,12 +43,8 @@ def subgraph(G, vertices):
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                     delimiter = ' ',
-    ...                     dtype=['int32', 'int32', 'float32'],
-    ...                     header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> verts = np.zeros(3, dtype=np.int32)
     >>> verts[0] = 0
     >>> verts[1] = 1

diff --git a/python/cugraph/cugraph/community/triangle_count.py b/python/cugraph/cugraph/community/triangle_count.py
@@ -39,12 +39,8 @@ def triangles(G):
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                     delimiter = ' ',
-    ...                     dtype=['int32', 'int32', 'float32'],
-    ...                     header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> count = cugraph.triangles(G)
 
     """

diff --git a/python/cugraph/cugraph/components/connectivity.py b/python/cugraph/cugraph/components/connectivity.py
@@ -171,12 +171,8 @@ def weakly_connected_components(G,
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None)
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.weakly_connected_components(G)
 
     """
@@ -269,12 +265,8 @@ def strongly_connected_components(G,
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None)
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.strongly_connected_components(G)
 
     """
@@ -367,12 +359,8 @@ def connected_components(G,
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
-    ...                   delimiter = ' ',
-    ...                   dtype=['int32', 'int32', 'float32'],
-    ...                   header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None)
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.connected_components(G, connection="weak")
 
     """

diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py
@@ -58,10 +58,8 @@ def core_number(G, degree_type=None):
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> df = cugraph.core_number(G)
 
     """

diff --git a/python/cugraph/cugraph/cores/k_core.py b/python/cugraph/cugraph/cores/k_core.py
@@ -74,10 +74,8 @@ def k_core(G, k=None, core_number=None):
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> KCoreGraph = cugraph.k_core(G)
 
     """

diff --git a/python/cugraph/cugraph/experimental/datasets/metadata/__init__.py b/python/cugraph/cugraph/experimental/datasets/metadata/__init__.py
diff --git a/python/cugraph/cugraph/layout/force_atlas2.py b/python/cugraph/cugraph/layout/force_atlas2.py
@@ -123,11 +123,8 @@ def on_train_end(self, positions):
 
         Examples
         --------
-        >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-        ...                     dtype=['int32', 'int32', 'float32'],
-        ...                     header=None)
-        >>> G = cugraph.Graph()
-        >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+        >>> from cugraph.experimental.datasets import karate
+        >>> G = karate.get_graph(fetch=True)
         >>> pos = cugraph.force_atlas2(G)
 
     """

diff --git a/python/cugraph/cugraph/link_analysis/hits.py b/python/cugraph/cugraph/link_analysis/hits.py
@@ -79,10 +79,8 @@ def hits(
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> hits = cugraph.hits(G, max_iter = 50)
 
     """

diff --git a/python/cugraph/cugraph/link_analysis/pagerank.py b/python/cugraph/cugraph/link_analysis/pagerank.py
@@ -98,10 +98,8 @@ def pagerank(
 
     Examples
     --------
-    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
-    ...                     dtype=['int32', 'int32', 'float32'], header=None)
-    >>> G = cugraph.Graph()
-    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
+    >>> from cugraph.experimental.datasets import karate
+    >>> G = karate.get_graph(fetch=True)
     >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, tol = 1.0e-05)
 
     """