diff --git a/notebooks/algorithms/centrality/Betweenness.ipynb b/notebooks/algorithms/centrality/Betweenness.ipynb index 8860819b3ad..82b7b4bc29e 100644 --- a/notebooks/algorithms/centrality/Betweenness.ipynb +++ b/notebooks/algorithms/centrality/Betweenness.ipynb @@ -12,7 +12,8 @@ "| --------------|------------|------------------|-----------------|----------------|\n", "| Brad Rees | 04/24/2019 | created | 0.15 | GV100, CUDA 11.0\n", "| Brad Rees | 08/16/2020 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4\n", - "| Don Acosta | 07/05/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5" + "| Don Acosta | 07/05/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5" ] }, { @@ -111,7 +112,10 @@ "source": [ "# Import needed libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { @@ -124,42 +128,6 @@ "import networkx as nx" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Some Prep" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define the path to the test data \n", - "datafile='../../data/karate-data.csv'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", - "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -173,9 +141,8 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "# Create a graph using the imported Dataset object\n", + "G = karate.get_graph(fetch=True)" ] }, { @@ -256,6 +223,7 @@ "outputs": [], "source": [ "# Read the data, this also created a NetworkX Graph \n", + "datafile=\"../../data/karate-data.csv\"\n", "file = open(datafile, 'rb')\n", "Gnx = nx.read_edgelist(file)" ] @@ -321,7 +289,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -335,11 +303,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/centrality/Katz.ipynb b/notebooks/algorithms/centrality/Katz.ipynb index f3537fe75e7..b62cea2df82 100755 --- a/notebooks/algorithms/centrality/Katz.ipynb +++ b/notebooks/algorithms/centrality/Katz.ipynb @@ -12,7 +12,8 @@ "| --------------|------------|------------------|-----------------|----------------|\n", "| Brad Rees | 10/15/2019 | created | 0.14 | GV100, CUDA 10.2\n", "| Brad Rees | 08/16/2020 | tested / updated | 0.15.1 nightly | RTX 3090 CUDA 11.4\n", - "| Don Acosta | 07/05/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5" + "| Don Acosta | 07/05/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5" ] }, { @@ -40,9 +41,9 @@ " this value is 0.0f, cuGraph will use the default value which is 0.00001. \n", " Setting too small a tolerance can lead to non-convergence due to numerical \n", " roundoff. Usually values between 0.01 and 0.00001 are acceptable.\n", - " nstart:cuDataFrame, GPU Dataframe containing the initial guess for katz centrality. \n", + " nstart: cuDataFrame, GPU Dataframe containing the initial guess for katz centrality. \n", " Default is None\n", - " normalized:bool, If True normalize the resulting katz centrality values. \n", + " normalized: bool, If True normalize the resulting katz centrality values. \n", " Default is True\n", "\n", "Returns:\n", @@ -106,7 +107,10 @@ "source": [ "# Import rapids libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { @@ -140,35 +144,6 @@ "tol = 0.00001 # tolerance" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define the path to the test data \n", - "datafile='../../data/karate-data.csv'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", - "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -182,9 +157,8 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "# Create a graph using the imported Dataset object\n", + "G = karate.get_graph(fetch=True)" ] }, { @@ -275,6 +249,7 @@ "outputs": [], "source": [ "# Read the data, this also created a NetworkX Graph \n", + "datafile = \"../../data/karate-data.csv\"\n", "file = open(datafile, 'rb')\n", "Gnx = nx.read_edgelist(file)" ] @@ -348,7 +323,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -362,11 +337,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/community/ECG.ipynb b/notebooks/algorithms/community/ECG.ipynb index 28d44f5e3b2..829be21035c 100644 --- a/notebooks/algorithms/community/ECG.ipynb +++ b/notebooks/algorithms/community/ECG.ipynb @@ -13,6 +13,7 @@ "| | 08/16/2020 | updated | 0.15 | GV100, CUDA 10.2 |\n", "| | 08/05/2021 | tested/updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", "| Don Acosta | 07/20/2022 | tested/updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "\n", "## Introduction\n", "\n", @@ -101,34 +102,17 @@ "source": [ "# Import needed libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Read data using cuDF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test file \n", - "datafile='../../data/karate-data.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read the data using cuDF\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "## Create an Edgelist" ] }, { @@ -137,6 +121,9 @@ "metadata": {}, "outputs": [], "source": [ + "# You can also just get the edgelist\n", + "gdf = karate.get_edgelist(fetch=True)\n", + "\n", "# The algorithm also requires that there are vertex weights. Just use 1.0 \n", "gdf[\"data\"] = 1.0" ] @@ -232,7 +219,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -246,11 +233,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/community/Louvain.ipynb b/notebooks/algorithms/community/Louvain.ipynb index 4786fb1e9dc..a8529483534 100755 --- a/notebooks/algorithms/community/Louvain.ipynb +++ b/notebooks/algorithms/community/Louvain.ipynb @@ -15,6 +15,7 @@ "| | 08/16/2020 | updated | 0.14 | GV100, CUDA 10.2 |\n", "| | 08/05/2021 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "\n", "\n", "\n", @@ -140,34 +141,17 @@ "source": [ "# Import needed libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Read data using cuDF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test file \n", - "datafile='../../data//karate-data.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read the data using cuDF\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "## Create an Edgelist" ] }, { @@ -176,6 +160,9 @@ "metadata": {}, "outputs": [], "source": [ + "# You can also just get the edgelist\n", + "gdf = karate.get_edgelist(fetch=True)\n", + "\n", "# The algorithm also requires that there are vertex weights. Just use 1.0 \n", "gdf[\"data\"] = 1.0" ] @@ -323,7 +310,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -337,11 +324,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/community/Spectral-Clustering.ipynb b/notebooks/algorithms/community/Spectral-Clustering.ipynb index a314861090c..2ac1b9e8c16 100755 --- a/notebooks/algorithms/community/Spectral-Clustering.ipynb +++ b/notebooks/algorithms/community/Spectral-Clustering.ipynb @@ -13,7 +13,8 @@ "| ---------------------------|------------|------------------|-----------------|-----------------------------|\n", "| Brad Rees and James Wyles | 08/01/2019 | created | 0.14 | GV100 32G, CUDA 10.2 |\n", "| | 08/16/2020 | updated | 0.15 | GV100 32G, CUDA 10.2 |\n", - "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" + "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" ] }, { @@ -140,48 +141,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import needed libraries\n", "import cugraph\n", "import cudf\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read the CSV datafile using cuDF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test file \n", - "datafile='../../data/karate-data.csv'\n", + "import numpy as np\n", "\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Add Edge Weights" + "### Create Edgelist and Add Edge Weights" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ + "gdf = karate.get_edgelist(fetch=True)\n", + "\n", "# The algorithm requires that there are edge weights. In this case all the weights are being set to 1\n", "gdf[\"data\"] = cudf.Series(np.ones(len(gdf), dtype=np.float32))" ] @@ -219,7 +206,7 @@ "metadata": {}, "outputs": [], "source": [ - "# create a CuGraph \n", + "# create a Graph \n", "G = cugraph.Graph()\n", "G.from_cudf_edgelist(gdf, source='src', destination='dst', edge_attr='data')" ] @@ -390,7 +377,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.6.9 64-bit", "language": "python", "name": "python3" }, @@ -404,11 +391,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.6.9" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, diff --git a/notebooks/algorithms/community/Subgraph-Extraction.ipynb b/notebooks/algorithms/community/Subgraph-Extraction.ipynb index 88577d756ba..22c226fbb7a 100755 --- a/notebooks/algorithms/community/Subgraph-Extraction.ipynb +++ b/notebooks/algorithms/community/Subgraph-Extraction.ipynb @@ -13,7 +13,8 @@ "| --------------|------------|------------------|-----------------|-----------------------------|\n", "| Brad Rees | 10/16/2019 | created | 0.13 | GV100 32G, CUDA 10.2 |\n", "| | 08/16/2020 | updated | 0.15 | GV100 32G, CUDA 10.2 |\n", - "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" + "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" ] }, { @@ -79,7 +80,10 @@ "source": [ "# Import needed libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { @@ -89,26 +93,6 @@ "## Read data using cuDF" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test file \n", - "datafile='../../data//karate-data.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read the data using cuDF\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -123,19 +107,14 @@ "metadata": {}, "outputs": [], "source": [ + "# You can also just get the edgelist\n", + "gdf = karate.get_edgelist(fetch=True)\n", + "\n", "# The louvain algorithm requires that there are vertex weights. Just use 1.0 \n", - "gdf[\"data\"] = 1.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a Graph \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst', edge_attr='data')" + "gdf[\"data\"] = 1.0\n", + "\n", + "# Create a graph\n", + "G = cugraph.from_cudf_edgelist(gdf, source='src', destination='dst')" ] }, { @@ -275,7 +254,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -289,11 +268,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/community/Triangle-Counting.ipynb b/notebooks/algorithms/community/Triangle-Counting.ipynb index 74006ae9cda..0554fac1362 100755 --- a/notebooks/algorithms/community/Triangle-Counting.ipynb +++ b/notebooks/algorithms/community/Triangle-Counting.ipynb @@ -14,6 +14,7 @@ "| Brad Rees | 08/01/2019 | created | 0.13 | GV100 32G, CUDA 10.2 |\n", "| | 08/16/2020 | updated | 0.15 | GV100 32G, CUDA 10.2 |\n", "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 07/27/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "\n", "## Introduction\n", "Triangle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", @@ -90,8 +91,11 @@ "# Import needed libraries\n", "import cugraph\n", "import cudf\n", + "from collections import OrderedDict\n", "from cugraph.experimental import triangle_count as experimental_triangles\n", - "from collections import OrderedDict" + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { @@ -106,23 +110,6 @@ "from scipy.io import mmread" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Some Prep" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define the path to the test data \n", - "datafile='../../data/karate.csv'" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -137,6 +124,7 @@ "metadata": {}, "outputs": [], "source": [ + "datafile= '../../data/karate.csv'\n", "# Read the data, this also created a NetworkX Graph \n", "file = open(datafile, 'rb')\n", "df = pd.read_csv(\n", @@ -211,36 +199,6 @@ "# cuGraph" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", - "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). A third, `weight`, column is also used in this example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test file \n", - "gdf = cudf.read_csv(\n", - " datafile,\n", - " delimiter=\" \",\n", - " header=None,\n", - " names=[\"0\", \"1\", \"weight\"],\n", - " dtype={\"0\": \"int32\", \"1\": \"int32\", \"weight\": \"float32\"})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -254,9 +212,8 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source=\"0\", destination=\"1\",edge_attr=\"weight\")" + "G = karate.get_graph()\n", + "G = G.to_undirected()" ] }, { @@ -330,7 +287,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.6.9 64-bit", "language": "python", "name": "python3" }, @@ -344,11 +301,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.6.9" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, diff --git a/notebooks/algorithms/community/ktruss.ipynb b/notebooks/algorithms/community/ktruss.ipynb index 20c14d76986..3c96f7ff5a7 100644 --- a/notebooks/algorithms/community/ktruss.ipynb +++ b/notebooks/algorithms/community/ktruss.ipynb @@ -14,6 +14,7 @@ "| | 08/16/2020 | updated | 0.15 | GV100, CUDA 10.2 |\n", "| | 08/05/2021 | tested/updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", "| Don Acosta | 07/08/2022 | tested/updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "## Introduction\n", "\n", "Compute the k-truss of the graph G. A K-Truss is a relaxed cliques where every vertex is supported by at least k-2 triangle.\n", @@ -96,34 +97,17 @@ "source": [ "# Import needed libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Read data using cuDF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test file \n", - "datafile='../../data//karate-data.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read the data using cuDF\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "### Create a Graph" ] }, { @@ -132,9 +116,9 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "# Create a graph using the imported Dataset object\n", + "G = karate.get_graph(fetch=True)\n", + "G = G.to_undirected()" ] }, { @@ -260,7 +244,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -274,11 +258,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/components/ConnectedComponents.ipynb b/notebooks/algorithms/components/ConnectedComponents.ipynb index 0259c314ccf..5f18352647f 100755 --- a/notebooks/algorithms/components/ConnectedComponents.ipynb +++ b/notebooks/algorithms/components/ConnectedComponents.ipynb @@ -16,10 +16,11 @@ "\n", "_Notebook Credits_\n", "\n", - "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", - "| --------------|------------|------------------|-----------------|-----------------------------|\n", - "| Kumar Aatish | 08/13/2019 | created | 0.15 | GV100, CUDA 10.2 |\n", - "| Brad Rees | 10/18/2021 | updated | 21.12 nightly | GV100, CUDA 11.4 |\n", + "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", + "| --------------|------------|------------------|-----------------|--------------------|\n", + "| Kumar Aatish | 08/13/2019 | created | 0.15 | GV100, CUDA 10.2 |\n", + "| Brad Rees | 10/18/2021 | updated | 21.12 nightly | GV100, CUDA 11.4 |\n", + "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 | TV100, CUDA 11.5 |\n", "| Don Acosta | 07/22/2021 | updated | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", "\n", "\n", @@ -131,13 +132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 1. Read graph data from file\n", - "\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation on the GPU.\n", - "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The source to destination pairs is in what is known as Coordinate Format (COO).\n", - "\n", - "In this test case the data in the test file is expressed in three columns, source, destination and the edge weight. While edge weight is relevant in other algorithms, cuGraph connected component calls do not make use of it and hence that column can be discarded from the dataframe." + "### 1. Import a Built-In Dataset" ] }, { @@ -146,14 +141,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Test file\n", - "datafile='../../data/netscience.csv'\n", - "\n", - "# the datafile contains three columns, but we only want to use the first two. \n", - "# We will use the \"usecols' feature of read_csv to ignore that column\n", - "\n", - "gdf = cudf.read_csv(datafile, delimiter=' ', names=['src', 'dst', 'wgt'], dtype=['int32', 'int32', 'float32'], usecols=['src', 'dst'])\n", - "gdf.head(5)" + "from cugraph.experimental.datasets import netscience" ] }, { @@ -169,9 +157,7 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe\n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = netscience.get_graph(fetch=True)" ] }, { @@ -362,7 +348,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -376,11 +362,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/cores/core-number.ipynb b/notebooks/algorithms/cores/core-number.ipynb index 64b8eada7ef..06fe570d390 100755 --- a/notebooks/algorithms/cores/core-number.ipynb +++ b/notebooks/algorithms/cores/core-number.ipynb @@ -16,6 +16,7 @@ "| --------------|------------|------------------|-----------------|--------------------|\n", "| Brad Rees | 10/28/2019 | created | 0.13 | GV100, CUDA 10.2 |\n", "| Don Acosta | 07/21/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", + "| Ralph Liu | 07/26/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", "\n", "## Introduction\n", "\n", @@ -77,34 +78,17 @@ "source": [ "# Import needed libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Read data using cuDF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test file \n", - "datafile='../../data/karate-data.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read the data using cuDF\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "### Create a Graph" ] }, { @@ -113,9 +97,8 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = karate.get_graph(fetch=True)\n", + "G = G.to_undirected()" ] }, { @@ -160,7 +143,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -174,11 +157,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/algorithms/cores/kcore.ipynb b/notebooks/algorithms/cores/kcore.ipynb index 432a46834a1..065f02ffd98 100755 --- a/notebooks/algorithms/cores/kcore.ipynb +++ b/notebooks/algorithms/cores/kcore.ipynb @@ -17,6 +17,7 @@ "| Brad Rees | 10/28/2019 | created | 0.13 | GV100, CUDA 10.2 |\n", "| Brad Rees | 08/16/2020 | created | 0.15 | GV100, CUDA 10.2 |\n", "| Don Acosta | 07/21/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", + "| Ralph Liu | 07/26/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", "\n", "## Introduction\n", "\n", @@ -71,58 +72,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Import needed libraries\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Read data using cuDF" + "### Create a Graph" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# Test file \n", - "datafile='../../data/karate-data.csv'" + "G = karate.get_graph(fetch=True)\n", + "G = G.to_undirected()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [ - "# read the data using cuDF\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a Graph \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Main Graph\n", + "\tNumber of Vertices: 34\n", + "\tNumber of Edges: 156\n" + ] + } + ], "source": [ "print(\"Main Graph\")\n", "print(\"\\tNumber of Vertices: \" + str(G.number_of_vertices()))\n", @@ -138,9 +131,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "non-success value returned from cugraph_core_number: CUGRAPH_UNKNOWN_ERROR", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/nfs/ralphl/datasets-api/notebooks/algorithms/cores/kcore.ipynb Cell 10\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39m# Call k-cores on the graph\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m kcg \u001b[39m=\u001b[39m cugraph\u001b[39m.\u001b[39;49mk_core(G)\n", + "File \u001b[0;32m~/miniconda3/envs/cugraph_dev/lib/python3.9/site-packages/cugraph-22.2.0a0+366.gabd2f0ef-py3.9-linux-x86_64.egg/cugraph/cores/k_core.py:103\u001b[0m, in \u001b[0;36mk_core\u001b[0;34m(G, k, core_number)\u001b[0m\n\u001b[1;32m 99\u001b[0m core_number \u001b[39m=\u001b[39m G\u001b[39m.\u001b[39madd_internal_vertex_id(core_number, \u001b[39m'\u001b[39m\u001b[39mvertex\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[1;32m 100\u001b[0m cols)\n\u001b[1;32m 102\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 103\u001b[0m core_number \u001b[39m=\u001b[39m _call_plc_core_number(G)\n\u001b[1;32m 104\u001b[0m core_number \u001b[39m=\u001b[39m core_number\u001b[39m.\u001b[39mrename(\n\u001b[1;32m 105\u001b[0m columns\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mcore_number\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mvalues\u001b[39m\u001b[39m\"\u001b[39m}, copy\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m\n\u001b[1;32m 106\u001b[0m )\n\u001b[1;32m 108\u001b[0m \u001b[39mif\u001b[39;00m k \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/cugraph_dev/lib/python3.9/site-packages/cugraph-22.2.0a0+366.gabd2f0ef-py3.9-linux-x86_64.egg/cugraph/cores/k_core.py:27\u001b[0m, in \u001b[0;36m_call_plc_core_number\u001b[0;34m(G)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_call_plc_core_number\u001b[39m(G):\n\u001b[1;32m 26\u001b[0m vertex, core_number \u001b[39m=\u001b[39m \\\n\u001b[0;32m---> 27\u001b[0m pylibcugraph_core_number(\n\u001b[1;32m 28\u001b[0m resource_handle\u001b[39m=\u001b[39;49mResourceHandle(),\n\u001b[1;32m 29\u001b[0m graph\u001b[39m=\u001b[39;49mG\u001b[39m.\u001b[39;49m_plc_graph,\n\u001b[1;32m 30\u001b[0m degree_type\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 31\u001b[0m do_expensive_check\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m\n\u001b[1;32m 32\u001b[0m )\n\u001b[1;32m 34\u001b[0m df \u001b[39m=\u001b[39m cudf\u001b[39m.\u001b[39mDataFrame()\n\u001b[1;32m 35\u001b[0m df[\u001b[39m\"\u001b[39m\u001b[39mvertex\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m vertex\n", + "File \u001b[0;32mcore_number.pyx:124\u001b[0m, in \u001b[0;36mpylibcugraph.core_number.core_number\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mutils.pyx:51\u001b[0m, in \u001b[0;36mpylibcugraph.utils.assert_success\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: non-success value returned from cugraph_core_number: CUGRAPH_UNKNOWN_ERROR" + ] + } + ], "source": [ "# Call k-cores on the graph\n", "kcg = cugraph.k_core(G) " @@ -267,7 +276,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.13 ('cugraph_dev')", "language": "python", "name": "python3" }, @@ -285,7 +294,7 @@ }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "8a663d26f441a9657bbd22051a1abab57e1b3709a9f7822414e6eae68c6232e8" } } }, diff --git a/notebooks/algorithms/layout/Force-Atlas2.ipynb b/notebooks/algorithms/layout/Force-Atlas2.ipynb index 90c39294cca..00fb9318790 100644 --- a/notebooks/algorithms/layout/Force-Atlas2.ipynb +++ b/notebooks/algorithms/layout/Force-Atlas2.ipynb @@ -20,6 +20,7 @@ "| -----------------|------------|------------------|-----------------|----------------|\n", "| Hugo Linsenmaier | 11/16/2020 | created | 0.17 | GV100, CUDA 11.0\n", "| Brad Rees | 01/11/2022 | tested / updated | 22.02 nightly | RTX A6000 CUDA 11.5\n", + "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 | TV100, CUDA 11.5\n", " " ] }, @@ -49,7 +50,6 @@ "outputs": [], "source": [ "# Import RAPIDS libraries\n", - "\n", "import cudf\n", "import cugraph\n", "import time" @@ -94,8 +94,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the path to the test data \n", - "datafile = '../../data/netscience.csv'" + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import netscience" ] }, { @@ -127,18 +127,7 @@ "metadata": {}, "outputs": [], "source": [ - "edges_gdf = cudf.read_csv(datafile, names=[\"source\", \"destination\", \"weights\"],\n", - " delimiter=' ', dtype=[\"int32\", \"int32\", \"float32\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(edges_gdf, renumber=False)\n", + "G = netscience.get_graph()\n", "G.number_of_nodes(), G.number_of_edges()" ] }, @@ -187,6 +176,8 @@ "metadata": {}, "outputs": [], "source": [ + "edges_gdf = netscience.get_edgelist()\n", + "\n", "connected = calc_connected_edges(pos_gdf,\n", " edges_gdf,\n", " node_x=\"x\",\n", @@ -194,8 +185,8 @@ " node_x_dtype=\"float32\",\n", " node_y_dtype=\"float32\",\n", " node_id=\"vertex\",\n", - " edge_source=\"source\",\n", - " edge_target=\"destination\",\n", + " edge_source=\"src\",\n", + " edge_target=\"dst\",\n", " edge_aggregate_col=None,\n", " edge_render_type=\"direct\",\n", " )" @@ -234,7 +225,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('cugraph_dev')", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -248,11 +239,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.7" }, "vscode": { "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" } } }, diff --git a/notebooks/applications/CostMatrix.ipynb b/notebooks/applications/CostMatrix.ipynb deleted file mode 100644 index 687b1526069..00000000000 --- a/notebooks/applications/CostMatrix.ipynb +++ /dev/null @@ -1,641 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to compute a _Cost Matrix_ by replicating data\n", - "# Skip notebook test\n", - "\n", - "### Approach\n", - "A simple approach to creating a cost matrix is to run All-Source Shortest Path (ASSP), however cuGraph currently does not have an All-Source Shortest Path (ASSP) algorithm. One is on the roadmap, based on Floyd-Warshall, but that doesn't help us today. Luckily there is a work around if the graph to be processed is small. The hack is to run ASSP by creating a lot of copies of the graph and running the Single Source Shortest Path (SSSP) on one seed per graph copy. Since each SSSP run within its own disjoint component, there is no issue with path collisions between seeds. \n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Notebook Organization\n", - "The first portion of the notebook discusses each step independently. It gives insight into what is going on and how fast each step takes.\n", - "\n", - "The second section puts it all the steps together in a single function and times how long with would take to compute the matrix\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Data\n", - "\n", - "In this notebook we will use the email-Eu-core\n", - "\n", - "* Number of Vertices: 1,005\n", - "* Number of Edges: 25,571\n", - "\n", - "We are using this dataset since it is small with a few communities, meaning that there are paths to be found." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Notebook Revisions\n", - "\n", - "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", - "| --------------|------------|------------------|-----------------|----------------|\n", - "| Brad Rees | 06/21/2022 | created | 22.08 | V100 w 32 GB, CUDA 11.5\n", - "| Don Acosta | 06/28/2022 | modified | 22.08 | V100 w 32 GB, CUDA 11.5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### References\n", - "\n", - "* https://www.sciencedirect.com/topics/mathematics/cost-matrix\n", - "* https://en.wikipedia.org/wiki/Shortest_path_problem\n", - "\n", - "Dataset\n", - "* Hao Yin, Austin R. Benson, Jure Leskovec, and David F. Gleich. Local Higher-order Graph Clustering. In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017.\n", - "\n", - "* J. Leskovec, J. Kleinberg and C. Faloutsos. Graph Evolution: Densification and Shrinking Diameters. ACM Transactions on Knowledge Discovery from Data (ACM TKDD), 1(1), 2007. http://www.cs.cmu.edu/~jure/pubs/powergrowth-tkdd.pdf\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# system and other\n", - "import time\n", - "from time import perf_counter\n", - "import math\n", - "\n", - "# rapids\n", - "import cugraph\n", - "import cudf" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "-----\n", - "# Reading the data\n", - "\n", - "Let's start with data read" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# simple function to read in the CSV data file\n", - "def read_data_cudf(datafile):\n", - " gdf = cudf.read_csv(datafile,\n", - " delimiter=\" \",\n", - " header=None,\n", - " names=['src','dst', 'wt'])\n", - " return gdf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# function to determine the number of nodes in the dataset\n", - "def find_number_of_nodes(df):\n", - " node = cudf.concat([df['src'], df['dst']])\n", - " node = node.unique()\n", - " return len(node)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read the data and verify that it is zero based (e.g. first vertex is 0)\n", - "**IMPORTANT:** The node numbering must be zero based. We use the starting index on the replicated graph to be one larger than the number of vertices. If the starting index is not zero, then the graph copies will overlap in index space and not be independent (disjoint). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t1 = perf_counter()\n", - "gdf = read_data_cudf('../data/email-Eu-core.csv')\n", - "read_t = perf_counter() - t1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(f\" read {len(gdf)} edges in {read_t} seconds\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# verify that the starting ID is zero\n", - "min([gdf['src'].min(), gdf['dst'].min()])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# check the max ID\n", - "max([gdf['src'].max(), gdf['dst'].max()])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# the number of nodes should be one greater than the max ID\n", - "# that is the ID that we start the next instance of the data at\n", - "offset = find_number_of_nodes(gdf)\n", - "print(offset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Now let's dive into how to replicate the data\n", - "We will use a model that doubles the data at each pass. That is a lot faster \n", - "than adding one copy at a time. \n", - "The number of disjoint versions of the data will be a power of 2.\n", - "Although the power of 2 replication results in faster data set growth and Graph building, the simple order one replication is shown here for illustration purposes.\n", - "\n", - "\n", - "![Data Duplicated](../../notebooks/img/graph_after_replication.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This function creates additional version of the data \n", - "\n", - "def make_data(base_df, N):\n", - " id = find_number_of_nodes(base_df)\n", - " _d = base_df\n", - "\n", - " for x in range(N):\n", - " tmp = _d.copy()\n", - " tmp['src'] += id\n", - " tmp['dst'] += id\n", - " _d = cudf.concat([_d,tmp])\n", - " id = id * 2\n", - " return _d" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit\n", - "_ = make_data(gdf, 3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "gdf2 = make_data(gdf, 3)\n", - "print()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# simple print to show tha there is not a lot more data\n", - "# print # of Edges and # of Nodes\n", - "print(f\"Old {len(gdf)} {find_number_of_nodes(gdf)}\")\n", - "print(f\"New {len(gdf2)} {find_number_of_nodes(gdf2)}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Build the ghost node connection set\n", - "A ghost node is an artificially added node to parallelize/simulate the all-points shortest path algorithm which is not yet supported.\n", - "After the ghost node is added, the 2nd hop is actually the all points shortest path.\n", - "The Ghost node is later removed after the Shortest path algorithms are run.\n", - "\n", - "![Ghost Node](../../notebooks/img/graph_after_ghost.png)\n", - "\n", - "The Ghost Node is connected to a different corresponding node in each replication so all sources are covered.\n", - "\n", - "In this simple example of a four-node 'square' graph after complete replication and adding the ghost node, the graph looks like this:\n", - "\n", - "![Ghost Node](../../notebooks/img/Full-four_node_replication.png)\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def add_ghost_node(_df, N):\n", - " # get the size of the graph. That number will be the ghost node ID\n", - " ghost_node_id = find_number_of_nodes(_df)\n", - " \n", - " num_copies = math.floor(math.pow(2, N))\n", - "\n", - " seeds = cudf.DataFrame()\n", - " seeds['dst'] = [((offset * x) + x) for x in range(num_copies)]\n", - " seeds['src'] = ghost_node_id\n", - " \n", - " _d = cudf.concat([_df, seeds])\n", - " \n", - " return _d, ghost_node_id" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit\n", - "_, _ = add_ghost_node(gdf2, 10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gdf_with_ghost, ghost_id = add_ghost_node(gdf2, 10)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create an Empty directed Graph" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "G = cugraph.Graph(directed=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Populate the new graph with an edgelist containing\n", - "* The original Data\n", - "* The replicated data copies\n", - "* Each replication connected to the Ghost Node by a single edge from a different node\n", - "in each copy of the graph." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%time\n", - "G.from_cudf_edgelist(gdf_with_ghost, source='src', destination='dst', renumber=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%time\n", - "G.number_of_edges()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run Single Source Shortest Path (SSSP) from the ghost node\n", - "The single Ghost node source becomes a all-source shortest path after one hop since all the\n", - "replicated data is connected through that node. This will include extraneous ghost node related data which will be removed in later steps." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit\n", - "X = cugraph.sssp(G, ghost_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X = cugraph.sssp(G, ghost_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This result will contain a ghost node like the simple example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Now reset vertex IDs and convert to a cost matrix\n", - "All edges with the ghost node as a source are removed here." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# drop the ghost node which doesnt exist so remove from matrix.\n", - "X = X[X['predecessor'] != ghost_id]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Apply the CuGraph filter which removes all nodes not encountered during the graph traversal. In this case the SSSP." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# drop unreachable\n", - "X = cugraph.filter_unreachable(X)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Remove the path cost that was incurred by going to the single seed in each copy from the ghost node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# adjust distances so that they don't go to the ghost node\n", - "X['distance'] -= 1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Now the Ghost node and tangential edges are removed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Calculate the seed for each copy. This is where it is critical that the original graph node numbering is zero based." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# add a new column for the seed\n", - "# since each seed was a different component with a different offset amount, exploit that to determine the seed number\n", - "X['seed'] = (X['vertex'] / offset).astype(int)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Now adjust all vertices to be in the correct range\n", - "# resets the seed number to the\n", - "X['v2'] = X['vertex'] - (X['seed'] * offset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Finally just pull out the cost matrix\n", - "cost = X.drop(columns=['vertex', 'predecessor'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cost.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# cleanup \n", - "del G\n", - "del X\n", - "del gdf_with_ghost\n", - "del gdf2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# Section 2: Do it all in a single function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Set the number of replications - 10 will produce 1,024 graphs\n", - "N = 10" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def build_cost_matrix(_gdf):\n", - " data = make_data(_gdf, N)\n", - " gdf_with_ghost, ghost_id = add_ghost_node(data, N)\n", - " \n", - " G = cugraph.Graph(directed=True)\n", - " G.from_cudf_edgelist(gdf_with_ghost, source='src', destination='dst', renumber=False)\n", - " \n", - " X = cugraph.sssp(G, ghost_id)\n", - " \n", - " X = X[X['predecessor'] != ghost_id]\n", - " X = cugraph.filter_unreachable(X)\n", - " X['distance'] -= 1\n", - " X['seed'] = (X['vertex'] / offset).astype(int)\n", - " X['v2'] = X['vertex'] - (X['seed'] * offset)\n", - " cost = X.drop(columns=['vertex', 'predecessor'])\n", - " \n", - " return cost" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit\n", - "CM = build_cost_matrix(gdf)\n", - "CM" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "CM = build_cost_matrix(gdf)\n", - "CM.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "___\n", - "Copyright (c) 2022, NVIDIA CORPORATION.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n", - "___" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "cugraph_dev", - "language": "python", - "name": "cugraph_dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "cee8a395f2f0c5a5bcf513ae8b620111f4346eff6dc64e1ea99c951b2ec68604" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/centrality/Centrality.ipynb b/notebooks/centrality/Centrality.ipynb new file mode 100644 index 00000000000..dbc2a0e18a8 --- /dev/null +++ b/notebooks/centrality/Centrality.ipynb @@ -0,0 +1,386 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Centrality" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we will compute vertex centrality scores using the various cuGraph algorithms. We will then compare the similarities and differences.\n", + "\n", + "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", + "| --------------|------------|------------------|-----------------|----------------|\n", + "| Brad Rees | 04/16/2021 | created | 0.19 | GV100, CUDA 11.0\n", + "| | 08/05/2021 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4\n", + "| Ralph Liu | 06/22/2022 | test/update | 22.08 | T100, Cuda 11.5\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Centrality is measure of how important, or central, a node or edge is within a graph. It is useful for identifying influencer in social networks, key routing nodes in communication/computer network infrastructures, \n", + "\n", + "The seminal paper on centrality is: Freeman, L. C. (1978). Centrality in social networks conceptual clarification. Social networks, 1(3), 215-239.\n", + "\n", + "\n", + "__Degree centrality__ – _done but needs an API_
\n", + "Degree centrality is based on the notion that whoever has the most connections must be important. \n", + "\n", + "
\n", + " Cd(v) = degree(v)\n", + "
\n", + "\n", + "cuGraph currently does not have a Degree Centrality function call. However, since Degree Centrality is just the degree of a node, we can use _G.degree()_ function.\n", + "Degree Centrality for a Directed graph can be further divided in _indegree centrality_ and _outdegree centrality_ and can be obtained using _G.degrees()_\n", + "\n", + "\n", + "___Closeness centrality – coming soon___
\n", + "Closeness is a measure of the shortest path to every other node in the graph. A node that is close to every other node, can reach over other node in the fewest number of hops, means that it has greater influence on the network versus a node that is not close.\n", + "\n", + "__Betweenness Centrality__
\n", + "Betweenness is a measure of the number of shortest paths that cross through a node, or over an edge. A node with high betweenness means that it had a greater influence on the flow of information. \n", + "\n", + "Betweenness centrality of a node 𝑣 is the sum of the fraction of all-pairs shortest paths that pass through 𝑣\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "To speedup runtime of betweenness centrailty, the metric can be computed on a limited number of nodes (randomly selected) and then used to estimate the other scores. For this example, the graphs are relatively small (under 5,000 nodes) so betweenness on every node will be computed.\n", + "\n", + "___Eigenvector Centrality - coming soon___
\n", + "Eigenvectors can be thought of as the balancing points of a graph, or center of gravity of a 3D object. High centrality means that more of the graph is balanced around that node.\n", + "\n", + "__Katz Centrality__
\n", + "Katz is a variant of degree centrality and of eigenvector centrality. \n", + "Katz centrality is a measure of the relative importance of a node within the graph based on measuring the influence across the total number of walks between vertex pairs. \n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "See:\n", + "* [Katz on Wikipedia](https://en.wikipedia.org/wiki/Katz_centrality) for more details on the algorithm.\n", + "* https://www.sci.unich.it/~francesc/teaching/network/katz.html\n", + "\n", + "__PageRank__
\n", + "PageRank is classified as both a Link Analysis tool and a centrality measure. PageRank is based on the assumption that important nodes point (directed edge) to other important nodes. From a social network perspective, the question is who do you seek for an answer and then who does that person seek. PageRank is good when there is implied importance in the data, for example a citation network, web page linkages, or trust networks. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test Data\n", + "We will be using the Zachary Karate club dataset \n", + "*W. W. Zachary, An information flow model for conflict and fission in small groups, Journal of\n", + "Anthropological Research 33, 452-473 (1977).*\n", + "\n", + "\n", + "![Karate Club](../img/zachary_black_lines.png)\n", + "\n", + "\n", + "Because the test data has vertex IDs starting at 1, the auto-renumber feature of cuGraph (mentioned above) will be used so the starting vertex ID is zero for maximum efficiency. The resulting data will then be auto-unrenumbered, making the entire renumbering process transparent to users." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the modules\n", + "import cugraph\n", + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd \n", + "from IPython.display import display_html " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Functions\n", + "using underscore variable names to avoid collisions. \n", + "non-underscore names are expected to be global names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute Centrality\n", + "# the centrality calls are very straightforward with the graph being the primary argument\n", + "# we are using the default argument values for all centrality functions\n", + "\n", + "def compute_centrality(_graph) :\n", + " # Compute Degree Centrality\n", + " _d = _graph.degree()\n", + " \n", + " # Compute the Betweenness Centrality\n", + " _b = cugraph.betweenness_centrality(_graph)\n", + "\n", + " # Compute Katz Centrality\n", + " _k = cugraph.katz_centrality(_graph)\n", + " \n", + " # Compute PageRank Centrality\n", + " _p = cugraph.pagerank(_graph)\n", + " \n", + " return _d, _b, _k, _p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print function\n", + "# being lazy and requiring that the dataframe names are not changed versus passing them in\n", + "def print_centrality(_n):\n", + " dc_top = dc.sort_values(by='degree', ascending=False).head(_n).to_pandas()\n", + " bc_top = bc.sort_values(by='betweenness_centrality', ascending=False).head(_n).to_pandas()\n", + " katz_top = katz.sort_values(by='katz_centrality', ascending=False).head(_n).to_pandas()\n", + " pr_top = pr.sort_values(by='pagerank', ascending=False).head(_n).to_pandas()\n", + " \n", + " df1_styler = dc_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Degree').hide_index()\n", + " df2_styler = bc_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Betweenness').hide_index()\n", + " df3_styler = katz_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Katz').hide_index()\n", + " df4_styler = pr_top.style.set_table_attributes(\"style='display:inline'\").set_caption('PageRank').hide_index()\n", + "\n", + " display_html(df1_styler._repr_html_()+df2_styler._repr_html_()+df3_styler._repr_html_()+df4_styler._repr_html_(), raw=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a graph using the imported Dataset object\n", + "G = karate.get_graph(fetch=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compute Centrality" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc, bc, katz, pr = compute_centrality(G)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Results\n", + "Typically, analysts just look at the top 10% of results. Basically just those vertices that are the most central or important. \n", + "The karate data has 32 vertices, so let's round a little and look at the top 5 vertices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_centrality(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A Different Dataset\n", + "The Karate dataset is not that large or complex, which makes it a perfect test dataset since it is easy to visually verify results. Let's look at a larger dataset with a lot more edges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import a different dataset object\n", + "from cugraph.experimental.datasets import netscience" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "G = netscience.get_graph(fetch=True)\n", + "(G.number_of_nodes(), G.number_of_edges())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc, bc, katz, pr = compute_centrality(G)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_centrality(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now see a larger discrepancy between the centrality scores and which nodes rank highest.\n", + "Which centrality measure to use is left to the analyst to decide and does require insight into the difference algorithms and graph structure." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### And One More Dataset\n", + "Let's look at a Cyber dataset. The vertex ID are IP addresses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import a different dataset object\n", + "from cugraph.experimental.datasets import cyber" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the edgelist\n", + "gdf = cyber.get_edgelist(fetch=True)\n", + "\n", + "# Create a Graph\n", + "G = cugraph.Graph()\n", + "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(G.number_of_nodes(), G.number_of_edges())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc, bc, katz, pr = compute_centrality(G)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_centrality(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are differences in how each centrality measure ranks the nodes. In some cases, every algorithm returns similar results, and in others, the results are different. Understanding how the centrality measure is computed and what edge represent is key to selecting the right centrality metric." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "Copyright (c) 2019-2021, NVIDIA CORPORATION.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.6.9 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/img/Full-four_node_replication.png b/notebooks/img/Full-four_node_replication.png deleted file mode 100644 index 8cbc3cd1dca..00000000000 Binary files a/notebooks/img/Full-four_node_replication.png and /dev/null differ diff --git a/notebooks/img/graph_after_ghost.png b/notebooks/img/graph_after_ghost.png deleted file mode 100644 index 256ac9b5425..00000000000 Binary files a/notebooks/img/graph_after_ghost.png and /dev/null differ diff --git a/notebooks/img/graph_after_replication.png b/notebooks/img/graph_after_replication.png deleted file mode 100644 index 3fba876e899..00000000000 Binary files a/notebooks/img/graph_after_replication.png and /dev/null differ diff --git a/notebooks/link_analysis/HITS.ipynb b/notebooks/link_analysis/HITS.ipynb index b564acb565e..91a78473dae 100755 --- a/notebooks/link_analysis/HITS.ipynb +++ b/notebooks/link_analysis/HITS.ipynb @@ -12,13 +12,13 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* Created: 06/09/2020\n", - "* Updated: 08/16/2020\n", + "* Updated: 06/22/2022\n", "\n", "RAPIDS Versions: 0.15 \n", "\n", "Test Hardware\n", "\n", - "* GV100 32G, CUDA 10.0\n", + "* Tesla V100 32G, CUDA 11.5\n", "\n", "\n", "## Introduction\n", @@ -139,8 +139,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the path to the test data \n", - "datafile='../data/karate-data.csv'" + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { @@ -158,6 +158,7 @@ "outputs": [], "source": [ "# Read the data, this also created a NetworkX Graph \n", + "datafile = \"../data/karate-data.csv\"\n", "file = open(datafile, 'rb')\n", "Gnx = nx.read_edgelist(file)" ] @@ -197,26 +198,6 @@ "# cuGraph" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", - "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Read the data \n", - "gdf = cudf.read_csv(datafile, names=[\"src\", \"dst\"], delimiter='\\t', dtype=[\"int32\", \"int32\"] )" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -230,9 +211,7 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = karate.get_graph(fetch=True)" ] }, { @@ -379,9 +358,9 @@ ], "metadata": { "kernelspec": { - "display_name": "cugraph_dev", + "display_name": "Python 3.9.7 ('base')", "language": "python", - "name": "cugraph_dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -393,7 +372,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/link_analysis/Pagerank.ipynb b/notebooks/link_analysis/Pagerank.ipynb index 2ee8ca045c3..ebd8d32905b 100755 --- a/notebooks/link_analysis/Pagerank.ipynb +++ b/notebooks/link_analysis/Pagerank.ipynb @@ -13,13 +13,13 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* Created: 08/13/2019\n", - "* Updated: 04/06/2022\n", + "* Updated: 06/22/2022\n", "\n", - "RAPIDS Versions: 22.04 \n", + "RAPIDS Versions: 22.08 \n", "\n", "Test Hardware\n", "\n", - "* GV100 32G, CUDA 11.5\n", + "* Tesla V100 32G, CUDA 11.5\n", "\n", "\n", "## Introduction\n", @@ -129,8 +129,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the path to the test data \n", - "datafile='../data/karate-data.csv'" + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { @@ -147,7 +147,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Read the data, this also creates a NetworkX Graph \n", + "# Read the data, this also creates a NetworkX Graph\n", + "datafile = \"../data/karate-data.csv\"\n", "file = open(datafile, 'rb')\n", "Gnx = nx.read_edgelist(file)" ] @@ -187,26 +188,6 @@ "# cuGraph" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read in the data - GPU\n", - "cuGraph graphs can be created from cuDF, dask_cuDF and Pandas dataframes\n", - "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Read the data \n", - "gdf = cudf.read_csv(datafile, names=[\"src\", \"dst\"], delimiter='\\t', dtype=[\"int32\", \"int32\"] )" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -220,8 +201,7 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.from_edgelist(gdf, source='src', destination='dst')" + "G = karate.get_graph(fetch=True)" ] }, { @@ -446,7 +426,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -461,6 +441,11 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/link_prediction/Jaccard-Similarity.ipynb b/notebooks/link_prediction/Jaccard-Similarity.ipynb index e3c6e7fa4cc..1c94ec2a023 100755 --- a/notebooks/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/link_prediction/Jaccard-Similarity.ipynb @@ -17,12 +17,12 @@ "\n", " Original Authors: Brad Rees\n", " Created: 10/14/2019\n", - " Last Edit: 08/16/2020\n", + " Last Edit: 06/22/2022\n", "\n", - "RAPIDS Versions: 0.14\n", + "RAPIDS Versions: 22.08\n", "\n", "Test Hardware\n", - "* GV100 32G, CUDA 10.2\n" + "* Tesla V100 32G, CUDA 11.5\n" ] }, { @@ -221,8 +221,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Read the CSV datafile using cuDF\n", - "data file is actually _tab_ separated, so we need to set the delimiter" + "### Create an Edgelist" ] }, { @@ -231,10 +230,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Test file \n", - "datafile='../data/karate-data.csv'\n", - "\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "from cugraph.experimental.datasets import karate\n", + "gdf = karate.get_edgelist()" ] }, { @@ -271,8 +268,8 @@ "outputs": [], "source": [ "# create a Graph \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = karate.get_graph()\n", + "G = G.to_undirected()" ] }, { @@ -472,20 +469,25 @@ "---\n", "### It's that easy with cuGraph\n", "\n", - "Copyright (c) 2019-2020, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n", "___" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "cugraph_dev", + "display_name": "Python 3.9.7 ('base')", "language": "python", - "name": "cugraph_dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -497,7 +499,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/link_prediction/Overlap-Similarity.ipynb b/notebooks/link_prediction/Overlap-Similarity.ipynb index d71078ed061..5b3633f4014 100755 --- a/notebooks/link_prediction/Overlap-Similarity.ipynb +++ b/notebooks/link_prediction/Overlap-Similarity.ipynb @@ -16,7 +16,8 @@ "| --------------|------------|------------------|-----------------|--------------------|\n", "| Brad Rees | 10/14/2019 | created | 0.08 | GV100, CUDA 10.0 |\n", "| | 08/16/2020 | upadted | 0.12 | GV100, CUDA 10.0 |\n", - "| | 08/05/2021 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n" + "| | 08/05/2021 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", + "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 | TV100, CUDA 11.5 |\n" ] }, { @@ -239,8 +240,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Read the CSV datafile using cuDF\n", - "data file is actually _tab_ separated, so we need to set the delimiter" + "### Import a Dataset Object" ] }, { @@ -249,10 +249,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Test file \n", - "datafile='../data/karate-data.csv'\n", - "\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "from cugraph.experimental.datasets import karate\n", + "gdf = karate.get_edgelist(fetch=True)" ] }, { @@ -289,8 +287,8 @@ "outputs": [], "source": [ "# create a Graph \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = karate.get_graph()\n", + "G = G.to_undirected()" ] }, { @@ -582,9 +580,9 @@ ], "metadata": { "kernelspec": { - "display_name": "cugraph_dev", + "display_name": "Python 3.9.7 ('base')", "language": "python", - "name": "cugraph_dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -596,7 +594,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/sampling/RandomWalk.ipynb b/notebooks/sampling/RandomWalk.ipynb index afceff5378d..caacf909259 100644 --- a/notebooks/sampling/RandomWalk.ipynb +++ b/notebooks/sampling/RandomWalk.ipynb @@ -9,9 +9,10 @@ "In this notebook, we will compute the Random Walk from a set of seeds using cuGraph. \n", "\n", "\n", - "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", - "| --------------|------------|--------------|-----------------|----------------|\n", - "| Brad Rees | 04/20/2021 | created | 0.19 | GV100, CUDA 11.0\n", + "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", + "| --------------|------------|----------------|-----------------|----------------|\n", + "| Brad Rees | 04/20/2021 | created | 0.19 | GV100, CUDA 11.0\n", + "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 | TV100, CUDA 11.5\n", "\n", "Currently NetworkX does not have a random walk function. There is code on StackOverflow that generats a random walk by getting a vertice and then randomly selection a neighbor and then repeating the process. " ] @@ -40,7 +41,10 @@ "source": [ "# Import the modules\n", "import cugraph\n", - "import cudf" + "import cudf\n", + "\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate" ] }, { @@ -49,11 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Read The Data\n", - "# Define the path to the test data \n", - "datafile='../data/karate-data.csv'\n", - "\n", - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "gdf = karate.get_edgelist(fetch=True)" ] }, { @@ -156,7 +156,7 @@ "metadata": {}, "source": [ "-----\n", - "Copyright (c) 2021, NVIDIA CORPORATION.\n", + "Copyright (c) 2022, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -166,9 +166,9 @@ ], "metadata": { "kernelspec": { - "display_name": "cugraph_dev", + "display_name": "Python 3.9.7 ('base')", "language": "python", - "name": "cugraph_dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -180,7 +180,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/structure/Renumber-2.ipynb b/notebooks/structure/Renumber-2.ipynb index a26becbb99f..03858b3e52a 100755 --- a/notebooks/structure/Renumber-2.ipynb +++ b/notebooks/structure/Renumber-2.ipynb @@ -20,13 +20,13 @@ "| Brad Rees | 08/13/2019 | created |\n", "| Brad Rees | 07/08/2020 | updated |\n", "| Ralph Liu | 06/01/2022 | docs & code change |\n", + "| | 06/22/2022 | update |\n", "\n", - "RAPIDS Versions: 0.13 \n", - "cuGraph Version: 22.06 \n", + "RAPIDS Versions: 22.08 \n", "\n", "Test Hardware\n", "\n", - "* GV100 32G, CUDA 11.5\n", + "* Tesla V100 32G, CUDA 11.5\n", "\n", "\n", "## Introduction\n", @@ -83,12 +83,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Read the data\n", - "# the file contains an index column that will be ignored\n", - "\n", - "datafile='../data/cyber.csv'\n", - "\n", - "gdf = cudf.read_csv(datafile, delimiter=',', names=['idx','srcip','dstip'], dtype=['int32','str', 'str'], skiprows=1, usecols=['srcip', 'dstip'] )" + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import cyber\n", + "gdf = cyber.get_edgelist(fetch=True)" ] }, { @@ -104,6 +101,9 @@ "metadata": {}, "outputs": [], "source": [ + "# trim\n", + "gdf = gdf[1:]\n", + "\n", "# take a peek at the data\n", "gdf.head()" ] @@ -115,8 +115,8 @@ "outputs": [], "source": [ "# Since IP columns are strings, we first need to convert them to integers\n", - "gdf['src_ip'] = gdf['srcip'].str.ip2int()\n", - "gdf['dst_ip'] = gdf['dstip'].str.ip2int()" + "gdf['src_ip'] = gdf['src'].str.ip2int()\n", + "gdf['dst_ip'] = gdf['dst'].str.ip2int()" ] }, { @@ -225,7 +225,7 @@ "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2020, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -235,11 +235,8 @@ } ], "metadata": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - }, "kernelspec": { - "display_name": "Python 3.6.9 64-bit", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -253,7 +250,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/structure/Renumber.ipynb b/notebooks/structure/Renumber.ipynb index 8c7b4e615eb..903ed9df389 100755 --- a/notebooks/structure/Renumber.ipynb +++ b/notebooks/structure/Renumber.ipynb @@ -16,13 +16,13 @@ "Notebook Credits\n", "* Original Authors: Chuck Hastings and Bradley Rees\n", "* Created: 08/13/2019\n", - "* Updated: 07/08/2020\n", + "* Updated: 06/22/2022\n", "\n", - "RAPIDS Versions: 0.15 \n", + "RAPIDS Versions: 22.08 \n", "\n", "Test Hardware\n", "\n", - "* GV100 32G, CUDA 10.2\n", + "* Tesla V100 32G, CUDA 11.5\n", "\n", "## Introduction\n", "\n", @@ -63,7 +63,7 @@ "import pandas as pd\n", "import numpy as np\n", "import networkx as nx\n", - "from cugraph.structure import NumberMap\n" + "from cugraph.structure import NumberMap" ] }, { @@ -331,7 +331,7 @@ "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2020, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -342,7 +342,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -356,7 +356,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/structure/Symmetrize.ipynb b/notebooks/structure/Symmetrize.ipynb index 30b9d5dc618..f0c57baf070 100755 --- a/notebooks/structure/Symmetrize.ipynb +++ b/notebooks/structure/Symmetrize.ipynb @@ -11,13 +11,13 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* Created: 08/13/2019\n", - "* Updated: 03/02/2020\n", + "* Updated: 06/22/2022\n", "\n", - "RAPIDS Versions: 0.13 \n", + "RAPIDS Versions: 22.08 \n", "\n", "Test Hardware\n", "\n", - "* GV100 32G, CUDA 10.2\n", + "* Tesla V100 32G, CUDA 11.5\n", "\n", "\n", "## Introduction\n", @@ -80,9 +80,11 @@ "metadata": {}, "outputs": [], "source": [ - "# load the full symmetrized dataset for comparison\n", - "datafile='../data/karate-data.csv'\n", - "test_gdf = cudf.read_csv(datafile, names=[\"src\", \"dst\"], delimiter='\\t', dtype=[\"int32\", \"int32\"] )" + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate\n", + "\n", + "# This is the symmetrized dataset\n", + "test_gdf = karate.get_edgelist(fetch=True)" ] }, { @@ -162,7 +164,7 @@ "metadata": {}, "source": [ "---\n", - "Copyright (c) 2019-2020, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -173,9 +175,9 @@ ], "metadata": { "kernelspec": { - "display_name": "cugraph_dev", + "display_name": "Python 3.9.7 ('base')", "language": "python", - "name": "cugraph_dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -187,7 +189,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/traversal/BFS.ipynb b/notebooks/traversal/BFS.ipynb index ce768967eb9..b9409e2f821 100755 --- a/notebooks/traversal/BFS.ipynb +++ b/notebooks/traversal/BFS.ipynb @@ -10,13 +10,13 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* Feature available since 0.6\n", - "* Last Edit: 08/16/2020\n", + "* Last Edit: 06/22/2022\n", "\n", - "RAPIDS Versions: 0.14.0 \n", + "RAPIDS Versions: 22.08 \n", "\n", "Test Hardware\n", "\n", - "* GV100 32G, CUDA 10.0\n", + "* Tesla V100 32G, CUDA 11.5\n", "\n", "\n", "\n", @@ -94,7 +94,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Read the data using cuDF" + "# Create an Edgelist" ] }, { @@ -103,10 +103,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Read the data file\n", - "datafile='../data/karate-data.csv'\n", + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate\n", "\n", - "gdf = cudf.read_csv(datafile, names=[\"src\", \"dst\"], delimiter='\\t', dtype=[\"int32\", \"int32\"] )" + "gdf = karate.get_edgelist(fetch=True)" ] }, { @@ -257,7 +257,7 @@ "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2020, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -268,9 +268,9 @@ ], "metadata": { "kernelspec": { - "display_name": "cugraph_dev", + "display_name": "Python 3.9.7 ('base')", "language": "python", - "name": "cugraph_dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -282,7 +282,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4, diff --git a/notebooks/traversal/SSSP.ipynb b/notebooks/traversal/SSSP.ipynb index 3d0ce1c4234..a5e981f38c2 100755 --- a/notebooks/traversal/SSSP.ipynb +++ b/notebooks/traversal/SSSP.ipynb @@ -11,14 +11,14 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* available since release 0.6\n", - "* Last Edit: 08/16/2020\n", + "* Last Edit: 06/22/2022\n", "\n", "\n", - "RAPIDS Versions: 0.12.0 \n", + "RAPIDS Versions: 22.08 \n", "\n", "Test Hardware\n", "\n", - "* GV100 32G, CUDA 10.0\n", + "* Tesla V100 32G, CUDA 11.5\n", "\n", "\n", "\n", @@ -79,7 +79,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Read the data and adjust the vertex IDs" + "### Create an Edgelist" ] }, { @@ -88,17 +88,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Test file - using the classic Karate club dataset. \n", - "datafile='../data/karate-data.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gdf = cudf.read_csv(datafile, names=[\"src\", \"dst\"], delimiter='\\t', dtype=[\"int32\", \"int32\"])" + "# Import a built-in dataset\n", + "from cugraph.experimental.datasets import karate\n", + "\n", + "gdf = karate.get_edgelist(fetch=True)" ] }, { @@ -173,7 +166,7 @@ "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2020, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -184,7 +177,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -198,7 +191,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } } }, "nbformat": 4,