From 260ad5d9d0475b0718782fdb2fe5657dcb6bf181 Mon Sep 17 00:00:00 2001 From: Oliver Alvarado Rodriguez Date: Mon, 22 Jan 2024 12:24:20 -0500 Subject: [PATCH] update sample to notebook, removed warnings chapel 1.33 and 1.33, updated release metadata --- README.md | 4 +- arachne/README.md | 8 +- arachne/arachne_sample.ipynb | 919 ++++++++++++++++++++++++ arachne/arachne_sample.py | 197 ----- arachne/client/setup.py | 4 +- arachne/server/Aggregators.chpl | 10 +- arachne/server/BreadthFirstSearch.chpl | 8 +- arachne/server/SquareCount.chpl | 2 +- arachne/server/SubgraphIsomorphism.chpl | 6 +- 9 files changed, 938 insertions(+), 220 deletions(-) create mode 100644 arachne/arachne_sample.ipynb delete mode 100644 arachne/arachne_sample.py diff --git a/README.md b/README.md index 59903c65..9690203f 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ This is an external repository to build functionality for [Arkouda](https://github.com/Bears-R-Us/Arkouda) with a focus on advanced graph processing. It is built with the same structure as [arkouda-contrib](https://github.com/Bears-R-Us/arkouda-contrib) to manage modules and easily swap between the production (`arachne`) and development (`arachne_development`) directories. ## Prerequisites -1. Download and build [Chapel](https://chapel-lang.org/download.html). **Use version 1.31.0. There are some [reported performance issues](https://github.com/chapel-lang/chapel/issues/23680) with 1.32.0 that are being looked at.** -2. Download but **do not build** [Arkouda](https://github.com/Bears-R-Us/arkouda). **We recommend using the most recent release v2023.10.06.** +1. Download and build [Chapel](https://chapel-lang.org/download.html). **Chapel version 1.32.0 and 1.33.0 are supported.** +2. Download but **do not build** [Arkouda](https://github.com/Bears-R-Us/arkouda). **We recommend using the most recent release v2023.11.15.** 3. Follow instructions to activate the Arkouda environment and install all [prerequisites](https://github.com/Bears-R-Us/arkouda#prerequisites-toc). **We recommend using `Anaconda` to manage all dependencies.** ## Installation diff --git a/arachne/README.md b/arachne/README.md index a77ec833..30a68786 100644 --- a/arachne/README.md +++ b/arachne/README.md @@ -11,14 +11,10 @@ python3 module_configuration.py --ak_loc=/complete/path/to/arkouda/ --pkg_path=/ ``` ## Usage -To see an example on how to run and use Arachne, please use `arkouda-njit/arachne/arachne_sample.py` to build a random graph and run breadth-first search on it. This assumes that you have started an Arkouda server using `./arkouda_server` in the Arkouda home directory. The file is executed as follows: -```bash -python3 arachne_sample.py node port n m x y -``` -Where `n` is the number of vertices in the graph, `m` is the number of edges, `host` is the locale that the Arkouda server is running on, and `port` is where the Arkouda server is listening on for messages. Further, the graph is populated with `x` labels and `y` relationships. +To see an example on how to run and use Arachne, please use `arkouda-njit/arachne/arachne_sample.ipynb` to build a random property graph and run queries. This assumes that you have started an Arkouda server using `./arkouda_server` in the Arkouda home directory. ## Testing The Arachne tests are executed from the arkouda-njit/arachne directory as follows with pytest: ```bash -python3 -m pytest test/algorithm_test.py test/class_test.py +python3 -m pytest test/algorithm_test.py test/class_test.py test/prop_graph_test.py ``` diff --git a/arachne/arachne_sample.ipynb b/arachne/arachne_sample.ipynb new file mode 100644 index 00000000..0716d5e2 --- /dev/null +++ b/arachne/arachne_sample.ipynb @@ -0,0 +1,919 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _ _ _ \n", + " / \\ _ __| | _____ _ _ __| | __ _ \n", + " / _ \\ | '__| |/ / _ \\| | | |/ _` |/ _` |\n", + " / ___ \\| | | < (_) | |_| | (_| | (_| |\n", + "/_/ \\_\\_| |_|\\_\\___/ \\__,_|\\__,_|\\__,_|\n", + " \n", + "\n", + "Client Version: v2023.11.15\n" + ] + } + ], + "source": [ + "import arkouda as ak\n", + "import arachne as ar" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "connected to arkouda server tcp://*:5555\n" + ] + } + ], + "source": [ + "# NOTE: Make sure to change the server to the appropriate name.\n", + "ak.connect(\"n51\", 5555)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "78d1993d", + "metadata": {}, + "outputs": [], + "source": [ + "n = 1_000_000\n", + "m = 1_000_000_000\n", + "k = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5d0712b5", + "metadata": {}, + "outputs": [], + "source": [ + "src_array = ak.randint(0, n, m, dtype=ak.dtype('int64'), seed=2)\n", + "dst_array = ak.randint(0, n, m, dtype=ak.dtype('int64'), seed=4)\n", + "int_array = ak.randint(-1, k, m, dtype=ak.dtype('int64'), seed=6)\n", + "uint_array = ak.randint(0, k, m, dtype=ak.dtype('uint64'), seed=8)\n", + "real_array = ak.randint(0, k, m, dtype=ak.dtype('float64'), seed=10)\n", + "bool_array = ak.randint(0, k, m, dtype=ak.dtype('bool'), seed=12)\n", + "strings_array = ak.random_strings_uniform(0, k, m, characters=\"abcdefghijklmonpqrstuvwxyz\", seed=14)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9c9b38b7", + "metadata": {}, + "outputs": [], + "source": [ + "prop_graph = ar.PropGraph()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "af0f2c50", + "metadata": {}, + "outputs": [], + "source": [ + "test_edge_dict = {\n", + " \"src\":src_array,\n", + " \"dst\":dst_array,\n", + " \"data1\":int_array,\n", + " \"data2\":uint_array,\n", + " \"data3\":real_array,\n", + " \"data4\":bool_array,\n", + " \"data5\":strings_array\n", + "}\n", + "test_edge_df = ak.DataFrame(test_edge_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ca0bbe6e", + "metadata": {}, + "outputs": [], + "source": [ + "prop_graph.load_edge_attributes(test_edge_df, source_column=\"src\", destination_column=\"dst\", relationship_columns=[\"data5\", \"data1\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8b8d7246", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
srcdstdata1data2data3data4data5
00266100.640015True0
101013-101.057109True0
202806100.569387False9
303646001.829369False4
403686111.348952False0
........................
999499340999999994687111.390592True0
999499341999999995598001.099502True4
999499342999999996090100.252629False0
999499343999999996867-100.460681True6
999499344999999999426001.787682True0
\n", + "

999499345 rows x 7 columns

" + ], + "text/plain": [ + " src dst data1 data2 data3 data4 data5\n", + "0 0 266 1 0 0.640015 True 0\n", + "1 0 1013 -1 0 1.057109 True 0\n", + "2 0 2806 1 0 0.569387 False 9\n", + "3 0 3646 0 0 1.829369 False 4\n", + "4 0 3686 1 1 1.348952 False 0\n", + "... ... ... ... ... ... ... ...\n", + "999499340 999999 994687 1 1 1.390592 True 0\n", + "999499341 999999 995598 0 0 1.099502 True 4\n", + "999499342 999999 996090 1 0 0.252629 False 0\n", + "999499343 999999 996867 -1 0 0.460681 True 6\n", + "999499344 999999 999426 0 0 1.787682 True 0 (999499345 rows x 7 columns)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prop_graph.get_edge_attributes()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b6c3ae3b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
srcdstdata5data1
0026601
1010130-1
20280691
30364640
40368601
...............
99949934099999999468701
99949934199999999559840
99949934299999999609001
9994993439999999968676-1
99949934499999999942600
\n", + "

999499345 rows x 4 columns

" + ], + "text/plain": [ + " src dst data5 data1\n", + "0 0 266 0 1\n", + "1 0 1013 0 -1\n", + "2 0 2806 9 1\n", + "3 0 3646 4 0\n", + "4 0 3686 0 1\n", + "... ... ... ... ...\n", + "999499340 999999 994687 0 1\n", + "999499341 999999 995598 4 0\n", + "999499342 999999 996090 0 1\n", + "999499343 999999 996867 6 -1\n", + "999499344 999999 999426 0 0 (999499345 rows x 4 columns)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prop_graph.get_edge_relationships()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e94bf24e", + "metadata": {}, + "outputs": [], + "source": [ + "m = len(prop_graph)\n", + "k = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5dc37703", + "metadata": {}, + "outputs": [], + "source": [ + "int_array = ak.randint(-1, k, m, dtype=ak.dtype('int64'), seed=6)\n", + "uint_array = ak.randint(0, k, m, dtype=ak.dtype('uint64'), seed=8)\n", + "real_array = ak.randint(0, k, m, dtype=ak.dtype('float64'), seed=10)\n", + "bool_array = ak.randint(0, k, m, dtype=ak.dtype('bool'), seed=12)\n", + "strings_array = ak.random_strings_uniform(0, k, m, characters=\"abcdefghijklmonpqrstuvwxyz\", seed=14)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d849d4ce", + "metadata": {}, + "outputs": [], + "source": [ + "test_node_dict = {\n", + " \"nodes\":prop_graph.nodes(),\n", + " \"data1\":int_array,\n", + " \"data2\":uint_array,\n", + " \"data3\":real_array,\n", + " \"data4\":bool_array,\n", + " \"data5\":strings_array\n", + "}\n", + "test_node_df = ak.DataFrame(test_node_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "56581839", + "metadata": {}, + "outputs": [], + "source": [ + "prop_graph.load_node_attributes(test_node_df, node_column=\"nodes\", label_columns=[\"data5\", \"data2\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "acdeada4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nodesdata1data2data3data4data5
00000.638154False2
11001.366654False12
22010.887981True14
33-101.219599False0
44101.191131True17
.....................
999995999995101.663987False0
999996999996-100.781876True9
999997999997101.614944False0
999998999998100.226886True0
999999999999010.014557True0
\n", + "

1000000 rows x 6 columns

" + ], + "text/plain": [ + " nodes data1 data2 data3 data4 data5\n", + "0 0 0 0 0.638154 False 2\n", + "1 1 0 0 1.366654 False 12\n", + "2 2 0 1 0.887981 True 14\n", + "3 3 -1 0 1.219599 False 0\n", + "4 4 1 0 1.191131 True 17\n", + "... ... ... ... ... ... ...\n", + "999995 999995 1 0 1.663987 False 0\n", + "999996 999996 -1 0 0.781876 True 9\n", + "999997 999997 1 0 1.614944 False 0\n", + "999998 999998 1 0 0.226886 True 0\n", + "999999 999999 0 1 0.014557 True 0 (1000000 rows x 6 columns)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prop_graph.get_node_attributes()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b193b125", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nodesdata5data2
0020
11120
22141
3300
44170
............
99999599999500
99999699999690
99999799999700
99999899999800
99999999999901
\n", + "

1000000 rows x 3 columns

" + ], + "text/plain": [ + " nodes data5 data2\n", + "0 0 2 0\n", + "1 1 12 0\n", + "2 2 14 1\n", + "3 3 0 0\n", + "4 4 17 0\n", + "... ... ... ...\n", + "999995 999995 0 0\n", + "999996 999996 9 0\n", + "999997 999997 0 0\n", + "999998 999998 0 0\n", + "999999 999999 0 1 (1000000 rows x 3 columns)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prop_graph.get_node_labels()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7091cde5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prop_graph.label_mapper[\"data5\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2343803e", + "metadata": {}, + "outputs": [], + "source": [ + "def node_filter(node_attributes):\n", + " return node_attributes[\"data2\"] == 0" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "94b1b23a", + "metadata": {}, + "outputs": [], + "source": [ + "def edge_filter(edge_attributes):\n", + " return edge_attributes[\"data1\"] > -1" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "ce31b434", + "metadata": {}, + "outputs": [], + "source": [ + "subgraph_nodes = prop_graph.subgraph_view(filter_node=node_filter)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2461eb43", + "metadata": {}, + "outputs": [], + "source": [ + "subgraph_edges = prop_graph.subgraph_view(filter_edge=edge_filter)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "3623966a", + "metadata": {}, + "outputs": [], + "source": [ + "subgraph_together = prop_graph.subgraph_view(filter_node=node_filter, filter_edge=edge_filter)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "9a8f70ad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Subgraph generated with edge size: 750442115\n" + ] + } + ], + "source": [ + "print(f\"Subgraph generated with edge size: {subgraph_nodes.size()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "3a608027", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Subgraph generated with edge size: 666325452\n" + ] + } + ], + "source": [ + "print(f\"Subgraph generated with edge size: {subgraph_edges.size()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "ecc5c635", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Subgraph generated with edge size: 167128950\n" + ] + } + ], + "source": [ + "print(f\"Subgraph generated with edge size: {subgraph_together.size()}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "arkouda-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/arachne/arachne_sample.py b/arachne/arachne_sample.py deleted file mode 100644 index b7fbb277..00000000 --- a/arachne/arachne_sample.py +++ /dev/null @@ -1,197 +0,0 @@ -"""Sample Arachne Script - -This script provides an example on how a graph is built in Arachne from two Arkouda arrays and then -analyzed using Arachne functions. The graphs are randomly generated by using the ak.randint function -with the range of the vertex names being picked from 0..