Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a notebook comparing nx and cuGraph using synthetic data #3135

Merged
merged 18 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions notebooks/applications/gen_550M.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import gc\n",
"import os\n",
"from time import perf_counter\n",
"import numpy as np\n",
"import random\n",
"import math\n",
"\n",
"# rapids\n",
"import cugraph\n",
"import cudf\n",
"\n",
"# NetworkX libraries\n",
"import networkx as nx\n",
"\n",
"# RMAT data generator\n",
"from cugraph.generators import rmat\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def generate_data(scale, edgefactor=16):\n",
" _gdf = rmat(\n",
" scale,\n",
" (2 ** scale) * edgefactor,\n",
" 0.57,\n",
" 0.19,\n",
" 0.19,\n",
" 42,\n",
" clip_and_flip=False,\n",
" scramble_vertex_ids=True,\n",
" create_using=None, # return edgelist instead of Graph instance\n",
" mg=False\n",
" )\n",
" print('Generating a dataframe of ' + str(len(_gdf)) + '...')\n",
" return _gdf"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def gen_times(count, start_date, end_date):\n",
" range_start = start_date.timestamp()\n",
" range_end = int(end_date.timestamp())\n",
" random_list = []\n",
" for i in range(count):\n",
" random_list.append(random.randint(range_start,range_end))\n",
" return cudf.Series(random_list,name='Date', dtype=int)\n",
"# return [datetime.fromtimestamp(i) for i in random_list]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def gen_amounts(count,value_range):\n",
" random_list = []\n",
" for i in range(count):\n",
" random_list.append(random.randint(0,value_range*100))\n",
" return cudf.Series(random_list,name='amount', dtype=float).divide(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"start_time = '1/1/2022 01:00:00 AM'\n",
"end_time = '7/1/2022 01:00:00 AM'\n",
"amount_range = 25000\n",
"d1 = datetime.strptime(start_time, '%m/%d/%Y %I:%M:%S %p')\n",
"d2 = datetime.strptime(end_time, '%m/%d/%Y %I:%M:%S %p')\n",
"\n",
"df = generate_data(15)\n",
"\n",
"dates = gen_times(len(df),d1, d2)\n",
"amounts = gen_amounts(len(df),amount_range)\n",
"df['amounts'] = amounts\n",
"df['date'] = dates\n",
"len(df)\n",
"df.head(4)\n",
"df.to_csv('../data/data_500m.csv') #append mode"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "cudfdev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "587ff963ecd34554a9da41c94362e2baa062d9a57502e220f049e10816826984"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading