Skip to content
This repository has been archived by the owner on Apr 30, 2020. It is now read-only.

Commit

Permalink
Commit @mzargham's exploratory work to master (#24)
Browse files Browse the repository at this point in the history
This is a rescue/recreation of #20, which unfortunately got stuck in
rebsae hell.
  • Loading branch information
teamdandelion authored Apr 18, 2019
1 parent 0cd41b7 commit d0f2d02
Show file tree
Hide file tree
Showing 32 changed files with 14,647 additions and 0 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'import_graph'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-9b2e3b4647d3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpage_ranker\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mimport_graph\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'import_graph'"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import networkx as nx\n",
"import json\n",
"import os\n",
"import page_ranker as pr\n",
"import import_graph as ig\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import networkx as nx\n",
"import json\n",
"import os\n",
"import page_ranker as pr\n",
"import import_graph as ig\n",
"import inspect_subgraph as isg\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"cwd = os.getcwd()\n",
"\n",
"rel_path = \"sample_graphs/sourcecred_sourcecred.json\"\n",
"abs_file_path = os.path.abspath(os.path.join(cwd, '..', rel_path))\n",
"\n",
"with open(abs_file_path) as json_file: \n",
" data = json.load(json_file)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"G = ig.jsonToMultiDiGraph(data)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"iterations = 50\n",
"\n",
"\n",
"alpha = 0\n",
"seed = {n:1/len(G.nodes) for n in G.nodes}\n",
"self_loop_wt = 1/1000\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"r, df, g = pr.pageRanker(G,\n",
" alpha,\n",
" iterations,\n",
" seed=seed,\n",
" initial_value = seed,\n",
" lazy=False,\n",
" self_loop_wt=self_loop_wt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([2.000e+00, 2.000e+00, 1.000e+00, 5.100e+01, 2.051e+03, 1.723e+03,\n",
" 9.860e+02, 5.520e+02, 5.390e+02, 2.100e+01, 1.000e+00, 0.000e+00,\n",
" 0.000e+00, 2.000e+00, 1.000e+00]),\n",
" array([-6.62636919, -6.2259882 , -5.82560721, -5.42522622, -5.02484523,\n",
" -4.62446424, -4.22408325, -3.82370226, -3.42332126, -3.02294027,\n",
" -2.62255928, -2.22217829, -1.8217973 , -1.42141631, -1.02103532,\n",
" -0.62065432]),\n",
" <a list of 15 Patch objects>)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAD8CAYAAACRkhiPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAExRJREFUeJzt3X+sZPV53/H3pzhGadKIdbm4690lF1tLVKDuxr4hSJYjUhpYoOJHJLegyFDH0hoLqlhK1CxGKpYREo1NkGhTorVYGSoXQkUwK4Njr2lsFCnYXJwtPwyEBa/NZbdwA6ntiohq8dM/5twygbk/dmbunb183y9pNHOe+Z4zz2GFPvd8z5k5qSokSW36B5NuQJI0OYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWHvmHQDyzn++ONrenp60m1I0rrxyCOP/E1VTa1k7FEfAtPT08zOzk66DUlaN5L8YKVjnQ6SpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQkKSGHfXfGNZkTe+8b6zbO3DD+WPdnqTRLHskkGRLkj9P8mSSJ5L8Tld/V5K9SZ7pnjd09SS5Ocn+JI8m+UDfti7vxj+T5PLV2y1J0kqsZDroMPC7VfVPgTOAK5OcAuwEHqiqrcAD3TLAucDW7rEDuAV6oQFcC/wqcDpw7UJwSJImY9kQqKpDVfXd7vVPgCeBTcCFwG3dsNuAi7rXFwK3V89DwHFJNgLnAHur6pWq+ltgL7B9rHsjSToiR3RiOMk08MvAt4F3V9Uh6AUFcEI3bBPwfN9qc11tsbokaUJWHAJJfh64G/hUVf14qaEDarVEfdBn7Ugym2R2fn5+pS1Kko7QikIgyc/QC4AvVdWfduUXu2keuueXuvocsKVv9c3AwSXqb1FVu6pqpqpmpqZWdF8ESdIQVnJ1UIBbgSer6g/73toDLFzhczlwb1/9su4qoTOAH3XTRV8Dzk6yoTshfHZXkyRNyEq+J/Ah4KPAY0n2dbVPAzcAdyX5OPBD4CPde/cD5wH7gVeBjwFU1StJrgMe7sZ9tqpeGcteSJKGsmwIVNVfMHg+H+CsAeMLuHKRbe0Gdh9Jg5Kk1ePPRkhSwwwBSWqYISBJDTMEJKlhhoAkNcwQkKSGGQKS1DBDQJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGraS20vuTvJSksf7an+SZF/3OLBwx7Ek00n+ru+9P+5b54NJHkuyP8nN3W0rJUkTtJLbS34R+M/A7QuFqvo3C6+T3Aj8qG/8s1W1bcB2bgF2AA/RuwXlduCrR96yJGlclj0SqKoHgYH3Au7+mv/XwB1LbSPJRuAXquovu9tP3g5cdOTtSpLGadRzAh8GXqyqZ/pqJyX5qyTfSvLhrrYJmOsbM9fVJEkTtJLpoKVcyt8/CjgEnFhVLyf5IPDlJKcy+Eb1tdhGk+ygN3XEiSeeOGKLkqTFDH0kkOQdwG8Cf7JQq6rXqurl7vUjwLPAyfT+8t/ct/pm4OBi266qXVU1U1UzU1NTw7YoSVrGKNNB/xJ4qqr+/zRPkqkkx3Sv3wtsBZ6rqkPAT5Kc0Z1HuAy4d4TPliSNwbLTQUnuAM4Ejk8yB1xbVbcCl/DWE8K/Bnw2yWHgdeCKqlo4qfxJelca/Sy9q4K8MqhB0zvvG/s2D9xw/ti3KbVi2RCoqksXqf/bAbW7gbsXGT8LnHaE/UmSVpHfGJakhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWGGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQkKSGLRsCSXYneSnJ4321zyR5Icm+7nFe33tXJ9mf5Okk5/TVt3e1/Ul2jn9XJElHaiVHAl8Etg+o31RV27rH/QBJTqF37+FTu3X+S5JjupvP/xFwLnAKcGk3VpI0QSu5x/CDSaZXuL0LgTur6jXg+0n2A6d37+2vqucAktzZjf3eEXcsSRqbUc4JXJXk0W66aENX2wQ83zdmrqstVh8oyY4ks0lm5+fnR2hRkrSUYUPgFuB9wDbgEHBjV8+AsbVEfaCq2lVVM1U1MzU1NWSLkqTlLDsdNEhVvbjwOskXgK90i3PAlr6hm4GD3evF6pKkCRnqSCDJxr7Fi4GFK4f2AJckOTbJScBW4DvAw8DWJCcleSe9k8d7hm9bkjQOyx4JJLkDOBM4PskccC1wZpJt9KZ0DgCfAKiqJ5LcRe+E72Hgyqp6vdvOVcDXgGOA3VX1xNj3RpJ0RFZyddClA8q3LjH+euD6AfX7gfuPqDtJ0qryG8OS1DBDQJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWGGgCQ1zBCQpIYtGwJJdid5KcnjfbXPJXkqyaNJ7klyXFefTvJ3SfZ1jz/uW+eDSR5Lsj/JzUkG3XxekrSGVnIk8EVg+5tqe4HTqur9wF8DV/e992xVbeseV/TVbwF20Lvv8NYB25QkrbFlQ6CqHgReeVPt61V1uFt8CNi81Da6G9P/QlX9ZVUVcDtw0XAtS5LGZRznBH4b+Grf8klJ/irJt5J8uKttAub6xsx1NUnSBC17o/mlJLkGOAx8qSsdAk6sqpeTfBD4cpJTgUHz/7XEdnfQmzrixBNPHKVFSdIShj4SSHI58K+A3+qmeKiq16rq5e71I8CzwMn0/vLvnzLaDBxcbNtVtauqZqpqZmpqatgWJUnLGCoEkmwHfh+4oKpe7atPJTmme/1eeieAn6uqQ8BPkpzRXRV0GXDvyN1Lkkay7HRQkjuAM4Hjk8wB19K7GuhYYG93pedD3ZVAvwZ8Nslh4HXgiqpaOKn8SXpXGv0svXMI/ecRJEkTsGwIVNWlA8q3LjL2buDuRd6bBU47ou4kSavKbwxLUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWGGgCQ1zBCQpIYZApLUMENAkhq2ohBIsjvJS0ke76u9K8neJM90zxu6epLcnGR/kkeTfKBvncu78c909yiWJE3QSo8Evghsf1NtJ/BAVW0FHuiWAc6ld2/hrcAO4BbohQa9W1P+KnA6cO1CcEiSJmNFIVBVDwKvvKl8IXBb9/o24KK++u3V8xBwXJKNwDnA3qp6par+FtjLW4NFkrSGRjkn8O6qOgTQPZ/Q1TcBz/eNm+tqi9UlSROyGieGM6BWS9TfuoFkR5LZJLPz8/NjbU6S9IZ3jLDui0k2VtWhbrrnpa4+B2zpG7cZONjVz3xT/ZuDNlxVu4BdADMzMwODQlowvfO+sW7vwA3nj3V70tFslCOBPcDCFT6XA/f21S/rrhI6A/hRN130NeDsJBu6E8JndzVJ0oSs6EggyR30/oo/Pskcvat8bgDuSvJx4IfAR7rh9wPnAfuBV4GPAVTVK0muAx7uxn22qt58slmStIZWFAJVdekib501YGwBVy6ynd3A7hV3J0laVX5jWJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWGGgCQ1zBCQpIYZApLUsKFDIMkvJdnX9/hxkk8l+UySF/rq5/Wtc3WS/UmeTnLOeHZBkjSsoW80X1VPA9sAkhwDvADcQ+92kjdV1ef7xyc5BbgEOBV4D/CNJCdX1evD9iBJGs24poPOAp6tqh8sMeZC4M6qeq2qvk/vHsSnj+nzJUlDGFcIXALc0bd8VZJHk+xOsqGrbQKe7xsz19UkSRMycggkeSdwAfDfu9ItwPvoTRUdAm5cGDpg9VpkmzuSzCaZnZ+fH7VFSdIixnEkcC7w3ap6EaCqXqyq16vqp8AXeGPKZw7Y0rfeZuDgoA1W1a6qmqmqmampqTG0KEkaZBwhcCl9U0FJNva9dzHwePd6D3BJkmOTnARsBb4zhs+XJA1p6KuDAJL8Q+A3gE/0lf8gyTZ6Uz0HFt6rqieS3AV8DzgMXOmVQZI0WSOFQFW9CvzjN9U+usT464HrR/lMSdL4+I1hSWqYISBJDTMEJKlhhoAkNcwQkKSGGQKS1DBDQJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGjbS/QSkt6PpnfeNdXsHbjh/rNuTxskQkFaZoaKj2cjTQUkOJHksyb4ks13tXUn2Jnmme97Q1ZPk5iT7kzya5AOjfr4kaXjjOifw61W1rapmuuWdwANVtRV4oFsGOJfeDea3AjuAW8b0+ZKkIazWieELgdu617cBF/XVb6+eh4DjkmxcpR4kScsYRwgU8PUkjyTZ0dXeXVWHALrnE7r6JuD5vnXnutrfk2RHktkks/Pz82NoUZI0yDhODH+oqg4mOQHYm+SpJcZmQK3eUqjaBewCmJmZecv7kqTxGPlIoKoOds8vAfcApwMvLkzzdM8vdcPngC19q28GDo7agyRpOCOFQJKfS/KPFl4DZwOPA3uAy7thlwP3dq/3AJd1VwmdAfxoYdpIkrT2Rp0OejdwT5KFbf23qvqzJA8DdyX5OPBD4CPd+PuB84D9wKvAx0b8fEnSCEYKgap6DvjnA+ovA2cNqBdw5SifKUkaH387SJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWGGgCQ1zBCQpIYZApLUsKFDIMmWJH+e5MkkTyT5na7+mSQvJNnXPc7rW+fqJPuTPJ3knHHsgCRpeKPcWeww8LtV9d3uPsOPJNnbvXdTVX2+f3CSU4BLgFOB9wDfSHJyVb0+Qg+SpBEMfSRQVYeq6rvd658ATwKblljlQuDOqnqtqr5P7z7Dpw/7+ZKk0Y3lnECSaeCXgW93pauSPJpkd5INXW0T8HzfanMsHRqSpFU2cggk+XngbuBTVfVj4BbgfcA24BBw48LQAavXItvckWQ2yez8/PyoLUqSFjFSCCT5GXoB8KWq+lOAqnqxql6vqp8CX+CNKZ85YEvf6puBg4O2W1W7qmqmqmampqZGaVGStIRRrg4KcCvwZFX9YV99Y9+wi4HHu9d7gEuSHJvkJGAr8J1hP1+SNLpRrg76EPBR4LEk+7rap4FLk2yjN9VzAPgEQFU9keQu4Hv0riy60iuDJGmyhg6BqvoLBs/z37/EOtcD1w/7mZKk8fIbw5LUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQkKSGGQKS1DBDQJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDVslJvK6Cg0vfO+SbcgaR1Z8yOBJNuTPJ1kf5Kda/35kqQ3rGkIJDkG+CPgXOAUereiPGUte5AkvWGtp4NOB/ZX1XMASe4ELqR332FJK7AaU34Hbjh/7NvU+rDW00GbgOf7lue6miRpAtb6SGDQjenrLYOSHcCObvH/JHl6VbtaPccDfzPpJsbg7bIf4L4MlP84jq0MzX+T8fvFlQ5c6xCYA7b0LW8GDr55UFXtAnatVVOrJclsVc1Muo9RvV32A9yXo9HbZT9gfe7LWk8HPQxsTXJSkncClwB71rgHSVJnTY8EqupwkquArwHHALur6om17EGS9IY1/7JYVd0P3L/Wnzsh635Kq/N22Q9wX45Gb5f9gHW4L6l6y3lZSVIj/O0gSWqYIbDKkvy77mcynkjyB5PuZ1hJPpPkhST7usd5k+5pVEl+L0klOX7SvQwjyXVJHu3+Pb6e5D2T7mlYST6X5Kluf+5JctykexpWko90/7//NMlRf6WQIbCKkvw6vW9Ev7+qTgU+P+GWRnVTVW3rHuv6vE6SLcBvAD+cdC8j+FxVvb+qtgFfAf7DpBsawV7gtKp6P/DXwNUT7mcUjwO/CTw46UZWwhBYXZ8Ebqiq1wCq6qUJ96M33AT8ewZ8WXG9qKof9y3+HOt7X75eVYe7xYfofYdoXaqqJ6tq3XzB1RBYXScDH07y7STfSvIrk25oRFd1h+u7k2yYdDPDSnIB8EJV/c9J9zKqJNcneR74Ldb3kUC/3wa+OukmWuH9BEaU5BvAPxnw1jX0/vtuAM4AfgW4K8l76yi9JGuZfbkFuI7eX5vXATfS+5/1qLTMvnwaOHttOxrOUvtRVfdW1TXANUmuBq4Crl3TBo/AcvvSjbkGOAx8aS17O1Ir2Zf1wktEV1GSP6M3HfTNbvlZ4Iyqmp9oYyNKMg18papOm3ArRyzJPwMeAF7tSgs/XXJ6Vf2viTU2oiS/CNy3Hv9NFiS5HLgCOKuqXl1u/NEuyTeB36uq2Un3shSng1bXl4F/AZDkZOCdHB0/LnXEkmzsW7yY3smvdaeqHquqE6pquqqm6f2e1QfWYwAk2dq3eAHw1KR6GVWS7cDvAxe8HQJgPfFIYBV1v4+0G9gG/F96fxX8j8l2NZwk/5XefhRwAPhEVR2aaFNjkOQAMFNV6y6ck9wN/BLwU+AHwBVV9cJkuxpOkv3AscDLXemhqrpigi0NLcnFwH8CpoD/DeyrqnMm29XiDAFJapjTQZLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQkKSG/T+idk1/cr/U1QAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.hist(np.log10(r), bins = 15)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.apply(np.log10).plot(legend=False, figsize=(20,10),alpha=.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"top25 = list(df.T.reset_index().sort_values(iterations).tail(25).index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"isg.inspectSubGraph(G, top25, expand=False, verbose=False, label=True,pos = \"kk\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.sum(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.hist([np.log10(v) for v in list(nx.get_node_attributes(G,'total_wt').values())])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Large diffs are not rendered by default.

Loading

0 comments on commit d0f2d02

Please sign in to comment.