Skip to content

Commit

Permalink
added SQL Analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanildoBarauna committed Apr 22, 2024
1 parent 54a7073 commit 027fd24
Showing 1 changed file with 139 additions and 41 deletions.
180 changes: 139 additions & 41 deletions notebooks/data_explorer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 84,
"metadata": {},
"outputs": [
{
Expand All @@ -18,7 +18,7 @@
"210"
]
},
"execution_count": 53,
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -61,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 85,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -170,7 +170,7 @@
"138 2024-04-22 01:02:43 "
]
},
"execution_count": 57,
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -188,7 +188,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 86,
"metadata": {},
"outputs": [
{
Expand All @@ -212,88 +212,186 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>create_date</th>\n",
" <th>extracted_at</th>\n",
" <th>code</th>\n",
" <th>symbol</th>\n",
" <th>codein</th>\n",
" <th>name</th>\n",
" <th>ask</th>\n",
" <th>bid</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>varBid</th>\n",
" <th>pctChange</th>\n",
" <th>bid</th>\n",
" <th>ask</th>\n",
" <th>timestamp</th>\n",
" <th>create_date</th>\n",
" <th>symbol</th>\n",
" <th>extracted_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>2024-04-19 17:59:58</td>\n",
" <td>2024-04-22 01:02:43</td>\n",
" <td>BRL</td>\n",
" <td>BRL-MAD</td>\n",
" <td>MAD</td>\n",
" <td>Real Brasileiro/Dirham Marroquino</td>\n",
" <td>1.9502</td>\n",
" <td>1.9489</td>\n",
" <td>1.9569</td>\n",
" <td>1.9213</td>\n",
" <td>0.0204</td>\n",
" <td>1.05</td>\n",
" <td>1.9489</td>\n",
" <td>1.9502</td>\n",
" <td>1713560398</td>\n",
" <td>2024-04-19 17:59:58</td>\n",
" <td>BRL-MAD</td>\n",
" <td>2024-04-22 01:02:43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>2024-04-21 22:02:11</td>\n",
" <td>2024-04-22 01:02:43</td>\n",
" <td>BRL</td>\n",
" <td>BRL-JPY</td>\n",
" <td>JPY</td>\n",
" <td>Real Brasileiro/Iene Japonês</td>\n",
" <td>29.5810</td>\n",
" <td>29.5650</td>\n",
" <td>29.585</td>\n",
" <td>29.549</td>\n",
" <td>0.0103</td>\n",
" <td>0.03</td>\n",
" <td>29.5650</td>\n",
" <td>29.5810</td>\n",
" <td>1713747731</td>\n",
" <td>2024-04-21 22:02:11</td>\n",
" <td>BRL-JPY</td>\n",
" <td>2024-04-22 01:02:43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>2024-04-21 19:41:23</td>\n",
" <td>2024-04-22 01:02:43</td>\n",
" <td>BRL</td>\n",
" <td>BRL-SAR</td>\n",
" <td>SAR</td>\n",
" <td>Real Brasileiro/Riyal Saudita</td>\n",
" <td>0.7203</td>\n",
" <td>0.7201</td>\n",
" <td>0.7204</td>\n",
" <td>0.7202</td>\n",
" <td>-0.0001</td>\n",
" <td>-0.01</td>\n",
" <td>0.7201</td>\n",
" <td>0.7203</td>\n",
" <td>1713739283</td>\n",
" <td>2024-04-21 19:41:23</td>\n",
" <td>BRL-SAR</td>\n",
" <td>2024-04-22 01:02:43</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" create_date extracted_at code symbol \\\n",
"105 2024-04-19 17:59:58 2024-04-22 01:02:43 BRL BRL-MAD \n",
"68 2024-04-21 22:02:11 2024-04-22 01:02:43 BRL BRL-JPY \n",
"138 2024-04-21 19:41:23 2024-04-22 01:02:43 BRL BRL-SAR \n",
" code codein name high low varBid \\\n",
"105 BRL MAD Real Brasileiro/Dirham Marroquino 1.9569 1.9213 0.0204 \n",
"68 BRL JPY Real Brasileiro/Iene Japonês 29.585 29.549 0.0103 \n",
"138 BRL SAR Real Brasileiro/Riyal Saudita 0.7204 0.7202 -0.0001 \n",
"\n",
" name ask bid varBid pctChange \n",
"105 Real Brasileiro/Dirham Marroquino 1.9502 1.9489 0.0204 1.05 \n",
"68 Real Brasileiro/Iene Japonês 29.5810 29.5650 0.0103 0.03 \n",
"138 Real Brasileiro/Riyal Saudita 0.7203 0.7201 -0.0001 -0.01 "
" pctChange bid ask timestamp create_date symbol \\\n",
"105 1.05 1.9489 1.9502 1713560398 2024-04-19 17:59:58 BRL-MAD \n",
"68 0.03 29.5650 29.5810 1713747731 2024-04-21 22:02:11 BRL-JPY \n",
"138 -0.01 0.7201 0.7203 1713739283 2024-04-21 19:41:23 BRL-SAR \n",
"\n",
" extracted_at \n",
"105 2024-04-22 01:02:43 \n",
"68 2024-04-22 01:02:43 \n",
"138 2024-04-22 01:02:43 "
]
},
"execution_count": 56,
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## Reorder columns in dataframe\n",
"df = allFiles[['create_date', 'extracted_at', 'code', 'symbol', 'name', 'ask', 'bid', 'varBid', 'pctChange' ]]\n",
"\n",
"# Change data types\n",
"df = df.astype({'ask': float, 'bid': float, 'varBid': float, 'pctChange': float})\n",
"\n",
"# Filter dataframe by code\n",
"df = df[df['code'] == 'BRL']\n",
"df = allFiles.astype({'ask': float, 'bid': float, 'varBid': float, 'pctChange': float})\n",
"\n",
"# Show the dataframe\n",
"df.head(3)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Using SQL for Data Exploration\n",
" 3.1 What is the currency with the highest ask value?"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>symbol</th>\n",
" <th>name</th>\n",
" <th>max_ask</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BRL-LBP</td>\n",
" <td>Real Brasileiro/Libra Libanesa</td>\n",
" <td>17206.94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" symbol name max_ask\n",
"0 BRL-LBP Real Brasileiro/Libra Libanesa 17206.94"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pandasql import sqldf\n",
"\n",
"query = \"\"\"\n",
" SELECT symbol, name, max(ask) max_ask FROM df \n",
" where code = 'BRL' \n",
" group by symbol, name\n",
" order by 3 desc limit 1\n",
"\"\"\"\n",
"\n",
"newDf = sqldf(query, locals())\n",
"\n",
"newDf\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit 027fd24

Please sign in to comment.