-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
36bcd72
commit a00d25c
Showing
11 changed files
with
227 additions
and
285 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,70 @@ | ||
import dask.dataframe as dd | ||
import plotly.express as px | ||
import time | ||
|
||
import pandas as pd | ||
import streamlit as st | ||
|
||
from pipeline.settings import RESULTS_DIR | ||
|
||
|
||
@st.cache_data | ||
def get_data(region, part_type): | ||
return dd.read_parquet( | ||
RESULTS_DIR / region / part_type.upper() / "*.parquet" | ||
).compute() | ||
|
||
def get_data(segment): | ||
return pd.read_parquet(RESULTS_DIR / f"{segment.lower()}.snappy.parquet") | ||
|
||
description = """ | ||
### Recommended Suppliers | ||
_Some text that explains the business problem being addressed..._ | ||
|
||
This query finds which supplier should be selected to place an order for a given part in a given region. | ||
st.markdown( | ||
""" | ||
### Top Unshipped Orders | ||
_Top 50 unshipped orders with the highest revenue._ | ||
""" | ||
st.markdown(description) | ||
regions = list(map(str.title, ["EUROPE", "AFRICA", "AMERICA", "ASIA", "MIDDLE EAST"])) | ||
region = st.selectbox( | ||
"Region", | ||
regions, | ||
index=None, | ||
placeholder="Please select a region...", | ||
) | ||
part_types = list(map(str.title, ["COPPER", "BRASS", "TIN", "NICKEL", "STEEL"])) | ||
part_type = st.selectbox( | ||
"Part Type", | ||
part_types, | ||
|
||
SEGMENTS = ["automobile", "building", "furniture", "machinery", "household"] | ||
|
||
|
||
def files_exist(): | ||
# Do we have all the files needed for the dashboard? | ||
files = list(RESULTS_DIR.rglob("*.snappy.parquet")) | ||
return len(files) == len(SEGMENTS) | ||
|
||
|
||
with st.spinner("Waiting for data..."): | ||
while not files_exist(): | ||
time.sleep(5) | ||
|
||
segments = list( | ||
map(str.title, ["automobile", "building", "furniture", "machinery", "household"]) | ||
) | ||
segment = st.selectbox( | ||
"Segment", | ||
segments, | ||
index=None, | ||
placeholder="Please select a part type...", | ||
placeholder="Please select a product segment...", | ||
) | ||
if region and part_type: | ||
df = get_data(region, part_type) | ||
if segment: | ||
df = get_data(segment) | ||
df = df.drop(columns="o_shippriority") | ||
df["l_orderkey"] = df["l_orderkey"].map(lambda x: f"{x:09}") | ||
df["revenue"] = df["revenue"].round(2) | ||
df = df.rename( | ||
columns={ | ||
"n_name": "Country", | ||
"s_name": "Supplier", | ||
"s_acctbal": "Balance", | ||
"p_partkey": "Part ID", | ||
"l_orderkey": "Order ID", | ||
"o_order_time": "Date Ordered", | ||
"revenue": "Revenue", | ||
} | ||
) | ||
maxes = df.groupby("Country").Balance.idxmax() | ||
data = df.loc[maxes] | ||
figure = px.choropleth( | ||
data, | ||
locationmode="country names", | ||
locations="Country", | ||
featureidkey="Supplier", | ||
color="Balance", | ||
color_continuous_scale="viridis", | ||
hover_data=["Country", "Supplier", "Balance"], | ||
|
||
df = df.set_index("Order ID") | ||
st.dataframe( | ||
df.style.format({"Revenue": "${:,}"}), | ||
column_config={ | ||
"Date Ordered": st.column_config.DateColumn( | ||
"Date Ordered", | ||
format="MM/DD/YYYY", | ||
help="Date order was placed", | ||
), | ||
"Revenue": st.column_config.NumberColumn( | ||
"Revenue (in USD)", | ||
help="Total revenue of order", | ||
), | ||
}, | ||
) | ||
st.plotly_chart(figure, theme="streamlit", use_container_width=True) | ||
on = st.toggle("Show data") | ||
if on: | ||
st.write( | ||
df[["Country", "Supplier", "Balance", "Part ID"]], use_container_width=True | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import os | ||
import shlex | ||
import subprocess | ||
|
||
import coiled | ||
import requests | ||
from prefect import flow | ||
from rich import print | ||
|
||
from .settings import DASHBOARD_FILE, LOCAL, REGION | ||
|
||
port = 8080 | ||
name = "etl-tpch-dashboard" | ||
subdomain = "etl-tpch" | ||
|
||
|
||
def deploy(): | ||
print("[green]Deploying dashboard...[/green]") | ||
cmd = f"streamlit run {DASHBOARD_FILE} --server.port {port} --server.headless true" | ||
if LOCAL: | ||
subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE) | ||
else: | ||
cmd = f""" | ||
coiled run \ | ||
--region {REGION} \ | ||
--vm-type t3.medium \ | ||
-f dashboard.py \ | ||
-f pipeline \ | ||
--subdomain {subdomain} \ | ||
--port {port} \ | ||
-e AWS_ACCESS_KEY_ID={os.environ['AWS_ACCESS_KEY_ID']} \ | ||
-e AWS_SECRET_ACCESS_KEY={os.environ['AWS_SECRET_ACCESS_KEY']} \ | ||
--detach \ | ||
--keepalive '520 weeks' \ | ||
--name {name} \ | ||
-- \ | ||
{cmd} | ||
""" | ||
subprocess.run(shlex.split(cmd)) | ||
print(f"Dashboard is available at [blue]{get_address()}[/blue] :rocket:") | ||
|
||
|
||
def get_address(): | ||
if LOCAL: | ||
return f"http://0.0.0.0:{port}" | ||
else: | ||
with coiled.Cloud() as cloud: | ||
account = cloud.default_account | ||
return f"http://{subdomain}.{account}.dask.host:{port}" | ||
|
||
|
||
@flow(log_prints=True) | ||
def deploy_dashboard(): | ||
address = get_address() | ||
try: | ||
r = requests.get(address) | ||
r.raise_for_status() | ||
except Exception: | ||
deploy() | ||
else: | ||
print("Dashboard is healthy") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.