-
Notifications
You must be signed in to change notification settings - Fork 1
/
dashboard.py
70 lines (57 loc) · 1.7 KB
/
dashboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import time
import pandas as pd
import streamlit as st
from pipeline.settings import RESULTS_DIR
@st.cache_data
def get_data(segment):
return pd.read_parquet(RESULTS_DIR / f"{segment.lower()}.snappy.parquet")
st.markdown(
"""
### Top Unshipped Orders
_Top 50 unshipped orders with the highest revenue._
"""
)
SEGMENTS = ["automobile", "building", "furniture", "machinery", "household"]
def files_exist():
# Do we have all the files needed for the dashboard?
files = list(RESULTS_DIR.rglob("*.snappy.parquet"))
return len(files) == len(SEGMENTS)
with st.spinner("Waiting for data..."):
while not files_exist():
time.sleep(5)
segments = list(
map(str.title, ["automobile", "building", "furniture", "machinery", "household"])
)
segment = st.selectbox(
"Segment",
segments,
index=None,
placeholder="Please select a product segment...",
)
if segment:
df = get_data(segment)
df = df.drop(columns="o_shippriority")
df["l_orderkey"] = df["l_orderkey"].map(lambda x: f"{x:09}")
df["revenue"] = df["revenue"].round(2)
df = df.rename(
columns={
"l_orderkey": "Order ID",
"o_order_time": "Date Ordered",
"revenue": "Revenue",
}
)
df = df.set_index("Order ID")
st.dataframe(
df.style.format({"Revenue": "${:,}"}),
column_config={
"Date Ordered": st.column_config.DateColumn(
"Date Ordered",
format="MM/DD/YYYY",
help="Date order was placed",
),
"Revenue": st.column_config.NumberColumn(
"Revenue (in USD)",
help="Total revenue of order",
),
},
)