-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2024-01-22-plot-airlines-triturators.py
116 lines (82 loc) · 3.16 KB
/
2024-01-22-plot-airlines-triturators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import ast
def time_to_float(time):
time = time.strip()
try:
time_object = dt.datetime.strptime(time, "%H:%M:%S")
hours = (
time_object.hour
+ time_object.minute / 60
+ time_object.second / 3600
)
return hours
except Exception as e:
print(f"conversion of time {time} failed: {e}")
return 0
def return_flights():
with open("all_flights.tsv") as infile:
lines = infile.readlines()
us_flights = []
lines = lines[1:]
for line in lines:
origin, origin_code, date, terminal, equipment, flight, airline, nation, state, flight_time = line.split("\t")
airlines = ast.literal_eval(airline)
prime_airline = airlines[0]
if nation == "Canada":
triturator_status = "Unknown"
elif prime_airline in [
"United Airlines",
"American Airlines"]:
triturator_status = "American Airlines\nTriturator"
elif prime_airline in [
"JetBlue Airways",
"Delta Air Lines",
"Southwest Airlines"]:
triturator_status = "Swissport\nTriturator"
elif nation != "United States":
triturator_status = "Swissport\nTriturator"
else:
triturator_status = "Unknown"
us_flights.append([origin, origin_code, date, terminal, equipment, flight, airline, nation, state, flight_time, prime_airline, triturator_status])
us_flights_df = pd.DataFrame(us_flights)
# set headers
us_flights_df.columns = ["Origin", "Origin Code", "Date", "Terminal", "Equipment", "Flight", "Airline", "Nation", "State", "Flight Time", "Prime Airline", "Triturator Status"]
return us_flights_df
def return_plotting_df():
df = return_flights()
df["Flight Hours"] = df["Flight Time"].apply(time_to_float)
df = (
df.groupby(["Prime Airline", "Triturator Status"])
.agg({"Flight Hours": "sum"})
.reset_index()
)
df = df.pivot(
index="Prime Airline", columns="Triturator Status", values="Flight Hours"
)
df["Total"] = df.sum(axis=1)
df = df.sort_values(by="Total", ascending=False)
df = df.drop("Total", axis=1)
return df
def return_destination_trit_plot():
df = return_plotting_df()
df = df.head(50)
fig, ax = plt.subplots(figsize=(8, 7))
df.plot.barh(stacked=True, ax=ax, width=0.8)
# drop title of legend box
ax.legend().set_title("")
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
plt.tick_params(axis="y", which="both", left=False, right=False)
plt.ylabel("")
plt.xlabel("Total Flight Hours")
plt.title("Total Flight Hours per Airline and Triturator (Top 50)")
plt.tight_layout()
plt.savefig("triturator_airline_flight_hours.png", dpi=600)
def start():
return_destination_trit_plot()
if __name__ == "__main__":
start()