-
Notifications
You must be signed in to change notification settings - Fork 1
/
eda.py
85 lines (71 loc) · 1.88 KB
/
eda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
train_1999 = pd.read_csv('./data/ace/ace_1999.csv')
print("train shape: ", train_1999.shape)
test = pd.read_csv('./data/test.csv')
print("test shape", test.shape)
label = pd.read_csv('./data/kp_index.csv')
print("label shape", label.shape)
def process_na(dataset):
# global dataset
# print(dataset.columns)
for feature in dataset.columns:
dataset[feature] = dataset[feature].map(lambda x: np.nan if x < -9999 else x)
return dataset
train_1999 = process_na(train_1999)
time = np.linspace(1999, 2013, train_1999.shape[0])
Np = train_1999['Np']
Tp = train_1999['Tp']
Vp = train_1999['Vp']
# Np against time
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(time, Np)
ax.set_ylabel('Np')
ax.set_xlabel('year')
plt.title('Np vs. Year')
plt.grid(True)
plt.savefig('./eda_output_img/Np_time.png')
# plt.show()
# Tp against time
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(time, Tp)
ax.set_ylabel('Tp')
ax.set_xlabel('year')
plt.title('Tp vs. Year')
plt.grid(True)
plt.savefig('./eda_output_img/Tp_time.png')
# plt.show()
# Vp against time
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(time, Vp)
ax.set_ylabel('Vp')
ax.set_xlabel('year')
plt.title('Vp vs. Year')
plt.grid(True)
plt.savefig('./eda_output_img/Vp_time.png')
# plt.show()
# Np, Vp against time
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(time, Np)
ax.plot(time, Vp)
ax.set_ylabel('Np, Vp')
ax.set_xlabel('year')
plt.title('Np, Vp vs. Year')
plt.grid(True)
plt.legend(loc='best')
plt.savefig('./eda_output_img/Np,Vp_time.png')
# plt.show()
# Np, Tp, Vp against time
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(time, Np)
ax.plot(time, Tp)
ax.plot(time, Vp)
ax.set_ylabel('Np, Tp, Vp')
ax.set_xlabel('year')
plt.title('Np, Tp, Vp vs. Year')
plt.grid(True)
plt.legend(loc='best')
plt.savefig('./eda_output_img/Np,Tp,Vp_time.png')
# plt.show()