-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
125 lines (90 loc) · 3.15 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
# coding: utf-8
######################################
# Multiple Linear Regression Model #
######################################
# Installations
# !pip install keras
# !pip install tensorflow
# !pip install sklearn --upgrade
# !pip install joblib
#####################
# Score the Model #
#####################
def score_model(array):
import warnings
warnings.simplefilter('ignore')
# Load dependencies
import os
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt
# Set the same starting seed number for numpy.random so the results are reproducible
from numpy.random import seed
seed(42)
#########################
# Basic Data Cleaning #
#########################
filepath = os.path.join('static','primary_pivot.csv')
pivot_df = pd.read_csv(filepath)
# Specify indicator(s)
indicators = array
# Initialize features array
X = []
# For each row in the df
for row in range(len(pivot_df)):
point = []
# Append each indicator value to the data point
for i in range(len(indicators)):
point.append(pivot_df[indicators[i]][row])
# Append the row to the features array
X.append(point)
# Flatten the data into arrays
X = np.array(X)
y = np.array(pivot_df["hdi"])
y = y.reshape(-1, 1)
###############################
# Data Preprocessing for ML #
###############################
# Split into Test and Train data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
# Scale the data
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)
# Transform the training and test data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)
########################
# Creating the Model #
########################
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Create the model using LinearRegression
linear = LinearRegression()
# Train the model
linear.fit(X_train_scaled, y_train_scaled)
# Use our model to make predictions
predictions = linear.predict(X_test_scaled)
########################
# Score the Model #
########################
r2 = linear.score(X_test_scaled, y_test_scaled)
mse = mean_squared_error(y_test_scaled, predictions)
return linear, X_scaler, y_scaler, r2, mse
##########################
# Predicting HDI value #
##########################
def predict_hdi(array, values):
# Create the model
linear, X_scaler, y_scaler, _, _ = score_model(array)
# Scale input to the model
X_scaled = X_scaler.transform([values])
y_scaled = linear.predict(X_scaled)
# Predict HDI
predicted_y = y_scaler.inverse_transform(y_scaled)
predicted_HDI = predicted_y[0][0]
return predicted_HDI