-
Notifications
You must be signed in to change notification settings - Fork 0
/
bitcoin.py
executable file
·162 lines (121 loc) · 5.02 KB
/
bitcoin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# This project have been created by:
# Name: Aditya Bhardwaj
# Unity ID: abhardw2
# Project 4: Predicting Bitcoin Price Variations using Bayesian Regression
import statsmodels.formula.api as smf
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import math
import sys
# The path to the data folder should be given as input
if len(sys.argv) != 2:
print('bitcoin.py <path to data folder>')
sys.exit(1)
data_path = sys.argv[1]
# Reading the vectors from the given csv files
train1_90 = pd.read_csv(data_path+'/train1_90.csv')
train1_180 = pd.read_csv(data_path+'/train1_180.csv')
train1_360 = pd.read_csv(data_path+'/train1_360.csv')
train2_90 = pd.read_csv(data_path+'/train2_90.csv')
train2_180 = pd.read_csv(data_path+'/train2_180.csv')
train2_360 = pd.read_csv(data_path+'/train2_360.csv')
test_90 = pd.read_csv(data_path+'/test_90.csv')
test_180 = pd.read_csv(data_path+'/test_180.csv')
test_360 = pd.read_csv(data_path+'/test_360.csv')
# Function to Calculate Equation Number 9 from the paper
def similarityFunction(a,b):
similar = 0
mean_a, mean_b = np.mean(a), np.mean(b)
for i in range(0, len(a)):
similar += (a[i] - mean_a)*(b[i] - mean_b)
similar = float(similar) / float(len(a)*np.std(a)*np.std(b))
return similar
def computeDelta(wt, X, Xi):
"""
This function computes equation 6 of the paper, but with the euclidean distance
replaced by the similarity function given in Equation 9.
Parameters
----------
wt : int
This is the constant c at the top of the right column on page 4.
X : A row of Panda Dataframe
Corresponds to (x, y) in Equation 6.
Xi : Panda Dataframe
Corresponds to a dataframe of (xi, yi) in Equation 6.
Returns
-------
float
The output of equation 6, a prediction of the average price change.
"""
# YOUR CODE GOES HERE
y1, y0 = 0, 0
for i in range(0,len(Xi)):
y = Xi.iloc[i][-1]
x = Xi.iloc[i][:-1]
similarity = similarityFunction(X[:-1],x)
y1 += y * math.exp(similarity * wt)
y0 += math.exp(similarity * wt)
return (float(y1) / float(y0))
# doubts:
# why weight=2?
# Perform the Bayesian Regression to predict the average price change for each dataset of train2 using train1 as input.
# These will be used to estimate the coefficients (w0, w1, w2, and w3) in equation 8.
weight = 2 # This constant was not specified in the paper, but we will use 2.
trainDeltaP90 = np.empty(0)
trainDeltaP180 = np.empty(0)
trainDeltaP360 = np.empty(0)
for i in range(0,len(train1_90.index)):
trainDeltaP90 = np.append(trainDeltaP90,computeDelta(weight,train2_90.iloc[i],train1_90))
for i in range(0,len(train1_180.index)):
trainDeltaP180 = np.append(trainDeltaP180,computeDelta(weight,train2_180.iloc[i],train1_180))
for i in range(0,len(train1_360.index)):
trainDeltaP360 = np.append(trainDeltaP360,computeDelta(weight,train2_360.iloc[i],train1_360))
# Actual deltaP values for the train2 data.
trainDeltaP = np.asarray(train2_360[['Yi']])
trainDeltaP = np.reshape(trainDeltaP,-1)
# Combine all the training data
d = {'deltaP': trainDeltaP,
'deltaP90': trainDeltaP90,
'deltaP180': trainDeltaP180,
'deltaP360': trainDeltaP360 }
trainData = pd.DataFrame(d)
# Feed the data: [deltaP, deltaP90, deltaP180, deltaP360] to train the linear model.
# Use the statsmodels ols function.
# Use the variable name model for your fitted model
# YOUR CODE HERE
model = smf.ols(formula = 'deltaP ~ deltaP90 + deltaP180 + deltaP360',data = trainData).fit()
# Print the weights from the model
print(model.params)
# Perform the Bayesian Regression to predict the average price change for each dataset of test using train1 as input.
# This should be similar to above where it was computed for train2.
# YOUR CODE HERE
testDeltaP90 = np.empty(0)
testDeltaP180 = np.empty(0)
testDeltaP360 = np.empty(0)
for i in range(0,len(train1_90.index)) :
testDeltaP90 = np.append(testDeltaP90,computeDelta(weight,test_90.iloc[i],train1_90))
for i in range(0,len(train1_180.index)) :
testDeltaP180 = np.append(testDeltaP180,computeDelta(weight,test_180.iloc[i],train1_180))
for i in range(0,len(train1_360.index)) :
testDeltaP360 = np.append(testDeltaP360,computeDelta(weight,test_360.iloc[i],train1_360))
# Actual deltaP values for test data.
# YOUR CODE HERE (use the right variable names so the below code works)
testDeltaP = np.asarray(test_360[['Yi']])
testDeltaP = np.reshape(testDeltaP, -1)
# Combine all the test data
d = {'deltaP': testDeltaP,
'deltaP90': testDeltaP90,
'deltaP180': testDeltaP180,
'deltaP360': testDeltaP360}
testData = pd.DataFrame(d)
# Predict price variation on the test data set.
result = model.predict(testData)
compare = { 'Actual': testDeltaP,
'Predicted': result }
compareDF = pd.DataFrame(compare)
# Compute the MSE and print the result
# HINT: consider using the sm.mean_squared_error function
# YOUR CODE HERE
MSE = (sm.mean_squared_error(compareDF['Actual'],compareDF['Predicted']))
print("The MSE is %f" %MSE)