-
Notifications
You must be signed in to change notification settings - Fork 4
/
Reverse data.py
101 lines (73 loc) · 3.63 KB
/
Reverse data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import tensorflow as tf
import matplotlib as mplt
mplt.use('agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
import csv
from itertools import zip_longest
class RNNConfig():
iterator = 0
config = RNNConfig()
def reverse():
fields = ["Store","DayOfWeek","Date","Sales","Customers","Open","Promo","StateHoliday","SchoolHoliday"]
with open('processed_train.csv', mode='a') as stock_file:
writer = csv.writer(stock_file,delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(fields)
for chunk in pd.read_csv("train.csv", chunksize=10):
store_data = chunk.reindex(index=chunk.index[::-1])
append_data_csv(store_data)
def append_data_csv(store_data):
config.iterator += 1
with open('processed_train.csv', mode='a') as store_file:
writer = csv.writer(store_file,delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
if config.iterator == 1:
for index, row in store_data.iterrows():
writer.writerow([row['Store'],row['DayOfWeek'],row['Date'],row['Sales'],row['Customers'],row['Open'],row['Promo'],row['StateHoliday'],row['SchoolHoliday']])
else:
read_data = pd.read_csv('processed_train.csv')
data = []
for index, row in store_data.iterrows():
data.append({'Store': row['Store'], 'DayOfWeek': row['DayOfWeek'],'Date' :row['Date'], 'Sales': row['Sales'],'Customers': row['Customers'],'Open': row['Open'],'Promo': row['Promo'],'StateHoliday': row['StateHoliday'],'SchoolHoliday': row['SchoolHoliday']})
tempframe=pd.DataFrame(data)
tempframe = tempframe.reindex_axis(read_data.columns, axis=1)
f =pd.concat([tempframe, read_data],sort=True)
print(f)
# , ignore_index=True,sort=True
def tryit():
# stock_data = pd.read_csv("train.csv")
# stock_data = stock_data.reindex(index=stock_data.index[::-1])
# stock_data.to_csv('processed_train.csv', sep='\t', encoding='utf-8')
# stock_data.index
# print(stock_data.head())
# for chunk in pd.read_csv("train.csv", chunksize=10):
# stock_data.index
# stock_data = stock_data.drop( stock_data[(stock_data.Open == 0) & (stock_data.Sales == 0)].index)
#
# stock_data = stock_data.drop( stock_data[(stock_data.Open != 0) & (stock_data.Sales == 0)].index)
###################################################################################################
# stock_data = pd.read_csv('processed_train.csv')
#
# datatof = stock_data[(stock_data.Store == 165)]
#
# datatof.to_csv('store285_test.csv', sep=',', encoding='utf-8')
#
# return stock_data
###################################################################################################
#extracting date year and month from date column
stock_data = pd.read_csv('store285_test.csv')
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data['Year'] = stock_data['Date'].dt.year
stock_data['Month'] = stock_data['Date'].dt.month
stock_data['Day'] = stock_data['Date'].dt.day
with open(r'store285_test.csv', 'r') as f, open(r'store285_2.csv','w') as g:
fr = csv.reader(f)
gw = csv.writer(g)
gw.writerow(next(fr))
gw.writerows(a + [b] + [c] + [d] for a, b, c,d in zip_longest(fr, stock_data['Year'], stock_data['Month'], stock_data['Day'], fillvalue=[0]))
# reverse()
f = tryit()