-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathprocess_data.py
114 lines (92 loc) · 2.98 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
# @Time : 2019/3/5 16:29
# @Author : wb
# @File : process_data.py
# 预处理数据
# 处理数据
# 首先处理 user_balacnce_table表
import csv
# 读取csv至字典
# 获取用户余额宝进出项表
userbalancetable = open("data/user_balance_table.csv", "r")
reader = csv.reader(userbalancetable)
# 总的数组,数组里面是这些单个的数据
users = []
for item in reader:
# 忽略第一行
if reader.line_num == 1:
continue
# 建立字典
user_balance = {}
user_balance['user_id'] = int(item[0])
user_balance['report_date'] = item[1]
user_balance['tBalance'] = item[2]
user_balance['yBalance'] = item[3]
user_balance['total_purchase_amt'] = item[4]
user_balance['direct_purchase_amt'] = item[5]
user_balance['purchase_bal_amt'] = item[6]
user_balance['purchase_bank_amt'] = item[7]
user_balance['total_redeem_amt'] = item[8]
user_balance['consume_amt'] = item[9]
user_balance['transfer_amt'] = item[10]
user_balance['tftobal_amt'] = item[11]
user_balance['tftocard_amt'] = item[12]
user_balance['share_amt'] = item[13]
if item[14] != '':
user_balance['category1'] = item[14]
else:
user_balance['category1'] = 0
if item[15] != '':
user_balance['category2'] = item[15]
else:
user_balance['category2'] = 0
if item[16] != '':
user_balance['category3'] = item[16]
else:
user_balance['category3'] = 0
if item[17] != '':
user_balance['category4'] = item[17]
else:
user_balance['category4'] = 0
users.append(user_balance)
userbalancetable.close()
# 多级排序
users = sorted(users, key=lambda e: (e.__getitem__('user_id'), e.__getitem__('report_date')))
for user in users:
user['user_id'] = str(user['user_id'])
# 获取用户信息表
# userprofiletable = open("data/user_profile_table.csv", "r")
# reader = csv.reader(userprofiletable)
#
# for item in reader:
# # 忽略第一行
# if reader.line_num == 1:
# continue
#
# # 将两张表的信息根据user_id合并
# for i in users:
# if item[0] == i['user_id']:
# i['sex'] = item[1]
# i['city'] = item[2]
# i['constellation'] = item[3]
#
# userprofiletable.close()
# print(len(users))
# print(len(user_pros))
# for user in users[:10]:
# print(user.values())
# 文件头,一般就是数据名
fileHeader = users[0].keys()
# 写入数据
'''
Python中的csv的writer,打开文件的时候,要小心,
要通过binary模式去打开,即带b的,比如wb,ab+等
而不能通过文本模式,即不带b的方式,w,w+,a+等,否则,会导致使用writerow写内容到csv中时,产生对于的CR,导致多余的空行。
'''
csvFile = open("data/new_user_balance.csv", "w", newline='')
writer = csv.writer(csvFile)
# 写入的内容都是以列表的形式传入函数
writer.writerow(fileHeader)
for user in users:
writer.writerow(user.values())
csvFile.close()