-
Notifications
You must be signed in to change notification settings - Fork 2
/
txt导入数据库.py
63 lines (55 loc) · 2.14 KB
/
txt导入数据库.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import sys
import re
import os,time
import pymysql
conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='newbegin',db='lsj',charset='utf8')
cur = conn.cursor(cursor=pymysql.cursors.DictCursor)
class WordCreate():
def __init__(self,path):
'''
导入文件进行解析
'''
with open(path,encoding="utf-8") as file:
content = file.read()
x = re.findall(r'[(|(](.*?)[)|)]', content)
print(x)
for i in x:
zifu = '('+i+')'
content = content.replace(zifu,'')
content = re.split(r'\d', content)
content = [i for i in content if i != '']
self.sc_list = []
for i in content:
weak_list = re.split(r'[、。;!?\n\s*]',i)
weak_list = [i for i in weak_list if i != '']
self.sc_list.append(weak_list)
# print(self.sc_list)
def OperationSql(self):
'''
这部分为导入数据库中,num是给古诗数量编的一个号码,
总共两次爬取400多首诗还有两个文件爬的译文,分别对应好的
下面导入sql部分是更新语句,初次导入可能还要做修改
'''
num=130
for i in self.sc_list:
for _ in range(62-len(i)): #导入诗词为62 导入赏析为61
i.append('NULL')
# print(len(i))
num2 = 1
for _ in range(61):
sql = "UPDATE poetry SET yiwen_{}='{}' WHERE id ={}".format(num2, i[num2-1], num)
print(sql)
cur.execute(sql)
num2 += 1
# for _ in range(61):
# sql = "UPDATE poetry SET sentense_{}='{}' WHERE id ={}".format(num2, i[num2], num)
# print(sql)
# cur.execute(sql)
# num2 += 1
conn.commit()
num+=1
def main():
generator = WordCreate('D:\Mydesk\诗词库-2-赏析.txt') #选择合适的路径
generator.OperationSql()
if __name__ == '__main__':
main()