-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCh9_4.py
44 lines (35 loc) · 943 Bytes
/
Ch9_4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
import datetime
import multiprocessing as mp
def crawl(url, data): # 访问
text = requests.get(url=url, params=data).text
return text
def func(page): # 执行抓取
url = "https://book.douban.com/subject/4117922/comments/hot"
data = {
"p": page
}
text = crawl(url, data)
print("Crawling : page No.{}".format(page))
if __name__ == '__main__':
start = datetime.datetime.now()
start_page = 1
end_page = 15
# 多进程抓取
# pages = [i for i in range(start_page, end_page)]
# p = mp.Pool()
# p.map_async(func, pages)
# p.close()
# p.join()
# 单进程抓取
page = start_page
for page in range(start_page, end_page):
url = "https://book.douban.com/subject/4117922/comments/hot"
# get参数
data = {
"p": page
}
content = crawl(url, data)
print("Crawling : page No.{}".format(page))
end = datetime.datetime.now()
print("Time\t: ", end - start)