-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtimeStartDaily.py
41 lines (36 loc) · 1.61 KB
/
timeStartDaily.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# 文件timerStartDaily.py
from scrapy import cmdline
from datetime import datetime
import time
import shutil
import os
recoderDir = r"crawls" # 这是为了爬虫能够续爬而创建的目录,存储续爬需要的数据
checkFile = "isRunning.txt" # 爬虫是否在运行的标志
startTime = datetime.now()
print(f"startTime = {startTime}")
i = 0
miniter = 0
while True:
isRunning = os.path.isfile(checkFile)
if not isRunning: # 爬虫不在执行,开始启动爬虫
# 在爬虫启动之前处理一些事情,清掉JOBDIR = crawls
isExsit = os.path.isdir(recoderDir) # 检查JOBDIR目录crawls是否存在
print(f"CoinDetailSpider not running, ready to start. isExsit:{isExsit}")
if isExsit:
removeRes = shutil.rmtree(recoderDir) # 删除续爬目录crawls及目录下所有文件
print(f"At time:{datetime.now()}, delete res:{removeRes}")
else:
print(f"At time:{datetime.now()}, Dir:{recoderDir} is not exsit.")
time.sleep(20)
clawerTime = datetime.now()
waitTime = clawerTime - startTime
print(f"At time:{clawerTime}, start clawer: CoinDetailSpider !!!, waitTime:{waitTime}")
cmdline.execute('scrapy crawl CoinDetailSpider -s JOBDIR=crawls/storeMyRequest'.split())
break #爬虫结束之后,退出脚本
else:
print(f"At time:{datetime.now()}, CoinDetailSpider is running, sleep to wait.")
i += 1
time.sleep(600) # 每10分钟检查一次
miniter += 10
if miniter >= 1440: # 等待满24小时,自动退出监控脚本
break