-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDownLoadImg.py
70 lines (59 loc) · 2.87 KB
/
DownLoadImg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import re
import pymongo
import requests
from fake_useragent import UserAgent
DEFAULT_REQUEST_HEADERS = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36 Edg/87.0.664.41'}
# dic_path = r'Test/Test/img/%s.%s'
DEFAULT_URL = 'https://tvax1.sinaimg.cn/default/images/default_avatar_female_50.gif?KID=imgbed,tva&Expires=1608232859&ssig=kqbMa6HGNF'
class downloadimg():
def __init__(self,dbname,dic_path):
self.myclient = pymongo.MongoClient("mongodb://localhost:27017/")
self.db = self.myclient[dbname]['user']
self.img_path = dic_path + '/%s.%s'
self.dic_path = dic_path
def get_proxy_local(self):
x = requests.get("http://127.0.0.1:5010/get/").json()
proxies = {
'http': x['proxy']
}
return proxies
def run(self):
for item in self.db.find():
# print(item['src_url'])
proxies = self.get_proxy_local()
DEFAULT_REQUEST_HEADERS['user-agent'] = UserAgent(verify_ssl=False).random
if bool(re.findall(r'[0-9]{3}/(.+.jpg)',item['src_url'])):
jpg_path = self.img_path %(item['src_url'][-10:].replace('%','PC'),'jpg')
r = requests.get(item['src_url'], headers=DEFAULT_REQUEST_HEADERS, proxies=proxies)
print(jpg_path)
with open(jpg_path, 'wb') as f:
f.write(r.content)
elif bool(re.findall(r'/images/(.+.gif)',item['src_url'])):
gif_path = self.img_path %(item['src_url'][-10:].replace('%','PC'),'gif')
r = requests.get(item['src_url'], headers=DEFAULT_REQUEST_HEADERS, proxies=proxies)
with open(gif_path, 'wb') as f:
f.write(r.content)
proxies = self.get_proxy_local()
DEFAULT_REQUEST_HEADERS['user-agent'] = UserAgent(verify_ssl=False).random
DEFAULT_PATH = self.img_path %(DEFAULT_URL[-10:].replace('%','PC'),'gif')
r = requests.get(DEFAULT_URL, headers=DEFAULT_REQUEST_HEADERS, proxies=proxies)
with open(DEFAULT_PATH, 'wb') as f:
f.write(r.content)
def replace_img(self):
fileList = os.listdir(self.dic_path)
for i in fileList:
oldname = self.dic_path + os.sep + i
newname = self.dic_path + os.sep + i.replace('%','PC')
os.rename(oldname, newname)
print(oldname, '======>', newname)
# if __name__ == '__main__':
# di = downloadimg('weibo9',dic_path = r'Test/Test/img')
# di.run()
# di.replace_img()