-
Notifications
You must be signed in to change notification settings - Fork 0
/
1.images_scraper.py
63 lines (49 loc) · 1.66 KB
/
1.images_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import requests
import os
from glob import glob
from collections import Counter
from PIL import Image
from config import IMG_URL, TMP_DIR, MEMBERS, LEGISLATURA
def download_and_save_img(member_id: int, legislatura_id: int):
path = f"{TMP_DIR}/{member_id}_{legislatura_id}.jpg"
if os.path.isfile(path):
return
r = requests.get(IMG_URL % (member_id, legislatura_id), stream=True)
if r.status_code == 200:
img = r.raw.read()
with open(path, 'wb') as f:
f.write(img)
def uniform_images(path: str):
widths = Counter()
heights = Counter()
for file in glob(f"{path}/*.jpg"):
i = Image.open(file)
(w, h) = i.size
widths[w] += 1
heights[h] += 1
most_common_size = (
widths.most_common()[0][0],
heights.most_common()[0][0]
)
print(f'Original most_common_size: {most_common_size}')
most_common_size = (
_find_16_divisible(most_common_size[0]),
_find_16_divisible(most_common_size[1])
)
print(f'Adapted most_common_size: {most_common_size}')
for file in glob(f"{path}/*.jpg"):
i = Image.open(file)
if len(i.getbands()) != 3:
i = i.convert('RGB')
resized = i.resize(most_common_size, Image.BICUBIC)
resized.save(file, "JPEG")
def _find_16_divisible(x):
return int(x/16) * 16
if __name__ == "__main__":
if not os.path.isdir(TMP_DIR):
os.mkdir(TMP_DIR)
print(f'Downloading diputados images in {TMP_DIR}')
for member_id in range(MEMBERS):
download_and_save_img(member_id+1, LEGISLATURA)
print('Rescaling images to most common size')
uniform_images(TMP_DIR)