-
Notifications
You must be signed in to change notification settings - Fork 0
/
philomena-dl.py
87 lines (69 loc) · 2.87 KB
/
philomena-dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Import libraries
import json
import urllib.parse
import urllib.request
import math
import os
import pathvalidate
import time
# Load config
config = json.load(open('config.json'))
# Get search string
search_string = input('Enter your search query: ')
# Search URL
search_url = config['site'] + '/api/v1/json/search/images?key=' + config['key'] + '&perpage=50&q=' + urllib.parse.quote(search_string)
# Generate search request
search_request = urllib.request.Request(search_url,data=None,headers={'User-Agent': 'Philomena Bulk Image Downloader'})
# Get result
search_response = urllib.request.urlopen(search_request).read()
search_data = json.loads(search_response)
# Determine results count
results_count = search_data['total']
results_pages = math.ceil(results_count/50)
# Exit script if no results
if(results_count == 0):
print('No results found for your query, sorry')
exit()
# Inform user of results count
print('Found ' + str(results_count) + ' images, producing ' + str(results_pages) + ' pages of results')
# Create results folders
folder_name = pathvalidate.sanitize_filename(search_string)
downloads_folder = './downloads/'
downloads_json_folder = downloads_folder + 'json/'
try:
os.makedirs(downloads_folder)
except FileExistsError:
print("Downloads directory already exists.")
try:
os.makedirs(downloads_json_folder)
except FileExistsError:
print("JSON directory already exists.")
# Loop through each page
counter = 1
for page in range(0,results_pages):
# Generate a search URL for this page of results
page_search_url = config['site'] + '/api/v1/json/search/images?key=' + config['key'] + '&perpage=50&page=' + str(page + 1) + '&q=' + urllib.parse.quote(search_string)
# Generate search request for page
page_search_request = urllib.request.Request(page_search_url,data=None,headers={'User-Agent': 'Philomena Bulk Image Downloader'})
# Get responses for page
page_search_response = urllib.request.urlopen(page_search_request).read()
page_search_data = json.loads(page_search_response)
time.sleep(.1)
# Loop through each image
for image in page_search_data['images']:
# Output status
print('Downloading ' + str(image['id']) + ' (' + str(counter) + ' of ' + str(results_count) + ')')
# Set file paths
if(image['format'] == 'svg'):
image_file_path = downloads_folder + str(image['id']) + '.png'
else:
image_file_path = downloads_folder + str(image['id']) + '.' + image['format']
image_json_path = downloads_json_folder + str(image['id']) + '.json'
# Download the image
urllib.request.urlretrieve(image['representations']['full'],image_file_path)
# Output metadata
with open(image_json_path, 'w') as outfile:
json.dump(image,outfile)
time.sleep(.1)
# Increment the image counter
counter += 1