-
Notifications
You must be signed in to change notification settings - Fork 0
/
rip.cgi
executable file
·291 lines (260 loc) · 7.79 KB
/
rip.cgi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/usr/bin/python
from sys import argv
from os import remove, path, stat, utime, SEEK_END
from stat import ST_ATIME, ST_MTIME
from time import strftime
from urllib import unquote
from json import dumps
# import argparse
from sites.site_deviantart import deviantart
from sites.site_flickr import flickr
from sites.site_imagearn import imagearn
from sites.site_imagebam import imagebam
from sites.site_imagefap import imagefap
from sites.site_imgur import imgur
from sites.site_instagram import instagram
from sites.site_photobucket import photobucket
# from sites.site_tumblr import tumblr
from sites.site_twitter import twitter
from sites.site_xhamster import xhamster
from sites.site_getgonewild import getgonewild
from sites.site_anonib import anonib
from sites.site_motherless import motherless
from sites.site_4chan import fourchan
from sites.site_minus import minus
from sites.site_gifyo import gifyo
from sites.site_five00px import five00px
from sites.site_cghub import cghub
from sites.site_chickupload import chickupload
from sites.site_teenplanet import teenplanet
from sites.site_chansluts import chansluts
from sites.site_buttoucher import buttoucher
from sites.site_pichunter import pichunter
from sites.site_soupio import soupio
from sites.site_imgbox import imgbox
from sites.site_reddit import reddit
from sites.site_gallerydump import gallerydump
from sites.site_fapdu import fapdu
# No longer supported
from sites.site_occ import occ
from sites.site_gonearch import gonearch
""" Print error in JSON format """
def print_error(text):
print dumps( { 'error' : text } )
if len(argv) <= 1:
print("\nError: No URL was provided\n")
exit()
# Where the magic happens.
# Prints JSON response to query.
def main():
# Keys are the query that's passed to the rip script, ex:
# ./rip.cgi?url=http://x.com&start=true&cached=false
# The dict would be { url : http://x.com, start: true, cached: false }
keys = get_keys()
if 'start' in keys and \
'url' in keys and \
'cached' in keys and \
'urls_only' in keys:
cached = True # Default to cached
if 'cached' in keys and keys['cached'] == 'false':
cached = False
urls_only = False # Default to false
if 'urls_only' in keys and keys['urls_only'] == 'true':
urls_only = True
rip(keys['url'], cached, urls_only)
elif 'check' in keys and \
'url' in keys:
urls_only = False
if 'urls_only' in keys and keys['urls_only'] == 'true':
urls_only = True
check(keys['url'], urls_only)
elif 'recent' in keys:
lines = 10
if 'lines' in keys:
lines = int(keys['lines'])
recent(lines)
else:
print_error('invalid request')
# Gets ripper, checks for existing rip, rips and zips as needed.
def rip(url, cached, urls_only):
url = unquote(url).replace(' ', '%20')
try:
# Get domain-specific ripper for URL
ripper = get_ripper(url, urls_only)
except Exception, e:
print_error(str(e))
return
# Check if there's already a zip for the album
if ripper.existing_zip_path() != None:
# If user specified the uncached version, remove the zip
if not cached:
remove(ripper.existing_zip_path())
else:
# Mark the file as recently-accessed (top of FIFO queue)
update_file_modified(ripper.existing_zip_path())
#add_recent(url)
print dumps( {
'zip' : ripper.existing_zip_path(),
'size' : ripper.get_size(ripper.existing_zip_path())
} )
return
if ripper.is_downloading():
print_error("album rip is in progress. check back later")
return
# Rip it
try:
ripper.download()
except Exception, e:
print_error('download failed: %s' % str(e))
return
# If ripper fails silently, it will remove the directory of images
if not path.exists(ripper.working_dir):
print_error('unable to download album (empty? 404?)')
return
# Zip it
try:
ripper.zip()
except Exception, e:
print_error('zip failed: %s' % str(e))
return
# Add to recently-downloaded list
add_recent(url)
# Print it
response = {}
response['zip'] = ripper.existing_zip_path()
response['size'] = ripper.get_size(ripper.existing_zip_path())
response['image_count'] = ripper.image_count
if ripper.hit_image_limit():
response['limit'] = ripper.max_images
print dumps(response)
"""
Checks status of rip. Returns zip/size if finished, otherwise
returns the last log line from the rip.
"""
def check(url, urls_only):
url = unquote(url).replace(' ', '%20')
try:
ripper = get_ripper(url, urls_only)
except Exception, e:
print_error(str(e))
return
# Check if there's already a zip for the album
if ripper.existing_zip_path() != None:
# Return link to zip
print dumps( {
'zip' : ripper.existing_zip_path(),
'size' : ripper.get_size(ripper.existing_zip_path())
} )
else:
# Print last log line ("status")
lines = ripper.get_log(tail_lines=1)
print dumps( {
'log' : '\\n'.join(lines)
} )
""" Returns an appropriate ripper for a URL, or throws exception """
def get_ripper(url, urls_only):
sites = [ \
deviantart, \
flickr, \
imagearn, \
imagebam, \
imagefap, \
imgur, \
instagram, \
photobucket, \
# tumblr, \
twitter, \
xhamster, \
getgonewild, \
anonib, \
motherless, \
fourchan, \
occ, \
minus, \
gifyo, \
five00px, \
chickupload, \
cghub, \
teenplanet, \
chansluts, \
buttoucher, \
pichunter, \
soupio, \
imgbox, \
reddit, \
gallerydump, \
fapdu]
for site in sites:
try:
ripper = site(url, urls_only)
return ripper
except Exception, e:
# Rippers that aren't made for the URL throw blank Exception
error = str(e)
if error == '': continue
# If Exception isn't blank, then it's the right ripper but an error occurred
raise e
raise Exception('Ripper can not rip given URL')
""" Updates system 'modified time' for file to current time. """
def update_file_modified(f):
st = stat(f)
atime = int(strftime('%s'))
mtime = int(strftime('%s'))
utime(f, (atime, mtime))
""" Retrieves key/value pairs from query, puts in dict """
def get_keys():
keys = {}
keys['start'] = 'true'
keys['url'] = argv[1]
keys['cached'] = argv[2]
keys['urls_only'] = argv[3]
return keys
"""
Returns recently-downloaded zips
"""
def recent(lines):
recents = []
try:
f = open('recent_rips.lst', 'r')
recents = tail(f, lines=lines)
f.close()
except: pass
print dumps( {
'recent' : recents
} )
""" Tail a file and get X lines from the end """
def tail(f, lines=1, _buffer=4098):
lines_found = []
block_counter = -1
while len(lines_found) < lines:
try:
f.seek(block_counter * _buffer, SEEK_END)
except IOError: # either file is too small, or too many lines requested
f.seek(0)
lines_found = f.readlines()
break
lines_found = f.readlines()
if len(lines_found) > lines:
break
block_counter -= 1
result = [word.strip() for word in lines_found[-lines:]]
result.reverse()
return result
""" Adds url to list of recently-downloaded albums """
def add_recent(url):
if '.ru/' in url: return
if path.exists('recent_rips.lst'):
already_added = False
f = open('recent_rips.lst', 'r')
if url in tail(f, lines=10): already_added = True
f.close()
if already_added: return
f = open('recent_rips.lst', 'a')
f.write('%s\n' % url)
f.close()
""" Entry point. Print leading/trailing characters, executes main() """
if __name__ == '__main__':
print "Content-Type: application/json"
print ""
main()
print "\n"