forked from manojmj92/subtitle-downloader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
subtitle-downloader.py
executable file
·124 lines (112 loc) · 4.64 KB
/
subtitle-downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python
#-------------------------------------------------------------------------------
# Name : subtitle downloader
# Purpose : One step subtitle download
#
# Authors : manoj m j, arun shivaram p, Valentin Vetter, niroyb
# Edited by : Valentin Vetter
# Created :
# Copyright : (c) www.manojmj.com
# Licence : GPL v3
#-------------------------------------------------------------------------------
# TODO: use another DB if subs are not found on subDB
import hashlib
import os
import sys
import logging
import requests,time,re,zipfile
from bs4 import BeautifulSoup
PY_VERSION = sys.version_info[0]
if PY_VERSION == 2:
import urllib2
if PY_VERSION == 3:
import urllib.request
def get_hash(file_path):
read_size = 64 * 1024
with open(file_path, 'rb') as f:
data = f.read(read_size)
f.seek(-read_size, os.SEEK_END)
data += f.read(read_size)
return hashlib.md5(data).hexdigest()
def sub_downloader(file_path):
# Put the code in a try catch block in order to continue for other video files, if it fails during execution
try:
# Skip this file if it is not a video
root, extension = os.path.splitext(file_path)
if extension not in [".avi", ".mp4", ".mkv", ".mpg", ".mpeg", ".mov", ".rm", ".vob", ".wmv", ".flv", ".3gp",".3g2"]:
return
if not os.path.exists(root + ".srt"):
headers = {'User-Agent': 'SubDB/1.0 (subtitle-downloader/1.0; http://github.com/manojmj92/subtitle-downloader)'}
url = "http://api.thesubdb.com/?action=download&hash=" + get_hash(file_path) + "&language=en"
if PY_VERSION == 3:
req = urllib.request.Request(url, None, headers)
response = urllib.request.urlopen(req).read()
if PY_VERSION == 2:
req = urllib2.Request(url, '', headers)
response = urllib2.urlopen(req).read()
with open(root + ".srt", "wb") as subtitle:
subtitle.write(response)
logging.info("Subtitle successfully downloaded for " + file_path)
except:
#download subs from subscene if not found in subdb
sub_downloader2(file_path)
def sub_downloader2(file_path):
try:
root, extension = os.path.splitext(file_path)
if extension not in [".avi", ".mp4", ".mkv", ".mpg", ".mpeg", ".mov", ".rm", ".vob", ".wmv", ".flv", ".3gp",".3g2"]:
return
if os.path.exists(root + ".srt"):
return
j=-1
root2=root
for i in range(0,len(root)):
if(root[i]=="\\"):
j=i
root=root2[j+1:]
root2=root2[:j+1]
r=requests.get("http://subscene.com/subtitles/release?q="+root);
soup=BeautifulSoup(r.content,"lxml")
atags=soup.find_all("a")
href=""
for i in range(0,len(atags)):
spans=atags[i].find_all("span")
if(len(spans)==2 and spans[0].get_text().strip()=="English"):
href=atags[i].get("href").strip()
if(len(href)>0):
r=requests.get("http://subscene.com"+href);
soup=BeautifulSoup(r.content,"lxml")
lin=soup.find_all('a',attrs={'id':'downloadButton'})[0].get("href")
r=requests.get("http://subscene.com"+lin);
soup=BeautifulSoup(r.content,"lxml")
subfile=open(root2+".zip", 'wb')
for chunk in r.iter_content(100000):
subfile.write(chunk)
subfile.close()
time.sleep(1)
zip=zipfile.ZipFile(root2+".zip")
zip.extractall(root2)
zip.close()
os.unlink(root2+".zip")
except:
#Ignore exception and continue
print("Error in fetching subtitle for " + file_path)
print("Error", sys.exc_info())
logging.error("Error in fetching subtitle for " + file_path + str(sys.exc_info()))
def main():
root, _ = os.path.splitext(sys.argv[0])
logging.basicConfig(filename=root + '.log', level=logging.INFO)
logging.info("Started with params " + str(sys.argv))
if len(sys.argv) == 1:
print("This program requires at least one parameter")
sys.exit(1)
for path in sys.argv:
if os.path.isdir(path):
# Iterate the root directory recursively using os.walk and for each video file present get the subtitle
for dir_path, _, file_names in os.walk(path):
for filename in file_names:
file_path = os.path.join(dir_path, filename)
sub_downloader(file_path)
else:
sub_downloader(path)
if __name__ == '__main__':
main()