-
Notifications
You must be signed in to change notification settings - Fork 0
/
mkv-subtitle-management-tool.py
175 lines (136 loc) · 7.42 KB
/
mkv-subtitle-management-tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from enum import Enum
import shutil
import subprocess
import sys
import asstosrt
import json
import os
import argparse
from pathlib import Path
from deep_translator import GoogleTranslator
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type=int)
parser.add_argument('--mkv_file', type=str)
parser.add_argument('--ass_file', type=str)
parser.add_argument('--source_language', type=str)
parser.add_argument('--srt_file', type=str)
parser.add_argument('--target_language', type=str)
args = parser.parse_args()
class Modes(Enum):
GENERATE_MKV_WITH_SRT = 1
CONVERT_ASS_TO_SRT = 2
TRANSLATE_SRT = 3
MERGE_SRT_INTO_MKV = 4
def get_mkv_file_info(file_name):
result = subprocess.run(['mkvmerge', '--identification-format', 'json', '--identify', file_name], capture_output=True, text=True)
return json.loads(result.stdout)
def generate_mkv_temp_file(file_name):
print("---------- GENERATING .MKV TEMP ----------")
shutil.move(file_name, "input.mkv")
def merge_subtitles_in_mkv(file_name, new_subtitle_files):
print("---------- GENERATING NEW .MKV FILE WITH SRT SUBTITLES ----------")
create_new_mkv_command = f"mkvmerge -o '{file_name}' input.mkv {new_subtitle_files}"
subprocess.check_call(create_new_mkv_command, shell=True, stdout=sys.stdout, stderr=subprocess.STDOUT)
def delete_temp_mkv_file():
temp_mkv_path = Path('input.mkv')
if temp_mkv_path.exists():
temp_mkv_path.unlink()
def update_mkv_subtitle_tracks_info(file_name, track_language, language_track_id):
print(f"---------- UPDATING '{track_language} - {language_track_id}' SRT TRACK ----------")
language_track_name = language_track_id.split("-")[1] if len(language_track_id.split("-")) > 1 else ""
language_track_id = language_track_id.split("-")[0]
language_track_number = int(language_track_id) + 1
set_language_command = f"mkvpropedit '{file_name}' --edit track:{language_track_number} --set language={track_language}"
subprocess.check_call(set_language_command, shell=True, stdout=sys.stdout, stderr=subprocess.STDOUT)
set_track_name = f"mkvpropedit '{file_name}' --edit track:{language_track_number} --set name='{language_track_name}'"
subprocess.check_call(set_track_name, shell=True, stdout=sys.stdout, stderr=subprocess.STDOUT)
srt_path = Path(f'{track_language}-{language_track_id}.srt')
ass_path = Path(f'{track_language}-{language_track_id}.ass')
if srt_path.exists():
srt_path.unlink()
if ass_path.exists():
ass_path.unlink()
def extract_mkv_ass_track(file_name, track_id, track_language):
extract_command = f"mkvextract tracks '{file_name}' {track_id}:{track_language}-{track_id}.ass"
subprocess.check_call(extract_command, shell=True, stdout=sys.stdout, stderr=subprocess.STDOUT)
def convert_ass_to_srt(ass_file_name, track_language, latest_track_number):
print(f"---------- CONVERTING '{ass_file_name}' IN .SRT ----------")
ass_file = open(ass_file_name)
srt_str = asstosrt.convert(ass_file)
target_file_name = f'{track_language}-{int(latest_track_number) + 1}.srt' if latest_track_number is not None else f'{track_language}.srt'
str_file = open(target_file_name, 'w')
str_file.write(srt_str)
str_file.close()
def translate_srt(file_path, original_language, new_language):
print(f"---------- GENERATING '{new_language}' SUBTITLES ----------")
with open(file_path, encoding="utf-8") as f:
content = f.read()
print(f"Reading {file_path} file")
lines = content.split("\n")
f.close()
translated_content = str(content)
counter = 1
lines_to_translate = list(filter(lambda line: ((line.isdigit() == False) and ("-->" not in line) and "" != line), lines))
for line in lines_to_translate:
translation = GoogleTranslator(source=original_language, target=new_language).translate(line)
print(f"[{counter}/{len(lines_to_translate)}] \n - Original: {line}\n - Translation: {translation}\n")
safe_translation = "" if translation is None else translation
translated_content = translated_content.replace(line, safe_translation)
counter += 1
output_srt = open(f"{new_language}.srt", "w", encoding="utf-8")
output_srt.write(translated_content)
output_srt.close()
def update_new_subtitles_files(new_subtitle_files, track_language, language_track_id):
return new_subtitle_files + f' {track_language}-{language_track_id}.srt'
def parse_mkv_subtitle_track(track, file_name, latest_track_number, language_track_id, track_language_dict):
track_id = track["id"]
if "SubStationAlpha" in track["codec"]:
track_language = track["properties"]["language"]
track_name = track["properties"]["track_name"] if "track_name" in track["properties"].keys() else track_language
extract_mkv_ass_track(file_name, track_id, track_language)
convert_ass_to_srt(f'{track_language}-{track_id}.ass', track_language, latest_track_number)
language_entry = f"{language_track_id} - {track_name}"
if track_language in track_language_dict.keys():
track_language_dict[track_language].append(language_entry)
else:
track_language_dict[track_language] = [language_entry]
#TODO
if "SubRip/SRT" in track["codec"]:
track_language = track["properties"]["language"]
extract_command = f"mkvextract tracks '{file_name}' {track_id}:{track_language}.srt"
subprocess.check_call(extract_command, shell=True, stdout=sys.stdout, stderr=subprocess.STDOUT)
language_track_id = int(track_id) + 1
track_language_dict[track_language] = str(language_track_id)
return track_language_dict
print(deep_translator.constans.GOOGLE_LANGUAGES_TO_CODES)
mode = Modes(args.mode) if args.mode is not None else Modes.GENERATE_MKV_WITH_SRT
if mode == Modes.GENERATE_MKV_WITH_SRT:
full_path = args.mkv_file
file_name = Path(full_path).name
folder_path = os.path.dirname(full_path)
os.chdir(folder_path)
mkv_json_info = get_mkv_file_info(file_name)
new_subtitle_files = ""
track_language_dict = {}
print("---------- PARSING CURRENT .MKV FILE ----------")
latest_track_number = mkv_json_info["tracks"][-1]["id"]
for track in mkv_json_info["tracks"]:
if "SubStationAlpha" in track["codec"] or "SubRip/SRT" in track["codec"]:
language_track_id = int(latest_track_number) + 1
track_language_dict = parse_mkv_subtitle_track(track, file_name, latest_track_number, language_track_id, track_language_dict)
new_subtitle_files = update_new_subtitles_files(new_subtitle_files, track["properties"]["language"], language_track_id)
latest_track_number += 1
print(json.dumps(track_language_dict))
generate_mkv_temp_file(file_name)
merge_subtitles_in_mkv(file_name, new_subtitle_files)
delete_temp_mkv_file()
print(json.dumps(track_language_dict))
for track_language,language_track_id_list in track_language_dict.items():
for language_track_id in language_track_id_list:
update_mkv_subtitle_tracks_info(file_name, track_language, language_track_id)
if mode == Modes.CONVERT_ASS_TO_SRT:
convert_ass_to_srt(args.ass_file, args.source_language, None)
if mode == Modes.TRANSLATE_SRT:
translate_srt(args.srt_file, args.source_language, args.target_language)
if mode == Modes.MERGE_SRT_INTO_MKV:
raise Exception("Mode not supported yet.")