Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes and Improvement parsing #84

Merged
merged 1 commit into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
* [v3.17.0](https://github.com/newt-sc/a4kSubtitles/releases/tag/service.subtitles.a4ksubtitles%2Fservice.subtitles.a4ksubtitles-3.17.0):
* Fix: TV year being pulled incorrectly
* Fix: File name issues in both temp and media directories
* Fix: Subtitle file missing extension
* Fix: Incorrect episode selection when downloaded archive contains multiple subtitles
* Fix: Auto-download not working after the first selected episode in the playlist (A4K only works for the first media)
* Fix: Subtitle import issue due to "illegal characters"
* Improve: determination of subtitle episodes
* Improve: results parsing
* Feature: Auto-download now copies subtitles next to the video or to a custom location, based on Kodi's subtitle storage mode path
* SubSource: Now supports series in "absolute order", as used by some anime websites
* SubSource: Fixed issue of duplicated subtitle IDs with different names

* [v3.16.1](https://github.com/newt-sc/a4kSubtitles/releases/tag/service.subtitles.a4ksubtitles%2Fservice.subtitles.a4ksubtitles-3.16.1):
* Fix addons.xml.crc

Expand Down
53 changes: 45 additions & 8 deletions a4kSubtitles/download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# -*- coding: utf-8 -*-
subtitles_exts = ['.srt', '.sub']
subtitles_exts_secondary = ['.smi', '.ssa', '.aqt', '.jss', '.ass', '.rt', '.txt']
subtitles_exts_all = subtitles_exts + subtitles_exts_secondary

def __download(core, filepath, request):
request['stream'] = True
Expand All @@ -7,6 +10,10 @@ def __download(core, filepath, request):
core.shutil.copyfileobj(r.raw, f)

def __extract_gzip(core, archivepath, filename):
if not any(filename.lower().endswith(ext) for ext in subtitles_exts_all):
# For now, we will use 'srt' to mark unknown file extensions as subtitles.
filename = filename + ".srt"
newt-sc marked this conversation as resolved.
Show resolved Hide resolved

filepath = core.os.path.join(core.utils.temp_dir, filename)

if core.utils.py2:
Expand All @@ -25,8 +32,8 @@ def __extract_gzip(core, archivepath, filename):
return filepath

def __extract_zip(core, archivepath, filename, episodeid):
sub_exts = ['.srt', '.sub']
sub_exts_secondary = ['.smi', '.ssa', '.aqt', '.jss', '.ass', '.rt', '.txt']
sub_exts = subtitles_exts
sub_exts_secondary = subtitles_exts_secondary

try:
using_libvfs = False
Expand All @@ -39,9 +46,13 @@ def __extract_zip(core, archivepath, filename, episodeid):
(dirs, files) = core.kodi.xbmcvfs.listdir('archive://%s' % archivepath_)
namelist = [file.decode(core.utils.default_encoding) if core.utils.py2 else file for file in files]

subfile = core.utils.find_file_in_archive(core, namelist, sub_exts, episodeid)
if not subfile:
subfile = core.utils.find_file_in_archive(core, namelist, sub_exts_secondary, episodeid)
subfile = core.utils.find_file_in_archive(core, namelist, sub_exts + sub_exts_secondary, episodeid)

if subfile:
# Add the subtitle file extension.
subfilename_and_ext = subfile.rsplit(".", 1)
if len(subfilename_and_ext) > 1:
filename = filename + "." + subfilename_and_ext[-1]

dest = core.os.path.join(core.utils.temp_dir, filename)
if not subfile:
Expand All @@ -67,9 +78,15 @@ def __extract_zip(core, archivepath, filename, episodeid):
return dest

def __insert_lang_code_in_filename(core, filename, lang_code):
filename_chunks = core.utils.strip_non_ascii_and_unprintable(filename).split('.')
filename_chunks.insert(-1, lang_code)
return '.'.join(filename_chunks)
name = core.utils.strip_non_ascii_and_unprintable(filename)
nameparts = name.rsplit(".", 1)

# Because this can be called via "raw" subtitles where sub ext exists we will ensure it ends with the subtitle ext.
# Otherwise we will use "filename.lang_code" later the ext will be added on unzip process.
if len(nameparts) > 1 and ("." + nameparts[1] in subtitles_exts_all):
return ".".join([nameparts[0], lang_code, nameparts[1]])

return "{0}.{1}".format(name, lang_code)

def __postprocess(core, filepath, lang_code):
try:
Expand Down Expand Up @@ -111,6 +128,24 @@ def __postprocess(core, filepath, lang_code):
f.write(text.encode(core.utils.default_encoding))
except: pass

def __copy_sub_local(core, subfile):
# Copy the subfile to local.
if core.os.getenv('A4KSUBTITLES_TESTRUN') == 'true':
return

media_name = core.os.path.splitext(core.os.path.basename(core.kodi.xbmc.getInfoLabel('Player.Filename')))[0]
sub_name, lang_code, extension = core.os.path.basename(subfile).rsplit(".", 2)
file_dest, folder_dest = None, None
if core.kodi.get_kodi_setting("subtitles.storagemode") == 0:
folder_dest = core.kodi.xbmc.getInfoLabel('Player.Folderpath')
file_dest = core.os.path.join(folder_dest, ".".join([media_name, lang_code, extension]))
elif core.kodi.get_kodi_setting("subtitles.storagemode") == 1:
folder_dest = core.kodi.get_kodi_setting("subtitles.custompath")
file_dest = core.os.path.join(folder_dest, ".".join([media_name, lang_code, extension]))

if file_dest and core.kodi.xbmcvfs.exists(folder_dest):
core.kodi.xbmcvfs.copy(subfile, file_dest)

def download(core, params):
core.logger.debug(lambda: core.json.dumps(params, indent=2))

Expand All @@ -120,6 +155,7 @@ def download(core, params):
actions_args = params['action_args']
lang_code = core.utils.get_lang_id(actions_args['lang'], core.kodi.xbmc.ISO_639_2)
filename = __insert_lang_code_in_filename(core, actions_args['filename'], lang_code)
filename = core.utils.slugify_filename(filename)
archivepath = core.os.path.join(core.utils.temp_dir, 'sub.zip')

service_name = params['service_name']
Expand All @@ -140,6 +176,7 @@ def download(core, params):
__postprocess(core, filepath, lang_code)

if core.api_mode_enabled:
__copy_sub_local(core, filepath)
newt-sc marked this conversation as resolved.
Show resolved Hide resolved
return filepath

listitem = core.kodi.xbmcgui.ListItem(label=filepath, offscreen=True)
Expand Down
1 change: 1 addition & 0 deletions a4kSubtitles/lib/kodi_mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
xbmc.ENGLISH_NAME = 'name'

__player = lambda: None
__player.isPlayingVideo = lambda: None
__player.getPlayingFile = lambda: ''
__player.getAvailableSubtitleStreams = lambda: []
__player.setSubtitles = lambda s: None
Expand Down
51 changes: 49 additions & 2 deletions a4kSubtitles/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ def strip_non_ascii_and_unprintable(text):
result = ''.join(char for char in text if char in string.printable)
return result.encode('ascii', errors='ignore').decode('ascii', errors='ignore')

def slugify_filename(text):
return re.sub(r'[\\/*?:"<>|]', '_', text)

def get_lang_id(language, lang_format):
try:
return get_lang_ids([language], lang_format)[0]
Expand Down Expand Up @@ -173,15 +176,16 @@ def get_json(path, filename):
with open_file_wrapper(json_path)() as json_result:
return json.load(json_result)

def find_file_in_archive(core, namelist, exts, part_of_filename=''):
def find_file_in_archive(core, namelist, exts, episode_number=''):
first_ext_match = None
exact_file = None
for file in namelist:
file_lower = file.lower()
if any(file_lower.endswith(ext) for ext in exts):
sub_meta = extract_season_episode(file_lower, True)
if not first_ext_match:
first_ext_match = file
if (part_of_filename == '' or part_of_filename in file_lower):
if (episode_number == '' or sub_meta.episode == episode_number):
exact_file = file
break

Expand Down Expand Up @@ -212,3 +216,46 @@ def extract_zipfile_member(zipfile, filename, dest):
except:
filename = filename.encode(default_encoding).decode(py3_zip_missing_utf8_flag_fallback_encoding)
return zipfile.extract(filename, dest)

def extract_season_episode(filename, episode_fallback=False, zfill=3):
episode_pattern = r'(?:e|ep.?|episode.?)(\d{1,5})'
season_pattern = r'(?:s|season.?)(\d{1,5})'
combined_pattern = r'\b(?:s|season)(\d{1,5})\s?[x|\-|\_|\s]\s?[a-z]?(\d{1,5})\b'
range_episodes_pattern = r'\b(?:.{1,4}e|ep|eps|episodes|\s)?(\d{1,5}?)(?:v.?)?\s?[\-|\~]\s?(\d{1,5})(?:v.?)?\b'
date_pattern = r'\b(\d{2,4}-\d{1,2}-\d{2,4})\b'

filename = re.sub(date_pattern, "", filename)
season_match = re.search(season_pattern, filename, re.IGNORECASE)
episode_match = re.search(episode_pattern, filename, re.IGNORECASE)
combined_match = re.search(combined_pattern, filename, re.IGNORECASE)
range_episodes_match = re.findall(range_episodes_pattern, filename, re.IGNORECASE)

season = season_match.group(1) if season_match else None
episode = episode_match.group(1) if episode_match else None
episodes_range = range(0)

if combined_match:
season = season if season else combined_match.group(1)
episode = episode if episode else combined_match.group(2)

if range_episodes_match:
range_start, range_end = map(int, range_episodes_match[-1])
episodes_range = range(range_start, range_end)

if episode_fallback and not episode:
# If no matches found, attempt to capture episode-like sequences
fallback_pattern = re.compile(r'\bE?P?(\d{1,5})v?\d?\b', re.IGNORECASE)
filename = re.sub(r'[\s\.\:\;\(\)\[\]\{\}\\\/\&\€\'\`\#\@\=\$\?\!\%\+\-\_\*\^]', " ", filename)
fallback_matches = fallback_pattern.findall(filename)

if fallback_matches:
# Assuming the last number in the fallback matches is the episode number
episode = fallback_matches[-1].lstrip("0").zfill(zfill)

return DictAsObject(
{
"season": season.lstrip("0").zfill(zfill) if season else "",
"episode": episode.lstrip("0").zfill(zfill) if episode else "",
"episodes_range": episodes_range
}
)
17 changes: 7 additions & 10 deletions a4kSubtitles/lib/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ def __update_info_from_imdb(core, meta, pagination_token=''):
meta.episode = str(result['series']['episodeNumber']['episodeNumber'])
else:
meta.tvshow = result['titleText']['text']
meta.tvshow_year = str(result['releaseDate']['year'])
if meta.tvshow_year == '':
meta.tvshow_year = str(result['releaseDate']['year'])

episodes = result['episodes']['result']['edges']
s_number = int(meta.season)
Expand Down Expand Up @@ -316,15 +317,11 @@ def __get_basic_info():
if regex_result:
meta.imdb_id = regex_result.group(1)

if meta.season == '':
regex_result = re.search(r'.*season=(\d{1,}).*', filename_and_path, re.IGNORECASE)
if regex_result:
meta.season = regex_result.group(1)

if meta.episode == '':
regex_result = re.search(r'.*episode=(\d{1,}).*', filename_and_path, re.IGNORECASE)
if regex_result:
meta.episode = regex_result.group(1)
if meta.season == '' or meta.episode == '':
filename_info = utils.extract_season_episode(meta.filename, zfill=0)
filename_path_info = utils.extract_season_episode(filename_and_path, zfill=0)
meta.season = meta.season or filename_path_info.season or filename_info.season
meta.episode = meta.episode or filename_path_info.episode or filename_info.episode

return meta

Expand Down
Loading
Loading