diff --git a/medusa/providers/generic_provider.py b/medusa/providers/generic_provider.py index 8782394384..65890cddfb 100644 --- a/medusa/providers/generic_provider.py +++ b/medusa/providers/generic_provider.py @@ -238,9 +238,33 @@ def remove_duplicate_mappings(items, pk='link'): )) ) + def search_results_in_cache(self, episodes): + """ + Search episodes based on param in cache. + + Search the cache (db) for this provider + :param episodes: List of Episode objects + + :return: A dict of search results, ordered by episode number + """ + return self.cache.find_episodes(episodes) + def find_search_results(self, series, episodes, search_mode, forced_search=False, download_current_quality=False, manual_search=False, manual_search_type='episode'): - """Search episodes based on param.""" + """ + Search episodes based on param. + + Search the provider using http queries. + :param series: Series object + :param episodes: List of Episode objects + :param search_mode: 'eponly' or 'sponly' + :param forced_search: Flag if the search was triggered by a forced search + :param download_current_quality: Flag if we want to include an already downloaded quality in the new search + :param manual_search: Flag if the search was triggered by a manual search + :param manual_search_type: How the manual search was started: For example an 'episode' or 'season' + + :return: A dict of search results, ordered by episode number. + """ self._check_auth() self.series = series @@ -249,18 +273,6 @@ def find_search_results(self, series, episodes, search_mode, forced_search=False season_search = (len(episodes) > 1 or manual_search_type == 'season') and search_mode == 'sponly' for episode in episodes: - if not manual_search: - cache_results = self.cache.find_needed_episodes( - episode, forced_search=forced_search, down_cur_quality=download_current_quality - ) - if cache_results: - for episode_no in cache_results: - if episode_no not in results: - results[episode_no] = cache_results[episode_no] - else: - results[episode_no] += cache_results[episode_no] - continue - search_strings = [] if season_search: search_strings = self._get_season_search_strings(episode) @@ -471,6 +483,7 @@ def find_search_results(self, series, episodes, search_mode, forced_search=False ', '.join(map(str, search_result.parsed_result.episode_numbers)), search_result.name, search_result.url) + if episode_number not in results: results[episode_number] = [search_result] else: diff --git a/medusa/search/core.py b/medusa/search/core.py index a39d9614f5..85ae574101 100644 --- a/medusa/search/core.py +++ b/medusa/search/core.py @@ -647,6 +647,8 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality manual_search_results = [] multi_results = [] single_results = [] + cache_multi_results = [] + cache_single_results = [] # build name cache for show name_cache.build_name_cache(series_obj) @@ -667,14 +669,15 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality threading.currentThread().name = original_thread_name for cur_provider in providers: - threading.currentThread().name = original_thread_name + u' :: [' + cur_provider.name + u']' + threading.currentThread().name = '{original_thread_name} :: [{provider}]'.format( + original_thread_name=original_thread_name, provider=cur_provider.name + ) if cur_provider.anime_only and not series_obj.is_anime: log.debug(u'{0} is not an anime, skipping', series_obj.name) continue found_results[cur_provider.name] = {} - search_count = 0 search_mode = cur_provider.search_mode @@ -694,24 +697,41 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality log.info(u'Performing season pack search for {0}', series_obj.name) try: - search_results = cur_provider.find_search_results(series_obj, episodes, search_mode, forced_search, - down_cur_quality, manual_search, manual_search_type) + search_results = [] + cache_search_results = [] + cache_multi = [] + cache_single = [] + + if not manual_search: + cache_search_results = cur_provider.search_results_in_cache(episodes) + if cache_search_results: + # From our provider multi_episode and single_episode results, collect candidates. + found_cache_results = list_results_for_provider(cache_search_results, found_results, cur_provider) + # We're passing the empty lists, because we don't want to include previous candidates + cache_multi, cache_single = collect_candidates(found_cache_results, cur_provider, cache_multi, + cache_single, series_obj, down_cur_quality) + + # Check if we got any candidates from cache add add them to the list. + # If we found candidates in cache, we don't need to search the provider. + if cache_multi: + cache_multi_results += cache_multi + if cache_single: + cache_single_results += cache_single + + # For now we only search if we didn't get any results back from cache, + # but we might wanna check if there was something useful in cache. + if not (cache_multi or cache_single): + log.debug(u'Could not find any candidates in cache, searching provider.') + search_results = cur_provider.find_search_results(series_obj, episodes, search_mode, forced_search, + down_cur_quality, manual_search, manual_search_type) + # Update the list found_results + found_results = list_results_for_provider(search_results, found_results, cur_provider) + except AuthException as error: - log.error(u'Authentication error: {0}', ex(error)) + log.error(u'Authentication error: {0!r}', error) break - if search_results: - # make a list of all the results for this provider - for cur_ep in search_results: - if cur_ep in found_results[cur_provider.name]: - found_results[cur_provider.name][cur_ep] += search_results[cur_ep] - else: - found_results[cur_provider.name][cur_ep] = search_results[cur_ep] - - # Sort the list by seeders if possible - if cur_provider.provider_type == u'torrent' or getattr(cur_provider, u'torznab', None): - found_results[cur_provider.name][cur_ep].sort(key=lambda d: int(d.seeders), reverse=True) - + if search_results or cache_search_results: break elif not cur_provider.search_fallback or search_count == 2: break @@ -747,16 +767,13 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality # Continue because we don't want to pick best results as we are running a manual search by user continue - # Collect candidates for multi-episode or season results - candidates = (candidate for result, candidate in iteritems(found_results[cur_provider.name]) - if result in (SEASON_RESULT, MULTI_EP_RESULT)) - candidates = list(itertools.chain(*candidates)) - if candidates: - multi_results += collect_multi_candidates(candidates, series_obj, episodes, down_cur_quality) - - # Collect candidates for single-episode results - single_results = collect_single_candidates(found_results[cur_provider.name], - single_results) + # From our providers multi_episode and single_episode results, collect candidates. + # Only collect the candidates if we didn't get any from cache. + if not (cache_multi_results or cache_single_results): + multi_results, single_results = collect_candidates(found_results, cur_provider, multi_results, + single_results, series_obj, down_cur_quality) + else: + multi_results, single_results = cache_multi_results, cache_single_results # Remove provider from thread name before return results threading.currentThread().name = original_thread_name @@ -768,6 +785,43 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality return combine_results(multi_results, single_results) +def collect_candidates(found_results, provider, multi_results, single_results, series_obj, down_cur_quality): + """Collect candidates for episode, multi-episode or season results.""" + candidates = (candidate for result, candidate in iteritems(found_results[provider.name]) + if result in (SEASON_RESULT, MULTI_EP_RESULT)) + candidates = list(itertools.chain(*candidates)) + if candidates: + multi_results += collect_multi_candidates(candidates, series_obj, down_cur_quality) + + # Collect candidates for single-episode results + single_results = collect_single_candidates(found_results[provider.name], single_results) + + return multi_results, single_results + + +def list_results_for_provider(search_results, found_results, provider): + """ + Add results for this provider to the search_results dict. + + The structure is based on [provider_name][episode_number][search_result] + :param search_results: New dictionary with search results for this provider + :param found_results: Dictionary with existing per provider search results + :param provider: Provider object + :return: Updated dict found_results + """ + for cur_ep in search_results: + if cur_ep in found_results[provider.name]: + found_results[provider.name][cur_ep] += search_results[cur_ep] + else: + found_results[provider.name][cur_ep] = search_results[cur_ep] + + # Sort the list by seeders if possible + if provider.provider_type == u'torrent' or getattr(provider, u'torznab', None): + found_results[provider.name][cur_ep].sort(key=lambda d: int(d.seeders), reverse=True) + + return found_results + + def collect_single_candidates(candidates, results): """Collect single-episode result candidates.""" single_candidates = list(results) @@ -798,7 +852,7 @@ def collect_single_candidates(candidates, results): return single_candidates + new_candidates -def collect_multi_candidates(candidates, series_obj, episodes, down_cur_quality): +def collect_multi_candidates(candidates, series_obj, down_cur_quality): """Collect mutli-episode and season result candidates.""" multi_candidates = [] diff --git a/medusa/tv/cache.py b/medusa/tv/cache.py index 15eb097303..7e8ab2367d 100644 --- a/medusa/tv/cache.py +++ b/medusa/tv/cache.py @@ -9,6 +9,7 @@ import traceback from builtins import object from builtins import str +from collections import defaultdict from time import time from medusa import ( @@ -31,7 +32,7 @@ from medusa.show import naming from medusa.show.show import Show -from six import text_type +from six import text_type, viewitems log = BraceAdapter(logging.getLogger(__name__)) log.logger.addHandler(logging.NullHandler()) @@ -454,38 +455,101 @@ def item_in_cache(self, url): 'WHERE url=?'.format(provider=self.provider_id), [url] )[0]['count'] - def find_needed_episodes(self, episode, forced_search=False, - down_cur_quality=False): - """Find needed episodes.""" - needed_eps = {} + def find_needed_episodes(self, episodes, forced_search=False, down_cur_quality=False): + """ + Search cache for needed episodes. + + NOTE: This is currently only used by the Daily Search. + The following checks are performed on the cache results: + * Use the episodes current quality / wanted quality to decide if we want it + * Filtered on ignored/required words, and non-tv junk + * Filter out non-anime results on Anime only providers + * Check if the series is still in our library + + :param episodes: Single or list of episode object(s) + :param forced_search: Flag to mark that this is searched through a forced search + :param down_cur_quality: Flag to mark that we want to include the episode(s) current quality + + :return dict(episode: [list of SearchResult objects]). + """ + results = defaultdict(list) + cache_results = self.find_episodes(episodes) + + for episode_number, search_results in viewitems(cache_results): + for search_result in search_results: + + # ignored/required words, and non-tv junk + if not naming.filter_bad_releases(search_result.name): + continue + + all_wanted = True + for cur_ep in search_result.actual_episodes: + # if the show says we want that episode then add it to the list + if not search_result.series.want_episode(search_result.actual_season, cur_ep, search_result.quality, + forced_search, down_cur_quality): + log.debug('Ignoring {0} because one or more episodes are unwanted', search_result.name) + all_wanted = False + break + + if not all_wanted: + continue + + log.debug( + '{id}: Using cached results from {provider} for series {show_name!r} episode {ep}', { + 'id': search_result.series.series_id, + 'provider': self.provider.name, + 'show_name': search_result.series.name, + 'ep': episode_num(search_result.episodes[0].season, search_result.episodes[0].episode), + } + ) + + # FIXME: Should be changed to search_result.search_type + search_result.forced_search = forced_search + search_result.download_current_quality = down_cur_quality + + # add it to the list + results[episode_number].append(search_result) + + return results + + def find_episodes(self, episodes): + """ + Search cache for episodes. + + NOTE: This is currently only used by the Backlog/Forced Search. As we determine the candidates there. + The following checks are performed on the cache results: + * Filter out non-anime results on Anime only providers + * Check if the series is still in our library + :param episodes: Single or list of episode object(s) + + :return list of SearchResult objects. + """ + cache_results = defaultdict(list) results = [] cache_db_con = self._get_db() - if not episode: + if not episodes: sql_results = cache_db_con.select( 'SELECT * FROM [{name}]'.format(name=self.provider_id)) - elif not isinstance(episode, list): + elif not isinstance(episodes, list): sql_results = cache_db_con.select( 'SELECT * FROM [{name}] ' - 'WHERE indexer = ? AND' - ' indexerid = ? AND' - ' season = ? AND' - ' episodes LIKE ?'.format(name=self.provider_id), - [episode.series.indexer, episode.series.series_id, episode.season, - '%|{0}|%'.format(episode.episode)] + 'WHERE indexer = ? AND ' + 'indexerid = ? AND ' + 'season = ? AND ' + 'episodes LIKE ?'.format(name=self.provider_id), + [episodes.series.indexer, episodes.series.series_id, episodes.season, + '%|{0}|%'.format(episodes.episode)] ) else: - for ep_obj in episode: + for ep_obj in episodes: results.append([ 'SELECT * FROM [{name}] ' 'WHERE indexer = ? AND ' - ' indexerid = ? AND' - ' season = ? AND' - ' episodes LIKE ? AND ' - ' quality IN ({qualities})'.format( - name=self.provider_id, - qualities=','.join((str(x) - for x in ep_obj.wanted_quality)) + 'indexerid = ? AND ' + 'season = ? AND ' + 'episodes LIKE ?'.format( + name=self.provider_id ), [ep_obj.series.indexer, ep_obj.series.series_id, ep_obj.season, '%|{0}|%'.format(ep_obj.episode)]] @@ -499,10 +563,10 @@ def find_needed_episodes(self, episode, forced_search=False, sql_results = [] log.debug( '{id}: No cached results in {provider} for series {show_name!r} episode {ep}', { - 'id': episode[0].series.series_id, + 'id': episodes[0].series.series_id, 'provider': self.provider.name, - 'show_name': episode[0].series.name, - 'ep': episode_num(episode[0].season, episode[0].episode), + 'show_name': episodes[0].series.name, + 'ep': episode_num(episodes[0].season, episodes[0].episode), } ) @@ -515,10 +579,6 @@ def find_needed_episodes(self, episode, forced_search=False, search_result = self.provider.get_result() - # ignored/required words, and non-tv junk - if not naming.filter_bad_releases(cur_result['name']): - continue - # get the show, or ignore if it's not one of our shows series_obj = Show.find_by_id(app.showList, int(cur_result['indexer']), int(cur_result['indexerid'])) if not series_obj: @@ -535,49 +595,32 @@ def find_needed_episodes(self, episode, forced_search=False, search_result.version = cur_result['version'] search_result.name = cur_result['name'] search_result.url = cur_result['url'] - search_result.season = int(cur_result['season']) - search_result.actual_season = search_result.season + search_result.actual_season = int(cur_result['season']) - sql_episodes = cur_result['episodes'].strip('|') # TODO: Add support for season results + sql_episodes = cur_result['episodes'].strip('|') # Season result if not sql_episodes: - ep_objs = series_obj.get_all_episodes(search_result.season) + ep_objs = series_obj.get_all_episodes(search_result.actual_season) + if not ep_objs: + # We couldn't get any episodes for this season, which is odd, skip the result. + log.debug("We couldn't get any episodes for season {0} of {1}, skipping", + search_result.actual_season, search_result.name) + continue actual_episodes = [ep.episode for ep in ep_objs] episode_number = SEASON_RESULT # Multi or single episode result else: actual_episodes = [int(ep) for ep in sql_episodes.split('|')] - ep_objs = [series_obj.get_episode(search_result.season, ep) for ep in actual_episodes] + ep_objs = [series_obj.get_episode(search_result.actual_season, ep) for ep in actual_episodes] if len(actual_episodes) == 1: episode_number = actual_episodes[0] else: episode_number = MULTI_EP_RESULT - all_wanted = True - for cur_ep in actual_episodes: - # if the show says we want that episode then add it to the list - if not series_obj.want_episode(search_result.season, cur_ep, search_result.quality, - forced_search, down_cur_quality): - log.debug('Ignoring {0} because one or more episodes are unwanted', cur_result['name']) - all_wanted = False - break - - if not all_wanted: - continue - search_result.episodes = ep_objs search_result.actual_episodes = actual_episodes - log.debug( - '{id}: Using cached results from {provider} for series {show_name!r} episode {ep}', { - 'id': search_result.episodes[0].series.series_id, - 'provider': self.provider.name, - 'show_name': search_result.episodes[0].series.name, - 'ep': episode_num(search_result.episodes[0].season, search_result.episodes[0].episode), - } - ) - # Map the remaining attributes search_result.series = series_obj search_result.seeders = cur_result['seeders'] @@ -587,17 +630,10 @@ def find_needed_episodes(self, episode, forced_search=False, search_result.proper_tags = cur_result['proper_tags'].split('|') if cur_result['proper_tags'] else '' search_result.content = None - # FIXME: Should be changed to search_result.search_type - search_result.forced_search = forced_search - search_result.download_current_quality = down_cur_quality - # add it to the list - if episode_number not in needed_eps: - needed_eps[episode_number] = [search_result] - else: - needed_eps[episode_number].append(search_result) + cache_results[episode_number].append(search_result) # datetime stamp this search so cache gets cleared self.searched = time() - return needed_eps + return cache_results