Check cache for candidates when searching using backlog search. (#5816)

* Check cache for candidates when searching using backlog search. * Clean up the docstrings and method names. * Removed superfluous check. Fixed same flake comments. * Only get the candidates of we didn't already get them from cache. * Restructured the cache_multi_results and cache_singele_results a litte, so they're not erased with a second loop on season back episode fallback. * Assign them separate. * Created new method cache.find_results(). As I don't like the existing find_needed_episodes. As I think that method does too much. * Remove episode paramater. * Fix docstring. * Made cache.find_needed_eps more DRY. * Added debug log. * Small changes to improve clarity * Small changes
pymedusa · Dec 8, 2018 · 3ee452a · 3ee452a
1 parent 0d186a0
commit 3ee452a
Show file tree

Hide file tree

Showing 3 changed files with 207 additions and 104 deletions.
diff --git a/medusa/providers/generic_provider.py b/medusa/providers/generic_provider.py
@@ -238,9 +238,33 @@ def remove_duplicate_mappings(items, pk='link'):
             ))
         )
 
+    def search_results_in_cache(self, episodes):
+        """
+        Search episodes based on param in cache.
+
+        Search the cache (db) for this provider
+        :param episodes: List of Episode objects
+
+        :return: A dict of search results, ordered by episode number
+        """
+        return self.cache.find_episodes(episodes)
+
     def find_search_results(self, series, episodes, search_mode, forced_search=False, download_current_quality=False,
                             manual_search=False, manual_search_type='episode'):
-        """Search episodes based on param."""
+        """
+        Search episodes based on param.
+
+        Search the provider using http queries.
+        :param series: Series object
+        :param episodes: List of Episode objects
+        :param search_mode: 'eponly' or 'sponly'
+        :param forced_search: Flag if the search was triggered by a forced search
+        :param download_current_quality: Flag if we want to include an already downloaded quality in the new search
+        :param manual_search: Flag if the search was triggered by a manual search
+        :param manual_search_type: How the manual search was started: For example an 'episode' or 'season'
+
+        :return: A dict of search results, ordered by episode number.
+        """
         self._check_auth()
         self.series = series
 
@@ -249,18 +273,6 @@ def find_search_results(self, series, episodes, search_mode, forced_search=False
         season_search = (len(episodes) > 1 or manual_search_type == 'season') and search_mode == 'sponly'
 
         for episode in episodes:
-            if not manual_search:
-                cache_results = self.cache.find_needed_episodes(
-                    episode, forced_search=forced_search, down_cur_quality=download_current_quality
-                )
-                if cache_results:
-                    for episode_no in cache_results:
-                        if episode_no not in results:
-                            results[episode_no] = cache_results[episode_no]
-                        else:
-                            results[episode_no] += cache_results[episode_no]
-                    continue
-
             search_strings = []
             if season_search:
                 search_strings = self._get_season_search_strings(episode)
@@ -471,6 +483,7 @@ def find_search_results(self, series, episodes, search_mode, forced_search=False
                           ', '.join(map(str, search_result.parsed_result.episode_numbers)),
                           search_result.name,
                           search_result.url)
+
             if episode_number not in results:
                 results[episode_number] = [search_result]
             else:

diff --git a/medusa/search/core.py b/medusa/search/core.py
@@ -647,6 +647,8 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality
     manual_search_results = []
     multi_results = []
     single_results = []
+    cache_multi_results = []
+    cache_single_results = []
 
     # build name cache for show
     name_cache.build_name_cache(series_obj)
@@ -667,14 +669,15 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality
     threading.currentThread().name = original_thread_name
 
     for cur_provider in providers:
-        threading.currentThread().name = original_thread_name + u' :: [' + cur_provider.name + u']'
+        threading.currentThread().name = '{original_thread_name} :: [{provider}]'.format(
+            original_thread_name=original_thread_name, provider=cur_provider.name
+        )
 
         if cur_provider.anime_only and not series_obj.is_anime:
             log.debug(u'{0} is not an anime, skipping', series_obj.name)
             continue
 
         found_results[cur_provider.name] = {}
-
         search_count = 0
         search_mode = cur_provider.search_mode
 
@@ -694,24 +697,41 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality
                 log.info(u'Performing season pack search for {0}', series_obj.name)
 
             try:
-                search_results = cur_provider.find_search_results(series_obj, episodes, search_mode, forced_search,
-                                                                  down_cur_quality, manual_search, manual_search_type)
+                search_results = []
+                cache_search_results = []
+                cache_multi = []
+                cache_single = []
+
+                if not manual_search:
+                    cache_search_results = cur_provider.search_results_in_cache(episodes)
+                    if cache_search_results:
+                        # From our provider multi_episode and single_episode results, collect candidates.
+                        found_cache_results = list_results_for_provider(cache_search_results, found_results, cur_provider)
+                        # We're passing the empty lists, because we don't want to include previous candidates
+                        cache_multi, cache_single = collect_candidates(found_cache_results, cur_provider, cache_multi,
+                                                                       cache_single, series_obj, down_cur_quality)
+
+                        # Check if we got any candidates from cache add add them to the list.
+                        # If we found candidates in cache, we don't need to search the provider.
+                        if cache_multi:
+                            cache_multi_results += cache_multi
+                        if cache_single:
+                            cache_single_results += cache_single
+
+                # For now we only search if we didn't get any results back from cache,
+                # but we might wanna check if there was something useful in cache.
+                if not (cache_multi or cache_single):
+                    log.debug(u'Could not find any candidates in cache, searching provider.')
+                    search_results = cur_provider.find_search_results(series_obj, episodes, search_mode, forced_search,
+                                                                      down_cur_quality, manual_search, manual_search_type)
+                    # Update the list found_results
+                    found_results = list_results_for_provider(search_results, found_results, cur_provider)
+
             except AuthException as error:
-                log.error(u'Authentication error: {0}', ex(error))
+                log.error(u'Authentication error: {0!r}', error)
                 break
 
-            if search_results:
-                # make a list of all the results for this provider
-                for cur_ep in search_results:
-                    if cur_ep in found_results[cur_provider.name]:
-                        found_results[cur_provider.name][cur_ep] += search_results[cur_ep]
-                    else:
-                        found_results[cur_provider.name][cur_ep] = search_results[cur_ep]
-
-                    # Sort the list by seeders if possible
-                    if cur_provider.provider_type == u'torrent' or getattr(cur_provider, u'torznab', None):
-                        found_results[cur_provider.name][cur_ep].sort(key=lambda d: int(d.seeders), reverse=True)
-
+            if search_results or cache_search_results:
                 break
             elif not cur_provider.search_fallback or search_count == 2:
                 break
@@ -747,16 +767,13 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality
             # Continue because we don't want to pick best results as we are running a manual search by user
             continue
 
-        # Collect candidates for multi-episode or season results
-        candidates = (candidate for result, candidate in iteritems(found_results[cur_provider.name])
-                      if result in (SEASON_RESULT, MULTI_EP_RESULT))
-        candidates = list(itertools.chain(*candidates))
-        if candidates:
-            multi_results += collect_multi_candidates(candidates, series_obj, episodes, down_cur_quality)
-
-        # Collect candidates for single-episode results
-        single_results = collect_single_candidates(found_results[cur_provider.name],
-                                                   single_results)
+        # From our providers multi_episode and single_episode results, collect candidates.
+        # Only collect the candidates if we didn't get any from cache.
+        if not (cache_multi_results or cache_single_results):
+            multi_results, single_results = collect_candidates(found_results, cur_provider, multi_results,
+                                                               single_results, series_obj, down_cur_quality)
+        else:
+            multi_results, single_results = cache_multi_results, cache_single_results
 
     # Remove provider from thread name before return results
     threading.currentThread().name = original_thread_name
@@ -768,6 +785,43 @@ def search_providers(series_obj, episodes, forced_search=False, down_cur_quality
         return combine_results(multi_results, single_results)
 
 
+def collect_candidates(found_results, provider, multi_results, single_results, series_obj, down_cur_quality):
+    """Collect candidates for episode, multi-episode or season results."""
+    candidates = (candidate for result, candidate in iteritems(found_results[provider.name])
+                  if result in (SEASON_RESULT, MULTI_EP_RESULT))
+    candidates = list(itertools.chain(*candidates))
+    if candidates:
+        multi_results += collect_multi_candidates(candidates, series_obj, down_cur_quality)
+
+    # Collect candidates for single-episode results
+    single_results = collect_single_candidates(found_results[provider.name], single_results)
+
+    return multi_results, single_results
+
+
+def list_results_for_provider(search_results, found_results, provider):
+    """
+    Add results for this provider to the search_results dict.
+
+    The structure is based on [provider_name][episode_number][search_result]
+    :param search_results: New dictionary with search results for this provider
+    :param found_results: Dictionary with existing per provider search results
+    :param provider: Provider object
+    :return: Updated dict found_results
+    """
+    for cur_ep in search_results:
+        if cur_ep in found_results[provider.name]:
+            found_results[provider.name][cur_ep] += search_results[cur_ep]
+        else:
+            found_results[provider.name][cur_ep] = search_results[cur_ep]
+
+        # Sort the list by seeders if possible
+        if provider.provider_type == u'torrent' or getattr(provider, u'torznab', None):
+            found_results[provider.name][cur_ep].sort(key=lambda d: int(d.seeders), reverse=True)
+
+    return found_results
+
+
 def collect_single_candidates(candidates, results):
     """Collect single-episode result candidates."""
     single_candidates = list(results)
@@ -798,7 +852,7 @@ def collect_single_candidates(candidates, results):
     return single_candidates + new_candidates
 
 
-def collect_multi_candidates(candidates, series_obj, episodes, down_cur_quality):
+def collect_multi_candidates(candidates, series_obj, down_cur_quality):
     """Collect mutli-episode and season result candidates."""
     multi_candidates = []