Skip to content

Commit

Permalink
don't check one url multiple times
Browse files Browse the repository at this point in the history
  • Loading branch information
PetrDlouhy committed Jan 25, 2017
1 parent a3afe52 commit 5d94848
Showing 1 changed file with 13 additions and 11 deletions.
24 changes: 13 additions & 11 deletions linkcheck/cache/urlqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,19 @@ def _put (self, url_data):
log.debug(LOG_CACHE, "queueing %s", url_data.url)
key = url_data.cache_url
cache = url_data.aggregate.result_cache
if url_data.has_result or cache.has_result(key):
self.queue.appendleft(url_data)
else:
assert key is not None, "no result for None key: %s" % url_data
if self.max_allowed_urls is not None:
self.max_allowed_urls -= 1
self.num_puts += 1
if self.num_puts >= NUM_PUTS_CLEANUP:
self.cleanup()
self.queue.append(url_data)
self.unfinished_tasks += 1
if not cache.has_result(key):
if url_data.has_result:
self.queue.appendleft(url_data)
else:
assert key is not None, "no result for None key: %s" % url_data
if self.max_allowed_urls is not None:
self.max_allowed_urls -= 1
self.num_puts += 1
if self.num_puts >= NUM_PUTS_CLEANUP:
self.cleanup()
self.queue.append(url_data)
self.unfinished_tasks += 1
cache.add_result(key, None) # add none value to cache to prevent checking this url multiple times

def cleanup(self):
"""Move cached elements to top."""
Expand Down

0 comments on commit 5d94848

Please sign in to comment.