Skip to content

Commit

Permalink
Add retry for HTTP failures and network connectivity issues
Browse files Browse the repository at this point in the history
In the HTTP backend, we create a urllib3's retry policy and attach it to a python-requests session.

We test this by monkeypatching the code to retry on 404 errors (which we normally do not).  We attempt to fetch a file that's not present, but start a thread that creates the file after a short delay.  It should initially not find the file, and then eventually succeed.

Fixes spacetx/starfish#1277
  • Loading branch information
Tony Tung committed May 13, 2019
1 parent ac21c56 commit f76db39
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
15 changes: 14 additions & 1 deletion slicedimage/backends/_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@
import requests
from io import BytesIO

from requests.adapters import HTTPAdapter
from urllib3.util import retry

from slicedimage.urlpath import pathjoin
from ._base import Backend, verify_checksum


RETRY_STATUS_CODES = frozenset({500, 502, 503, 504})


class HttpBackend(Backend):
def __init__(self, baseurl):
self._baseurl = baseurl
Expand All @@ -23,7 +29,14 @@ def __init__(self, url, checksum_sha256):
self.handle = None

def __enter__(self):
resp = requests.get(self.url)
session = requests.Session()
retry_policy = retry.Retry(
connect=10, read=10, status=10, backoff_factor=0.1, status_forcelist=RETRY_STATUS_CODES)
adapter = HTTPAdapter(max_retries=retry_policy)
session.mount("http://", adapter)
session.mount("https://", adapter)

resp = session.get(self.url)
resp.raise_for_status()
self.handle = BytesIO(resp.content)
verify_checksum(self.handle, self.checksum_sha256)
Expand Down
31 changes: 31 additions & 0 deletions tests/io_/v0_1_0/test_http_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import os
import sys
import tempfile
import threading
import time

import pytest
import requests
from requests import HTTPError

from slicedimage.backends import ChecksumValidationError, HttpBackend
from slicedimage.backends import _http
from tests.utils import (
ContextualChildProcess,
TemporaryDirectory,
Expand Down Expand Up @@ -121,3 +123,32 @@ def test_error(http_server):
with pytest.raises(ChecksumValidationError):
with http_backend.read_contextmanager("tileset.json") as cm:
cm.read()


def test_retry(monkeypatch, http_server):
"""
Verifies that the retry logic is reasonable. Since we normally only retry on http connectivity
issues, or 50x errors, we monkeypatch the list of HTTP status codes we retry on to {404}, and
induce a 404 error. We also start a thread that creates the file we are looking for.
Then we attempt to fetch the file. It should fail a few times, and then successfully return the
file.
"""
tempdir, port = http_server
http_backend = HttpBackend("http://0.0.0.0:{port}".format(port=port))

def sleep_and_make_file():
time.sleep(5.0)
data = os.urandom(1024)
with open(os.path.join(tempdir, "tileset.json"), "w") as fh:
fh.write(data)
fh.flush()

thread = threading.Thread(target=sleep_and_make_file)
thread.setDaemon(True)
thread.start()

with monkeypatch.context() as mc:
mc.setattr(_http, "RETRY_STATUS_CODES", frozenset({404}))
with http_backend.read_contextmanager("tileset.json") as cm:
cm.read()

0 comments on commit f76db39

Please sign in to comment.