diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d64312..01efd54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ - `ImageFeatures` with `valid_rowcol=False` are no longer supported for training. For now they are still supported for classification. +- S3 downloads are now always performed in the main thread, to prevent `RuntimeError: cannot schedule new futures after interpreter shutdown`. + ## 0.7.0 - `TrainClassifierMsg` labels arguments have changed. Instead of `train_labels` and `val_labels`, it now takes a single argument `labels`, which is a `TrainingTaskLabels` object (basically a set of 3 `ImageLabels` objects: training set, reference set, and validation set). diff --git a/spacer/storage.py b/spacer/storage.py index 79aa40c..9910a12 100644 --- a/spacer/storage.py +++ b/spacer/storage.py @@ -15,6 +15,7 @@ import urllib.request import botocore.exceptions +from boto3.s3.transfer import TransferConfig from PIL import Image from sklearn.calibration import CalibratedClassifierCV from sklearn.linear_model import SGDClassifier @@ -99,6 +100,10 @@ class S3Storage(Storage): def __init__(self, bucketname: str): self.bucketname = bucketname + # Prevent `RuntimeError: cannot schedule new futures after + # interpreter shutdown`. + # Based on https://github.com/etianen/django-s3-storage/pull/136 + self.transfer_config = TransferConfig(use_threads=False) def store(self, key: str, stream: BytesIO): s3 = config.get_s3_conn() @@ -107,7 +112,8 @@ def store(self, key: str, stream: BytesIO): def load(self, key: str): s3 = config.get_s3_conn() stream = BytesIO() - s3.Object(self.bucketname, key).download_fileobj(stream) + s3.Object(self.bucketname, key).download_fileobj( + stream, Config=self.transfer_config) return stream def delete(self, key: str) -> None: