From 245cf6e304d25686d3286c7dd20f3028595b9bb7 Mon Sep 17 00:00:00 2001 From: Wilfred Tyler Gee Date: Fri, 19 Jan 2018 09:56:57 +1100 Subject: [PATCH] Update google storage utils (#386) * Use service auth token (#384, see also #385) * Test basic upload * Remove download abilities until we need them --- .codecov.yml | 1 - pocs/tests/conftest.py | 4 + pocs/tests/utils/test_google_storage.py | 77 +++++++++++++++ pocs/utils/error.py | 5 + pocs/utils/google/storage.py | 123 +++++++++++------------- requirements.txt | 4 +- 6 files changed, 144 insertions(+), 70 deletions(-) create mode 100644 pocs/tests/utils/test_google_storage.py diff --git a/.codecov.yml b/.codecov.yml index 5c1c78095..53be06386 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1,6 +1,5 @@ ignore: - "pocs/utils/data.py" - - "pocs/utils/google/*" - "pocs/camera/canon_gphoto2.py" - "pocs/camera/sbig.py" - "pocs/camera/sbigudrv.py" diff --git a/pocs/tests/conftest.py b/pocs/tests/conftest.py index df2a14f3c..11b6af67b 100644 --- a/pocs/tests/conftest.py +++ b/pocs/tests/conftest.py @@ -17,6 +17,10 @@ def pytest_addoption(parser): "List items can include: mount, camera, weather, or all") parser.addoption("--solve", action="store_true", default=False, help="If tests that require solving should be run") + parser.addoption("--test-cloud-storage", action="store_true", default=False, + dest="test_cloud_storage", + help="Tests cloud strorage functions." + + "Requires $PANOPTES_CLOUD_KEY to be set to path of valid json service key") def pytest_collection_modifyitems(config, items): diff --git a/pocs/tests/utils/test_google_storage.py b/pocs/tests/utils/test_google_storage.py new file mode 100644 index 000000000..58c0a249b --- /dev/null +++ b/pocs/tests/utils/test_google_storage.py @@ -0,0 +1,77 @@ +import pytest +import os + +from pocs.utils.error import GoogleCloudError +from pocs.utils.google.storage import PanStorage + + +pytestmark = pytest.mark.skipif( + not pytest.config.option.test_cloud_storage, + reason="Needs --test-cloud-storage to run" +) + + +def test_key_exists(): + assert os.environ['PANOPTES_CLOUD_KEY'] + assert os.path.exists(os.environ['PANOPTES_CLOUD_KEY']) + + +def test_bad_bucket(): + with pytest.raises(AssertionError): + PanStorage('fake-bucket') + + auth_key_path = os.environ['PANOPTES_CLOUD_KEY'] + with pytest.raises(GoogleCloudError): + PanStorage('fake-bucket', auth_key=auth_key_path) + + +@pytest.fixture(scope="function") +def storage(): + auth_key_path = os.environ['PANOPTES_CLOUD_KEY'] + return PanStorage('panoptes-test-bucket', auth_key=auth_key_path) + + +def test_unit_id(storage): + assert storage.unit_id is not None + # TODO(wtgee)Verify the unit id better after #384 is done. + assert storage.unit_id.startswith('PAN'), storage.logger.error( + "Must have valid PAN_ID. Please change your conf_files/pocs_local.yaml") + + +def test_bucket_exists(storage): + assert storage.bucket.exists() + + +def test_file_upload_no_prepend(storage): + temp_fn = 'ping.txt' + with open(temp_fn, 'w') as f: + f.write('Hello World') + + remote_path = storage.upload(temp_fn) + assert remote_path == '{}/{}'.format(storage.unit_id, temp_fn) + assert storage.bucket.blob(remote_path).exists() + os.unlink(temp_fn) + + +def test_file_upload_prepend_remote_path(storage): + temp_fn = 'pong.txt'.format(storage.unit_id) + with open(temp_fn, 'w') as f: + f.write('Hello World') + + remote_path = '{}/{}'.format(storage.unit_id, temp_fn) + returned_remote_path = storage.upload(temp_fn, remote_path=remote_path) + assert remote_path == returned_remote_path + assert storage.bucket.blob(returned_remote_path).exists() + os.unlink(temp_fn) + + +def test_delete(storage): + """ + Note: The util wrappers don't provide a way to delete because we generally + will not want people to delete things. However it's good to test and we + want to remove the above files + """ + remote_path = '{}/pong.txt'.format(storage.unit_id) + assert storage.bucket.blob(remote_path).exists() + storage.bucket.blob(remote_path).delete() + assert storage.bucket.blob(remote_path).exists() is False diff --git a/pocs/utils/error.py b/pocs/utils/error.py index f729d9f33..61b57ed19 100644 --- a/pocs/utils/error.py +++ b/pocs/utils/error.py @@ -122,3 +122,8 @@ class TheSkyXKeyError(TheSkyXError): class TheSkyXTimeout(TheSkyXError): """ Errors from TheSkyX because bad key passed """ pass + + +class GoogleCloudError(PanError): + """ Errors related to google cloud """ + pass diff --git a/pocs/utils/google/storage.py b/pocs/utils/google/storage.py index bbffe288a..c6f4425ea 100644 --- a/pocs/utils/google/storage.py +++ b/pocs/utils/google/storage.py @@ -1,100 +1,87 @@ import os -import warnings from gcloud import storage +from gcloud.exceptions import Forbidden -import pocs.utils.logger +from pocs.utils import error +from pocs.utils.logger import get_root_logger +from pocs.utils.config import load_config class PanStorage(object): """ Class for interacting with Google Cloud Platform """ - def __init__(self, project_id='panoptes-survey', bucket_name=None, prefix=None): - assert bucket_name is not None, warnings.warn( - "A valid bucket name is required.") + def __init__(self, bucket_name, auth_key=None, project_id='panoptes-survey'): + """Create an object that can interact easily with storage buckets. + + Args: + bucket_name (str): Name of bucket to use. + auth_key (str, optional): Path to valid json authorization token. + project_id (str, optional): Project id hosting the bucket. Default 'panoptes-survey' + + Raises: + error.GoogleCloudError: Error raised if valid connection cannot be formed for + given project, bucket, and authorization. + """ + self.logger = get_root_logger() + assert auth_key is not None and os.path.exists(auth_key), self.logger.error( + "Cannot use google storage without PANOPTES_CLOUD_KEY variable set.") + super(PanStorage, self).__init__() - self.logger = pocs.utils.logger.get_root_logger() + self.unit_id = load_config()['PAN_ID'] self.project_id = project_id - self.prefix = prefix - - self.client = storage.Client(self.project_id) self.bucket_name = bucket_name - self.bucket = self.client.get_bucket(bucket_name) - def list_remote(self, prefix=None): - """Return a list of blobs in the remote bucket with the given prefix.""" - if not prefix: - prefix = self.prefix + self.client = storage.Client.from_service_account_json( + auth_key, + project=self.project_id + ) + + try: + self.bucket = self.client.get_bucket(bucket_name) + except Forbidden as e: + raise error.GoogleCloudError( + "Storage bucket does not exist or no permissions. " + + "Ensure that the PANOPTES_CLOUD_KEY variable is properly set" + ) - blobs = self.bucket.list_blobs(prefix=prefix) - files = [] - for blob in blobs: - files.append(blob.name) - return files + self.logger.info("Connected to storage bucket {}", self.bucket_name) def upload(self, local_path, remote_path=None): - """Upload the given file to the Google Cloud Storage bucket.""" - assert self.project_id and os.path.exists(local_path) + """Upload the given file to the Google Cloud Storage bucket. - self.logger.debug('Building upload request...') + Note: + The name of the current unit will be prepended to the path + so that all files will be placed in a "subdirectory" according + to unit. + + Args: + local_path (str): Path to local file to be uploaded + remote_path (str, optional): Destination path in bucket. + + Returns: + str: Remote path of uploaded object + """ + assert os.path.exists(local_path), self.logger.warning( + "Local path does not exist, can't upload: {}", local_path) if remote_path is None: remote_path = local_path - self.logger.debug('Uploading file: %s to bucket: %s object: %s '.format( - local_path, self.bucket.name, remote_path)) + if not remote_path.startswith(self.unit_id): + remote_path = '{}/{}'.format(self.unit_id, remote_path) + + self.logger.debug('Uploading file: {} to bucket: {} object: {} ', + local_path, self.bucket.name, remote_path) try: self.bucket.blob(remote_path).upload_from_filename( filename=local_path) - self.logger.debug('Upload complete!') + self.logger.debug('Upload complete') except Exception as err: self.logger.warning( 'Problem uploading file {}: {}'.format(local_path, err)) return remote_path - - def download(self, remote_path, local_path=None): - """Download the given file from the Google Cloud Storage bucket.""" - if local_path is None: - local_path = '{}/temp/{}'.format(os.getenv('PANDIR'), remote_path) - - os.makedirs(os.path.dirname(local_path), exist_ok=True) - - try: - self.bucket.get_blob(remote_path).download_to_filename( - filename=local_path) - self.logger.debug('Download complete!') - except Exception as err: - self.logger.warning( - 'Problem downloading {}: {}'.format(remote_path, err)) - - return local_path - - def upload_string(self, data, remote_path): - """Upload the given data string to the Google Cloud Storage bucket.""" - if remote_path in self.list_remote(): - try: - self.bucket.get_blob(remote_path).upload_from_string(data) - self.logger.debug('String upload complete!') - except Exception as err: - self.logger.warning('Problem uploading string: {}'.format(err)) - else: - try: - self.bucket.blob(remote_path).upload_from_string(data) - self.logger.debug('String upload complete!') - except Exception as err: - self.logger.warning('Problem uploading string: {}'.format(err)) - return remote_path - - def download_string(self, remote_path): - """Download the given file as a string from the Google Cloud Storage bucket.""" - try: - data = self.bucket.get_blob(remote_path).download_as_string() - self.logger.debug('String download complete!') - except Exception as err: - self.logger.warning( - 'Problem downloading {}: {}'.format(remote_path, err)) - return data diff --git a/requirements.txt b/requirements.txt index d2f473551..fd2c2aea7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,6 @@ ffmpy google-cloud-storage dateparser coveralls -mocket \ No newline at end of file +mocket +google-cloud-storage +gcloud \ No newline at end of file