Skip to content

Commit

Permalink
Update google storage utils (#386)
Browse files Browse the repository at this point in the history
* Use service auth token (#384, see also #385)
* Test basic upload
* Remove download abilities until we need them
  • Loading branch information
wtgee authored Jan 18, 2018
1 parent e6d5299 commit 245cf6e
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 70 deletions.
1 change: 0 additions & 1 deletion .codecov.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
ignore:
- "pocs/utils/data.py"
- "pocs/utils/google/*"
- "pocs/camera/canon_gphoto2.py"
- "pocs/camera/sbig.py"
- "pocs/camera/sbigudrv.py"
Expand Down
4 changes: 4 additions & 0 deletions pocs/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ def pytest_addoption(parser):
"List items can include: mount, camera, weather, or all")
parser.addoption("--solve", action="store_true", default=False,
help="If tests that require solving should be run")
parser.addoption("--test-cloud-storage", action="store_true", default=False,
dest="test_cloud_storage",
help="Tests cloud strorage functions." +
"Requires $PANOPTES_CLOUD_KEY to be set to path of valid json service key")


def pytest_collection_modifyitems(config, items):
Expand Down
77 changes: 77 additions & 0 deletions pocs/tests/utils/test_google_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import pytest
import os

from pocs.utils.error import GoogleCloudError
from pocs.utils.google.storage import PanStorage


pytestmark = pytest.mark.skipif(
not pytest.config.option.test_cloud_storage,
reason="Needs --test-cloud-storage to run"
)


def test_key_exists():
assert os.environ['PANOPTES_CLOUD_KEY']
assert os.path.exists(os.environ['PANOPTES_CLOUD_KEY'])


def test_bad_bucket():
with pytest.raises(AssertionError):
PanStorage('fake-bucket')

auth_key_path = os.environ['PANOPTES_CLOUD_KEY']
with pytest.raises(GoogleCloudError):
PanStorage('fake-bucket', auth_key=auth_key_path)


@pytest.fixture(scope="function")
def storage():
auth_key_path = os.environ['PANOPTES_CLOUD_KEY']
return PanStorage('panoptes-test-bucket', auth_key=auth_key_path)


def test_unit_id(storage):
assert storage.unit_id is not None
# TODO(wtgee)Verify the unit id better after #384 is done.
assert storage.unit_id.startswith('PAN'), storage.logger.error(
"Must have valid PAN_ID. Please change your conf_files/pocs_local.yaml")


def test_bucket_exists(storage):
assert storage.bucket.exists()


def test_file_upload_no_prepend(storage):
temp_fn = 'ping.txt'
with open(temp_fn, 'w') as f:
f.write('Hello World')

remote_path = storage.upload(temp_fn)
assert remote_path == '{}/{}'.format(storage.unit_id, temp_fn)
assert storage.bucket.blob(remote_path).exists()
os.unlink(temp_fn)


def test_file_upload_prepend_remote_path(storage):
temp_fn = 'pong.txt'.format(storage.unit_id)
with open(temp_fn, 'w') as f:
f.write('Hello World')

remote_path = '{}/{}'.format(storage.unit_id, temp_fn)
returned_remote_path = storage.upload(temp_fn, remote_path=remote_path)
assert remote_path == returned_remote_path
assert storage.bucket.blob(returned_remote_path).exists()
os.unlink(temp_fn)


def test_delete(storage):
"""
Note: The util wrappers don't provide a way to delete because we generally
will not want people to delete things. However it's good to test and we
want to remove the above files
"""
remote_path = '{}/pong.txt'.format(storage.unit_id)
assert storage.bucket.blob(remote_path).exists()
storage.bucket.blob(remote_path).delete()
assert storage.bucket.blob(remote_path).exists() is False
5 changes: 5 additions & 0 deletions pocs/utils/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,8 @@ class TheSkyXKeyError(TheSkyXError):
class TheSkyXTimeout(TheSkyXError):
""" Errors from TheSkyX because bad key passed """
pass


class GoogleCloudError(PanError):
""" Errors related to google cloud """
pass
123 changes: 55 additions & 68 deletions pocs/utils/google/storage.py
Original file line number Diff line number Diff line change
@@ -1,100 +1,87 @@
import os
import warnings

from gcloud import storage
from gcloud.exceptions import Forbidden

import pocs.utils.logger
from pocs.utils import error
from pocs.utils.logger import get_root_logger
from pocs.utils.config import load_config


class PanStorage(object):
""" Class for interacting with Google Cloud Platform """

def __init__(self, project_id='panoptes-survey', bucket_name=None, prefix=None):
assert bucket_name is not None, warnings.warn(
"A valid bucket name is required.")
def __init__(self, bucket_name, auth_key=None, project_id='panoptes-survey'):
"""Create an object that can interact easily with storage buckets.
Args:
bucket_name (str): Name of bucket to use.
auth_key (str, optional): Path to valid json authorization token.
project_id (str, optional): Project id hosting the bucket. Default 'panoptes-survey'
Raises:
error.GoogleCloudError: Error raised if valid connection cannot be formed for
given project, bucket, and authorization.
"""
self.logger = get_root_logger()
assert auth_key is not None and os.path.exists(auth_key), self.logger.error(
"Cannot use google storage without PANOPTES_CLOUD_KEY variable set.")

super(PanStorage, self).__init__()

self.logger = pocs.utils.logger.get_root_logger()
self.unit_id = load_config()['PAN_ID']
self.project_id = project_id
self.prefix = prefix

self.client = storage.Client(self.project_id)
self.bucket_name = bucket_name
self.bucket = self.client.get_bucket(bucket_name)

def list_remote(self, prefix=None):
"""Return a list of blobs in the remote bucket with the given prefix."""
if not prefix:
prefix = self.prefix
self.client = storage.Client.from_service_account_json(
auth_key,
project=self.project_id
)

try:
self.bucket = self.client.get_bucket(bucket_name)
except Forbidden as e:
raise error.GoogleCloudError(
"Storage bucket does not exist or no permissions. " +
"Ensure that the PANOPTES_CLOUD_KEY variable is properly set"
)

blobs = self.bucket.list_blobs(prefix=prefix)
files = []
for blob in blobs:
files.append(blob.name)
return files
self.logger.info("Connected to storage bucket {}", self.bucket_name)

def upload(self, local_path, remote_path=None):
"""Upload the given file to the Google Cloud Storage bucket."""
assert self.project_id and os.path.exists(local_path)
"""Upload the given file to the Google Cloud Storage bucket.
self.logger.debug('Building upload request...')
Note:
The name of the current unit will be prepended to the path
so that all files will be placed in a "subdirectory" according
to unit.
Args:
local_path (str): Path to local file to be uploaded
remote_path (str, optional): Destination path in bucket.
Returns:
str: Remote path of uploaded object
"""
assert os.path.exists(local_path), self.logger.warning(
"Local path does not exist, can't upload: {}", local_path)

if remote_path is None:
remote_path = local_path

self.logger.debug('Uploading file: %s to bucket: %s object: %s '.format(
local_path, self.bucket.name, remote_path))
if not remote_path.startswith(self.unit_id):
remote_path = '{}/{}'.format(self.unit_id, remote_path)

self.logger.debug('Uploading file: {} to bucket: {} object: {} ',
local_path, self.bucket.name, remote_path)

try:
self.bucket.blob(remote_path).upload_from_filename(
filename=local_path)
self.logger.debug('Upload complete!')
self.logger.debug('Upload complete')

except Exception as err:
self.logger.warning(
'Problem uploading file {}: {}'.format(local_path, err))

return remote_path

def download(self, remote_path, local_path=None):
"""Download the given file from the Google Cloud Storage bucket."""
if local_path is None:
local_path = '{}/temp/{}'.format(os.getenv('PANDIR'), remote_path)

os.makedirs(os.path.dirname(local_path), exist_ok=True)

try:
self.bucket.get_blob(remote_path).download_to_filename(
filename=local_path)
self.logger.debug('Download complete!')
except Exception as err:
self.logger.warning(
'Problem downloading {}: {}'.format(remote_path, err))

return local_path

def upload_string(self, data, remote_path):
"""Upload the given data string to the Google Cloud Storage bucket."""
if remote_path in self.list_remote():
try:
self.bucket.get_blob(remote_path).upload_from_string(data)
self.logger.debug('String upload complete!')
except Exception as err:
self.logger.warning('Problem uploading string: {}'.format(err))
else:
try:
self.bucket.blob(remote_path).upload_from_string(data)
self.logger.debug('String upload complete!')
except Exception as err:
self.logger.warning('Problem uploading string: {}'.format(err))
return remote_path

def download_string(self, remote_path):
"""Download the given file as a string from the Google Cloud Storage bucket."""
try:
data = self.bucket.get_blob(remote_path).download_as_string()
self.logger.debug('String download complete!')
except Exception as err:
self.logger.warning(
'Problem downloading {}: {}'.format(remote_path, err))
return data
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ ffmpy
google-cloud-storage
dateparser
coveralls
mocket
mocket
google-cloud-storage
gcloud

0 comments on commit 245cf6e

Please sign in to comment.