Skip to content

Commit

Permalink
storage initializer: fix s3 download
Browse files Browse the repository at this point in the history
when downloading the specified file, keep the name of the file itself.
when downloading the specified folder, keep the name of the folder itself.

Signed-off-by: JimmyYang20 <[email protected]>
  • Loading branch information
JimmyYang20 committed Oct 29, 2021
1 parent 1c5c537 commit 337dcd4
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 21 deletions.
8 changes: 6 additions & 2 deletions scripts/storage-initializer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@ python3 download.py s3://models/classification/model.tar.gz /tmp/models/
export S3_ENDPOINT_URL=https://play.min.io
export ACCESS_KEY_ID=Q3AM3UQ867SPQQA43P2F
export SECRET_ACCESS_KEY=zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG
python3 download.py s3://datasets/mnist /tmp/mnist
# we then download the content of mnist directory into /tmp/mnist/

python3 download.py s3://datasets/mnist/1.jpg /tmp
# we then download the file 1.jpg into /tmp, and result is /tmp/1.jpg.

python3 download.py s3://datasets/mnist /tmp
# we then download the folder mnist into /tmp, and result is /tmp/mnist.

```
3. http server:
Expand Down
64 changes: 45 additions & 19 deletions scripts/storage-initializer/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,31 +175,57 @@ def download_s3_with_multi_files(download_files,
total_count, base_uri, base_out_dir)


def _download_s3(client, uri, out_dir):
bucket_args = uri.replace(_S3_PREFIX, "", 1).split("/", 1)
def _download_s3(client, s3_url, out_dir):
"""
The function downloads specified file or folder to local directory address.
this function supports:
1. when downloading the specified file, keep the name of the file itself.
2. when downloading the specified folder, keep the name of the folder itself.
Parameters:
client: s3 client
s3_url(string): url in s3, e.g. file url: s3://dev/data/data.txt, directory url: s3://dev/data
out_dir(string): local directory address, e.g. /tmp/data/
Returns:
int: files of number in s3_url
"""

bucket_args = s3_url.replace(_S3_PREFIX, "", 1).split("/", 1)
bucket_name = bucket_args[0]
bucket_path = len(bucket_args) > 1 and bucket_args[1] or ""
bucket_path = len(bucket_args) > 1 and os.path.normpath(bucket_args[1]) or ""

objects = client.list_objects(bucket_name,
prefix=bucket_path,
recursive=True,
use_api_v1=True)
count = 0
objects = client.list_objects(bucket_name, prefix=bucket_path, use_api_v1=True)
for o in objects:
if not o.is_dir:
client.fget_object(bucket_name, o.object_name, os.path.join(out_dir, os.path.basename(o.object_name)))
return 1
else:
count = 0
objects = client.list_objects(bucket_name, prefix=bucket_path, recursive=True, use_api_v1=True)
root_path, _ = os.path.split(os.path.normpath(bucket_path))
for obj in objects:
if not obj.is_dir:
object_file_path = os.path.join(out_dir, os.path.relpath(obj.object_name, root_path))
client.fget_object(bucket_name, obj.object_name, object_file_path)
count += 1

return count

return 0


# download files of the specified folder in s3
# download specified file in s3
def _download_file_s3(client, bucket_name, bucket_path, object_dir):
objects = client.list_objects(bucket_name, prefix=bucket_path, recursive=True, use_api_v1=True)

count = 0
for obj in objects:
# Replace any prefix from the object key with out_dir
subdir_object_key = obj.object_name[len(bucket_path):].strip("/")
# fget_object handles directory creation if does not exist
if not obj.is_dir:
local_file = os.path.join(
out_dir,
subdir_object_key or os.path.basename(obj.object_name)
)
LOG.debug("downloading count:%d, file:%s",
count, subdir_object_key)
local_file = os.path.join(object_dir, os.path.basename(obj.object_name))
# fget_object handles directory creation if does not exist
client.fget_object(bucket_name, obj.object_name, local_file)
_extract_compress(local_file, out_dir)

count += 1

return count
Expand Down

0 comments on commit 337dcd4

Please sign in to comment.