Skip to content

Commit

Permalink
storage initializer: fix s3 download
Browse files Browse the repository at this point in the history
when downloading the specified file, keep the name of the file itself.
when downloading the specified folder, keep the name of the folder itself.

Signed-off-by: JimmyYang20 <[email protected]>
  • Loading branch information
JimmyYang20 committed Oct 28, 2021
1 parent 1c5c537 commit 4675eaa
Showing 1 changed file with 26 additions and 17 deletions.
43 changes: 26 additions & 17 deletions scripts/storage-initializer/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,31 +175,40 @@ def download_s3_with_multi_files(download_files,
total_count, base_uri, base_out_dir)


def _download_s3(client, uri, out_dir):
bucket_args = uri.replace(_S3_PREFIX, "", 1).split("/", 1)
# download all(files, folders) in s3
# when downloading the specified file, keep the name of the file itself.
# when downloading the specified folder, keep the name of the folder itself.
def _download_s3(client, s3_url, out_dir):
bucket_args = s3_url.replace(_S3_PREFIX, "", 1).split("/", 1)
bucket_name = bucket_args[0]
bucket_path = len(bucket_args) > 1 and bucket_args[1] or ""

objects = client.list_objects(bucket_name,
prefix=bucket_path,
recursive=True,
use_api_v1=True)
objects = client.list_objects(bucket_name, prefix=bucket_path, recursive=False, use_api_v1=True)

count = 0
root_path, file_name = os.path.split(os.path.normpath(bucket_path))
for obj in objects:
if obj.is_dir:
sub_bucket_path = obj.object_name
object_dir = os.path.join(out_dir, os.path.relpath(obj.object_name, root_path))
count += _download_file_s3(client, bucket_name, sub_bucket_path, object_dir)
else:
count += _download_file_s3(client, bucket_name, obj.object_name, out_dir)

return count


# download files of the specified folder in s3
# download specified file in s3
def _download_file_s3(client, bucket_name, bucket_path, object_dir):
objects = client.list_objects(bucket_name, prefix=bucket_path, recursive=True, use_api_v1=True)

count = 0
for obj in objects:
# Replace any prefix from the object key with out_dir
subdir_object_key = obj.object_name[len(bucket_path):].strip("/")
# fget_object handles directory creation if does not exist
if not obj.is_dir:
local_file = os.path.join(
out_dir,
subdir_object_key or os.path.basename(obj.object_name)
)
LOG.debug("downloading count:%d, file:%s",
count, subdir_object_key)
local_file = os.path.join(object_dir, os.path.basename(obj.object_name))
# fget_object handles directory creation if does not exist
client.fget_object(bucket_name, obj.object_name, local_file)
_extract_compress(local_file, out_dir)

count += 1

return count
Expand Down

0 comments on commit 4675eaa

Please sign in to comment.