From ccb066a3df16bb6cb88cc3f33b42d7bfe5fa8c4a Mon Sep 17 00:00:00 2001
From: JimmyYang20 <yangjin39@huawei.com>
Date: Thu, 28 Oct 2021 17:30:55 +0800
Subject: [PATCH] storage initializer: fix s3 download

when downloading the specified file, keep the name of the file itself.
when downloading the specified folder, keep the name of the folder itself.

Signed-off-by: JimmyYang20 <yangjin39@huawei.com>
---
 scripts/storage-initializer/README.md   |  8 +++-
 scripts/storage-initializer/download.py | 63 ++++++++++++++-----------
 2 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/scripts/storage-initializer/README.md b/scripts/storage-initializer/README.md
index 6f4278208..a2133d533 100644
--- a/scripts/storage-initializer/README.md
+++ b/scripts/storage-initializer/README.md
@@ -12,8 +12,12 @@ python3 download.py s3://models/classification/model.tar.gz /tmp/models/
 export S3_ENDPOINT_URL=https://play.min.io
 export ACCESS_KEY_ID=Q3AM3UQ867SPQQA43P2F
 export SECRET_ACCESS_KEY=zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG
-python3 download.py s3://datasets/mnist /tmp/mnist
-# we then download the content of mnist directory into /tmp/mnist/
+
+python3 download.py s3://datasets/mnist/1.jpg /tmp
+# we then download the file 1.jpg into /tmp, and result is /tmp/1.jpg.
+
+python3 download.py s3://datasets/mnist /tmp
+# we then download the folder mnist into /tmp, and result is /tmp/mnist.
 
 ```
 3. http server:
diff --git a/scripts/storage-initializer/download.py b/scripts/storage-initializer/download.py
index 8d86196e5..5fa17e36c 100644
--- a/scripts/storage-initializer/download.py
+++ b/scripts/storage-initializer/download.py
@@ -175,35 +175,44 @@ def download_s3_with_multi_files(download_files,
              total_count, base_uri, base_out_dir)
 
 
-def _download_s3(client, uri, out_dir):
-    bucket_args = uri.replace(_S3_PREFIX, "", 1).split("/", 1)
+def _download_s3(client, s3_url, out_dir):
+    """
+    The function downloads specified file or folder to local directory address.
+    this function supports:
+    1. when downloading the specified file, keep the name of the file itself.
+    2. when downloading the specified folder, keep the name of the folder itself.
+
+    Parameters:
+    client: s3 client
+    s3_url(string): url in s3, e.g. file url: s3://dev/data/data.txt, directory url: s3://dev/data
+    out_dir(string):  local directory address, e.g. /tmp/data/
+
+    Returns:
+    int: files of number in s3_url
+    """
+
+    bucket_args = s3_url.replace(_S3_PREFIX, "", 1).split("/", 1)
     bucket_name = bucket_args[0]
-    bucket_path = len(bucket_args) > 1 and bucket_args[1] or ""
-
-    objects = client.list_objects(bucket_name,
-                                  prefix=bucket_path,
-                                  recursive=True,
-                                  use_api_v1=True)
-    count = 0
-
-    for obj in objects:
-        # Replace any prefix from the object key with out_dir
-        subdir_object_key = obj.object_name[len(bucket_path):].strip("/")
-        # fget_object handles directory creation if does not exist
-        if not obj.is_dir:
-            local_file = os.path.join(
-                out_dir,
-                subdir_object_key or os.path.basename(obj.object_name)
-            )
-            LOG.debug("downloading count:%d, file:%s",
-                      count, subdir_object_key)
-            client.fget_object(bucket_name, obj.object_name, local_file)
-            _extract_compress(local_file, out_dir)
-
-            count += 1
-
-    return count
+    bucket_path = len(bucket_args) > 1 and os.path.normpath(bucket_args[1]) or ""
 
+    objects = client.list_objects(bucket_name, prefix=bucket_path, use_api_v1=True)
+    for o in objects:
+        if not o.is_dir:
+            client.fget_object(bucket_name, o.object_name, os.path.join(out_dir, os.path.basename(o.object_name)))
+            return 1
+        else:
+            count = 0
+            objects = client.list_objects(bucket_name, prefix=bucket_path, recursive=True, use_api_v1=True)
+            root_path, _ = os.path.split(os.path.normpath(bucket_path))
+            for obj in objects:
+                if not obj.is_dir:
+                    object_file_path = os.path.join(out_dir, os.path.relpath(obj.object_name, root_path))
+                    client.fget_object(bucket_name, obj.object_name, object_file_path)
+                    count += 1
+
+            return count
+
+    return 0
 
 def download_local(uri, out_dir=None):
     local_path = uri.replace(_LOCAL_PREFIX, "/", 1)