diff --git a/paddlenlp/datasets/experimental/imdb.py b/paddlenlp/datasets/experimental/imdb.py index 2f040eff643946..89c4e04b83acb6 100644 --- a/paddlenlp/datasets/experimental/imdb.py +++ b/paddlenlp/datasets/experimental/imdb.py @@ -21,7 +21,7 @@ import numpy as np from paddle.dataset.common import md5file -from paddle.utils.download import get_path_from_url +from paddlenlp.utils.downloader import get_path_from_url from paddlenlp.utils.env import DATA_HOME from . import DatasetBuilder diff --git a/paddlenlp/utils/downloader.py b/paddlenlp/utils/downloader.py index 04c8858d2930bf..9d141db145407e 100644 --- a/paddlenlp/utils/downloader.py +++ b/paddlenlp/utils/downloader.py @@ -293,27 +293,23 @@ def _uncompress_file_zip(filepath): def _uncompress_file_tar(filepath, mode="r:*"): files = tarfile.open(filepath, mode) file_list = files.getnames() - file_dir = os.path.dirname(filepath) if _is_a_single_file(file_list): rootpath = file_list[0] uncompressed_path = os.path.join(file_dir, rootpath) - for item in file_list: - files.extract(item, file_dir) + files.extractall(file_dir, files.getmembers()) elif _is_a_single_dir(file_list): rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1] uncompressed_path = os.path.join(file_dir, rootpath) - for item in file_list: - files.extract(item, file_dir) + files.extractall(file_dir, files.getmembers()) else: rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1] uncompressed_path = os.path.join(file_dir, rootpath) if not os.path.exists(uncompressed_path): os.makedirs(uncompressed_path) - for item in file_list: - files.extract(item, os.path.join(file_dir, rootpath)) + files.extractall(os.path.join(file_dir, rootpath), files.getmembers()) files.close() diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 00000000000000..7842e011aad47d --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,2 @@ +sentencepiece +regex