From 1fe0da96f55e318f86156ec170baf3f468b962fa Mon Sep 17 00:00:00 2001 From: Zongheng Yang Date: Wed, 7 Jun 2023 09:07:16 -0700 Subject: [PATCH 1/2] Make path_size_megabytes() more robust. --- sky/backends/backend_utils.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 33e1269241c..5341b4b4f8e 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -276,21 +276,36 @@ def _optimize_file_mounts(yaml_path: str) -> None: def path_size_megabytes(path: str) -> int: - """Returns the size of 'path' (directory or file) in megabytes.""" + """Returns the size of 'path' (directory or file) in megabytes. + + Returns: + If successful: the size of 'path' in megabytes, rounded down. Otherwise, + -1. + """ resolved_path = pathlib.Path(path).expanduser().resolve() git_exclude_filter = '' if (resolved_path / command_runner.GIT_EXCLUDE).exists(): # Ensure file exists; otherwise, rsync will error out. git_exclude_filter = command_runner.RSYNC_EXCLUDE_OPTION.format( str(resolved_path / command_runner.GIT_EXCLUDE)) - rsync_output = str( - subprocess.check_output( - f'rsync {command_runner.RSYNC_DISPLAY_OPTION} ' - f'{command_runner.RSYNC_FILTER_OPTION} ' - f'{git_exclude_filter} --dry-run {path!r}', - shell=True).splitlines()[-1]) - total_bytes = rsync_output.split(' ')[3].replace(',', '') - return int(float(total_bytes)) // 10**6 + rsync_output = '' + try: + rsync_output = str( + subprocess.check_output( + f'rsync {command_runner.RSYNC_DISPLAY_OPTION} ' + f'{command_runner.RSYNC_FILTER_OPTION} ' + f'{git_exclude_filter} --dry-run {path!r}', + shell=True)) + except subprocess.CalledProcessError: + return -1 + match = re.search(r'total size is (\d+)', rsync_output) + if match is not None: + try: + total_bytes = int(float(match.group(1))) + return total_bytes // (1024**2) + except ValueError: + pass # Maybe different rsync versions have different output. + return -1 class FileMountHelper(object): From 0981822a2c29f8cc4930309c63d4cc33891a2e12 Mon Sep 17 00:00:00 2001 From: Zongheng Yang Date: Wed, 7 Jun 2023 21:47:54 -0700 Subject: [PATCH 2/2] Address comments. --- sky/backends/backend_utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 5341b4b4f8e..c4708b5327a 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -288,22 +288,28 @@ def path_size_megabytes(path: str) -> int: # Ensure file exists; otherwise, rsync will error out. git_exclude_filter = command_runner.RSYNC_EXCLUDE_OPTION.format( str(resolved_path / command_runner.GIT_EXCLUDE)) + rsync_command = (f'rsync {command_runner.RSYNC_DISPLAY_OPTION} ' + f'{command_runner.RSYNC_FILTER_OPTION} ' + f'{git_exclude_filter} --dry-run {path!r}') rsync_output = '' try: - rsync_output = str( - subprocess.check_output( - f'rsync {command_runner.RSYNC_DISPLAY_OPTION} ' - f'{command_runner.RSYNC_FILTER_OPTION} ' - f'{git_exclude_filter} --dry-run {path!r}', - shell=True)) + rsync_output = str(subprocess.check_output(rsync_command, shell=True)) except subprocess.CalledProcessError: + logger.debug('Command failed, proceeding without estimating size: ' + f'{rsync_command}') return -1 - match = re.search(r'total size is (\d+)', rsync_output) + # 3.2.3: + # total size is 250,957,728 speedup is 330.19 (DRY RUN) + # 2.6.9: + # total size is 212627556 speedup is 2437.41 + match = re.search(r'total size is ([\d,]+)', rsync_output) if match is not None: try: - total_bytes = int(float(match.group(1))) + total_bytes = int(float(match.group(1).replace(',', ''))) return total_bytes // (1024**2) except ValueError: + logger.debug('Failed to find "total size" in rsync output. Inspect ' + f'output of the following command: {rsync_command}') pass # Maybe different rsync versions have different output. return -1