From 48d4a84865821b468d0a608ef4860d7f352859f0 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Mon, 21 Nov 2022 17:32:50 -0800 Subject: [PATCH 1/8] Robustify the user hash to avoid empty string --- sky/usage/usage_lib.py | 2 +- sky/utils/common_utils.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sky/usage/usage_lib.py b/sky/usage/usage_lib.py index 7b43aa9464d..0201cdf8aab 100644 --- a/sky/usage/usage_lib.py +++ b/sky/usage/usage_lib.py @@ -35,7 +35,7 @@ def _get_current_timestamp_ns() -> int: def _get_user_hash(): """Returns a unique user-machine specific hash as a user id for logging.""" user_id = os.getenv(constants.USAGE_USER_ENV) - if user_id and len(user_id) == 8: + if user_id and len(user_id) == common_utils.USER_HASH_LENGTH: return user_id return common_utils.get_user_hash() diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 48075be17c5..18d8cbf380c 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -18,6 +18,7 @@ from sky import sky_logging _USER_HASH_FILE = os.path.expanduser('~/.sky/user_hash') +USER_HASH_LENGTH = 8 _PAYLOAD_PATTERN = re.compile(r'(.*)') _PAYLOAD_STR = '{}' @@ -44,10 +45,12 @@ def get_user_hash() -> str: """Returns a unique user-machine specific hash as a user id.""" if os.path.exists(_USER_HASH_FILE): with open(_USER_HASH_FILE, 'r') as f: - return f.read() + user_hash = f.read() + if user_hash and len(user_hash) == USER_HASH_LENGTH: + return user_hash hash_str = user_and_hostname_hash() - user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:8] + user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:USER_HASH_LENGTH] os.makedirs(os.path.dirname(_USER_HASH_FILE), exist_ok=True) with open(_USER_HASH_FILE, 'w') as f: f.write(user_hash) From fe91299305e9dddd762f19ac9e17efccb245183a Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Mon, 21 Nov 2022 17:36:48 -0800 Subject: [PATCH 2/8] fix --- sky/utils/common_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 18d8cbf380c..e274fef833d 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -46,7 +46,7 @@ def get_user_hash() -> str: if os.path.exists(_USER_HASH_FILE): with open(_USER_HASH_FILE, 'r') as f: user_hash = f.read() - if user_hash and len(user_hash) == USER_HASH_LENGTH: + if len(user_hash) == USER_HASH_LENGTH: return user_hash hash_str = user_and_hostname_hash() From 35476eacf7c89a137ffdb9655c24e2dc74df8143 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Mon, 21 Nov 2022 20:40:21 -0800 Subject: [PATCH 3/8] Check valid user hash with hexdecimal --- sky/usage/usage_lib.py | 4 +--- sky/utils/common_utils.py | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/sky/usage/usage_lib.py b/sky/usage/usage_lib.py index 0201cdf8aab..df6842870e2 100644 --- a/sky/usage/usage_lib.py +++ b/sky/usage/usage_lib.py @@ -35,9 +35,7 @@ def _get_current_timestamp_ns() -> int: def _get_user_hash(): """Returns a unique user-machine specific hash as a user id for logging.""" user_id = os.getenv(constants.USAGE_USER_ENV) - if user_id and len(user_id) == common_utils.USER_HASH_LENGTH: - return user_id - return common_utils.get_user_hash() + return common_utils.get_user_hash(default_value=user_id) class MessageType(enum.Enum): diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index e274fef833d..b4106dc16b5 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -41,16 +41,30 @@ def get_usage_run_id() -> str: return _usage_run_id -def get_user_hash() -> str: +def get_user_hash(default_value: Optional[str] = None) -> str: """Returns a unique user-machine specific hash as a user id.""" + def _is_valid_user_hash(user_hash: Optional[str]) -> bool: + try: + int(user_hash, 16) + except ValueError: + return False + return len(user_hash) == USER_HASH_LENGTH + + user_hash = default_value + if _is_valid_user_hash(user_hash): + return user_hash + if os.path.exists(_USER_HASH_FILE): + # Read from cached user hash file. with open(_USER_HASH_FILE, 'r') as f: - user_hash = f.read() - if len(user_hash) == USER_HASH_LENGTH: - return user_hash + # Remove invalid characters. + user_hash = f.read().strip() + if _is_valid_user_hash(user_hash): + return user_hash hash_str = user_and_hostname_hash() user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:USER_HASH_LENGTH] + assert _is_valid_user_hash(user_hash), user_hash os.makedirs(os.path.dirname(_USER_HASH_FILE), exist_ok=True) with open(_USER_HASH_FILE, 'w') as f: f.write(user_hash) From fb130aa0aee183871a86982dcbc5da5111f2a7a3 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Mon, 21 Nov 2022 20:41:57 -0800 Subject: [PATCH 4/8] format --- sky/utils/common_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index b4106dc16b5..9e1d34b6c00 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -43,6 +43,7 @@ def get_usage_run_id() -> str: def get_user_hash(default_value: Optional[str] = None) -> str: """Returns a unique user-machine specific hash as a user id.""" + def _is_valid_user_hash(user_hash: Optional[str]) -> bool: try: int(user_hash, 16) From eb132b3805b27618e4fe0c92c09933d9da9c9e1a Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Mon, 21 Nov 2022 20:47:55 -0800 Subject: [PATCH 5/8] fix --- sky/utils/common_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 9e1d34b6c00..ab9fc63c769 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -47,7 +47,7 @@ def get_user_hash(default_value: Optional[str] = None) -> str: def _is_valid_user_hash(user_hash: Optional[str]) -> bool: try: int(user_hash, 16) - except ValueError: + except (TypeError, ValueError): return False return len(user_hash) == USER_HASH_LENGTH From a854d8cb363e1dde4e9139dd3a884781e6fcaf1e Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Mon, 21 Nov 2022 20:58:28 -0800 Subject: [PATCH 6/8] Add fallback --- sky/utils/common_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index ab9fc63c769..6474232515b 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -65,7 +65,9 @@ def _is_valid_user_hash(user_hash: Optional[str]) -> bool: hash_str = user_and_hostname_hash() user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:USER_HASH_LENGTH] - assert _is_valid_user_hash(user_hash), user_hash + if not _is_valid_user_hash(user_hash): + # A fallback in case the hash is invalid. + user_hash = uuid.uuid4().hex[:USER_HASH_LENGTH] os.makedirs(os.path.dirname(_USER_HASH_FILE), exist_ok=True) with open(_USER_HASH_FILE, 'w') as f: f.write(user_hash) From aba1e3d702274e89d299788c82768b9a7a3e40a5 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 22 Nov 2022 13:08:04 -0800 Subject: [PATCH 7/8] Add comment --- sky/utils/common_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 6474232515b..23cf9c9b7e1 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -42,7 +42,11 @@ def get_usage_run_id() -> str: def get_user_hash(default_value: Optional[str] = None) -> str: - """Returns a unique user-machine specific hash as a user id.""" + """Returns a unique user-machine specific hash as a user id. + + We cache the user hash in a file to avoid potential user_name or + hostname changes causing a new user hash to be generated. + """ def _is_valid_user_hash(user_hash: Optional[str]) -> bool: try: From 7efcbac4b967b90ebd5b6bb6a3fa1582a1868749 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 22 Nov 2022 13:31:17 -0800 Subject: [PATCH 8/8] lint --- sky/utils/common_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 23cf9c9b7e1..fe320800a7e 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -43,7 +43,7 @@ def get_usage_run_id() -> str: def get_user_hash(default_value: Optional[str] = None) -> str: """Returns a unique user-machine specific hash as a user id. - + We cache the user hash in a file to avoid potential user_name or hostname changes causing a new user hash to be generated. """