Skip to content

Commit

Permalink
[Usage] Robustify the user hash to avoid empty string (#1442)
Browse files Browse the repository at this point in the history
* Robustify the user hash to avoid empty string

* fix

* Check valid user hash with hexdecimal

* format

* fix

* Add fallback

* Add comment

* lint
  • Loading branch information
Michaelvll authored and concretevitamin committed Dec 1, 2022
1 parent 3c46480 commit 40252d9
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
4 changes: 1 addition & 3 deletions sky/usage/usage_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ def _get_current_timestamp_ns() -> int:
def _get_user_hash():
"""Returns a unique user-machine specific hash as a user id for logging."""
user_id = os.getenv(constants.USAGE_USER_ENV)
if user_id and len(user_id) == 8:
return user_id
return common_utils.get_user_hash()
return common_utils.get_user_hash(default_value=user_id)


class MessageType(enum.Enum):
Expand Down
32 changes: 28 additions & 4 deletions sky/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sky import sky_logging

_USER_HASH_FILE = os.path.expanduser('~/.sky/user_hash')
USER_HASH_LENGTH = 8

_PAYLOAD_PATTERN = re.compile(r'<sky-payload>(.*)</sky-payload>')
_PAYLOAD_STR = '<sky-payload>{}</sky-payload>'
Expand All @@ -40,14 +41,37 @@ def get_usage_run_id() -> str:
return _usage_run_id


def get_user_hash() -> str:
"""Returns a unique user-machine specific hash as a user id."""
def get_user_hash(default_value: Optional[str] = None) -> str:
"""Returns a unique user-machine specific hash as a user id.
We cache the user hash in a file to avoid potential user_name or
hostname changes causing a new user hash to be generated.
"""

def _is_valid_user_hash(user_hash: Optional[str]) -> bool:
try:
int(user_hash, 16)
except (TypeError, ValueError):
return False
return len(user_hash) == USER_HASH_LENGTH

user_hash = default_value
if _is_valid_user_hash(user_hash):
return user_hash

if os.path.exists(_USER_HASH_FILE):
# Read from cached user hash file.
with open(_USER_HASH_FILE, 'r') as f:
return f.read()
# Remove invalid characters.
user_hash = f.read().strip()
if _is_valid_user_hash(user_hash):
return user_hash

hash_str = user_and_hostname_hash()
user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:8]
user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:USER_HASH_LENGTH]
if not _is_valid_user_hash(user_hash):
# A fallback in case the hash is invalid.
user_hash = uuid.uuid4().hex[:USER_HASH_LENGTH]
os.makedirs(os.path.dirname(_USER_HASH_FILE), exist_ok=True)
with open(_USER_HASH_FILE, 'w') as f:
f.write(user_hash)
Expand Down

0 comments on commit 40252d9

Please sign in to comment.