Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Usage] Robustify the user hash to avoid empty string #1442

Merged
merged 8 commits into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions sky/usage/usage_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ def _get_current_timestamp_ns() -> int:
def _get_user_hash():
"""Returns a unique user-machine specific hash as a user id for logging."""
user_id = os.getenv(constants.USAGE_USER_ENV)
if user_id and len(user_id) == 8:
return user_id
return common_utils.get_user_hash()
return common_utils.get_user_hash(default_value=user_id)


class MessageType(enum.Enum):
Expand Down
32 changes: 28 additions & 4 deletions sky/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sky import sky_logging

_USER_HASH_FILE = os.path.expanduser('~/.sky/user_hash')
USER_HASH_LENGTH = 8

_PAYLOAD_PATTERN = re.compile(r'<sky-payload>(.*)</sky-payload>')
_PAYLOAD_STR = '<sky-payload>{}</sky-payload>'
Expand All @@ -40,14 +41,37 @@ def get_usage_run_id() -> str:
return _usage_run_id


def get_user_hash() -> str:
"""Returns a unique user-machine specific hash as a user id."""
def get_user_hash(default_value: Optional[str] = None) -> str:
"""Returns a unique user-machine specific hash as a user id.

We cache the user hash in a file to avoid potential user_name or
hostname changes causing a new user hash to be generated.
"""

def _is_valid_user_hash(user_hash: Optional[str]) -> bool:
try:
int(user_hash, 16)
except (TypeError, ValueError):
return False
return len(user_hash) == USER_HASH_LENGTH

user_hash = default_value
if _is_valid_user_hash(user_hash):
return user_hash

if os.path.exists(_USER_HASH_FILE):
# Read from cached user hash file.
with open(_USER_HASH_FILE, 'r') as f:
return f.read()
# Remove invalid characters.
user_hash = f.read().strip()
if _is_valid_user_hash(user_hash):
return user_hash

hash_str = user_and_hostname_hash()
user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:8]
user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:USER_HASH_LENGTH]
if not _is_valid_user_hash(user_hash):
# A fallback in case the hash is invalid.
user_hash = uuid.uuid4().hex[:USER_HASH_LENGTH]
os.makedirs(os.path.dirname(_USER_HASH_FILE), exist_ok=True)
with open(_USER_HASH_FILE, 'w') as f:
f.write(user_hash)
Expand Down