From 968912578d3209b4a5109e1348763079902263b3 Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 21:51:18 -0400 Subject: [PATCH 1/9] Support body process protection across multiple users --- body/stretch_body/hello_utils.py | 74 ++++++++++++++++++++++++- body/stretch_body/robot.py | 26 ++------- tools/bin/stretch_free_robot_process.py | 25 ++------- 3 files changed, 83 insertions(+), 42 deletions(-) diff --git a/body/stretch_body/hello_utils.py b/body/stretch_body/hello_utils.py index b0aa70c6..bf5315dd 100644 --- a/body/stretch_body/hello_utils.py +++ b/body/stretch_body/hello_utils.py @@ -2,14 +2,19 @@ import yaml import math import os +import pwd import time import logging import numpy as np import sys +import signal +import pathlib import numbers import subprocess import pyrealsense2 as rs import cv2 +from filelock import FileLock, Timeout + def print_stretch_re_use(): print("For use with S T R E T C H (R) from Hello Robot Inc.") @@ -645,4 +650,71 @@ def get_video_device_port(camera_name): print(f"Found Camera={k} at port={camera_device} ") return camera_device print('ERROR: Did not find the specified camera_name = ' + str(camera_name)) - return camera_device \ No newline at end of file + return camera_device + +BODY_FILE = '/tmp/stretch_pid_dir/stretch_body_robot_pid.txt' +BODY_FILELOCK = f'{BODY_FILE}.lock' + +def acquire_body_filelock(): + whoami = pwd.getpwuid(os.getuid()).pw_name + pid_file = pathlib.Path(BODY_FILE) + filelock_path = pathlib.Path(BODY_FILELOCK) + # 1. If the '/tmp/stretch_pid_dir' does not exist, make it. Note, it's important we create + # these files within a subdirectory instead of /tmp directly because /tmp is a "sticky" + # directory (i.e. only the user that created the file can edit it). + if not pid_file.parent.is_dir(): + pid_file.parent.mkdir(parents=True, exist_ok=True) + file_lock = FileLock(BODY_FILELOCK) + try: + file_lock.acquire(timeout=1) + # 2. If we acquire the lock as the file's owner, the lock's permissions will have changed + # to limit write privileges. We use chmod to enable all users to write to the file. + if filelock_path.owner() == whoami: + filelock_path.chmod(0o777) + # 3. Write this process's PID to a file so this process can be freed by others. + with open(str(pid_file), 'w') as f: + f.write(str(os.getpid())) + if pid_file.owner() == whoami: + pid_file.chmod(0o777) + except Timeout: + # 4. If we failed to acquire the lock as the file's owner, the lock's permissions will have + # changed to limit write privileges. We use chmod to enable all users to write to the file. + if filelock_path.owner() == whoami: + filelock_path.chmod(0o777) + print('Another process is already using Stretch. Try running "stretch_free_robot_process.py"') + return False + return True + +def free_body_filelock(): + whoami = pwd.getpwuid(os.getuid()).pw_name + pid_file = pathlib.Path(BODY_FILE) + filelock_path = pathlib.Path(BODY_FILELOCK) + # 1. If the '/tmp/stretch_pid_dir' does not exist, no robot process has created it. + if not pid_file.parent.is_dir(): + return True + file_lock = FileLock(BODY_FILELOCK) + try: + file_lock.acquire(timeout=1) + file_lock.release() + # 2. If we acquire the lock as the file's owner, the lock's permissions will have changed + # to limit write privileges. We use chmod to enable all users to write to the file. + if filelock_path.owner() == whoami: + filelock_path.chmod(0o777) + except Timeout: + # 3. If we failed to acquire the lock as the file's owner, the lock's permissions will have + # changed to limit write privileges. We use chmod to enable all users to write to the file. + if filelock_path.owner() == whoami: + filelock_path.chmod(0o777) + with open(pid_file, 'r') as f: + tokill_pid = int(f.read()) + # 4. Send SIGTERM a few times because some processes (e.g. ipython) try to stall on exit. + try: + os.kill(tokill_pid, signal.SIGTERM) + time.sleep(0.2) + os.kill(tokill_pid, signal.SIGTERM) + time.sleep(0.2) + os.kill(tokill_pid, signal.SIGTERM) + except: + # 5. os.kill will fail to kill PIDs not owned by this user. Root user can kill anything. + return False + return True diff --git a/body/stretch_body/robot.py b/body/stretch_body/robot.py index ad0da735..a59fe422 100644 --- a/body/stretch_body/robot.py +++ b/body/stretch_body/robot.py @@ -4,10 +4,6 @@ import signal import importlib import asyncio -import os -import sys -from IPython import get_ipython -from filelock import FileLock, Timeout import traceback from stretch_body.device import Device @@ -206,22 +202,6 @@ class Robot(Device): """ def __init__(self): Device.__init__(self, 'robot') - - # TODO: Move filelocking to the startup() method after dxl - # devices move init usb comm out of their __init__ methods. - # https://github.com/hello-robot/stretch_body/issues/217 - pid_file = "/tmp/stretch_body_robot_pid.txt" - self._file_lock = FileLock(f"{pid_file}.lock") - try: - self._file_lock.acquire(timeout=1) - with open(pid_file, 'w') as f: - f.write(str(os.getpid())) - except Timeout: - print('Another process is already using Stretch. Try running "stretch_free_robot_process.py"') - if get_ipython(): - raise - sys.exit(1) - self.monitor = RobotMonitor(self) self.trace = RobotTrace(self) self.collision = RobotCollisionMgmt(self) @@ -307,6 +287,11 @@ def startup(self,start_non_dxl_thread=True,start_dxl_thread=True,start_sys_mon_t bool true if startup of robot succeeded """ + did_acquire = hello_utils.acquire_body_filelock() + if not did_acquire: + print('Another process is already using Stretch. Try running "stretch_free_robot_process.py"') + return False + self.logger.debug('Starting up Robot {0} of batch {1}'.format(self.params['serial_no'], self.params['batch_name'])) success = True for k in self.devices: @@ -371,7 +356,6 @@ def stop(self): Cleanly stops down motion and communication """ self.logger.debug('---- Shutting down robot ----') - self._file_lock.release() if self.non_dxl_thread: if self.non_dxl_thread.running: self.non_dxl_thread.shutdown_flag.set() diff --git a/tools/bin/stretch_free_robot_process.py b/tools/bin/stretch_free_robot_process.py index fb7eb8eb..5b36ad2b 100755 --- a/tools/bin/stretch_free_robot_process.py +++ b/tools/bin/stretch_free_robot_process.py @@ -1,23 +1,8 @@ #!/usr/bin/env python3 -import os -import signal -import time -from filelock import FileLock, Timeout +from stretch_body.hello_utils import free_body_filelock -pid_file = "/tmp/stretch_body_robot_pid.txt" -file_lock = FileLock(f"{pid_file}.lock") -try: - file_lock.acquire(timeout=1) - file_lock.release() -except Timeout: - with open(pid_file, 'r') as f: - tokill_pid = int(f.read()) - # send SIGTERM a few times some processes (e.g. ipython) - # try to stall on exit - os.kill(tokill_pid, signal.SIGTERM) - time.sleep(0.2) - os.kill(tokill_pid, signal.SIGTERM) - time.sleep(0.2) - os.kill(tokill_pid, signal.SIGTERM) -finally: +did_free = free_body_filelock() +if did_free: print('Done!') +else: + print('Failed because robot is being used by another user. To force kill the process, try running "sudo -E env PATH=$PATH stretch_free_robot_process.py"') From 09970deb10c2ca2dc9656087aaea117d647e68aa Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:15:03 -0400 Subject: [PATCH 2/9] Retain reference to file lock to avoid garbage collection --- body/stretch_body/hello_utils.py | 5 ++--- body/stretch_body/robot.py | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/body/stretch_body/hello_utils.py b/body/stretch_body/hello_utils.py index bf5315dd..3a69953a 100644 --- a/body/stretch_body/hello_utils.py +++ b/body/stretch_body/hello_utils.py @@ -681,9 +681,8 @@ def acquire_body_filelock(): # changed to limit write privileges. We use chmod to enable all users to write to the file. if filelock_path.owner() == whoami: filelock_path.chmod(0o777) - print('Another process is already using Stretch. Try running "stretch_free_robot_process.py"') - return False - return True + return False, file_lock + return True, file_lock def free_body_filelock(): whoami = pwd.getpwuid(os.getuid()).pw_name diff --git a/body/stretch_body/robot.py b/body/stretch_body/robot.py index a59fe422..3f63997b 100644 --- a/body/stretch_body/robot.py +++ b/body/stretch_body/robot.py @@ -287,7 +287,7 @@ def startup(self,start_non_dxl_thread=True,start_dxl_thread=True,start_sys_mon_t bool true if startup of robot succeeded """ - did_acquire = hello_utils.acquire_body_filelock() + did_acquire, self._file_lock = hello_utils.acquire_body_filelock() if not did_acquire: print('Another process is already using Stretch. Try running "stretch_free_robot_process.py"') return False @@ -356,6 +356,7 @@ def stop(self): Cleanly stops down motion and communication """ self.logger.debug('---- Shutting down robot ----') + self._file_lock.release() if self.non_dxl_thread: if self.non_dxl_thread.running: self.non_dxl_thread.shutdown_flag.set() From 8f43de556e44da0948d41efe6914acbdf0ca0cb5 Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:21:00 -0400 Subject: [PATCH 3/9] Cleaner exits from other CLI when robot fails to start --- tools/bin/stretch_robot_home.py | 1 - tools/bin/stretch_system_check.py | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/bin/stretch_robot_home.py b/tools/bin/stretch_robot_home.py index c53ac5b2..183d6f55 100755 --- a/tools/bin/stretch_robot_home.py +++ b/tools/bin/stretch_robot_home.py @@ -113,7 +113,6 @@ def get_latest_github_commit(url): r=robot.Robot() if not r.startup(): - r.logger.error('Failed to startup connection to robot') sys.exit(1) if r.pimu.status['runstop_event']: r.logger.error('Cannot home while run-stopped') diff --git a/tools/bin/stretch_system_check.py b/tools/bin/stretch_system_check.py index b088f2b4..002093d5 100755 --- a/tools/bin/stretch_system_check.py +++ b/tools/bin/stretch_system_check.py @@ -6,6 +6,7 @@ import os import sh import re +import sys import apt import git import yaml @@ -68,8 +69,11 @@ def val_in_range(val_name, val,vmin, vmax): print(Fore.LIGHTBLUE_EX + 'Batch = ' + Fore.CYAN + stretch_batch) print(Fore.LIGHTBLUE_EX + 'Serial Number = ' + Fore.CYAN + stretch_serial_no) # create robot instance +print(Style.RESET_ALL) r=robot.Robot() -r.startup() +if not r.startup(): + sys.exit(1) + r.monitor.logger.setLevel('WARN') # ################### HARDWARE ###################### From 60261dfbfb09a5edc5bf98378bd36dcf505177df Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:26:20 -0400 Subject: [PATCH 4/9] os.kill(pid) can throw two types of exceptions It can throw a PermissionError and a ProcessLookupError. The former likely means this user doesnt have the permission to kill the PID. The later likely means that a previous call to os.kill() already succeeded. Therefore, we only need to catch the former. --- body/stretch_body/hello_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/body/stretch_body/hello_utils.py b/body/stretch_body/hello_utils.py index 3a69953a..86b796a0 100644 --- a/body/stretch_body/hello_utils.py +++ b/body/stretch_body/hello_utils.py @@ -713,7 +713,7 @@ def free_body_filelock(): os.kill(tokill_pid, signal.SIGTERM) time.sleep(0.2) os.kill(tokill_pid, signal.SIGTERM) - except: + except PermissionError: # 5. os.kill will fail to kill PIDs not owned by this user. Root user can kill anything. return False return True From 42e865a75160d15b3e0a2328db2c587b733ad40e Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:30:09 -0400 Subject: [PATCH 5/9] Need to handle the ProcessLookupError as well --- body/stretch_body/hello_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/body/stretch_body/hello_utils.py b/body/stretch_body/hello_utils.py index 86b796a0..387e2592 100644 --- a/body/stretch_body/hello_utils.py +++ b/body/stretch_body/hello_utils.py @@ -716,4 +716,6 @@ def free_body_filelock(): except PermissionError: # 5. os.kill will fail to kill PIDs not owned by this user. Root user can kill anything. return False + except ProcessLookupError: + pass return True From 5e9fd455b435d4f8f4dee5210a0bd27f9d335bbb Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:33:26 -0400 Subject: [PATCH 6/9] Use robot class in battery check tool --- tools/bin/stretch_robot_battery_check.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/bin/stretch_robot_battery_check.py b/tools/bin/stretch_robot_battery_check.py index 937d2b52..eb489ca2 100755 --- a/tools/bin/stretch_robot_battery_check.py +++ b/tools/bin/stretch_robot_battery_check.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 from __future__ import print_function -import stretch_body.pimu as pimu +import stretch_body.robot as robot from colorama import Fore, Back, Style import argparse +import sys import stretch_body.hello_utils as hu hu.print_stretch_re_use() @@ -19,12 +20,12 @@ def val_in_range(val_name, val,vmin, vmax): # ##################################################### -p=pimu.Pimu() -if not p.startup(): - exit() -p.pull_status() -val_in_range('Voltage',p.status['voltage'], vmin=p.config['low_voltage_alert'], vmax=14.0) -val_in_range('Current',p.status['current'], vmin=0.1, vmax=p.config['high_current_alert']) -val_in_range('CPU Temp',p.status['cpu_temp'], vmin=15, vmax=80) +r=robot.Robot() +if not r.startup(): + sys.exit(1) +r.pimu.pull_status() +val_in_range('Voltage',r.pimu.status['voltage'], vmin=r.pimu.config['low_voltage_alert'], vmax=14.0) +val_in_range('Current',r.pimu.status['current'], vmin=0.1, vmax=r.pimu.config['high_current_alert']) +val_in_range('CPU Temp',r.pimu.status['cpu_temp'], vmin=15, vmax=80) print(Style.RESET_ALL) -p.stop() +r.stop() From eb7f82d9392f732e4a8587d50ddb1bb80fe85c91 Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:34:34 -0400 Subject: [PATCH 7/9] comment out cpu temp --- tools/bin/stretch_robot_battery_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bin/stretch_robot_battery_check.py b/tools/bin/stretch_robot_battery_check.py index eb489ca2..b197e897 100755 --- a/tools/bin/stretch_robot_battery_check.py +++ b/tools/bin/stretch_robot_battery_check.py @@ -26,6 +26,6 @@ def val_in_range(val_name, val,vmin, vmax): r.pimu.pull_status() val_in_range('Voltage',r.pimu.status['voltage'], vmin=r.pimu.config['low_voltage_alert'], vmax=14.0) val_in_range('Current',r.pimu.status['current'], vmin=0.1, vmax=r.pimu.config['high_current_alert']) -val_in_range('CPU Temp',r.pimu.status['cpu_temp'], vmin=15, vmax=80) +# val_in_range('CPU Temp',r.pimu.status['cpu_temp'], vmin=15, vmax=80) print(Style.RESET_ALL) r.stop() From 5c2607a4d5d9a0279d61e4c39ca31ff4012291fe Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:36:17 -0400 Subject: [PATCH 8/9] Exit on keyboard teleop if robot failed to start --- tools/bin/stretch_robot_keyboard_teleop.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/bin/stretch_robot_keyboard_teleop.py b/tools/bin/stretch_robot_keyboard_teleop.py index 139e14d7..5f0eeb10 100755 --- a/tools/bin/stretch_robot_keyboard_teleop.py +++ b/tools/bin/stretch_robot_keyboard_teleop.py @@ -4,6 +4,7 @@ import stretch_body.robot as hello_robot from stretch_body.hello_utils import * import argparse +import sys print_stretch_re_use() @@ -12,7 +13,8 @@ robot=hello_robot.Robot() -robot.startup() +if not robot.startup(): + sys.exit(1) small_move_m=.01 large_move_m=0.1 From e5f93938a9462bbaa84ba7282c7ab3f90af0b9c7 Mon Sep 17 00:00:00 2001 From: Binit Shah Date: Mon, 6 May 2024 23:52:35 -0400 Subject: [PATCH 9/9] Root user should also revert permissions --- body/stretch_body/hello_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/body/stretch_body/hello_utils.py b/body/stretch_body/hello_utils.py index 387e2592..21ea989a 100644 --- a/body/stretch_body/hello_utils.py +++ b/body/stretch_body/hello_utils.py @@ -669,17 +669,17 @@ def acquire_body_filelock(): file_lock.acquire(timeout=1) # 2. If we acquire the lock as the file's owner, the lock's permissions will have changed # to limit write privileges. We use chmod to enable all users to write to the file. - if filelock_path.owner() == whoami: + if filelock_path.owner() == whoami or whoami == "root": filelock_path.chmod(0o777) # 3. Write this process's PID to a file so this process can be freed by others. with open(str(pid_file), 'w') as f: f.write(str(os.getpid())) - if pid_file.owner() == whoami: + if pid_file.owner() == whoami or whoami == "root": pid_file.chmod(0o777) except Timeout: # 4. If we failed to acquire the lock as the file's owner, the lock's permissions will have # changed to limit write privileges. We use chmod to enable all users to write to the file. - if filelock_path.owner() == whoami: + if filelock_path.owner() == whoami or whoami == "root": filelock_path.chmod(0o777) return False, file_lock return True, file_lock @@ -697,12 +697,12 @@ def free_body_filelock(): file_lock.release() # 2. If we acquire the lock as the file's owner, the lock's permissions will have changed # to limit write privileges. We use chmod to enable all users to write to the file. - if filelock_path.owner() == whoami: + if filelock_path.owner() == whoami or whoami == "root": filelock_path.chmod(0o777) except Timeout: # 3. If we failed to acquire the lock as the file's owner, the lock's permissions will have # changed to limit write privileges. We use chmod to enable all users to write to the file. - if filelock_path.owner() == whoami: + if filelock_path.owner() == whoami or whoami == "root": filelock_path.chmod(0o777) with open(pid_file, 'r') as f: tokill_pid = int(f.read())