Skip to content

Commit

Permalink
Merge pull request #317 from hello-robot/feature/multiuser_multirobot…
Browse files Browse the repository at this point in the history
…process_protection

Robot process protection for multi-user setup
  • Loading branch information
hello-binit authored May 7, 2024
2 parents a97f03f + e5f9393 commit 0ec5bd3
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 53 deletions.
75 changes: 74 additions & 1 deletion body/stretch_body/hello_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@
import yaml
import math
import os
import pwd
import time
import logging
import numpy as np
import sys
import signal
import pathlib
import numbers
import subprocess
import pyrealsense2 as rs
import cv2
from filelock import FileLock, Timeout


def print_stretch_re_use():
print("For use with S T R E T C H (R) from Hello Robot Inc.")
Expand Down Expand Up @@ -645,4 +650,72 @@ def get_video_device_port(camera_name):
print(f"Found Camera={k} at port={camera_device} ")
return camera_device
print('ERROR: Did not find the specified camera_name = ' + str(camera_name))
return camera_device
return camera_device

BODY_FILE = '/tmp/stretch_pid_dir/stretch_body_robot_pid.txt'
BODY_FILELOCK = f'{BODY_FILE}.lock'

def acquire_body_filelock():
whoami = pwd.getpwuid(os.getuid()).pw_name
pid_file = pathlib.Path(BODY_FILE)
filelock_path = pathlib.Path(BODY_FILELOCK)
# 1. If the '/tmp/stretch_pid_dir' does not exist, make it. Note, it's important we create
# these files within a subdirectory instead of /tmp directly because /tmp is a "sticky"
# directory (i.e. only the user that created the file can edit it).
if not pid_file.parent.is_dir():
pid_file.parent.mkdir(parents=True, exist_ok=True)
file_lock = FileLock(BODY_FILELOCK)
try:
file_lock.acquire(timeout=1)
# 2. If we acquire the lock as the file's owner, the lock's permissions will have changed
# to limit write privileges. We use chmod to enable all users to write to the file.
if filelock_path.owner() == whoami or whoami == "root":
filelock_path.chmod(0o777)
# 3. Write this process's PID to a file so this process can be freed by others.
with open(str(pid_file), 'w') as f:
f.write(str(os.getpid()))
if pid_file.owner() == whoami or whoami == "root":
pid_file.chmod(0o777)
except Timeout:
# 4. If we failed to acquire the lock as the file's owner, the lock's permissions will have
# changed to limit write privileges. We use chmod to enable all users to write to the file.
if filelock_path.owner() == whoami or whoami == "root":
filelock_path.chmod(0o777)
return False, file_lock
return True, file_lock

def free_body_filelock():
whoami = pwd.getpwuid(os.getuid()).pw_name
pid_file = pathlib.Path(BODY_FILE)
filelock_path = pathlib.Path(BODY_FILELOCK)
# 1. If the '/tmp/stretch_pid_dir' does not exist, no robot process has created it.
if not pid_file.parent.is_dir():
return True
file_lock = FileLock(BODY_FILELOCK)
try:
file_lock.acquire(timeout=1)
file_lock.release()
# 2. If we acquire the lock as the file's owner, the lock's permissions will have changed
# to limit write privileges. We use chmod to enable all users to write to the file.
if filelock_path.owner() == whoami or whoami == "root":
filelock_path.chmod(0o777)
except Timeout:
# 3. If we failed to acquire the lock as the file's owner, the lock's permissions will have
# changed to limit write privileges. We use chmod to enable all users to write to the file.
if filelock_path.owner() == whoami or whoami == "root":
filelock_path.chmod(0o777)
with open(pid_file, 'r') as f:
tokill_pid = int(f.read())
# 4. Send SIGTERM a few times because some processes (e.g. ipython) try to stall on exit.
try:
os.kill(tokill_pid, signal.SIGTERM)
time.sleep(0.2)
os.kill(tokill_pid, signal.SIGTERM)
time.sleep(0.2)
os.kill(tokill_pid, signal.SIGTERM)
except PermissionError:
# 5. os.kill will fail to kill PIDs not owned by this user. Root user can kill anything.
return False
except ProcessLookupError:
pass
return True
25 changes: 5 additions & 20 deletions body/stretch_body/robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
import signal
import importlib
import asyncio
import os
import sys
from IPython import get_ipython
from filelock import FileLock, Timeout
import traceback

from stretch_body.device import Device
Expand Down Expand Up @@ -206,22 +202,6 @@ class Robot(Device):
"""
def __init__(self):
Device.__init__(self, 'robot')

# TODO: Move filelocking to the startup() method after dxl
# devices move init usb comm out of their __init__ methods.
# https://github.com/hello-robot/stretch_body/issues/217
pid_file = "/tmp/stretch_body_robot_pid.txt"
self._file_lock = FileLock(f"{pid_file}.lock")
try:
self._file_lock.acquire(timeout=1)
with open(pid_file, 'w') as f:
f.write(str(os.getpid()))
except Timeout:
print('Another process is already using Stretch. Try running "stretch_free_robot_process.py"')
if get_ipython():
raise
sys.exit(1)

self.monitor = RobotMonitor(self)
self.trace = RobotTrace(self)
self.collision = RobotCollisionMgmt(self)
Expand Down Expand Up @@ -307,6 +287,11 @@ def startup(self,start_non_dxl_thread=True,start_dxl_thread=True,start_sys_mon_t
bool
true if startup of robot succeeded
"""
did_acquire, self._file_lock = hello_utils.acquire_body_filelock()
if not did_acquire:
print('Another process is already using Stretch. Try running "stretch_free_robot_process.py"')
return False

self.logger.debug('Starting up Robot {0} of batch {1}'.format(self.params['serial_no'], self.params['batch_name']))
success = True
for k in self.devices:
Expand Down
25 changes: 5 additions & 20 deletions tools/bin/stretch_free_robot_process.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,8 @@
#!/usr/bin/env python3
import os
import signal
import time
from filelock import FileLock, Timeout
from stretch_body.hello_utils import free_body_filelock

pid_file = "/tmp/stretch_body_robot_pid.txt"
file_lock = FileLock(f"{pid_file}.lock")
try:
file_lock.acquire(timeout=1)
file_lock.release()
except Timeout:
with open(pid_file, 'r') as f:
tokill_pid = int(f.read())
# send SIGTERM a few times some processes (e.g. ipython)
# try to stall on exit
os.kill(tokill_pid, signal.SIGTERM)
time.sleep(0.2)
os.kill(tokill_pid, signal.SIGTERM)
time.sleep(0.2)
os.kill(tokill_pid, signal.SIGTERM)
finally:
did_free = free_body_filelock()
if did_free:
print('Done!')
else:
print('Failed because robot is being used by another user. To force kill the process, try running "sudo -E env PATH=$PATH stretch_free_robot_process.py"')
19 changes: 10 additions & 9 deletions tools/bin/stretch_robot_battery_check.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python3
from __future__ import print_function
import stretch_body.pimu as pimu
import stretch_body.robot as robot
from colorama import Fore, Back, Style
import argparse
import sys
import stretch_body.hello_utils as hu
hu.print_stretch_re_use()

Expand All @@ -19,12 +20,12 @@ def val_in_range(val_name, val,vmin, vmax):

# #####################################################

p=pimu.Pimu()
if not p.startup():
exit()
p.pull_status()
val_in_range('Voltage',p.status['voltage'], vmin=p.config['low_voltage_alert'], vmax=14.0)
val_in_range('Current',p.status['current'], vmin=0.1, vmax=p.config['high_current_alert'])
val_in_range('CPU Temp',p.status['cpu_temp'], vmin=15, vmax=80)
r=robot.Robot()
if not r.startup():
sys.exit(1)
r.pimu.pull_status()
val_in_range('Voltage',r.pimu.status['voltage'], vmin=r.pimu.config['low_voltage_alert'], vmax=14.0)
val_in_range('Current',r.pimu.status['current'], vmin=0.1, vmax=r.pimu.config['high_current_alert'])
# val_in_range('CPU Temp',r.pimu.status['cpu_temp'], vmin=15, vmax=80)
print(Style.RESET_ALL)
p.stop()
r.stop()
1 change: 0 additions & 1 deletion tools/bin/stretch_robot_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def get_latest_github_commit(url):

r=robot.Robot()
if not r.startup():
r.logger.error('Failed to startup connection to robot')
sys.exit(1)
if r.pimu.status['runstop_event']:
r.logger.error('Cannot home while run-stopped')
Expand Down
4 changes: 3 additions & 1 deletion tools/bin/stretch_robot_keyboard_teleop.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import stretch_body.robot as hello_robot
from stretch_body.hello_utils import *
import argparse
import sys
print_stretch_re_use()


Expand All @@ -12,7 +13,8 @@


robot=hello_robot.Robot()
robot.startup()
if not robot.startup():
sys.exit(1)

small_move_m=.01
large_move_m=0.1
Expand Down
6 changes: 5 additions & 1 deletion tools/bin/stretch_system_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import sh
import re
import sys
import apt
import git
import yaml
Expand Down Expand Up @@ -68,8 +69,11 @@ def val_in_range(val_name, val,vmin, vmax):
print(Fore.LIGHTBLUE_EX + 'Batch = ' + Fore.CYAN + stretch_batch)
print(Fore.LIGHTBLUE_EX + 'Serial Number = ' + Fore.CYAN + stretch_serial_no)
# create robot instance
print(Style.RESET_ALL)
r=robot.Robot()
r.startup()
if not r.startup():
sys.exit(1)

r.monitor.logger.setLevel('WARN')

# ################### HARDWARE ######################
Expand Down

0 comments on commit 0ec5bd3

Please sign in to comment.