Skip to content

Commit

Permalink
feat: accept UUID in CUDA_VISIBLE_DEVICES round robin assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
JoanFM committed Nov 7, 2022
1 parent 8b81b53 commit b500e09
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
21 changes: 16 additions & 5 deletions jina/orchestrate/deployments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ def _parse_devices(value: str, num_devices: int):
:return: slice
"""

all_devices = range(num_devices)
use_uuids = False
if re.match(WRAPPED_SLICE_BASE, value):
value = value[1:-1]

Expand All @@ -744,11 +744,21 @@ def _parse_devices(value: str, num_devices: int):
if len(parts) == 1:
# slice(stop)
parts = [parts[0], str(int(parts[0]) + 1)]
# else: slice(start, stop[, step])
else:
return [int(p) for p in parts]
# try to detect if parts are not numbers
try:
int(parts[0])
except:
use_uuids = True

if not use_uuids:
return [int(p) for p in parts]
else:
return parts
else:
parts = []

all_devices = range(num_devices)
return all_devices[slice(*[int(p) if p else None for p in parts])]

@staticmethod
Expand Down Expand Up @@ -776,10 +786,11 @@ def _roundrobin_cuda_device(device_str: str, replicas: int):

selected_devices = []
if device_str[2:]:
for device_num in Deployment._parse_devices(

for device in Deployment._parse_devices(
device_str[2:], num_devices
):
selected_devices.append(device_num)
selected_devices.append(device)
else:
selected_devices = range(num_devices)
_c = cycle(selected_devices)
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/orchestrate/deployments/test_cuda_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def cuda_total_devices(request):
['RR1:2', 1, {0: 1}, 3],
['RR0,2,3', 3, {0: 0, 1: 2, 2: 3}, 4],
['RR0,2,3', 5, {0: 0, 1: 2, 2: 3, 3: 0, 4: 2}, 4],
['RRUUID1,UUID2,UUID3', 5, {0: 'UUID1', 1: 'UUID2', 2: 'UUID3', 3: 'UUID1', 4: 'UUID2'}, 4],
['RRGPU-0aaaaaaa-74d2-7297-d557-12771b6a79d5,GPU-0bbbbbbb-74d2-7297-d557-12771b6a79d5,GPU-0ccccccc-74d2-7297-d557-12771b6a79d5,GPU-0ddddddd-74d2-7297-d557-12771b6a79d5', 5, {0: 'GPU-0aaaaaaa-74d2-7297-d557-12771b6a79d5', 1: 'GPU-0bbbbbbb-74d2-7297-d557-12771b6a79d5', 2: 'GPU-0ccccccc-74d2-7297-d557-12771b6a79d5', 3: 'GPU-0ddddddd-74d2-7297-d557-12771b6a79d5', 4: 'GPU-0aaaaaaa-74d2-7297-d557-12771b6a79d5'}, 4],
], indirect=['cuda_total_devices']
)
def test_cuda_assignment(device_str, replicas, expected, cuda_total_devices):
Expand Down

0 comments on commit b500e09

Please sign in to comment.