Skip to content

Commit

Permalink
support HBM in utils/numa.py (#3918)
Browse files Browse the repository at this point in the history
Co-authored-by: Olatunji Ruwase <[email protected]>
  • Loading branch information
delock and tjruwase authored Jul 19, 2023
1 parent fc8de76 commit 5dadf68
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 5 deletions.
4 changes: 3 additions & 1 deletion accelerator/cpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ def device_count(self):
# Ignore these NUMA nodes with no cores.
numa_core_lists = get_numa_cores()
numa_count = 0
prev_core_list = []
for core_list in numa_core_lists:
if len(core_list) > 0:
if len(core_list) > 0 and core_list != prev_core_list:
numa_count += 1
prev_core_list = core_list
return numa_count

def synchronize(self, device_index=None):
Expand Down
62 changes: 58 additions & 4 deletions deepspeed/utils/numa.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,65 @@ def get_numactl_cmd(bind_core_list, num_local_procs, local_rank):
# check if all cores belong to same numa, if true, bind process to that numa domain with -m parameter
numa_cores = get_numa_cores()
num_numas = len(numa_cores)

numa_mode = "normal"

non_empty_numa_list = []
empty_numa_list = []
previous_numa_cores = []
numa_node_list = []
numa_node_list_list = []
for i in range(num_numas):
if set(core_list_for_rank) <= set(numa_cores[i]):
numactl_cmd.append("-m")
numactl_cmd.append(f"{i}")
break
# look for empty numa which is HBM numa
if numa_cores[i] == []:
empty_numa_list.append(i)
else:
non_empty_numa_list.append(i)

# check for fakenuma
if numa_cores[i] == previous_numa_cores:
if numa_node_list == []:
#first duplication, add previous node into list
numa_node_list.append(i - 1)
numa_node_list.append(i)
else:
if numa_node_list != []:
numa_node_list_list.append(numa_node_list)
numa_node_list = []
previous_numa_cores = numa_cores[i]
if numa_node_list != []:
numa_node_list_list.append(numa_node_list)

if empty_numa_list != [] and len(empty_numa_list) == len(non_empty_numa_list):
numa_mode = "flat_hbm"
numa_dict = dict(zip(non_empty_numa_list, empty_numa_list))
elif numa_node_list_list != []:
numa_mode = "fake"

if numa_mode == "normal":
for i in range(num_numas):
if set(core_list_for_rank) <= set(numa_cores[i]):
numactl_cmd.append("-m")
numactl_cmd.append(f"{i}")
break
elif numa_mode == "flat_hbm":
for i in range(num_numas):
if set(core_list_for_rank) <= set(numa_cores[i]):
numactl_cmd.append("-p")
numactl_cmd.append(f"{numa_dict[i]}")
break
elif numa_mode == "fake":
for i in range(num_numas):
if set(core_list_for_rank) <= set(numa_cores[i]):
for nodes in numa_node_list_list:
if i in nodes:
numactl_cmd.append("-m")
numactl_cmd.append(f"{','.join(map(str, nodes))}")
break
# the following construct break the outer loop if inner loop breaks
else:
continue
break

numactl_cmd.append("-C")
last_core = core_list_for_rank[0]
Expand Down

0 comments on commit 5dadf68

Please sign in to comment.