Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SWDEV-294351 Fixed zero bandwith for hsa-api and hip-api traces #59

Open
wants to merge 1 commit into
base: rocm-4.3.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 112 additions & 76 deletions bin/mem_manager.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sqlitedb import SQLiteDB

pinned = ['hipMallocHost', 'hipHostMalloc', 'hipHostAlloc']
ondevice = ['hipMalloc', 'hipMallocPitch', 'hipMallocArray', 'hipMalloc3DArray']
ondevice = ['hipMalloc', 'hipMallocPitch', 'hipMallocArray', 'hipMalloc3DArray', 'hsa_amd_memory_pool_allocate']

mm_table_descr = [
['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'Direction', 'SrcType', 'DstType', 'Size', 'BW', 'Async'],
Expand All @@ -37,6 +37,33 @@ def fatal(msg):

DELIM = ','

class MemManagerRecord:
def __init__(self):
self.start_time = 0
self.end_time = 0
self.pid = 0
self.tid = 0
self.event = ''
self.direction = ''
self.srcptr_type = ''
self.dstptr_type = ''
self.size = 0
self.bandwidth = 0
self.is_async = False

def str(self):
return str(self.start_time) + DELIM \
+ str(self.end_time) + DELIM \
+ str(self.pid) + DELIM \
+ str(self.tid) + DELIM \
+ self.event + DELIM \
+ 'Direction=' + self.direction + DELIM \
+ 'SrcType=' + self.srcptr_type + DELIM \
+ 'DstType=' + self.dstptr_type + DELIM \
+ 'Size=' + str(self.size) + DELIM \
+ 'BW=' + str(self.bandwidth) + DELIM \
+ 'Async=' + str(self.is_async)

# Mem copy manager class
class MemManager:

Expand Down Expand Up @@ -69,94 +96,82 @@ def parse_hsa_handles(self, infile):

# register alloc and memcpy API calls
# ['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'args', 'Index', 'Data'],
def register_api(self, rec_vals):
def register_api(self, rec_vals, copy_index):
res = ''
record_name = rec_vals[4] # 'Name'
if record_name == "hipMemcpyToArray":
return res

record_args = rec_vals[5] # 'args'
malloc_ptrn = re.compile(r'hip.*Malloc|hsa_amd_memory_pool_allocate')
mcopy_ptrn = re.compile(r'hipMemcpy|hsa_amd_memory_async_copy')

if malloc_ptrn.match(record_name):
self.add_allocation(record_name, record_args)
elif mcopy_ptrn.match(record_name):
res = self.add_memcpy(rec_vals)
res = self.add_memcpy(rec_vals, copy_index)

return res


# register memcpy asynchronous activity
# rec_vals: ['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index', 'Data', ...
def register_activity(self, rec_vals):
procid = rec_vals[5] # 'pid'
recordid = rec_vals[8] # 'Index'
tid = rec_vals[6]
async_copy_start_time = rec_vals[0]
async_copy_end_time = rec_vals[1]

return self.register_copy_impl(async_copy_start_time, async_copy_end_time, procid, tid, recordid)

# register memcpy asynchronous copy
# ['BeginNs', 'EndNs', 'Name', 'pid', 'tid', 'Index', ...
def register_copy(self, rec_vals):
data = ''
event = rec_vals[2] # 'Name'
procid = rec_vals[3] # 'pid'
recordid = rec_vals[5] # 'Index'
size_ptrn = re.compile(DELIM + 'Size=(\d+)' + DELIM)

# query syncronous memcopy API record
key = (recordid, procid, 0)
if key in self.memcopies:
data = self.memcopies[key]

# query asyncronous memcopy API record
key = (recordid, procid, 1)
if key in self.memcopies:
if data != '': fatal('register_copy: corrupted record sync/async')

async_copy_start_time = rec_vals[0]
async_copy_end_time = rec_vals[1]

duration = int(async_copy_end_time) - int(async_copy_start_time)
size = 0
m = size_ptrn.search(self.memcopies[key])
if m:
size = m.group(1)
bandwidth = round(float(size) * 1000 / duration, 2)

tid = rec_vals[4]
copy_line_header = str(async_copy_start_time) + DELIM + str(async_copy_end_time) + DELIM + str(procid) + DELIM + str(tid)
copy_line_footer = 'BW=' + str(bandwidth) + DELIM + 'Async=' + str(1)
data = copy_line_header + self.memcopies[key] + copy_line_footer
self.memcopies[key] = data
async_copy_start_time = rec_vals[0]
async_copy_end_time = rec_vals[1]
tid = rec_vals[4]

return data

# register memcpy asynchronous activity
# rec_vals: ['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index', 'Data', ...
def register_activity(self, rec_vals):
data = ''
event = rec_vals[4] # 'Name'
procid = rec_vals[5] # 'pid'
recordid = rec_vals[7] # 'Index'
size_ptrn = re.compile(DELIM + 'Size=(\d+)' + DELIM)
return self.register_copy_impl(async_copy_start_time, async_copy_end_time, procid, tid, recordid)

def register_copy_impl(self, start_time, end_time, procid, tid, recordid):
data = MemManagerRecord()
# query syncronous memcopy API record
key = (recordid, procid, 0)
if key in self.memcopies:
data = self.memcopies[key]
sync_key = (recordid, procid, 0)
async_key = (recordid, procid, 1)

# query asyncronous memcopy API record
key = (recordid, procid, 1)
if key in self.memcopies:
if data != '': fatal('register_activity: corrupted record sync/async')

async_copy_start_time = rec_vals[0]
async_copy_end_time = rec_vals[1]
if sync_key in self.memcopies and async_key in self.memcopies:
fatal('register_copy_impl: corrupted record sync/async')

duration = int(async_copy_end_time) - int(async_copy_start_time)
size = 0
m = size_ptrn.search(self.memcopies[key])
if m:
size = m.group(1)
bandwidth = round(float(size) * 1000 / duration, 2)
if sync_key in self.memcopies:
return self.memcopies[sync_key].str()

tid = rec_vals[6]
copy_line_header = str(async_copy_start_time) + DELIM + str(async_copy_end_time) + DELIM + str(procid) + DELIM + str(tid)
copy_line_footer = 'BW=' + str(bandwidth) + DELIM + 'Async=' + str(1)
data = copy_line_header + self.memcopies[key] + copy_line_footer
self.memcopies[key] = data

return data
# query asyncronous memcopy API record
if not async_key in self.memcopies:
# is this a valid case? mb fatal?
# writeImage is not added through hipMemcpyArray
# fatal("async_key not found")
return ''

new_data = self.memcopies[async_key]
new_data.start_time = start_time
new_data.end_time = end_time

duration = int(end_time) - int(start_time)
# hsa api method "record" can have duration equal to 0
# if duration == 0:
# fatal('register_copy_impl: zero duration')

new_data.bandwidth = round(float(new_data.size) * 1000 / duration, 2) if duration != 0 else 0
# if (new_data.bandwidth == 0):
# fatal("register_copy_impl: zero bandwith")
new_data.tid = tid
new_data.is_async = True

self.memcopies[async_key] = new_data
return new_data.str()

# add allocation to map
def add_allocation(self, event, args):
Expand Down Expand Up @@ -221,8 +236,8 @@ def get_ptr_type(self, ptr):
return addr_type

# add memcpy to map
def add_memcpy(self, recvals):
recordid = recvals[6] #same as corrid
def add_memcpy(self, recvals, copy_index):
recordid = copy_index #recvals[6] #same as corrid
event = recvals[4]
start_time = recvals[0] # sync time stamp
end_time = recvals[1] # sync time stamp
Expand All @@ -239,6 +254,9 @@ def add_memcpy(self, recvals):
# hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
# size_t count, hipMemcpyKind kind);
hip_memcpy_ptrn3 = re.compile(r'\(\s*dst\((.*)\) .* src\((.*)\) count\((\d+)\).*\)')
# hipMemcpyToSymbol(const void* symbolName, const void* src, size_t sizeBytes,
# size_t offset = 0, hipMemcpyKind kind)
hip_memcpy_ptrn4 = re.compile(r'\(\s*symbol\((.*)\) src\((.*)\) sizeBytes\((\d+)\).*\)')
# memcopy with kind argument
hip_memcpy_ptrn_kind = re.compile(r'.* kind\((\d+)\)\s*.*')
#hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src,
Expand All @@ -257,13 +275,14 @@ def add_memcpy(self, recvals):
# aysnc memcopy
async_event_ptrn = re.compile(r'Async|async')
m_basic_hip = hip_memcpy_ptrn.match(args)
m_basic_hsa3 = hip_memcpy_ptrn4.match(args)
m_basic_hsa_prev = hsa_memcpy_ptrn_prev.match(args)
m_basic_hsa = hsa_memcpy_ptrn.match(args)
m_basic_hsa2 = hsa_memcpy_ptrn2.match(args)
is_hip = True if not (m_basic_hsa_prev or m_basic_hsa or m_basic_hsa2) else False
m_2d = hip_memcpy_ptrn2.match(args)
m_array = hip_memcpy_ptrn3.match(args)
is_async = 1 if async_event_ptrn.search(event) else 0
is_async = True if async_event_ptrn.search(event) else False
async_copy_start_time = -1
async_copy_end_time = -1
copy_line = ''
Expand Down Expand Up @@ -299,6 +318,7 @@ def add_memcpy(self, recvals):
srcptr_type = self.get_ptr_type(src_agent_ptr)
size = int(m_basic_hsa_prev.group(5))
condition_matched = True

if m_basic_hsa:
dstptr = m_basic_hsa.group(1)
dst_agent_ptr = m_basic_hsa.group(2)
Expand All @@ -322,6 +342,14 @@ def add_memcpy(self, recvals):
size = x*y*z
condition_matched = True

if m_basic_hsa3:
dstptr = m_basic_hsa3.group(1)
dstptr_type = self.get_ptr_type(dstptr)
srcptr = m_basic_hsa3.group(2)
srcptr_type = self.get_ptr_type(srcptr)
size = int(m_basic_hsa3.group(3))
condition_matched = True

if m_array:
dstptr = m_array.group(1)
dstptr_type = self.get_ptr_type(dstptr)
Expand Down Expand Up @@ -370,15 +398,22 @@ def add_memcpy(self, recvals):
if self.hsa_agent_types[dst_agent_ptr] == 1: direction += 'D'
elif self.hsa_agent_types[dst_agent_ptr] == 0: direction += 'H'

copy_line_header = ''
copy_line_footer = ''
copy_line_header = str(start_time) + DELIM + str(end_time) + DELIM + str(pid) + DELIM + str(tid)
copy_line_footer = "BW=" + str(bandwidth) + DELIM + 'Async=' + str(is_async)
mem_entry = MemManagerRecord()

copy_line = copy_line_header + DELIM + event + DELIM + 'Direction=' + direction + DELIM + 'SrcType=' + srcptr_type + DELIM + 'DstType=' + dstptr_type + DELIM + "Size=" + str(size) + DELIM + copy_line_footer
mem_entry.start_time = start_time
mem_entry.end_time = end_time
mem_entry.pid = pid
mem_entry.tid = tid
mem_entry.event = event
mem_entry.direction = direction
mem_entry.srcptr_type = srcptr_type
mem_entry.dstptr_type = dstptr_type
mem_entry.size = size
mem_entry.bandwidth = bandwidth
mem_entry.is_async = is_async

self.memcopies[(recordid, procid, is_async)] = copy_line
return copy_line;
self.memcopies[(recordid, procid, is_async)] = mem_entry
return mem_entry.str();

def dump_data(self, table_name, file_name):
# To create memcopy info table in DB
Expand All @@ -388,7 +423,7 @@ def dump_data(self, table_name, file_name):
fld_ptrn = re.compile(r'(.*)=(.*)')
for (key, record) in self.memcopies.items():
rec_vals_array = []
for rec in record.split(DELIM):
for rec in record.str().split(DELIM):
fld_ptrnm = fld_ptrn.match(rec)
if fld_ptrnm:
rec_vals_array.append(fld_ptrnm.group(2))
Expand All @@ -398,3 +433,4 @@ def dump_data(self, table_name, file_name):

# To dump the memcopy info table as CSV
self.db.dump_csv(table_name, file_name)

13 changes: 9 additions & 4 deletions bin/tblextr.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,6 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
line_index += 1

record = line[:-1]

corr_id = 0
m = ptrn_corr_id.search(record)
if m:
Expand Down Expand Up @@ -478,6 +477,7 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
# asyncronous opeartion API found
op_found = 0
mcopy_found = 0
hsa_mcopy_found = 0

# extract kernel name string
(kernel_str, kernel_found) = get_field(record_args, 'kernel')
Expand All @@ -496,10 +496,10 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
if hsa_mcopy_ptrn.match(record_name):
mcopy_found = 1
op_found = 1
hsa_mcopy_found = 1

stream_id = thread_id
hsa_patch_data[(copy_index, proc_id)] = thread_id
copy_index += 1

if op_found:
ops_patch_data[(corr_id, proc_id)] = (thread_id, stream_id, kernel_str)
Expand All @@ -521,7 +521,12 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
if expl_id: dep_str['id'].append(corr_id)

# memcopy registering
api_data = memory_manager.register_api(rec_vals) if mcopy_data_enabled else ''
if hsa_mcopy_found:
api_data = memory_manager.register_api(rec_vals, copy_index) if mcopy_data_enabled else ''
copy_index += 1
else:
api_data = memory_manager.register_api(rec_vals, corr_id) if mcopy_data_enabled else ''

rec_vals.append(api_data)

# setting section and lane
Expand Down Expand Up @@ -647,7 +652,7 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
name = m.group(1)
corr_id = int(m.group(2))
proc_id = int(m.group(3))

# checking name for memcopy pattern
is_barrier = 0
if ptrn_mcopy.search(name):
Expand Down