diff --git a/openmp/libomptarget/plugins/hero/CMakeLists.txt b/openmp/libomptarget/plugins/hero/CMakeLists.txt index 9309e57c25e196..b425ac48fcc756 100644 --- a/openmp/libomptarget/plugins/hero/CMakeLists.txt +++ b/openmp/libomptarget/plugins/hero/CMakeLists.txt @@ -17,7 +17,7 @@ if(NOT LIBOMPTARGET_HERO_BUILD) return() endif() -if(NOT EXISTS $ENV{HERO_DEV_INC_DIR}/libhero/herodev.h) +if(NOT EXISTS $ENV{HERO_DEV_INC_DIR}/libhero/hero_api.h) libomptarget_say("Not building HERO offloading plugin: HERO device SDK not found (HERO_DEV_INC_DIR to the correct path)") return() endif() diff --git a/openmp/libomptarget/plugins/hero/plugin-hero-dev.h b/openmp/libomptarget/plugins/hero/plugin-hero-dev.h index a0558aa0719399..40f8c92330552b 100644 --- a/openmp/libomptarget/plugins/hero/plugin-hero-dev.h +++ b/openmp/libomptarget/plugins/hero/plugin-hero-dev.h @@ -56,7 +56,7 @@ extern "C" { -#include "libhero/herodev.h" +#include "libhero/hero_api.h" #define HERO_DEV_DEFAULT_CLUSTER_ID (0x1U) #define HERO_DEV_DEFAULT_FREQ (HERO_DEV_DEFAULT_FREQ_MHZ) @@ -126,19 +126,16 @@ static int init_hero_device() { // reserve virtual addresses overlapping with HERO Device's internal physical address // space - hero_dev_reserve_v_addr(hd); ret = hero_dev_mmap(hd); if (ret < 0) { TRACE("ERROR: cannot load device!"); return ret; } - currFreq = hero_dev_clking_set_freq(hd, HERO_DEV_DEFAULT_FREQ); - if (currFreq > 0) - TRACE("HERO Device running @ %d MHz.", currFreq); - // else - // GOMP_PLUGIN_fatal("HERO device init failed!"); - hero_dev_rab_free(hd, 0x0); + //currFreq = hero_dev_clking_set_freq(hd, HERO_DEV_DEFAULT_FREQ); + //if (currFreq > 0) + // TRACE("HERO Device running @ %d MHz.", currFreq); + hero_dev_reset(hd, 0x1); // initialization of HERO Device, static RAB rules (mbox, L2, ...) @@ -156,7 +153,7 @@ static int init_hero_device() { } static int deinit_hero_device() { - hero_dev_munmap(hd); + //hero_dev_munmap(NULL); } extern "C" bool GOMP_OFFLOAD_init_device(int n __attribute__((unused))) { @@ -174,14 +171,7 @@ extern "C" bool GOMP_OFFLOAD_fini_device(int n __attribute__((unused))) { TRACE("Waiting for EOC..."); hero_dev_exe_wait(hd, HERO_DEV_DEFAULT_TIMEOUT); - hero_dev_exe_stop(hd); - - if (GOMP_OFFLOAD_get_caps() & GOMP_OFFLOAD_CAP_SHARED_MEM) - hero_dev_rab_soc_mh_disable(hd); - - hero_dev_rab_free(hd, 0); - hero_dev_free_v_addr(hd); hero_dev_munmap(hd); return 1; diff --git a/openmp/libomptarget/plugins/hero/rtl.cpp b/openmp/libomptarget/plugins/hero/rtl.cpp index cc0e4b812f7034..0dfc6da87af369 100644 --- a/openmp/libomptarget/plugins/hero/rtl.cpp +++ b/openmp/libomptarget/plugins/hero/rtl.cpp @@ -96,7 +96,7 @@ int32_t __tgt_rtl_init_device(int32_t device_id) { int32_t __tgt_rtl_unregister_lib(__tgt_bin_desc *Desc) { DP("__tgt_rtl_unregister_lib\n"); if (initialized) { - deinit_hero_device(); + hero_dev_munmap(hd); initialized = false; } return OFFLOAD_SUCCESS; @@ -325,7 +325,7 @@ bool load_and_execute_image(__tgt_device_image *image) { __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, __tgt_device_image *image) { - hero_add_timestamp("load_binary",__func__,0); + // hero_add_timestamp("load_binary",__func__,0); DP("__tgt_rtl_load_binary(%d, " DPxMOD ")\n", device_id, DPxPTR(image)); DP("Dev %d: load binary from " DPxMOD " image\n", device_id, @@ -343,7 +343,7 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, return NULL; } - hero_add_timestamp("init_omp",__func__,0); + // hero_add_timestamp("init_omp",__func__,0); // init argument buffers host_arg_buf.reserve(ARG_BUF_SIZE); // memory leak @@ -427,7 +427,7 @@ int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr, assert(hst_ptr == tgt_ptr); return OFFLOAD_SUCCESS; } - //char toprint[128]; + char toprint[128]; //snprintf(toprint, 128, "retrieve-%x", hst_ptr); //hero_add_timestamp(toprint,__func__,0); ret = GOMP_OFFLOAD_dev2host(device_id, hst_ptr, tgt_ptr, size) @@ -445,7 +445,7 @@ int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) { if (device_id == HERODEV_SVM) { return OFFLOAD_SUCCESS; } - //char toprint[128]; + char toprint[128]; //snprintf(toprint, 128, "dealloc-%x", tgt_ptr); //hero_add_timestamp(toprint,__func__,0); ret = GOMP_OFFLOAD_free(device_id, tgt_ptr) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL; @@ -460,7 +460,7 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, int32_t arg_num, int32_t team_num, int32_t thread_limit, uint64_t loop_tripcount) { - hero_add_timestamp("send_args",__func__,0); + //hero_add_timestamp("send_args",__func__,0); for (int32_t i = 0; i < arg_num; i++) { if (tgt_offsets[i] != 0) { fprintf(stderr, "Unimplemented: non-zero offset"); @@ -477,7 +477,7 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, size_t size = sizeof(uint64_t) * host_arg_buf.size(); __tgt_rtl_data_submit(device_id, dev_arg_buf, host_buf, size); - hero_add_timestamp("offload_wait",__func__,0); + // hero_add_timestamp("offload_wait",__func__,0); hero_dev_mbox_write(hd, MBOX_DEVICE_START); hero_dev_mbox_write(hd, (uint32_t)tgt_entry_ptr); @@ -501,7 +501,7 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, DP("Done offloading, cycles to execute kernel: %d!\n", (int)ret[1]); hero_device_cycles[hero_num_device_cycles++] = (uint32_t) ret[1]; - hero_add_timestamp("offload_return",__func__,0); + // hero_add_timestamp("offload_return",__func__,0); return OFFLOAD_SUCCESS; } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 703355c70bcccb..f4382ab91bd618 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -22,7 +22,7 @@ #ifdef LIBOMPTARGET_HERO extern "C" { -#include "libhero/herodev.h" +#include "libhero/hero_api.h" } #endif @@ -1286,7 +1286,7 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr, AsyncInfoTy &AsyncInfo) { TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", loc); #ifdef LIBOMPTARGET_HERO - hero_add_timestamp((char*)"all",(char*)__func__,0); + //hero_add_timestamp((char*)"all",(char*)__func__,0); #endif DeviceTy &Device = *PM->Devices[DeviceId]; int Ret = targetDataBegin(loc, Device, ArgNum, ArgBases, Args, ArgSizes, @@ -1417,7 +1417,7 @@ static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { #ifdef LIBOMPTARGET_HERO - hero_add_timestamp((char*)"all",(char*)__func__,0); + //hero_add_timestamp((char*)"all",(char*)__func__,0); #endif //TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", loc); DeviceTy &Device = *PM->Devices[DeviceId];