|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ROCM TRACER (ACTIVITY API) | |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | LABEL | COUNT | DEPTH | METRIC | UNITS | SUM | MEAN | % SELF | |--------------------------------------------------------------------------------------------------------------------------|--------|--------|-----------|--------|----------|----------|--------| | |0>>> hipSetDevice | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipGetDevice | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipGetDeviceProperties | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipMemGetInfo | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipMalloc | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipMemset | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipDeviceSynchronize | 2 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipLaunchKernel | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipGetLastError | 496 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> |_void benchmark_func, 256, 8u, 0u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 80u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000294 | 0.000294 | 100.0 | | |0>>> hipEventCreate | 990 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000360 | 0.000180 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 2u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000357 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000276 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000276 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 3u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000538 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000276 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 4u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000539 | 0.000180 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.000185 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000275 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 5u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000179 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000279 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000276 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 6u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000537 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000277 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000278 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 7u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000539 | 0.000180 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000277 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000279 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 8u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000537 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000185 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000278 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000276 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 9u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000535 | 0.000178 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000186 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000280 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000277 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 10u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000359 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000277 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000292 | 0.000097 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000185 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 11u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000358 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000279 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000304 | 0.000101 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000277 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 12u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000537 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000186 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.000211 | 0.000106 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000278 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 13u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000359 | 0.000180 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000278 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000333 | 0.000111 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000278 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 14u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000361 | 0.000181 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000280 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000352 | 0.000117 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000185 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 15u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000537 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000280 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000369 | 0.000123 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000278 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 16u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000536 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 1 | 1 | roctracer | sec | 0.000128 | 0.000128 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 17u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000535 | 0.000178 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000280 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000404 | 0.000135 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000280 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 18u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000537 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000283 | 0.000094 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000422 | 0.000141 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000187 | 0.000094 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 20u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000537 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000296 | 0.000099 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000461 | 0.000154 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000187 | 0.000094 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 22u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000535 | 0.000178 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000103 | 0.000103 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.000334 | 0.000167 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000281 | 0.000094 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 24u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000359 | 0.000180 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000217 | 0.000108 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000536 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000300 | 0.000100 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 28u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000362 | 0.000181 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000363 | 0.000121 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000612 | 0.000204 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000215 | 0.000108 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 32u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000544 | 0.000181 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000396 | 0.000132 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000689 | 0.000230 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000249 | 0.000124 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 40u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000572 | 0.000191 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000315 | 0.000158 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.000562 | 0.000281 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 48u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000550 | 0.000183 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.000663 | 0.000332 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000489 | 0.000163 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 56u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000457 | 0.000228 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000629 | 0.000210 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.001148 | 0.000383 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000548 | 0.000183 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 64u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000732 | 0.000244 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.000704 | 0.000235 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.001344 | 0.000448 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000220 | 0.000220 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000299 | 0.000299 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000588 | 0.000294 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.001655 | 0.000552 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000262 | 0.000262 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 128u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.001747 | 0.000873 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.002560 | 0.000853 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.005075 | 0.001692 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.004976 | 0.001659 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 512u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.005126 | 0.001709 | 100.0 | | |0>>> hipExtLaunchKernel | 495 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000275 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000182 | 0.000182 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000179 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000180 | 0.000180 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 15u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000279 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000103 | 0.000103 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 28u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000180 | 0.000180 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000434 | 0.000145 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000208 | 0.000208 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 1 | 1 | roctracer | sec | 0.000331 | 0.000331 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000223 | 0.000223 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 80u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000589 | 0.000294 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000522 | 0.000261 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 96u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func, 256, 8u, 512u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.001669 | 0.001669 | 100.0 | | |0>>> hipEventSynchronize | 495 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> |_void benchmark_func(short, short*) | 1 | 1 | roctracer | sec | 0.000008 | 0.000008 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000183 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 0u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.000538 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.000276 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 1u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000179 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.000276 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000182 | 0.000091 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 2u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000220 | 0.000220 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 96u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000349 | 0.000349 | 100.0 | | |0>>> hipEventElapsedTime | 495 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipEventDestroy | 990 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> |_void benchmark_func, 256, 8u, 2u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000179 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.000185 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000179 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 6u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(float, float*) | 2 | 1 | roctracer | sec | 0.000184 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 7u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 8u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000179 | 0.000179 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000092 | 0.000092 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 1 | 1 | roctracer | sec | 0.000106 | 0.000106 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 14u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000178 | 0.000178 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.000187 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.000258 | 0.000129 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000093 | 0.000093 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000103 | 0.000103 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 1 | 1 | roctracer | sec | 0.000166 | 0.000166 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000178 | 0.000178 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000108 | 0.000108 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000107 | 0.000107 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 32u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(float, float*) | 1 | 1 | roctracer | sec | 0.000125 | 0.000125 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) | 1 | 1 | roctracer | sec | 0.000158 | 0.000158 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 1 | 1 | roctracer | sec | 0.000280 | 0.000280 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000409 | 0.000204 | 100.0 | | |0>>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000226 | 0.000226 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 64u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 2 | 1 | roctracer | sec | 0.000692 | 0.000346 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.001032 | 0.000344 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.001966 | 0.000655 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.001327 | 0.000442 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 128u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 3 | 1 | roctracer | sec | 0.001369 | 0.000456 | 100.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) | 3 | 1 | roctracer | sec | 0.001338 | 0.000446 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 3 | 1 | roctracer | sec | 0.002589 | 0.000863 | 100.0 | | |0>>> |_void benchmark_func(float, float*) | 3 | 1 | roctracer | sec | 0.002544 | 0.000848 | 100.0 | | |0>>> |_void benchmark_func, 256, 8u, 256u>(HIP_vector_type, HIP_vector_type>> |_void benchmark_func(double, double*) | 1 | 1 | roctracer | sec | 0.000875 | 0.000875 | 100.0 | | |0>>> hipMemcpy | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> |_void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) | 2 | 1 | roctracer | sec | 0.003334 | 0.001667 | 100.0 | | |0>>> |_void benchmark_func(int, int*) | 2 | 1 | roctracer | sec | 0.006699 | 0.003350 | 100.0 | | |0>>> hipFree | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | | |0>>> hipDeviceReset | 1 | 0 | roctracer | sec | 0.000000 | 0.000000 | 0.0 | |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|