Skip to content

Commit

Permalink
add files to reproduce tests
Browse files Browse the repository at this point in the history
  • Loading branch information
amcamd committed Feb 25, 2017
1 parent 746f2b1 commit 50793b2
Show file tree
Hide file tree
Showing 7 changed files with 1,792 additions and 1 deletion.
123 changes: 123 additions & 0 deletions Examples/ROCgemm/lib/dnn_gemms.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*******************************************************************************
* Copyright (C) 2016 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop-
* ies of the Software, and to permit persons to whom the Software is furnished
* to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM-
* PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE-
* CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*******************************************************************************/

const unsigned int num_gemm_params = 3;

unsigned int gemm_params[][6] = {
// M, N, K, transA, transB, cnt
{ 1000, 128, 4096, 1, 0, 1 },
{ 4096, 128, 4096, 1, 0, 1 },
{ 4096, 128, 9216, 1, 0, 1 }

// {35, 8457, 1760, 0, 0, 1 },
// {2560, 16, 2560, 0, 0, 1 },
// {2560, 32, 2560, 0, 0, 1 },
// {2560, 64, 2560, 0, 0, 1 },
// {2560, 128, 2560, 0, 0, 1 },
// {2560, 5000, 2560, 0, 0, 1 },
// {2560, 5000, 2560, 0, 1, 1 },
// {35, 8457, 1760, 1, 0, 1 },
// {2560, 16, 2560, 1, 0, 1 },
// {2560, 32, 2560, 1, 0, 1 },
// {2560, 64, 2560, 1, 0, 1 },
// {2560, 128, 2560, 1, 0, 1 },
// {2560, 5000, 2560, 1, 0, 1 }

// { 1760, 16, 1760, 0, 0, 1 },
// { 1760, 32, 1760, 0, 0, 1 },
// { 1760, 64, 1760, 0, 0, 1 },
// { 1760, 128, 1760, 0, 0, 1 },
// { 1760, 7000, 1760, 0, 0, 1 },
// { 2048, 16, 2048, 0, 0, 1 },
// { 2048, 32, 2048, 0, 0, 1 },
// { 2048, 64, 2048, 0, 0, 1 },
// { 2048, 128, 2048, 0, 0, 1 },
// { 2048, 7000, 2048, 0, 0, 1 },
// { 2560, 16, 2560, 0, 0, 1 },
// { 2560, 32, 2560, 0, 0, 1 },
// { 2560, 64, 2560, 0, 0, 1 },
// { 2560, 128, 2560, 0, 0, 1 },
// { 2560, 7000, 2560, 0, 0, 1 },
// { 256, 256, 256, 1, 0, 1024 },
// { 4096, 128, 4096, 1, 0, 1 },
// { 1000, 128, 4096, 1, 0, 1 },
// { 4096, 64, 4096, 0, 0, 1 },
// { 4096, 128, 4096, 0, 0, 1 },
// { 4096, 7000, 4096, 0, 0, 1 },
// { 1760, 16, 1760, 1, 0, 1 },
// { 1760, 32, 1760, 1, 0, 1 },
// { 1760, 64, 1760, 1, 0, 1 },
// { 1760, 128, 1760, 1, 0, 1 },
// { 1760, 7000, 1760, 1, 0, 1 },
// { 2048, 16, 2048, 1, 0, 1 },
// { 2048, 32, 2048, 1, 0, 1 },
// { 2048, 64, 2048, 1, 0, 1 },
// { 2048, 128, 2048, 1, 0, 1 },
// { 2048, 7000, 2048, 1, 0, 1 },
// { 2560, 16, 2560, 1, 0, 1 },
// { 2560, 32, 2560, 1, 0, 1 },
// { 2560, 64, 2560, 1, 0, 1 },
// { 2560, 128, 2560, 1, 0, 1 },
// { 2560, 7000, 2560, 1, 0, 1 },
// { 4096, 16, 4096, 1, 0, 1 },
// { 4096, 32, 4096, 1, 0, 1 },
// { 4096, 64, 4096, 1, 0, 1 },
// { 4096, 128, 4096, 1, 0, 1 },
// { 4096, 7000, 4096, 1, 0, 1 },
// { 1760, 7133, 1760, 0, 1, 1 },
// { 2048, 7133, 2048, 0, 1, 1 },
// { 2560, 7133, 2560, 0, 1, 1 },
// { 4096, 7133, 4096, 0, 1, 1 },
// { 5124, 9124, 1760, 0, 0, 1 },
// { 35, 8457, 1760, 0, 0, 1 },
// { 5124, 9124, 2048, 0, 0, 1 },
// { 35, 8457, 2048, 0, 0, 1 },
// { 5124, 9124, 2560, 0, 0, 1 },
// { 35, 8457, 2560, 0, 0, 1 },
// { 5124, 9124, 4096, 0, 0, 1 },
// { 35, 8457, 4096, 0, 0, 1 },
// { 5124, 9124, 1760, 1, 0, 1 },
// { 35, 8457, 1760, 1, 0, 1 },
// { 5124, 9124, 2048, 1, 0, 1 },
// { 35, 8457, 2048, 1, 0, 1 },
// { 5124, 9124, 2560, 1, 0, 1 },
// { 35, 8457, 2560, 1, 0, 1 },
// { 5124, 9124, 4096, 1, 0, 1 },
// { 35, 8457, 4096, 1, 0, 1 },
// { 7680, 16, 2560, 0, 0, 1 },
// { 7680, 32, 2560, 0, 0, 1 },
// { 7680, 64, 2560, 0, 0, 1 },
// { 7680, 128, 2560, 0, 0, 1 },
// { 7680, 16, 2560, 1, 0, 1 },
// { 7680, 32, 2560, 1, 0, 1 },
// { 7680, 64, 2560, 1, 0, 1 },
// { 7680, 128, 2560, 1, 0, 1 },
// { 3072, 16, 1024, 0, 0, 1 },
// { 3072, 32, 1024, 0, 0, 1 },
// { 3072, 64, 1024, 0, 0, 1 },
// { 3072, 128, 1024, 0, 0, 1 },
// { 3072, 16, 1024, 1, 0, 1 },
// { 3072, 32, 1024, 1, 0, 1 },
// { 3072, 64, 1024, 1, 0, 1 },
// { 3072, 128, 1024, 1, 0, 1 },
// { 3072, 7435, 1024, 0, 1, 1 },
// { 7680, 5481, 2560, 0, 1, 1 }
};
Binary file added Examples/ROCgemm/lib/librocblas-hcc.a
Binary file not shown.
Binary file added Examples/ROCgemm/lib/librocblas-hcc.a.verb
Binary file not shown.
102 changes: 102 additions & 0 deletions Examples/ROCgemm/out.summary
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
out00:min,ave,max,rsd_gflops, number_iterations = 294, 319, 470, 8.0%, 297
out01:min,ave,max,rsd_gflops, number_iterations = 750, 894, 926, 1.7%, 854
out02:min,ave,max,rsd_gflops, number_iterations = 296, 316, 739, 11.2%, 287
out03:min,ave,max,rsd_gflops, number_iterations = 751, 898, 929, 1.7%, 848
out04:min,ave,max,rsd_gflops, number_iterations = 757, 892, 922, 2.0%, 852
out05:min,ave,max,rsd_gflops, number_iterations = 740, 901, 931, 1.6%, 853
out06:min,ave,max,rsd_gflops, number_iterations = 284, 312, 401, 4.4%, 289
out07:min,ave,max,rsd_gflops, number_iterations = 747, 894, 924, 1.9%, 839
out08:min,ave,max,rsd_gflops, number_iterations = 751, 901, 931, 1.5%, 839
out09:min,ave,max,rsd_gflops, number_iterations = 758, 899, 932, 1.6%, 853
out10:min,ave,max,rsd_gflops, number_iterations = 752, 891, 925, 2.1%, 820
out11:min,ave,max,rsd_gflops, number_iterations = 292, 316, 602, 9.9%, 292
out12:min,ave,max,rsd_gflops, number_iterations = 762, 898, 931, 1.7%, 829
out13:min,ave,max,rsd_gflops, number_iterations = 581, 893, 924, 2.0%, 837
out14:min,ave,max,rsd_gflops, number_iterations = 683, 901, 933, 1.5%, 823
out15:min,ave,max,rsd_gflops, number_iterations = 296, 313, 466, 5.7%, 293
out16:min,ave,max,rsd_gflops, number_iterations = 749, 893, 929, 1.8%, 839
out17:min,ave,max,rsd_gflops, number_iterations = 759, 901, 933, 1.6%, 836
out18:min,ave,max,rsd_gflops, number_iterations = 746, 898, 936, 1.8%, 832
out19:min,ave,max,rsd_gflops, number_iterations = 753, 893, 928, 1.7%, 832
out20:min,ave,max,rsd_gflops, number_iterations = 293, 320, 640, 12.3%, 296
out21:min,ave,max,rsd_gflops, number_iterations = 765, 898, 928, 1.6%, 828
out22:min,ave,max,rsd_gflops, number_iterations = 746, 892, 923, 2.0%, 832
out23:min,ave,max,rsd_gflops, number_iterations = 746, 901, 933, 1.5%, 820
out24:min,ave,max,rsd_gflops, number_iterations = 288, 312, 418, 4.7%, 293
out25:min,ave,max,rsd_gflops, number_iterations = 751, 894, 934, 1.7%, 852
out26:min,ave,max,rsd_gflops, number_iterations = 795, 902, 939, 1.4%, 852
out27:min,ave,max,rsd_gflops, number_iterations = 763, 897, 929, 1.9%, 830
out28:min,ave,max,rsd_gflops, number_iterations = 752, 893, 926, 1.6%, 776
out29:min,ave,max,rsd_gflops, number_iterations = 286, 318, 499, 9.7%, 289
out30:min,ave,max,rsd_gflops, number_iterations = 753, 896, 923, 1.7%, 839
out31:min,ave,max,rsd_gflops, number_iterations = 710, 891, 927, 1.9%, 822
out32:min,ave,max,rsd_gflops, number_iterations = 754, 901, 934, 1.6%, 853
out33:min,ave,max,rsd_gflops, number_iterations = 291, 312, 448, 4.6%, 296
out34:min,ave,max,rsd_gflops, number_iterations = 753, 892, 924, 2.0%, 835
out35:min,ave,max,rsd_gflops, number_iterations = 663, 901, 935, 1.7%, 819
out36:min,ave,max,rsd_gflops, number_iterations = 756, 898, 931, 1.8%, 848
out37:min,ave,max,rsd_gflops, number_iterations = 741, 891, 927, 2.2%, 829
out38:min,ave,max,rsd_gflops, number_iterations = 295, 321, 564, 11.5%, 291
out39:min,ave,max,rsd_gflops, number_iterations = 452, 897, 927, 2.3%, 835
out40:min,ave,max,rsd_gflops, number_iterations = 713, 892, 925, 2.1%, 827
out41:min,ave,max,rsd_gflops, number_iterations = 770, 902, 939, 1.4%, 830
out42:min,ave,max,rsd_gflops, number_iterations = 291, 313, 476, 4.6%, 291
out43:min,ave,max,rsd_gflops, number_iterations = 746, 891, 921, 2.1%, 817
out44:min,ave,max,rsd_gflops, number_iterations = 782, 901, 935, 1.5%, 838
out45:min,ave,max,rsd_gflops, number_iterations = 759, 898, 933, 1.8%, 854
out46:min,ave,max,rsd_gflops, number_iterations = 747, 891, 922, 2.3%, 842
out47:min,ave,max,rsd_gflops, number_iterations = 296, 319, 494, 10.0%, 293
out48:min,ave,max,rsd_gflops, number_iterations = 756, 897, 932, 1.6%, 828
out49:min,ave,max,rsd_gflops, number_iterations = 735, 892, 930, 2.1%, 847


out00:max,ave,min,rsd_seconds, number_iterations = 0.003562, 0.003290, 0.002230, 6.4%, 297
out01:max,ave,min,rsd_seconds, number_iterations = 0.001398, 0.001173, 0.001132, 1.8%, 854
out02:max,ave,min,rsd_seconds, number_iterations = 0.003545, 0.003317, 0.001419, 7.1%, 287
out03:max,ave,min,rsd_seconds, number_iterations = 0.001396, 0.001168, 0.001128, 1.9%, 848
out04:max,ave,min,rsd_seconds, number_iterations = 0.001385, 0.001175, 0.001138, 2.2%, 852
out05:max,ave,min,rsd_seconds, number_iterations = 0.001418, 0.001164, 0.001127, 1.8%, 853
out06:max,ave,min,rsd_seconds, number_iterations = 0.003697, 0.003358, 0.002613, 3.9%, 289
out07:max,ave,min,rsd_seconds, number_iterations = 0.001404, 0.001174, 0.001135, 2.1%, 839
out08:max,ave,min,rsd_seconds, number_iterations = 0.001396, 0.001163, 0.001127, 1.6%, 839
out09:max,ave,min,rsd_seconds, number_iterations = 0.001384, 0.001167, 0.001125, 1.8%, 853
out10:max,ave,min,rsd_seconds, number_iterations = 0.001395, 0.001177, 0.001134, 2.3%, 820
out11:max,ave,min,rsd_seconds, number_iterations = 0.003591, 0.003315, 0.001742, 7.1%, 292
out12:max,ave,min,rsd_seconds, number_iterations = 0.001376, 0.001167, 0.001126, 1.8%, 829
out13:max,ave,min,rsd_seconds, number_iterations = 0.001803, 0.001174, 0.001135, 2.5%, 837
out14:max,ave,min,rsd_seconds, number_iterations = 0.001535, 0.001164, 0.001124, 1.7%, 823
out15:max,ave,min,rsd_seconds, number_iterations = 0.003538, 0.003346, 0.002251, 4.7%, 293
out16:max,ave,min,rsd_seconds, number_iterations = 0.001400, 0.001174, 0.001128, 2.0%, 839
out17:max,ave,min,rsd_seconds, number_iterations = 0.001382, 0.001164, 0.001124, 1.7%, 836
out18:max,ave,min,rsd_seconds, number_iterations = 0.001406, 0.001168, 0.001120, 1.9%, 832
out19:max,ave,min,rsd_seconds, number_iterations = 0.001393, 0.001175, 0.001130, 1.9%, 832
out20:max,ave,min,rsd_seconds, number_iterations = 0.003577, 0.003276, 0.001638, 8.4%, 296
out21:max,ave,min,rsd_seconds, number_iterations = 0.001370, 0.001167, 0.001130, 1.7%, 828
out22:max,ave,min,rsd_seconds, number_iterations = 0.001406, 0.001176, 0.001136, 2.2%, 832
out23:max,ave,min,rsd_seconds, number_iterations = 0.001405, 0.001164, 0.001124, 1.6%, 820
out24:max,ave,min,rsd_seconds, number_iterations = 0.003638, 0.003361, 0.002511, 4.0%, 293
out25:max,ave,min,rsd_seconds, number_iterations = 0.001397, 0.001173, 0.001123, 1.9%, 852
out26:max,ave,min,rsd_seconds, number_iterations = 0.001318, 0.001163, 0.001116, 1.5%, 852
out27:max,ave,min,rsd_seconds, number_iterations = 0.001374, 0.001169, 0.001128, 2.0%, 830
out28:max,ave,min,rsd_seconds, number_iterations = 0.001395, 0.001174, 0.001132, 1.7%, 776
out29:max,ave,min,rsd_seconds, number_iterations = 0.003663, 0.003295, 0.002099, 7.4%, 289
out30:max,ave,min,rsd_seconds, number_iterations = 0.001393, 0.001171, 0.001136, 1.8%, 839
out31:max,ave,min,rsd_seconds, number_iterations = 0.001478, 0.001177, 0.001131, 2.2%, 822
out32:max,ave,min,rsd_seconds, number_iterations = 0.001391, 0.001163, 0.001122, 1.7%, 853
out33:max,ave,min,rsd_seconds, number_iterations = 0.003600, 0.003360, 0.002341, 3.8%, 296
out34:max,ave,min,rsd_seconds, number_iterations = 0.001392, 0.001175, 0.001134, 2.2%, 835
out35:max,ave,min,rsd_seconds, number_iterations = 0.001583, 0.001164, 0.001122, 2.0%, 819
out36:max,ave,min,rsd_seconds, number_iterations = 0.001387, 0.001168, 0.001126, 1.9%, 848
out37:max,ave,min,rsd_seconds, number_iterations = 0.001415, 0.001177, 0.001132, 2.4%, 829
out38:max,ave,min,rsd_seconds, number_iterations = 0.003555, 0.003263, 0.001858, 8.4%, 291
out39:max,ave,min,rsd_seconds, number_iterations = 0.002321, 0.001169, 0.001131, 3.7%, 835
out40:max,ave,min,rsd_seconds, number_iterations = 0.001472, 0.001176, 0.001133, 2.4%, 827
out41:max,ave,min,rsd_seconds, number_iterations = 0.001363, 0.001163, 0.001117, 1.5%, 830
out42:max,ave,min,rsd_seconds, number_iterations = 0.003601, 0.003353, 0.002204, 3.7%, 291
out43:max,ave,min,rsd_seconds, number_iterations = 0.001406, 0.001177, 0.001139, 2.4%, 817
out44:max,ave,min,rsd_seconds, number_iterations = 0.001341, 0.001164, 0.001122, 1.5%, 838
out45:max,ave,min,rsd_seconds, number_iterations = 0.001382, 0.001168, 0.001123, 2.0%, 854
out46:max,ave,min,rsd_seconds, number_iterations = 0.001405, 0.001177, 0.001137, 2.5%, 842
out47:max,ave,min,rsd_seconds, number_iterations = 0.003547, 0.003291, 0.002124, 7.5%, 293
out48:max,ave,min,rsd_seconds, number_iterations = 0.001387, 0.001169, 0.001126, 1.7%, 828
out49:max,ave,min,rsd_seconds, number_iterations = 0.001426, 0.001176, 0.001128, 2.4%, 847
Loading

0 comments on commit 50793b2

Please sign in to comment.