diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 4cd04d9ab5..ab8a1a0e66 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -265,7 +265,7 @@ MIOPEN_DECLARE_OBJECT(miopenFusionOpDescriptor); * @brief Creates the miopenTensorDescriptor_t type * * Tensor descriptor is an object that allows the user to specify a layer's size for each - * dimension and dimension strides. Currently only 4-D fully packed tensors are supported. + * dimension and dimension strides. * */ MIOPEN_DECLARE_OBJECT(miopenTensorDescriptor); @@ -1048,7 +1048,7 @@ typedef enum miopenConvolutionFwdAlgoDirect = 1, /*!< Direct convolutions */ miopenConvolutionFwdAlgoFFT = 2, /*!< Fast Fourier Transform indirect convolutions */ miopenConvolutionFwdAlgoWinograd = 3, /*!< Winograd indirect convolutions */ - miopenConvolutionFwdAlgoImplicitGEMM = 5, /*!< Implicit GEMM convolutions, fp32 only */ + miopenConvolutionFwdAlgoImplicitGEMM = 5, /*!< Implicit GEMM convolutions */ } miopenConvFwdAlgorithm_t; /*! @enum miopenConvBwdWeightsAlgorithm_t @@ -1073,7 +1073,7 @@ typedef enum miopenConvolutionBwdDataAlgoWinograd = 3, /*!< Winograd indirect convolutions */ miopenTransposeBwdDataAlgoGEMM = 4, /*!< Deprecated Transpose GEMM variant legacy, ToBe Removed */ - miopenConvolutionBwdDataAlgoImplicitGEMM = 5, /*!< Implicit GEMM convolutions, fp32 only */ + miopenConvolutionBwdDataAlgoImplicitGEMM = 5, /*!< Implicit GEMM convolutions */ } miopenConvBwdDataAlgorithm_t; /*! @enum miopenConvAlgorithm_t @@ -1085,7 +1085,7 @@ typedef enum miopenConvolutionAlgoDirect = 1, /*!< Direct convolutions */ miopenConvolutionAlgoFFT = 2, /*!< Fast Fourier Transform indirect convolutions */ miopenConvolutionAlgoWinograd = 3, /*!< Winograd indirect convolutions */ - miopenConvolutionAlgoImplicitGEMM = 5, /*!< Implicit GEMM convolutions, fp32 only */ + miopenConvolutionAlgoImplicitGEMM = 5, /*!< Implicit GEMM convolutions */ } miopenConvAlgorithm_t; /*! @brief Perf struct for forward, backward filter, or backward data algorithms @@ -3346,7 +3346,7 @@ MIOPEN_EXPORT miopenStatus_t miopenDestroyRNNDescriptor(miopenRNNDescriptor_t rn * @param rnnMode RNN model type (input) * @param biasMode RNN bias included (input) * @param algo RNN algorithm selected (input) - * @param dataType Only fp32 currently supported for RNNs (input) + * @param dataType MIOpen datatype (input) * @return miopenStatus_t */ MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor(miopenRNNDescriptor_t rnnDesc, @@ -3374,7 +3374,7 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor(miopenRNNDescriptor_t rnnDes * @param rnnMode RNN model type (input) * @param biasMode RNN bias included (input) * @param algo RNN algorithm selected (input) - * @param dataType Only fp32 currently supported for RNNs (input) + * @param dataType MIOpen datatype (input) * @return miopenStatus_t */ MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc, @@ -3459,7 +3459,7 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNParamsSize(miopenHandle_t handle, * @param rnnDesc Fully populated RNN layer descriptor type (input) * @param xDesc A previously populated tensor descriptor (input) * @param wDesc A previously allocated tensor descriptor (output) - * @param dtype MIOpen data type enum, currently only fp32 is supported (input) + * @param dtype MIOpen data type enum (input) * @return miopenStatus_t */ MIOPEN_EXPORT miopenStatus_t miopenGetRNNParamsDescriptor(miopenHandle_t handle, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c87f8b372c..5c201b6416 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -42,7 +42,6 @@ option( MIOPEN_TEST_GPU_XNACK_ENABLED "Test as if XNACK mode is enabled" OFF ) option( MIOPEN_TEST_CONV Off) option( MIOPEN_TEST_DEEPBENCH Off) option( MIOPEN_TEST_DRIVER_ITER_MODE Off) -option( MIOPEN_TEST_MIOTENSILE "Test MIOpenTensile path" OFF ) option( MIOPEN_TEST_MLIR "Test for MLIR compilation backend" ${MIOPEN_USE_MLIR} ) set_var_to_condition(MIOPEN_TEST_WITH_MIOPENDRIVER_DEFAULT MIOPEN_BUILD_DRIVER) @@ -183,42 +182,26 @@ option( WORKAROUND_ISSUE_1148 "" ${WORKAROUND_ISSUE_1148_DEFAULT}) set_var_to_condition(WORKAROUND_ISSUE_1334_DEFAULT MIOPEN_TEST_GFX103X AND MIOPEN_TEST_FLOAT) option( WORKAROUND_ISSUE_1334 "" ${WORKAROUND_ISSUE_1334_DEFAULT}) -if(NOT MIOPEN_TEST_MIOTENSILE) - if(MIOPEN_TEST_HALF) - if(MIOPEN_BACKEND_OPENCL) - set(SKIP_TESTS test_gru test_rnn_vanilla test_lstm) - endif() - elseif(MIOPEN_TEST_INT8) - set(SKIP_ALL_EXCEPT_TESTS - test_tensor_vec test_tensor_cast test_tensor_trans test_tensor_copy test_tensor_set - test_tensor_transform test_conv2d test_conv2d_find2) - elseif(MIOPEN_TEST_BFLOAT16) - set(SKIP_ALL_EXCEPT_TESTS - test_conv2d test_tensor_copy test_tensor_set test_tensor_vec test_immed_conv2d - test_check_numerics_test test_conv_extra test_conv_for_implicit_gemm test_miopen_conv - test_deepbench_conv test_conv_igemm_dynamic_xdlops_nhwc_wrw_bf16_gfx90a - test_conv_igemm_dynamic_xdlops_nhwc_fwd_bf16_gfx90a - test_conv_igemm_dynamic_xdlops_nhwc_bwd_bf16_gfx90a) - endif() - if(${CODECOV_TEST}) - list(APPEND SKIP_TESTS test_conv3d test_immed_conv3d test_immed_conv2d test_pooling2d test_pooling2d_asymmetric) - # replaced by smaller tests with suffix _codecov +if(MIOPEN_TEST_HALF) + if(MIOPEN_BACKEND_OPENCL) + set(SKIP_TESTS test_gru test_rnn_vanilla test_lstm) endif() -else() - if(MIOPEN_TEST_HALF) - set(SKIP_ALL_EXCEPT_TESTS test_conv2d test_conv3d test_conv3d_extra test_immed_conv2d - test_immed_conv3d test_gru test_rnn_vanilla test_lstm test_gru_extra test_rnn_extra - test_lstm_extra) - elseif(MIOPEN_TEST_INT8) - set(SKIP_ALL_EXCEPT_TESTS test_conv2d) - elseif(MIOPEN_TEST_BFLOAT16) - set(SKIP_ALL_EXCEPT_TESTS test_conv2d test_immed_conv2d) - else() - set(SKIP_ALL_EXCEPT_TESTS test_conv2d test_conv3d test_conv3d_extra test_immed_conv2d - test_immed_conv3d test_gru test_rnn_vanilla test_lstm test_gru_extra - test_rnn_extra test_lstm_extra) - endif() -endif() +elseif(MIOPEN_TEST_INT8) + set(SKIP_ALL_EXCEPT_TESTS + test_tensor_vec test_tensor_cast test_tensor_trans test_tensor_copy test_tensor_set + test_tensor_transform test_conv2d test_conv2d_find2) +elseif(MIOPEN_TEST_BFLOAT16) + set(SKIP_ALL_EXCEPT_TESTS + test_conv2d test_conv2d_find2 test_tensor_copy test_tensor_set test_tensor_vec test_immed_conv2d + test_check_numerics_test test_conv_extra test_conv_for_implicit_gemm test_miopen_conv + test_deepbench_conv test_conv_igemm_dynamic_xdlops_nhwc_wrw_bf16_gfx90a + test_conv_igemm_dynamic_xdlops_nhwc_fwd_bf16_gfx90a + test_conv_igemm_dynamic_xdlops_nhwc_bwd_bf16_gfx90a) + endif() + if(${CODECOV_TEST}) + list(APPEND SKIP_TESTS test_conv3d test_conv3d_find2 test_immed_conv3d test_immed_conv2d test_pooling2d test_pooling2d_asymmetric) + # replaced by smaller tests with suffix _codecov + endif() if (MIOPEN_NO_GPU) set(SKIP_ALL_EXCEPT_TESTS test_include_inliner test_kernel_build_params @@ -238,7 +221,7 @@ if(MIOPEN_BACKEND_OPENCL AND MIOPEN_TEST_ALL) list(APPEND SKIP_TESTS test_conv3d test_immed_conv3d test_immed_conv2d test_conv3d_find2) endif() if(MIOPEN_TEST_GFX103X) - list(APPEND SKIP_TESTS test_conv3d test_immed_conv3d test_immed_conv2d) + list(APPEND SKIP_TESTS test_conv3d test_immed_conv3d test_immed_conv2d test_conv3d_find2) endif() endif() @@ -368,7 +351,9 @@ file(GLOB TESTS *.cpp) set(LONG_TESTS test_dropout test_conv2d + test_conv2d_find2 test_conv3d + test_conv3d_find2 test_conv_group test_soft_max test_lrn_test @@ -469,11 +454,6 @@ endfunction() # If nothing is specified, the default value is taken. # Default: GFX900_ENABLED, GFX906_ENABLED, GFX908_ENABLED, GFX90A_ENABLED, GFX103X_DISABLED # -# Special internal components: MIOTENSILE -# The option can be enabled or disabled by using '_ENABLED' and '_DISABLED' suffix. -# If nothing is specified, the default value is taken. -# Default: MIOTENSILE_DISABLED -# # Testing mode: # SKIP_UNLESS_ALL - The test should be only run if MIOPEN_TEST_ALL=TRUE. Intended for long tests. # TEST_PERF_DB_RECORD_NOT_FOUND - Test should fail if output contains: "Perf Db: record not found". @@ -489,7 +469,7 @@ function(add_custom_test NAME) set(options BF16_ENABLED BF16_DISABLED HALF_ENABLED HALF_DISABLED INT8_ENABLED INT8_DISABLED FLOAT_ENABLED FLOAT_DISABLED GFX900_ENABLED GFX900_DISABLED GFX906_ENABLED GFX906_DISABLED GFX908_ENABLED GFX908_DISABLED - GFX103X_ENABLED GFX103X_DISABLED GFX90A_ENABLED GFX90A_DISABLED MIOTENSILE_ENABLED MIOTENSILE_DISABLED + GFX103X_ENABLED GFX103X_DISABLED GFX90A_ENABLED GFX90A_DISABLED SKIP_UNLESS_MLIR SKIP_UNLESS_ALL TEST_PERF_DB_RECORD_NOT_FOUND SKIP_XNACK_ON OCL_ENABLED OCL_DISABLED HIP_ENABLED HIP_DISABLED HIP_NOGPU_ENABLED HIP_NOGPU_DISABLED ) @@ -520,12 +500,6 @@ function(add_custom_test NAME) option_support_check(${PARSE_FLOAT_ENABLED} ${PARSE_FLOAT_DISABLED} ${FLOAT_TEST_DEFAULT} is_float_check) bool_and_f(${MIOPEN_TEST_FLOAT} ${is_float_check} is_float_check) - set(is_miotensile_check) - set(MIOTENSILE_TEST_DEFAULT FALSE) - option_support_check(${PARSE_MIOTENSILE_ENABLED} ${PARSE_MIOTENSILE_DISABLED} ${MIOTENSILE_TEST_DEFAULT} is_miotensile_check) - bool_not_f(${MIOPEN_TEST_MIOTENSILE} NOT_MIOPEN_TEST_MIOTENSILE) - bool_or_f(${NOT_MIOPEN_TEST_MIOTENSILE} ${is_miotensile_check} is_miotensile_check) - set(is_mlir_check) bool_not_f(${PARSE_SKIP_UNLESS_MLIR} is_mlir_check) bool_or_f(${is_mlir_check} ${MIOPEN_TEST_MLIR} is_mlir_check) @@ -904,7 +878,7 @@ COMMAND ${IMPLICITGEMM_TESTING_ENV} $ ${IMPLICITGEMM_AR COMMAND ${IMPLICITGEMM_TESTING_ENV} $ ${IMPLICITGEMM_ARGS} --verbose --input 64 32 7 7 --weights 192 32 3 3 --pads_strides_dilations 2 2 2 2 1 1 | grep -v "cannot be executed due to incorrect params" ) -add_custom_test(test_conv_group SKIP_UNLESS_ALL MIOTENSILE_ENABLED GFX103X_ENABLED +add_custom_test(test_conv_group SKIP_UNLESS_ALL GFX103X_ENABLED COMMAND $ --verbose --input 16 128 56 56 --weights 256 4 3 3 --pads_strides_dilations 1 1 1 1 1 1 --group-count 32 COMMAND $ --verbose --input 16 256 56 56 --weights 512 8 3 3 --pads_strides_dilations 1 1 2 2 1 1 --group-count 32 COMMAND $ --verbose --input 16 256 28 28 --weights 512 8 3 3 --pads_strides_dilations 1 1 1 1 1 1 --group-count 32 @@ -964,7 +938,7 @@ COMMAND $ --verbose --input 8 3 108 108 --weights 63 1 if(MIOPEN_TEST_DEEPBENCH) - add_custom_test(test_deepbench_rnn MIOTENSILE_ENABLED GFX103X_ENABLED + add_custom_test(test_deepbench_rnn GFX103X_ENABLED COMMAND $ --verbose --batch-size 16 --seq-len 50 --vector-len 1760 --hidden-size 1760 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 32 --seq-len 50 --vector-len 1760 --hidden-size 1760 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 64 --seq-len 50 --vector-len 1760 --hidden-size 1760 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill @@ -1022,7 +996,7 @@ if(MIOPEN_TEST_DEEPBENCH) endif() -add_custom_test(test_rnn_extra SKIP_UNLESS_ALL MIOTENSILE_ENABLED GFX103X_ENABLED +add_custom_test(test_rnn_extra SKIP_UNLESS_ALL GFX103X_ENABLED COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --no-hx COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --no-dhy COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --no-hx --no-dhy @@ -1053,7 +1027,7 @@ COMMAND $ --verbose --batch-size 32 --seq-len 3 -- COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --rnn-mode 1 --no-hx --no-dhy --no-hy --no-dhx ) -add_custom_test(test_gru_extra SKIP_UNLESS_ALL MIOTENSILE_ENABLED GFX103X_ENABLED +add_custom_test(test_gru_extra SKIP_UNLESS_ALL GFX103X_ENABLED COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy @@ -1070,7 +1044,7 @@ COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-se COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-hy --no-dhx ) -add_custom_test(test_lstm_extra SKIP_UNLESS_ALL MIOTENSILE_ENABLED GFX103X_ENABLED +add_custom_test(test_lstm_extra SKIP_UNLESS_ALL GFX103X_ENABLED COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy @@ -1104,7 +1078,7 @@ COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-s ) -add_custom_test(test_conv_extra SKIP_UNLESS_ALL MIOTENSILE_ENABLED GFX103X_ENABLED +add_custom_test(test_conv_extra SKIP_UNLESS_ALL GFX103X_ENABLED # COMMAND $ --verbose --input 1 1 1 1 --weights 1 1 2 2 --pads_strides_dilations 0 0 3 3 1 1 COMMAND $ --verbose --input 4 1 161 700 --weights 4 1 5 20 --pads_strides_dilations 0 0 2 2 1 1 COMMAND $ --verbose --input 4 1 161 700 --weights 4 1 5 20 --pads_strides_dilations 0 0 2 2 1 1 @@ -1170,7 +1144,7 @@ endif() endif() -add_custom_test(test_conv_trans SKIP_UNLESS_ALL MIOTENSILE_ENABLED GFX103X_ENABLED +add_custom_test(test_conv_trans SKIP_UNLESS_ALL GFX103X_ENABLED COMMAND $ --verbose --input 8 128 28 28 --weights 128 128 1 1 --pads_strides_dilations 0 0 1 1 1 1 --cmode trans --pmode default COMMAND $ --verbose --input 8 256 28 28 --weights 256 256 1 1 --pads_strides_dilations 0 0 1 1 1 1 --cmode trans --pmode same COMMAND $ --verbose --input 8 32 28 28 --weights 32 32 5 5 --pads_strides_dilations 0 0 2 2 1 1 --cmode trans --pmode default @@ -1190,7 +1164,7 @@ COMMAND $ --verbose --input 100 6 4 4 --weights 6 4 1 1 ) -add_custom_test(test_conv_3d SKIP_UNLESS_ALL MIOTENSILE_ENABLED GFX103X_ENABLED +add_custom_test(test_conv_3d SKIP_UNLESS_ALL GFX103X_ENABLED COMMAND $ --verbose --conv_dim_type conv3d --input 16 32 4 9 9 --weights 64 32 3 3 3 --pads_strides_dilations 0 0 0 2 2 2 1 1 1 --group-count 1 --cmode conv --pmode default COMMAND $ --verbose --conv_dim_type conv3d --input 4 3 4 227 227 --weights 4 3 3 11 11 --pads_strides_dilations 0 0 0 1 1 1 1 1 1 --group-count 1 --cmode conv --pmode default COMMAND $ --verbose --conv_dim_type conv3d --input 16 128 4 56 56 --weights 256 4 3 3 3 --pads_strides_dilations 1 1 1 1 1 1 1 1 1 --group-count 32 --cmode conv --pmode default @@ -1776,7 +1750,7 @@ COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --scales 1 0 -- ) if(MIOPEN_TEST_DEEPBENCH) - add_custom_test(test_deepbench_conv MIOTENSILE_ENABLED GFX103X_ENABLED + add_custom_test(test_deepbench_conv GFX103X_ENABLED COMMAND $ --verbose --input 4 1 161 700 --weights 32 1 5 20 --pads_strides_dilations 0 0 2 2 1 1 COMMAND $ --verbose --input 8 1 161 700 --weights 32 1 5 20 --pads_strides_dilations 0 0 2 2 1 1 COMMAND $ --verbose --input 16 1 161 700 --weights 32 1 5 20 --pads_strides_dilations 0 0 2 2 1 1 @@ -1817,7 +1791,7 @@ if(MIOPEN_TEST_DEEPBENCH) endif() if(MIOPEN_TEST_CONV) - add_custom_test(test_miopen_conv MIOTENSILE_ENABLED GFX103X_ENABLED + add_custom_test(test_miopen_conv GFX103X_ENABLED COMMAND $ --verbose --input 1 3 32 32 --weights 1 3 7 7 --pads_strides_dilations 1 1 1 1 1 1 COMMAND $ --verbose --input 1 3 227 227 --weights 1 3 7 7 --pads_strides_dilations 1 1 1 1 1 1 COMMAND $ --verbose --input 1 64 56 56 --weights 1 64 1 1 --pads_strides_dilations 0 0 2 2 1 1