Segfault in debug build: miopen::ConvolutionDescriptor::FindConvFwdAlgorithm #14

patflick · 2017-07-14T19:58:57Z

In the debug build for MIOpen, I'm experiencing segfaults inside the FindConvFwdAlgorithm function.
Note that this happens only in the debug (CMAKE_BUILD_TYPE=Debug) build of MIOpen. The CMAKE_BUILD_TYPE=Release doesn't have this issue.

I tried to create a minimal example, but its still quite long. Steps to reproduce:

I build the current master version of MIOpen with:

mkdir debug && cd debug
cmake -DCMAKE_BUILD_TYPE=Debug ../
make
sudo make install

The following is the almost-minimal code to reproduce this issue, file: conv_segfault.cpp:

#include <hip/hip_runtime_api.h>
#include <miopen/miopen.h>
#include <stdio.h>
#include <iostream>

#define CHECK_HIP(cmd) \
{\
    hipError_t hip_error  = cmd;\
    if (hip_error != hipSuccess) { \
        fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(hip_error), hip_error,__FILE__, __LINE__); \
        exit(EXIT_FAILURE);\
    }\
}

#define CHECK_MIO(cmd) \
{\
    miopenStatus_t miostat = cmd;\
    if (miostat != miopenStatusSuccess) { \
        fprintf(stderr, " MIOpen error (%d) at %s:%d\n", miostat,__FILE__, __LINE__); \
        exit(EXIT_FAILURE);\
    }\
}

struct Tensor {
    miopenTensorDescriptor_t desc;
    void* data;
    size_t data_size;
    Tensor(int n, int c, int h, int w) {
        CHECK_MIO(miopenCreateTensorDescriptor(&desc));
        CHECK_MIO(miopenSet4dTensorDescriptor(desc, miopenFloat, n, c, h, w));
        data_size = n*c*h*w*sizeof(float);
        CHECK_HIP(hipMalloc(&data, data_size));
    }
};

int main(int argc, char *argv[])
{
    int devcount;
    CHECK_HIP(hipGetDeviceCount(&devcount));
    std::cout << "Number of HIP devices found: " << devcount << std::endl;
    if (devcount <= 0)
        exit(EXIT_FAILURE);

    miopenHandle_t mio_handle;
    CHECK_MIO(miopenCreate(&mio_handle));

    /* create conv desc */
    miopenConvolutionDescriptor_t convdesc;
    CHECK_MIO(miopenCreateConvolutionDescriptor(&convdesc));
    CHECK_MIO(miopenInitConvolutionDescriptor(convdesc, miopenConvolution, 1, 1, 1, 1, 1, 1));

    // create input, output and weights tensors
    Tensor input(128, 3, 32, 32);
    Tensor output(128, 64, 32, 32);
    Tensor weights(64, 3, 3, 3);

    // create workspace
    size_t workspace_size;
    void* workspace;
    CHECK_MIO(miopenConvolutionForwardGetWorkSpaceSize(mio_handle, weights.desc, input.desc, convdesc, output.desc, &workspace_size));
    CHECK_HIP(hipMalloc(&workspace, workspace_size));

    // findalgo: this segfaults
    miopenConvAlgoPerf_t perfs[4];
    int returned_algos;
    CHECK_MIO(miopenFindConvolutionForwardAlgorithm(mio_handle, input.desc, input.data, weights.desc, weights.data, convdesc, output.desc, output.data, 4, &returned_algos, perfs, workspace, workspace_size, false));
    return 0;
}

Compile with:

/opt/rocm/hip/bin/hipcc -g  --amdgpu-target=gfx900 -I/opt/rocm/hip/include -I/opt/rocm/include conv_segfault.cpp -L/opt/rocm/lib -L/opt/rocm/opencl/lib/x86_64 -lMIOpen -o segfault

Running the resulting executable yields:

Number of HIP devices found: 1
Device Name: gfx900
runcl  -DNUM_CH_PER_WG=1 -DNUM_IM_BLKS_X=1 -DNUM_IM_BLKS=4 -DLOCAL_MEM_SIZE=385 -DSTRIDE_GT_1=0 -DTILE_SZ_X=32 -DTILE_SZ_Y=8 -DUSE_IM_OFF_GUARD=1 src/Kernels/MIOpenUtilKernels.cl -k Im2Col -dumpilisa -r 10 if#0: if#0: if#0: iv#0 3072,1,1/256,1,1
key: miopenIm2Col,
Kernel filename: MIOpenUtilKernels.cl
Segmentation fault (core dumped)

GDB stacktrace:

Thread 1 "segfault" received signal SIGSEGV, Segmentation fault.
0x00007fffe8e8fad9 in clSetKernelArg () from /opt/rocm/opencl/lib/x86_64/libamdocl64.so
(gdb) bt
#0  0x00007fffe8e8fad9 in clSetKernelArg () from /opt/rocm/opencl/lib/x86_64/libamdocl64.so
#1  0x00007ffff64d6233 in miopen::OCLSetKernelArg::operator()<std::integral_constant<unsigned long, 1ul>, _cl_mem*> (this=0x7fffffffc800, kernel=0xb628a0, i=..., x=@0x7fffffffc9d8: 0x110131d000) at /home/patrick/miopen/miopen-git/src/include/miopen/oclkernel.hpp:64
#2  0x00007ffff64d4a78 in std::_Bind<miopen::OCLSetKernelArg (_cl_kernel*, std::_Placeholder<1>, std::_Placeholder<2>)>::__call<void, std::integral_constant<unsigned long, 1ul>&&, _cl_mem* const&, 0ul, 1ul, 2ul>(std::tuple<std::integral_constant<unsigned long, 1ul>&&, _cl_mem* const&>&&, std::_Index_tuple<0ul, 1ul, 2ul>) (this=0x7fffffffc800, __args=<unknown type in /opt/rocm/lib/libMIOpen.so.1, CU 0x104f80, DIE 0x13e0aa>) at /usr/include/c++/5/functional:1074
#3  0x00007ffff64d3385 in std::_Bind<miopen::OCLSetKernelArg (_cl_kernel*, std::_Placeholder<1>, std::_Placeholder<2>)>::operator()<std::integral_constant<unsigned long, 1ul>, _cl_mem* const&, void>(std::integral_constant<unsigned long, 1ul>&&, _cl_mem* const&) (this=0x7fffffffc800) at /usr/include/c++/5/functional:1133
#4  0x00007ffff6533e55 in miopen::detail::each_args_i_impl<std::_Bind<miopen::OCLSetKernelArg (_cl_kernel*, std::_Placeholder<1>, std::_Placeholder<2>)>, 0ul, 1ul, 2ul, 3ul, 4ul, 5ul, 6ul, 7ul, 8ul, 9ul, 10ul, 11ul, 12ul, 13ul, int const&, _cl_mem* const&, unsigned long const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, _cl_mem* const&>(std::_Bind<miopen::OCLSetKernelArg (_cl_kernel*, std::_Placeholder<1>, std::_Placeholder<2>)>, miopen::detail::seq<0ul, 1ul, 2ul, 3ul, 4ul, 5ul, 6ul, 7ul, 8ul, 9ul, 10ul, 11ul, 12ul, 13ul>, int const&, _cl_mem* const&, unsigned long const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, _cl_mem* const&) (f=...) at /home/patrick/miopen/miopen-git/src/include/miopen/each_args.hpp:68
#5  0x00007ffff6533b33 in miopen::each_args_i<std::_Bind<miopen::OCLSetKernelArg (_cl_kernel*, std::_Placeholder<1>, std::_Placeholder<2>)>, int const&, _cl_mem* const&, unsigned long const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, _cl_mem* const&>(std::_Bind<miopen::OCLSetKernelArg (_cl_kernel*, std::_Placeholder<1>, std::_Placeholder<2>)>, int const&, _cl_mem* const&, unsigned long const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, int const&, _cl_mem* const&) (f=...) at /home/patrick/miopen/miopen-git/src/include/miopen/each_args.hpp:83
#6  0x00007ffff65337c0 in miopen::OCLKernelInvoke::operator()<int, _cl_mem*, unsigned long, int, int, int, int, int, int, int, int, int, int, _cl_mem*> (this=0x7fffffffccc0) at /home/patrick/miopen/miopen-git/src/include/miopen/oclkernel.hpp:97
#7  0x00007ffff6532e55 in miopen::Im2ColGPU (handle=..., data_size=393216, im=0x110131d000, im_offset=0, c=3, h=32, w=32, wei_h=3, wei_w=3, out_h=32, out_w=32, pad_h=1, pad_w=1, stride_h=1, stride_w=1, col=0x11034a2000) at /home/patrick/miopen/miopen-git/src/ocl/utilocl.cpp:92
#8  0x00007ffff64fd85d in miopen::ConvolutionDescriptor::FindConvFwdAlgorithm (this=0xc719e0, handle=..., xDesc=..., x=0x110131d000, wDesc=..., w=0x110349f000, yDesc=..., y=0x110149e000, requestAlgoCount=4, returnedAlgoCount=0x7fffffffd68c, perfResults=0x7fffffffd690, workSpace=0x11034a2000, workSpaceSize=110592, exhaustiveSearch=false) at /home/patrick/miopen/miopen-git/src/ocl/convolutionocl.cpp:360
#9  0x00007ffff6465c85 in <lambda()>::operator()(void) const (__closure=0x7fffffffd460) at /home/patrick/miopen/miopen-git/src/convolution_api.cpp:166
#10 0x00007ffff646b6fb in miopen::try_<miopenFindConvolutionForwardAlgorithm(miopenHandle_t, miopenTensorDescriptor_t, void const*, miopenTensorDescriptor_t, void const*, miopenConvolutionDescriptor_t, miopenTensorDescriptor_t, void*, int, int*, miopenConvAlgoPerf_t*, void*, size_t, bool)::<lambda()> >(<lambda()>) (f=...) at /home/patrick/miopen/miopen-git/src/include/miopen/errors.hpp:71
#11 0x00007ffff646646f in miopenFindConvolutionForwardAlgorithm (handle=0xb45760, xDesc=0xc106d0, x=0x110131d000, wDesc=0xc6f890, w=0x110349f000, convDesc=0xc719e0, yDesc=0xc108e0, y=0x110149e000, requestAlgoCount=4, returnedAlgoCount=0x7fffffffd68c, perfResults=0x7fffffffd690, workSpace=0x11034a2000, workSpaceSize=110592, exhaustiveSearch=false) at /home/patrick/miopen/miopen-git/src/convolution_api.cpp:167
#12 0x0000000000419776 in main (argc=1, argv=0x7fffffffd878) at conv_segfault.cpp:69

The text was updated successfully, but these errors were encountered:

pfultz2 · 2017-07-19T00:13:56Z

It looks like you are building the opencl backend and then using hip api with this which why this is broken. The cl_mem pointers and the device pointers from hip are not compatible. To build the hip backend, you need to set your compiler to hcc:

mkdir debug && cd debug
CXX=/opt/rocm/bin/hcc cmake -DCMAKE_BUILD_TYPE=Debug ../
make
sudo make install

You may need to also set the CMAKE_PREFIX_PATH as shown in the readme depending on how you installed hcc and hip: -DCMAKE_PREFIX_PATH="/opt/rocm/hcc;/opt/rocm/hip".

patflick · 2017-07-19T15:28:19Z

@pfultz2 Thank you! This is what was wrong.

Is there a way to detect that MIOpen was compiled differently then how it is used?

Looking into miopen/config.h:

#define MIOPEN_BACKEND_OPENCL 0
#define MIOPEN_BACKEND_HCC 0
#define MIOPEN_BACKEND_HIP 1

it looks like there are 3 possible backends at the moment. Are the HCC and HIP backends compatible?

So if I'm writing a hip based MIOpen application, I guess I could add something like the following:

#if MIOPEN_BACKEND_OPENCL
#error "The MIOpen OpenCL backend is not compatible with hip/hcc"
#endif

Is there a generic way you could detect the hip or hcc compiler in the MIOpen header and throw an error like this?

pfultz2 · 2017-07-19T23:48:08Z

Are the HCC and HIP backends compatible?

The HCC backend is planned for the future, but it is not used.

So if I'm writing a hip based MIOpen application, I guess I could add something like the following:

That is one possible solution.

Is there a generic way you could detect the hip or hcc compiler in the MIOpen header and throw an error like this?

I dont think this is robust as we can compile our opencl backend using hcc. We could try to detect if a hip header has been included and then produce an error, but this only works when hip is included before miopen.

If you are using find_package(miopen PATHS /opt/rocm) in cmake using hcc, then you should always get an error about missing headers when the wrong backend is used.

31b403526 Merge pull request #16 from ROCmSoftwarePlatform/develop b62bf8c3f Merge pull request #14 from ROCmSoftwarePlatform/miopen_downstream_init_integration ccc4a1d36 Merge pull request #8 from ROCmSoftwarePlatform/miopen_downstream_init_integration 67ad47e7c refactor 16effa767 refactor a91b68dfc DynamicBuffer, StaticBuffer, amd_buffer_load support customized value for invalid element 2cbabbba5 use int instead of index_t in kernel wrapper 0834bc763 compiler parameter use stream f2ac7832c make innner product compatiable on gfx900 4e57b30a6 rename c03045ce2 rename b2589957f update CK build script 2c48039d0 fix kernel filename d626dccc9 fix enum issue 643ebd4f3 tidy ddd49ec9e fix clang warning suppression 4f566c622 vector/scalar pointer cast use c-style pointer cast instead of reinterpret_cast 172036d72 add c-style pointer cast 76f313193 tidy d18428901 tidy f885c131d tidy 80120f0a0 tidy c3efeb5e2 tidy 56fc0842b tidy 54fba515b tidy e62bae7a4 tidy 24c872894 add tidy 61487e0a0 fix ae98b52ad remove online compilation from CK cb9542131 refactor 73ca97015 Merge commit '437cc595c6e206dfebb118985b5171bbc1e29eab' into composable_kernel_init_integration_v3 3b8664611 Merge pull request #7 from ROCmSoftwarePlatform/master d09ea4f4e Update develop (#5) 3d32ae940 add solver ConvIgemmFwdV6r1DlopsNchwKcyxNkhw; rename static ck source files git-subtree-dir: src/composable_kernel git-subtree-split: 31b403526ec54abf13c4bb58dfb6635b4d2aa619

…duction (#1156) * Squashed 'src/composable_kernel/' content from commit f6edda611 git-subtree-dir: src/composable_kernel git-subtree-split: f6edda6119ebbb237dfa6270797b34f960d7b190 * add solver ConvIgemmFwdV6r1DlopsNchwKcyxNkhw; rename static ck source files * Squashed 'src/composable_kernel/' changes from f6edda611..5781adf5c 5781adf5c Update develop (#5) (#6) 97e6d514f Merge pull request #4 from ROCmSoftwarePlatform/separate_online_compile 7b1ec41e5 refactor 49c33aaea refactor 54b3e73d1 rename git-subtree-dir: src/composable_kernel git-subtree-split: 5781adf5cf4ac753e2e36da7385791775b744bf7 * fix * refactor * remove online compilation from CK * refactor * fix * add ctest * tidy * add tidy * tidy * tidy * tidy * tidy * tidy * tidy * tidy * tidy * tidy * add c-style pointer cast * vector/scalar pointer cast use c-style pointer cast instead of reinterpret_cast * fix clang warning suppression * tidy * suppress cppcheck * fix enum issue * revert chagnes to hip build * fix kernel filename * update CK build script * rename * rename * make innner product compatiable on gfx900 * Update src/include/miopen/solver/ck_utility_common.hpp Co-authored-by: JD <[email protected]> * compiler parameter use stream * use int instead of index_t in kernel wrapper * DynamicBuffer, StaticBuffer, amd_buffer_load support customized value for invalid element * refactor * refactor * change cmakelist * change ck common utility * fix * Squashed 'src/composable_kernel/' changes from 5781adf5c..31b403526 31b403526 Merge pull request #16 from ROCmSoftwarePlatform/develop b62bf8c3f Merge pull request #14 from ROCmSoftwarePlatform/miopen_downstream_init_integration ccc4a1d36 Merge pull request #8 from ROCmSoftwarePlatform/miopen_downstream_init_integration 67ad47e7c refactor 16effa767 refactor a91b68dfc DynamicBuffer, StaticBuffer, amd_buffer_load support customized value for invalid element 2cbabbba5 use int instead of index_t in kernel wrapper 0834bc763 compiler parameter use stream f2ac7832c make innner product compatiable on gfx900 4e57b30a6 rename c03045ce2 rename b2589957f update CK build script 2c48039d0 fix kernel filename d626dccc9 fix enum issue 643ebd4f3 tidy ddd49ec9e fix clang warning suppression 4f566c622 vector/scalar pointer cast use c-style pointer cast instead of reinterpret_cast 172036d72 add c-style pointer cast 76f313193 tidy d18428901 tidy f885c131d tidy 80120f0a0 tidy c3efeb5e2 tidy 56fc0842b tidy 54fba515b tidy e62bae7a4 tidy 24c872894 add tidy 61487e0a0 fix ae98b52ad remove online compilation from CK cb9542131 refactor 73ca97015 Merge commit '437cc595c6e206dfebb118985b5171bbc1e29eab' into composable_kernel_init_integration_v3 3b8664611 Merge pull request #7 from ROCmSoftwarePlatform/master d09ea4f4e Update develop (#5) 3d32ae940 add solver ConvIgemmFwdV6r1DlopsNchwKcyxNkhw; rename static ck source files git-subtree-dir: src/composable_kernel git-subtree-split: 31b403526ec54abf13c4bb58dfb6635b4d2aa619 * Tiny fix in using data type template parameters in blockwise and direct_threadwise kernel * Fix with regard to implementing GetZeroVal() in both kernel and host * Avoid convert to compType from dstDataType before writting the output value * Add half_t support to NumericLimits and make constexpr GetZeroVal() of binary operator * Add CONSTANT decorator for descriptor read buffer * Use get_thread_local_1d_id() for thread local Id * Rename GetZeroVal() to GetReductionZeroVal() in the kernels * Remove constexpr from initialized zeroVal and tiny fix in reduction_operator.hpp * Occasional tiny simplification and update in the kernel files * Update in src/reducetensor.cpp for consistent IDs passing to the kernel * Update to re-order tensor dimensions on the host, split second_call kernel wrapper files and simplify reduce_all kernel wrappers * Update to remove OpenCL tidy checking failures * Small updates in src/reducetensor.cpp * Update for better readability * Remove unused codes and not-needed template parameters in the kernel wrappers Co-authored-by: Chao Liu <[email protected]> Co-authored-by: JD <[email protected]>

dagamayank assigned pfultz2 Jul 15, 2017

daniellowell closed this as completed May 16, 2019

alexandraBara mentioned this issue Sep 11, 2020

Solver generic_search fail: ConvHipImplicitGemmBwdDataV1R1Xdlops and ConvHipImplicitGemmForwardV4R4Xdlops #427

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Segfault in debug build: miopen::ConvolutionDescriptor::FindConvFwdAlgorithm #14

Segfault in debug build: miopen::ConvolutionDescriptor::FindConvFwdAlgorithm #14

patflick commented Jul 14, 2017

pfultz2 commented Jul 19, 2017 •

edited

Loading

patflick commented Jul 19, 2017

pfultz2 commented Jul 19, 2017

Segfault in debug build: miopen::ConvolutionDescriptor::FindConvFwdAlgorithm #14

Segfault in debug build: miopen::ConvolutionDescriptor::FindConvFwdAlgorithm #14

Comments

patflick commented Jul 14, 2017

pfultz2 commented Jul 19, 2017 • edited Loading

patflick commented Jul 19, 2017

pfultz2 commented Jul 19, 2017

pfultz2 commented Jul 19, 2017 •

edited

Loading