From 9f4d46ebc9497adc0b1dcbaaf619b65d291f3042 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 23 Apr 2023 20:22:16 +0200 Subject: [PATCH] [HIPIFY][#830][feature] Function call transformation type "move arg from place X to place Y" [feature] + Support for multiple transformations for a single call (including different types of transformations) + Support for a batch move of a sequence of args + Swap transformation should be described as two move transformations; for instance, for swapping 1 <-> 2 there should be two moves: 1->2 and 2->1; the order doesn't matter [IMP] + Instead of moving multiple arguments to the same place (including the very end), a batch move should be used + For instance: {1, {e_move_argument, cw_None, "", 9}}, {2, {e_move_argument, cw_None, "", 9}}, should be replaced with: {1, {e_move_argument, cw_None, "", 9, 2}}, That means that two args starting from 1st (1st and 2nd) will be moved to the end of the function call in the same order in a single replacement step + [Reason] 1 -> 9 and 2->9 are order-dependent, and argument 1 at place 9 will be rewritten by argument 2 or vice versa, whereas both are needed to be moved at place 9 [MIOpen][fix] + Apply move transformation for the following function calls: cudnnSoftmaxForward, cudnnSoftmaxBackward, cudnnConvolutionForward, cudnnConvolutionBackwardData, cudnnRNNBackwardWeights, cudnnGetConvolutionForwardWorkspaceSize, cudnnGetConvolutionBackwardDataWorkspaceSize + Update the test cudnn2miopen.cu accordingly --- src/CUDA2HIP_Scripting.h | 3 + src/HipifyAction.cpp | 97 ++++++++++++++++++- .../synthetic/libraries/cudnn2miopen.cu | 21 ++-- 3 files changed, 106 insertions(+), 15 deletions(-) diff --git a/src/CUDA2HIP_Scripting.h b/src/CUDA2HIP_Scripting.h index 302a99e5..204fbc2a 100644 --- a/src/CUDA2HIP_Scripting.h +++ b/src/CUDA2HIP_Scripting.h @@ -31,6 +31,7 @@ namespace hipify { e_int64_t, e_remove_argument, e_add_const_argument, + e_move_argument, }; enum CastWarning { @@ -42,6 +43,8 @@ namespace hipify { CastTypes castType; CastWarning castWarn; std::string constValToAdd = ""; + unsigned moveTo = 0; + unsigned numberToMove = 1; }; typedef std::map ArgCastMap; diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index d16180f1..1af7331b 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -77,6 +77,11 @@ const std::string sCudnnGetPoolingNdDescriptor = "cudnnGetPoolingNdDescriptor"; const std::string sCudnnSetLRNDescriptor = "cudnnSetLRNDescriptor"; const std::string sCudnnGetRNNDescriptor_v6 = "cudnnGetRNNDescriptor_v6"; const std::string sCudnnSetRNNDescriptor_v6 = "cudnnSetRNNDescriptor_v6"; +const std::string sCudnnSoftmaxForward = "cudnnSoftmaxForward"; +const std::string sCudnnSoftmaxBackward = "cudnnSoftmaxBackward"; +const std::string sCudnnConvolutionForward = "cudnnConvolutionForward"; +const std::string sCudnnConvolutionBackwardData = "cudnnConvolutionBackwardData"; +const std::string sCudnnRNNBackwardWeights = "cudnnRNNBackwardWeights"; // Matchers' names const StringRef sCudaLaunchKernel = "cudaLaunchKernel"; const StringRef sCudaHostFuncCall = "cudaHostFuncCall"; @@ -94,6 +99,7 @@ std::string getCastType(hipify::CastTypes c) { case e_int64_t: return s_int64_t; case e_remove_argument: return ""; case e_add_const_argument: return ""; + case e_move_argument: return ""; default: return ""; } } @@ -207,6 +213,8 @@ std::map FuncArgCasts { {sCudnnGetConvolutionForwardWorkspaceSize, { { + {1, {e_move_argument, cw_None, "", 2}}, + {2, {e_move_argument, cw_None, "", 1}}, {5, {e_remove_argument, cw_None}} }, true, @@ -216,6 +224,8 @@ std::map FuncArgCasts { {sCudnnGetConvolutionBackwardDataWorkspaceSize, { { + {1, {e_move_argument, cw_None, "", 2}}, + {2, {e_move_argument, cw_None, "", 1}}, {5, {e_remove_argument, cw_None}} }, true, @@ -294,6 +304,54 @@ std::map FuncArgCasts { true } }, + {sCudnnSoftmaxForward, + { + { + {1, {e_move_argument, cw_None, "", 9, 2}}, + }, + true, + true + } + }, + {sCudnnSoftmaxBackward, + { + { + {1, {e_move_argument, cw_None, "", 11, 2}}, + }, + true, + true + } + }, + {sCudnnConvolutionForward, + { + { + {8, {e_move_argument, cw_None, "", 13, 2}}, + }, + true, + true + } + }, + {sCudnnConvolutionBackwardData, + { + { + {2, {e_move_argument, cw_None, "", 4, 2}}, + {4, {e_move_argument, cw_None, "", 2, 2}}, + {8, {e_move_argument, cw_None, "", 13, 2}}, + }, + true, + true + } + }, + {sCudnnRNNBackwardWeights, + { + { + {9, {e_move_argument, cw_None, "", 11, 2}}, + {11, {e_move_argument, cw_None, "", 9, 2}}, + }, + true, + true + } + }, }; void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) { @@ -760,6 +818,38 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) length = SM->getCharacterData(e) - SM->getCharacterData(s); break; } + case e_move_argument: + { + std::string sArg; + clang::SmallString<40> dst_XStr; + llvm::raw_svector_ostream dst_OS(dst_XStr); + if (c.second.numberToMove > 1) { + if ((argNum + c.second.numberToMove - 1) >= call->getNumArgs()) + continue; + sr = call->getArg(argNum + c.second.numberToMove - 1)->getSourceRange(); + sr.setBegin(call->getArg(argNum)->getBeginLoc()); + } + sArg = readSourceText(*SM, sr).str(); + if (c.second.moveTo < call->getNumArgs()) + dst_OS << sArg << ", "; + else + dst_OS << ", " << sArg; + clang::SourceLocation dst_s; + if (c.second.moveTo < call->getNumArgs()) + dst_s = call->getArg(c.second.moveTo)->getBeginLoc(); + else + dst_s = call->getEndLoc(); + ct::Replacement dst_Rep(*SM, dst_s, 0, dst_OS.str()); + clang::FullSourceLoc dst_fullSL(dst_s, *SM); + insertReplacement(dst_Rep, dst_fullSL); + OS << ""; + if (argNum < call->getNumArgs()) + e = call->getArg(argNum + c.second.numberToMove)->getBeginLoc(); + else + e = call->getEndLoc(); + length = SM->getCharacterData(e) - SM->getCharacterData(s); + break; + } case e_add_const_argument: { if (argNum < call->getNumArgs()) @@ -880,7 +970,12 @@ std::unique_ptr HipifyAction::CreateASTConsumer(clang::Compi sCudnnGetPoolingNdDescriptor, sCudnnSetLRNDescriptor, sCudnnGetRNNDescriptor_v6, - sCudnnSetRNNDescriptor_v6 + sCudnnSetRNNDescriptor_v6, + sCudnnSoftmaxForward, + sCudnnSoftmaxBackward, + sCudnnConvolutionForward, + sCudnnConvolutionBackwardData, + sCudnnRNNBackwardWeights ) ) ) diff --git a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu index 3ffed966..e6eb0270 100644 --- a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu +++ b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu @@ -450,28 +450,24 @@ int main() { // CHECK: status = miopenFindConvolutionForwardAlgorithm(handle, xD, x, filterDescriptor, W, convolutionDescriptor, yD, y, requestedAlgoCount, &returnedAlgoCount, &ConvolutionFwdAlgoPerf_t, workSpace, workSpaceSizeInBytes, true); status = cudnnFindConvolutionForwardAlgorithmEx(handle, xD, x, filterDescriptor, W, convolutionDescriptor, yD, y, requestedAlgoCount, &returnedAlgoCount, &ConvolutionFwdAlgoPerf_t, workSpace, workSpaceSizeInBytes); - // TODO: swap 2 and 3 arguments (const miopenTensorDescriptor_t wDesc and const miopenTensorDescriptor_t xDesc) // CUDA: cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, size_t* sizeInBytes); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, size_t* workSpaceSize); - // CHECK: status = miopenConvolutionForwardGetWorkSpaceSize(handle, xD, filterDescriptor, convolutionDescriptor, yD, &workSpaceSizeInBytes); + // CHECK: status = miopenConvolutionForwardGetWorkSpaceSize(handle, filterDescriptor, xD, convolutionDescriptor, yD, &workSpaceSizeInBytes); status = cudnnGetConvolutionForwardWorkspaceSize(handle, xD, filterDescriptor, convolutionDescriptor, yD, convolutionFwdAlgo, &workSpaceSizeInBytes); - // TODO: swap correctly last 5 arguments // CUDA: cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenConvolutionForward(miopenHandle_t handle, const void* alpha, const miopenTensorDescriptor_t xDesc, const void* x, const miopenTensorDescriptor_t wDesc, const void* w, const miopenConvolutionDescriptor_t convDesc, miopenConvFwdAlgorithm_t algo, const void* beta, const miopenTensorDescriptor_t yDesc, void* y, void* workSpace, size_t workSpaceSize); - // CHECK: status = miopenConvolutionForward(handle, alpha, xD, x, filterDescriptor, W, convolutionDescriptor, convolutionFwdAlgo, workSpace, workSpaceSizeInBytes, beta, yD, y); + // CHECK: status = miopenConvolutionForward(handle, alpha, xD, x, filterDescriptor, W, convolutionDescriptor, convolutionFwdAlgo, beta, yD, y, workSpace, workSpaceSizeInBytes); status = cudnnConvolutionForward(handle, alpha, xD, x, filterDescriptor, W, convolutionDescriptor, convolutionFwdAlgo, workSpace, workSpaceSizeInBytes, beta, yD, y); - // TODO: swap 2 and 3 arguments // CUDA: cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, size_t* sizeInBytes); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, size_t* workSpaceSize); - // CHECK: status = miopenConvolutionBackwardDataGetWorkSpaceSize(handle, filterDescriptor, yD, convolutionDescriptor, xD, &workSpaceSizeInBytes); + // CHECK: status = miopenConvolutionBackwardDataGetWorkSpaceSize(handle, yD, filterDescriptor, convolutionDescriptor, xD, &workSpaceSizeInBytes); status = cudnnGetConvolutionBackwardDataWorkspaceSize(handle, filterDescriptor, yD, convolutionDescriptor, xD, ConvolutionBwdDataAlgo_t, &workSpaceSizeInBytes); - // TODO: swap correctly all args, starting from 3rd // CUDA: cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData(cudnnHandle_t handle, const void* alpha, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionBwdDataAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx); // MIOPEN MIOPEN_EXPORT miopenStatus_t miopenConvolutionBackwardData(miopenHandle_t handle, const void* alpha, const miopenTensorDescriptor_t dyDesc, const void* dy, const miopenTensorDescriptor_t wDesc, const void* w, const miopenConvolutionDescriptor_t convDesc, miopenConvBwdDataAlgorithm_t algo, const void* beta, const miopenTensorDescriptor_t dxDesc, void* dx, void* workSpace, size_t workSpaceSize); - // CHECK: status = miopenConvolutionBackwardData(handle, alpha, filterDescriptor, W, yD, dy, convolutionDescriptor, ConvolutionBwdDataAlgo_t, workSpace, workSpaceSizeInBytes, beta, xD, dx); + // CHECK: status = miopenConvolutionBackwardData(handle, alpha, yD, dy, filterDescriptor, W, convolutionDescriptor, ConvolutionBwdDataAlgo_t, beta, xD, dx, workSpace, workSpaceSizeInBytes); status = cudnnConvolutionBackwardData(handle, alpha, filterDescriptor, W, yD, dy, convolutionDescriptor, ConvolutionBwdDataAlgo_t, workSpace, workSpaceSizeInBytes, beta, xD, dx); // CUDA: cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* beta, const cudnnTensorDescriptor_t dbDesc, void* db); @@ -625,16 +621,14 @@ int main() { // CHECK: status = miopenActivationBackward(handle, activationDescriptor, alpha, yD, y, dyD, dy, xD, x, beta, dxD, dx); status = cudnnActivationBackward(handle, activationDescriptor, alpha, yD, y, dyD, dy, xD, x, beta, dxD, dx); - // TODO: Move args: 1 to 7, 2 to 8 // CUDA: cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenSoftmaxForward_V2(miopenHandle_t handle, const void* alpha, const miopenTensorDescriptor_t xDesc, const void* x, const void* beta, const miopenTensorDescriptor_t yDesc, void* y, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode); - // CHECK: status = miopenSoftmaxForward_V2(handle, softmaxAlgorithm, softmaxMode, alpha, xD, x, beta, yD, y); + // CHECK: status = miopenSoftmaxForward_V2(handle, alpha, xD, x, beta, yD, y, softmaxAlgorithm, softmaxMode); status = cudnnSoftmaxForward(handle, softmaxAlgorithm, softmaxMode, alpha, xD, x, beta, yD, y); - // TODO: Move args: 1 to 7, 2 to 8 // CUDA: cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenSoftmaxBackward_V2(miopenHandle_t handle, const void* alpha, const miopenTensorDescriptor_t yDesc, const void* y, const miopenTensorDescriptor_t dyDesc, const void* dy, const void* beta, const miopenTensorDescriptor_t dxDesc, void* dx, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode); - // CHECK: status = miopenSoftmaxBackward_V2(handle, softmaxAlgorithm, softmaxMode, alpha, yD, y, dyD, dy, beta, dxD, dx); + // CHECK: status = miopenSoftmaxBackward_V2(handle, alpha, yD, y, dyD, dy, beta, dxD, dx, softmaxAlgorithm, softmaxMode); status = cudnnSoftmaxBackward(handle, softmaxAlgorithm, softmaxMode, alpha, yD, y, dyD, dy, beta, dxD, dx); // CUDA: cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward(cudnnHandle_t handle, const void* alpha1, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* alpha2, const cudnnTensorDescriptor_t zDesc, const void* z, const cudnnTensorDescriptor_t biasDesc, const void* bias, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t yDesc, void* y); @@ -696,10 +690,9 @@ int main() { // CHECK: status = miopenRNNBackwardData(handle, RNNDescriptor, seqLength, &yD, y, &dyD, dy, dhyD, dhy, dcyD, dcy, filterDescriptor, W, hxD, hx, cxD, cx, &dxD, dx, dhxD, dhx, dcxD, dcx, workSpace, workSpaceSizeInBytes, &reserveSpace, reserveSpaceNumBytes); status = cudnnRNNBackwardData(handle, RNNDescriptor, seqLength, &yD, y, &dyD, dy, dhyD, dhy, dcyD, dcy, filterDescriptor, W, hxD, hx, cxD, cx, &dxD, dx, dhxD, dhx, dcxD, dcx, workSpace, workSpaceSizeInBytes, &reserveSpace, reserveSpaceNumBytes); - // TODO: Move/swap args starting from 9th // CUDA: CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t* yDesc, const void* y, const void* workSpace, size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void* dw, const void* reserveSpace, size_t reserveSpaceSizeInBytes); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t* xDesc, const void* x, const miopenTensorDescriptor_t hxDesc, const void* hx, const miopenTensorDescriptor_t* yDesc, const void* y, const miopenTensorDescriptor_t dwDesc, void* dw, void* workSpace, size_t workSpaceNumBytes, const void* reserveSpace, size_t reserveSpaceNumBytes); - // CHECK: status = miopenRNNBackwardWeights(handle, RNNDescriptor, seqLength, &xD, x, hxD, hx, &yD, y, workSpace, workSpaceSizeInBytes, filterDescriptor, dw, &reserveSpace, reserveSpaceNumBytes); + // CHECK: status = miopenRNNBackwardWeights(handle, RNNDescriptor, seqLength, &xD, x, hxD, hx, &yD, y, filterDescriptor, dw, workSpace, workSpaceSizeInBytes, &reserveSpace, reserveSpaceNumBytes); status = cudnnRNNBackwardWeights(handle, RNNDescriptor, seqLength, &xD, x, hxD, hx, &yD, y, workSpace, workSpaceSizeInBytes, filterDescriptor, dw, &reserveSpace, reserveSpaceNumBytes); // CUDA: CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, void* workSpace, size_t workSpaceSizeInBytes);