From a6007b6b62a2a37c8c55ee01550b30abd4774acd Mon Sep 17 00:00:00 2001 From: cosunae Date: Fri, 22 Mar 2019 15:32:05 +0100 Subject: [PATCH] Fix tmp idx (#185) * fix tmp index * limit the launch bounds with maximum number of blocks per sm --- src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp | 63 +++++++++---- src/dawn/CodeGen/Cuda/CodeGeneratorHelper.h | 27 ++++-- src/dawn/CodeGen/Cuda/CudaCodeGen.cpp | 10 +- src/dawn/CodeGen/Cuda/MSCodeGen.cpp | 91 +++++++------------ src/dawn/CodeGen/Cuda/MSCodeGen.hpp | 2 +- src/dawn/Compiler/Options.inc | 2 + src/dawn/IIR/Cache.cpp | 4 + src/dawn/IIR/Cache.h | 2 + src/dawn/IIR/DoMethod.cpp | 2 +- src/dawn/IIR/Field.cpp | 2 +- src/dawn/IIR/Field.h | 2 +- src/dawn/IIR/MultiStage.cpp | 21 ++++- src/dawn/IIR/MultiStage.h | 12 +++ src/dawn/IIR/Stage.cpp | 2 +- src/dawn/IIR/Stencil.cpp | 15 ++- src/dawn/IIR/Stencil.h | 6 +- 16 files changed, 165 insertions(+), 98 deletions(-) diff --git a/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp b/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp index eeef1905c..5bd65b072 100644 --- a/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp +++ b/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.cpp @@ -34,15 +34,6 @@ std::string CodeGeneratorHelper::indexIteratorName(Array3i dims) { return n_; } -bool CodeGeneratorHelper::useNormalIteratorForTmp(const std::unique_ptr& ms) { - for(const auto& stage : ms->getChildren()) { - if(!stage->getExtents().isHorizontalPointwise()) { - return false; - } - } - return true; -} - std::string CodeGeneratorHelper::buildCudaKernelName( const std::shared_ptr& instantiation, const std::unique_ptr& ms) { @@ -50,15 +41,17 @@ std::string CodeGeneratorHelper::buildCudaKernelName( "_ms" + std::to_string(ms->getID()) + "_kernel"; } -std::vector CodeGeneratorHelper::generateStrideArguments(const IndexRange > &nonTempFields, - const IndexRange > &tempFields, +std::vector CodeGeneratorHelper::generateStrideArguments( + const IndexRange>& nonTempFields, + const IndexRange>& tempFields, const std::shared_ptr& stencilInstantiation, const std::unique_ptr& ms, CodeGeneratorHelper::FunctionArgType funArg) { std::unordered_set processedDims; std::vector strides; for(auto field : nonTempFields) { - const auto fieldName = stencilInstantiation->getFieldNameFromAccessID((*field).second.getAccessID()); + const auto fieldName = + stencilInstantiation->getFieldNameFromAccessID((*field).second.getAccessID()); Array3i dims{-1, -1, -1}; // TODO this is a hack, we need to have dimensions also at ms level for(const auto& fieldInfo : ms->getParent()->getFields()) { @@ -118,6 +111,42 @@ iir::Extents CodeGeneratorHelper::computeTempMaxWriteExtent(iir::Stencil const& return maxExtents; } +bool CodeGeneratorHelper::hasAccessIDMemAccess(const int accessID, + const std::unique_ptr& stencil) { + + for(const auto& ms : stencil->getChildren()) { + if(!ms->hasField(accessID)) + continue; + if(!ms->isCached(accessID)) + return true; + if(ms->getCache(accessID).getCacheType() == iir::Cache::CacheTypeKind::bypass) { + return true; + } + if(ms->getCache(accessID).getCacheIOPolicy() != iir::Cache::CacheIOPolicy::local) { + return true; + } + } + return false; +} + +bool CodeGeneratorHelper::useTemporaries( + const std::unique_ptr& stencil, + const std::shared_ptr& stencilInstantiation) { + + const auto& fields = stencil->getFields(); + const bool containsMemTemporary = + (find_if(fields.begin(), fields.end(), + [&](const std::pair& field) { + const int accessID = field.second.field.getAccessID(); + if(!stencilInstantiation->isTemporaryField(accessID)) + return false; + // we dont need to use temporaries infrastructure for fields that are cached + return hasAccessIDMemAccess(accessID, stencil); + }) != fields.end()); + + return containsMemTemporary && stencil->containsRedundantComputations(); +} + void CodeGeneratorHelper::generateFieldAccessDeref( std::stringstream& ss, const std::unique_ptr& ms, const std::shared_ptr& instantiation, const int accessID, @@ -126,15 +155,15 @@ void CodeGeneratorHelper::generateFieldAccessDeref( bool isTemporary = instantiation->isTemporaryField(accessID); DAWN_ASSERT(fieldIndexMap.count(accessID) || isTemporary); const auto& field = ms->getField(accessID); - bool useTmpIndex_ = (isTemporary && !useNormalIteratorForTmp(ms)); - std::string index = useTmpIndex_ ? "idx_tmp" : "idx" + CodeGeneratorHelper::indexIteratorName( - fieldIndexMap.at(accessID)); + bool useTmpIndex = isTemporary && useTemporaries(ms->getParent(), instantiation); + std::string index = useTmpIndex ? "idx_tmp" : "idx" + CodeGeneratorHelper::indexIteratorName( + fieldIndexMap.at(accessID)); // temporaries have all 3 dimensions Array3i iter = isTemporary ? Array3i{1, 1, 1} : fieldIndexMap.at(accessID); - std::string offsetStr = RangeToString("+", "", "", true)( - CodeGeneratorHelper::ijkfyOffset(offset, useTmpIndex_, iter)); + std::string offsetStr = + RangeToString("+", "", "", true)(CodeGeneratorHelper::ijkfyOffset(offset, useTmpIndex, iter)); const bool readOnly = (field.getIntend() == iir::Field::IntendKind::IK_Input); ss << (readOnly ? "__ldg(&(" : "") << accessName << (offsetStr.empty() ? "[" + index + "]" : ("[" + index + "+" + offsetStr + "]")) diff --git a/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.h b/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.h index f6c335b6c..e5f6be50b 100644 --- a/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.h +++ b/src/dawn/CodeGen/Cuda/CodeGeneratorHelper.h @@ -19,8 +19,8 @@ #include "dawn/IIR/StencilInstantiation.h" #include "dawn/Support/Array.h" #include "dawn/Support/IndexRange.h" -#include #include +#include namespace dawn { namespace codegen { @@ -44,9 +44,9 @@ class CodeGeneratorHelper { static std::array ijkfyOffset(const Array3i& offsets, bool isTemporary, const Array3i iteratorDims); - /// @brief returns true if a normal ijk field iterator should be used for temporaries instead of a - /// custom iterator - static bool useNormalIteratorForTmp(const std::unique_ptr& ms); + /// @brief determines wheter an accessID will perform an access to main memory + static bool hasAccessIDMemAccess(const int accessID, + const std::unique_ptr& stencil); /// @brief return true if the ms can be solved in parallel (in the vertical dimension) static bool solveKLoopInParallel(const std::unique_ptr& ms); @@ -55,15 +55,24 @@ class CodeGeneratorHelper { static std::vector computePartitionOfIntervals(const std::unique_ptr& ms); + /// @brief determines whether for code generation, using temporaries will be required. + /// Even if the stencil contains temporaries, in some cases, like when they are local cached, they + /// are not required for code generation. Also in the case of no redundant computations, + /// temporaries will become normal fields + static bool + useTemporaries(const std::unique_ptr& stencil, + const std::shared_ptr& stencilInstantiation); + /// @brief computes the maximum extent required by all temporaries, which will be used for proper /// allocation static iir::Extents computeTempMaxWriteExtent(iir::Stencil const& stencil); - static std::vector generateStrideArguments( - const IndexRange>& nonTempFields, - const IndexRange>& tempFields, - const std::shared_ptr& stencilInstantiation, - const std::unique_ptr& ms, CodeGeneratorHelper::FunctionArgType funArg); + static std::vector + generateStrideArguments(const IndexRange>& nonTempFields, + const IndexRange>& tempFields, + const std::shared_ptr& stencilInstantiation, + const std::unique_ptr& ms, + CodeGeneratorHelper::FunctionArgType funArg); /// @brief compose the cuda kernel name of a stencil instantiation static std::string diff --git a/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp b/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp index 72153c5d9..04e5bcaca 100644 --- a/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp +++ b/src/dawn/CodeGen/Cuda/CudaCodeGen.cpp @@ -385,8 +385,8 @@ void CudaCodeGen::generateStencilWrapperMembers( stencilWrapperClass.addMember(c_gtc() + "meta_data_t", "m_meta_data"); for(int AccessID : stencilInstantiation->getAllocatedFieldAccessIDs()) - stencilWrapperClass.addMember(c_gtc() + "storage_t", - "m_" + stencilInstantiation->getFieldNameFromAccessID(AccessID)); + stencilWrapperClass.addMember( + c_gtc() + "storage_t", "m_" + stencilInstantiation->getFieldNameFromAccessID(AccessID)); } if(!globalsMap.empty()) { @@ -566,7 +566,7 @@ void CudaCodeGen::generateStencilRunMethod( // in some cases (where there are no horizontal extents) we dont use the special tmp index // iterator, but rather a normal 3d field index iterator. In that case we pass temporaries in // the same manner as normal fields - if(CodeGeneratorHelper::useNormalIteratorForTmp(multiStagePtr)) { + if(!CodeGeneratorHelper::useTemporaries(multiStagePtr->getParent(), stencilInstantiation)) { const auto fieldName = stencilInstantiation->getFieldNameFromAccessID((*field).second.getAccessID()); @@ -574,8 +574,8 @@ void CudaCodeGen::generateStencilRunMethod( ".get_storage_info_ptr()->index(" + fieldName + ".begin<0>(), " + fieldName + ".begin<1>()," + fieldName + ".begin<2>()," + fieldName + ".begin<3>(), 0))"; } else { - args = - args + "," + stencilInstantiation->getFieldNameFromAccessID((*field).second.getAccessID()); + args = args + "," + + stencilInstantiation->getFieldNameFromAccessID((*field).second.getAccessID()); } } diff --git a/src/dawn/CodeGen/Cuda/MSCodeGen.cpp b/src/dawn/CodeGen/Cuda/MSCodeGen.cpp index 163992826..d6a5981bf 100644 --- a/src/dawn/CodeGen/Cuda/MSCodeGen.cpp +++ b/src/dawn/CodeGen/Cuda/MSCodeGen.cpp @@ -30,30 +30,12 @@ MSCodeGen::MSCodeGen(std::stringstream& ss, const std::unique_ptrgetParent(), stencilInstantiation) && + ms->hasMemAccessTemporaries()), + cudaKernelName_(CodeGeneratorHelper::buildCudaKernelName(stencilInstantiation_, ms_)), blockSize_(stencilInstantiation_->getIIR()->getBlockSize()), - solveKLoopInParallel_(CodeGeneratorHelper::solveKLoopInParallel(ms_)) { - - // useTmpIndex_ - const auto& fields = ms_->getFields(); - const bool containsTemporary = - (find_if(fields.begin(), fields.end(), [&](const std::pair& field) { - const int accessID = field.second.getAccessID(); - if(!stencilInstantiation_->isTemporaryField(accessID)) - return false; - // we dont need to initialize tmp indices for fields that are cached - if(!cacheProperties_.accessIsCached(accessID)) - return true; - const auto& cache = ms_->getCache(accessID); - if(cache.getCacheIOPolicy() == iir::Cache::CacheIOPolicy::local) { - return false; - } - return true; - }) != fields.end()); - - useTmpIndex_ = containsTemporary && !CodeGeneratorHelper::useNormalIteratorForTmp(ms_); - - cudaKernelName_ = CodeGeneratorHelper::buildCudaKernelName(stencilInstantiation_, ms_); -} + solveKLoopInParallel_(CodeGeneratorHelper::solveKLoopInParallel(ms_)) {} void MSCodeGen::generateIJCacheDecl(MemberFunction& kernel) const { for(const auto& cacheP : ms_->getCaches()) { @@ -117,7 +99,7 @@ MSCodeGen::computeNextLevelToProcess(const iir::Interval& interval, void MSCodeGen::generateTmpIndexInit(MemberFunction& kernel) const { - if(!useTmpIndex_) + if(!useCodeGenTemporaries_) return; auto maxExtentTmps = CodeGeneratorHelper::computeTempMaxWriteExtent(*(ms_->getParent())); @@ -698,23 +680,14 @@ void MSCodeGen::generateCudaKernelCode() { // of // tmp storages (allocation, iterators, etc) auto tempFieldsNonLocalCached = - makeRange(fields, std::function const&)>([&]( - std::pair const& p) { - const int accessID = p.first; - if(!stencilInstantiation_->isTemporaryField(p.second.getAccessID())) - return false; - if(!cacheProperties_.accessIsCached(accessID)) - return true; - if(ms_->getCache(accessID).getCacheIOPolicy() == iir::Cache::CacheIOPolicy::local) - return false; - - return true; - })); - - const bool containsTemporary = !tempFieldsNonLocalCached.empty(); + makeRange(fields, std::function const&)>( + [&](std::pair const& p) { + const int accessID = p.first; + return ms_->isMemAccessTemporary(accessID); + })); std::string fnDecl = ""; - if(containsTemporary && useTmpIndex_) + if(useCodeGenTemporaries_) fnDecl = "template"; fnDecl = fnDecl + "__global__ void"; @@ -723,9 +696,13 @@ void MSCodeGen::generateCudaKernelCode() { (maxExtents[0].Minus < 0 ? 1 : 0) + (maxExtents[0].Plus > 0 ? 1 : 0)); int nSM = stencilInstantiation_->getOptimizerContext()->getOptions().nsms; + int maxBlocksPerSM = stencilInstantiation_->getOptimizerContext()->getOptions().maxBlocksPerSM; std::string domain_size = stencilInstantiation_->getOptimizerContext()->getOptions().domain_size; if(nSM > 0 && !domain_size.empty()) { + if(maxBlocksPerSM <= 0) { + throw std::runtime_error("--max-blocks-sm must be defined"); + } std::istringstream idomain_size(domain_size); std::string arg; getline(idomain_size, arg, ','); @@ -739,7 +716,7 @@ void MSCodeGen::generateCudaKernelCode() { minBlocksPerSM /= nSM; fnDecl = fnDecl + " __launch_bounds__(" + std::to_string(maxThreadsPerBlock) + "," + - std::to_string(minBlocksPerSM) + ") "; + std::to_string(std::min(maxBlocksPerSM, minBlocksPerSM)) + ") "; } else { fnDecl = fnDecl + " __launch_bounds__(" + std::to_string(maxThreadsPerBlock) + ") "; } @@ -763,19 +740,21 @@ void MSCodeGen::generateCudaKernelCode() { // first we construct non temporary field arguments for(auto field : nonTempFields) { - cudaKernel.addArg("gridtools::clang::float_type * const " + - stencilInstantiation_->getFieldNameFromAccessID((*field).second.getAccessID())); + cudaKernel.addArg( + "gridtools::clang::float_type * const " + + stencilInstantiation_->getFieldNameFromAccessID((*field).second.getAccessID())); } // then the temporary field arguments for(auto field : tempFieldsNonLocalCached) { - if(useTmpIndex_) { - cudaKernel.addArg(c_gt() + "data_view" + - stencilInstantiation_->getFieldNameFromAccessID((*field).second.getAccessID()) + - "_dv"); + if(useCodeGenTemporaries_) { + cudaKernel.addArg( + c_gt() + "data_view" + + stencilInstantiation_->getFieldNameFromAccessID((*field).second.getAccessID()) + "_dv"); } else { - cudaKernel.addArg("gridtools::clang::float_type * const " + - stencilInstantiation_->getFieldNameFromAccessID((*field).second.getAccessID())); + cudaKernel.addArg( + "gridtools::clang::float_type * const " + + stencilInstantiation_->getFieldNameFromAccessID((*field).second.getAccessID())); } } @@ -786,7 +765,7 @@ void MSCodeGen::generateCudaKernelCode() { cudaKernel.addComment("Start kernel"); // extract raw pointers of temporaries from the data views - if(useTmpIndex_) { + if(useCodeGenTemporaries_) { for(auto field : tempFieldsNonLocalCached) { std::string fieldName = stencilInstantiation_->getFieldNameFromAccessID((*field).second.getAccessID()); @@ -917,9 +896,7 @@ void MSCodeGen::generateCudaKernelCode() { generateIJCacheIndexInit(cudaKernel); } - if(containsTemporary) { - generateTmpIndexInit(cudaKernel); - } + generateTmpIndexInit(cudaKernel); // compute the partition of the intervals auto partitionIntervals = CodeGeneratorHelper::computePartitionOfIntervals(ms_); @@ -959,7 +936,7 @@ void MSCodeGen::generateCudaKernelCode() { intervalDiffToString(kmin, "ksize - 1") + ")"); } } - if(useTmpIndex_ && !kmin.null() && !((solveKLoopInParallel_) && firstInterval)) { + if(useCodeGenTemporaries_ && !kmin.null() && !((solveKLoopInParallel_) && firstInterval)) { cudaKernel.addComment("jump tmp iterators to match the beginning of next interval"); cudaKernel.addStatement("idx_tmp += kstride_tmp*(" + intervalDiffToString(kmin, "ksize - 1") + ")"); @@ -986,12 +963,12 @@ void MSCodeGen::generateCudaKernelCode() { } } } - if(useTmpIndex_) { + if(useCodeGenTemporaries_) { cudaKernel.addComment("jump tmp iterators to match the intersection of beginning of next " "interval and the parallel execution block "); cudaKernel.addStatement("idx_tmp += max(" + intervalDiffToString(kmin, "ksize - 1") + - ", kstride_tmp * blockIdx.z * " + std::to_string(blockSize_[2]) + - ")"); + ", blockIdx.z * " + std::to_string(blockSize_[2]) + + ") * kstride_tmp"); } } @@ -1075,7 +1052,7 @@ void MSCodeGen::generateCudaKernelCode() { CodeGeneratorHelper::generateStrideName(2, index.second)); } } - if(useTmpIndex_) { + if(useCodeGenTemporaries_) { cudaKernel.addStatement("idx_tmp " + incStr + " kstride_tmp"); } }); diff --git a/src/dawn/CodeGen/Cuda/MSCodeGen.hpp b/src/dawn/CodeGen/Cuda/MSCodeGen.hpp index 2d2f3473e..3b47bd7b6 100644 --- a/src/dawn/CodeGen/Cuda/MSCodeGen.hpp +++ b/src/dawn/CodeGen/Cuda/MSCodeGen.hpp @@ -49,7 +49,7 @@ class MSCodeGen { const std::unique_ptr& ms_; const std::shared_ptr stencilInstantiation_; const CacheProperties& cacheProperties_; - bool useTmpIndex_; + bool useCodeGenTemporaries_; std::string cudaKernelName_; Array3ui blockSize_; const bool solveKLoopInParallel_; diff --git a/src/dawn/Compiler/Options.inc b/src/dawn/Compiler/Options.inc index b1ded55c8..c6ac03c9b 100644 --- a/src/dawn/Compiler/Options.inc +++ b/src/dawn/Compiler/Options.inc @@ -42,6 +42,8 @@ OPT(std::string, Backend, "gridtools", "backend", "", "\n - cuda = optimized cuda", "", true, false) OPT(int, nsms, 0, "nsms", "", "Number of (CUDA) SMs", "", true, false) +OPT(int, maxBlocksPerSM, 0, "max-blocks-sm", "", + "Maximum number of blocks that can be registered per SM", "", true, false) OPT(std::string, domain_size, "", "domain-size", "", "domain size for compiler optimization", "", true, false) OPT(std::string, block_size, "", "block-size", "", diff --git a/src/dawn/IIR/Cache.cpp b/src/dawn/IIR/Cache.cpp index 6b763e42c..8211f678b 100644 --- a/src/dawn/IIR/Cache.cpp +++ b/src/dawn/IIR/Cache.cpp @@ -67,6 +67,10 @@ Interval Cache::getWindowInterval(Interval::Bound bound) const { return interval_->crop(bound, {window_->m_m, window_->m_p}); } +bool Cache::requiresMemMemoryAccess() const { + return (policy_ != CacheIOPolicy::local) || (type_ == CacheTypeKind::bypass); +} + json::json Cache::jsonDump() const { json::json node; node["accessid"] = AccessID_; diff --git a/src/dawn/IIR/Cache.h b/src/dawn/IIR/Cache.h index 77abe6ee2..7f4988ec3 100644 --- a/src/dawn/IIR/Cache.h +++ b/src/dawn/IIR/Cache.h @@ -89,6 +89,8 @@ class Cache { /// @brief determines if the cache specification requires a window bool requiresWindow() const; + bool requiresMemMemoryAccess() const; + /// @name Comparison operator /// @{ bool operator==(const Cache& other) const { diff --git a/src/dawn/IIR/DoMethod.cpp b/src/dawn/IIR/DoMethod.cpp index 8d4e97e87..258636044 100644 --- a/src/dawn/IIR/DoMethod.cpp +++ b/src/dawn/IIR/DoMethod.cpp @@ -103,7 +103,7 @@ json::json DoMethod::jsonDump(const StencilInstantiation& instantiation) const { json::json fieldsJson; for(const auto& field : derivedInfo_.fields_) { - fieldsJson[instantiation.getNameFromAccessID(field.first)] = field.second.jsonDump(&instantiation); + fieldsJson[instantiation.getNameFromAccessID(field.first)] = field.second.jsonDump(); } node["Fields"] = fieldsJson; diff --git a/src/dawn/IIR/Field.cpp b/src/dawn/IIR/Field.cpp index b57c42f80..89f1a60b3 100644 --- a/src/dawn/IIR/Field.cpp +++ b/src/dawn/IIR/Field.cpp @@ -24,7 +24,7 @@ Interval Field::computeAccessedInterval() const { return accessedInterval; } -json::json Field::jsonDump(const StencilInstantiation* instantiation) const { +json::json Field::jsonDump() const { json::json node; node["accessID"] = accessID_; node["intend"] = intend_; diff --git a/src/dawn/IIR/Field.h b/src/dawn/IIR/Field.h index 70f6433f0..dc892753d 100644 --- a/src/dawn/IIR/Field.h +++ b/src/dawn/IIR/Field.h @@ -91,7 +91,7 @@ class Field { return extentsRB_.getWriteExtents(); } - json::json jsonDump(const StencilInstantiation* instantiation) const; + json::json jsonDump() const; inline Extents const& getExtents() const { return extents_.getExtents(); } inline Extents const& getExtentsRB() const { return extentsRB_.getExtents(); } diff --git a/src/dawn/IIR/MultiStage.cpp b/src/dawn/IIR/MultiStage.cpp index 41dffb82f..128dc949a 100644 --- a/src/dawn/IIR/MultiStage.cpp +++ b/src/dawn/IIR/MultiStage.cpp @@ -415,7 +415,7 @@ json::json MultiStage::jsonDump(const StencilInstantiation& instantiation) const node["Loop"] = loopOrderToString(loopOrder_); json::json fieldsJson; for(const auto& field : derivedInfo_.fields_) { - fieldsJson[instantiation.getNameFromAccessID(field.first)] = field.second.jsonDump(&instantiation); + fieldsJson[instantiation.getNameFromAccessID(field.first)] = field.second.jsonDump(); } node["Fields"] = fieldsJson; @@ -432,6 +432,25 @@ json::json MultiStage::jsonDump(const StencilInstantiation& instantiation) const } return node; } + +bool MultiStage::hasMemAccessTemporaries() const { + for(const auto& field : derivedInfo_.fields_) { + if(isMemAccessTemporary(field.first)) { + return true; + } + } + return false; +} + +bool MultiStage::isMemAccessTemporary(const int accessID) const { + if(!stencilInstantiation_.isTemporaryField(accessID)) + return false; + if(!derivedInfo_.caches_.count(accessID)) + return true; + return (derivedInfo_.caches_.at(accessID).requiresMemMemoryAccess()); +} +bool MultiStage::hasField(const int accessID) const { return derivedInfo_.fields_.count(accessID); } + bool MultiStage::isEmptyOrNullStmt() const { for(const auto& stage : getChildren()) { if(!(stage)->isEmptyOrNullStmt()) { diff --git a/src/dawn/IIR/MultiStage.h b/src/dawn/IIR/MultiStage.h index 7263fea01..a9c0f279f 100644 --- a/src/dawn/IIR/MultiStage.h +++ b/src/dawn/IIR/MultiStage.h @@ -165,8 +165,20 @@ class MultiStage : public IIRNode { /// @brief Compute and return the pairs used for a given interval std::unordered_map computeFieldsAtInterval(const iir::Interval& interval) const; + /// @brief determines whether an accessID corresponds to a temporary that will perform accesses to + /// main memory + bool isMemAccessTemporary(const int accessID) const; + + /// @brief true if there is at least a temporary that requires access to main mem + bool hasMemAccessTemporaries() const; + + /// @brief determines whether the multistage contains the field with an accessID + bool hasField(const int accessID) const; + + /// @brief field getter with an accessID const Field& getField(int accessID) const; + /// @brief computes the collection of fields of the multistage on the fly (returns copy) std::unordered_map computeFieldsOnTheFly() const; /// @brief Get the enclosing interval of all access to temporaries diff --git a/src/dawn/IIR/Stage.cpp b/src/dawn/IIR/Stage.cpp index 330f877ae..2c1ac257d 100644 --- a/src/dawn/IIR/Stage.cpp +++ b/src/dawn/IIR/Stage.cpp @@ -39,7 +39,7 @@ json::json Stage::jsonDump(const StencilInstantiation& instantiation) const { json::json node; json::json fieldsJson; for(const auto& field : derivedInfo_.fields_) { - fieldsJson[instantiation.getNameFromAccessID(field.first)] = field.second.jsonDump(&instantiation); + fieldsJson[instantiation.getNameFromAccessID(field.first)] = field.second.jsonDump(); } node["Fields"] = fieldsJson; std::stringstream ss; diff --git a/src/dawn/IIR/Stencil.cpp b/src/dawn/IIR/Stencil.cpp index b99b52e38..6bd71d073 100644 --- a/src/dawn/IIR/Stencil.cpp +++ b/src/dawn/IIR/Stencil.cpp @@ -100,10 +100,10 @@ bool Stencil::StatementPosition::inSameDoMethod(const Stencil::StatementPosition return StagePos == other.StagePos && DoMethodIndex == other.DoMethodIndex; } -json::json Stencil::FieldInfo::jsonDump(const StencilInstantiation* instantiation) const { +json::json Stencil::FieldInfo::jsonDump() const { json::json node; node["dim"] = format("[%i,%i,%i]", Dimensions[0], Dimensions[1], Dimensions[2]); - node["field"] = field.jsonDump(instantiation); + node["field"] = field.jsonDump(); node["IsTemporary"] = IsTemporary; return node; } @@ -113,7 +113,7 @@ json::json Stencil::jsonDump() const { node["ID"] = std::to_string(StencilID_); json::json fieldsJson; for(const auto& f : derivedInfo_.fields_) { - fieldsJson[f.second.Name] = f.second.jsonDump(&stencilInstantiation_); + fieldsJson[f.second.Name] = f.second.jsonDump(); } node["Fields"] = fieldsJson; @@ -125,6 +125,15 @@ json::json Stencil::jsonDump() const { return node; } +bool Stencil::containsRedundantComputations() const { + for(const auto& stage : iterateIIROver(*this)) { + if(!stage->getExtents().isHorizontalPointwise()) { + return true; + } + } + return false; +} + void Stencil::updateFromChildren() { derivedInfo_.fields_.clear(); std::unordered_map fields; diff --git a/src/dawn/IIR/Stencil.h b/src/dawn/IIR/Stencil.h index 70d84baad..f7ecec513 100644 --- a/src/dawn/IIR/Stencil.h +++ b/src/dawn/IIR/Stencil.h @@ -55,7 +55,7 @@ class Stencil : public IIRNode { Array3i Dimensions; Field field; bool IsTemporary; - json::json jsonDump(const StencilInstantiation* instantiation) const; + json::json jsonDump() const; }; private: @@ -255,6 +255,10 @@ class Stencil : public IIRNode { void setStageDependencyGraph(const std::shared_ptr& stageDAG); /// @} + /// @brief determines whether the stencil contains redundant computations, i.e. if any of the + /// stages has a non null extent + bool containsRedundantComputations() const; + /// @brief Get the axis of the stencil (i.e the interval of all stages) /// /// @param useExtendedInterval Merge the extended intervals