diff --git a/source/adios2/engine/bp5/BP5Writer.cpp b/source/adios2/engine/bp5/BP5Writer.cpp index cb4da83e57..797690be4a 100644 --- a/source/adios2/engine/bp5/BP5Writer.cpp +++ b/source/adios2/engine/bp5/BP5Writer.cpp @@ -33,7 +33,8 @@ BP5Writer::BP5Writer(IO &io, const std::string &name, const Mode mode, helper::Comm comm) : Engine("BP5Writer", io, name, mode, std::move(comm)), m_BP5Serializer(), m_FileDataManager(m_Comm), m_FileMetadataManager(m_Comm), - m_FileMetadataIndexManager(m_Comm), m_FileMetaMetadataManager(m_Comm) + m_FileMetadataIndexManager(m_Comm), m_FileMetaMetadataManager(m_Comm), + m_Profiler(m_Comm) { PERFSTUBS_SCOPED_TIMER("BP5Writer::Open"); m_IO.m_ReadStreaming = false; @@ -67,7 +68,9 @@ size_t BP5Writer::CurrentStep() const { return m_WriterStep; } void BP5Writer::PerformPuts() { PERFSTUBS_SCOPED_TIMER("BP5Writer::PerformPuts"); + m_Profiler.Start("PP"); m_BP5Serializer.PerformPuts(); + m_Profiler.Stop("PP"); return; } @@ -351,7 +354,7 @@ void BP5Writer::MarshalAttributes() void BP5Writer::EndStep() { PERFSTUBS_SCOPED_TIMER("BP5Writer::EndStep"); - + m_Profiler.Start("endstep"); MarshalAttributes(); // true: advances step @@ -361,7 +364,9 @@ void BP5Writer::EndStep() * AttributeEncodeBuffer and the data encode Vector */ /* the first */ + m_Profiler.Start("AWD"); WriteData(TSInfo.DataBuffer); + m_Profiler.Stop("AWD"); m_ThisTimestepDataSize += TSInfo.DataBuffer->Size(); @@ -380,8 +385,11 @@ void BP5Writer::EndStep() TotalSize += n; RecvBuffer->resize(TotalSize); } + + m_Profiler.Start("meta_gather"); m_Comm.GathervArrays(MetaBuffer.data(), LocalSize, RecvCounts.data(), RecvCounts.size(), RecvBuffer->data(), 0); + m_Profiler.Stop("meta_gather"); if (m_Comm.Rank() == 0) { @@ -413,6 +421,7 @@ void BP5Writer::EndStep() WriteMetadataFileIndex(ThisMetaDataPos, ThisMetaDataSize); } delete RecvBuffer; + m_Profiler.Stop("endstep"); } // PRIVATE @@ -616,10 +625,12 @@ void BP5Writer::InitTransports() } } + bool useProfiler = true; + if (m_IAmWritingData) { m_FileDataManager.OpenFiles(m_SubStreamNames, m_OpenMode, - m_IO.m_TransportsParameters, false, + m_IO.m_TransportsParameters, useProfiler, *DataWritingComm); } @@ -637,14 +648,16 @@ void BP5Writer::InitTransports() if (m_Comm.Rank() == 0) { m_FileMetaMetadataManager.OpenFiles(m_MetaMetadataFileNames, m_OpenMode, - m_IO.m_TransportsParameters, false); + m_IO.m_TransportsParameters, + useProfiler); m_FileMetadataManager.OpenFiles(m_MetadataFileNames, m_OpenMode, - m_IO.m_TransportsParameters, false); + m_IO.m_TransportsParameters, + useProfiler); m_FileMetadataIndexManager.OpenFiles( m_MetadataIndexFileNames, m_OpenMode, m_IO.m_TransportsParameters, - false); + useProfiler); if (m_DrainBB) { @@ -675,7 +688,7 @@ void BP5Writer::InitTransports() auto emptyComm = helper::Comm(); transportman::TransportMan tm(emptyComm); tm.OpenFiles(versionNames, Mode::Write, m_IO.m_TransportsParameters, - false); + useProfiler); char b[1] = {'5'}; tm.WriteFiles(b, 1); } @@ -900,6 +913,85 @@ void BP5Writer::DoClose(const int transportIndex) // close metadata index file m_FileMetadataIndexManager.CloseFiles(); } + + FlushProfiler(); +} + +void BP5Writer::FlushProfiler() +{ + auto transportTypes = m_FileDataManager.GetTransportsTypes(); + + // find first File type output, where we can write the profile + int fileTransportIdx = -1; + for (size_t i = 0; i < transportTypes.size(); ++i) + { + if (transportTypes[i].compare(0, 4, "File") == 0) + { + fileTransportIdx = static_cast(i); + } + } + + auto transportProfilers = m_FileDataManager.GetTransportsProfilers(); + + auto transportTypesMD = m_FileMetadataManager.GetTransportsTypes(); + auto transportProfilersMD = m_FileMetadataManager.GetTransportsProfilers(); + + transportTypes.insert(transportTypes.end(), transportTypesMD.begin(), + transportTypesMD.end()); + + transportProfilers.insert(transportProfilers.end(), + transportProfilersMD.begin(), + transportProfilersMD.end()); + + // m_Profiler.WriteOut(transportTypes, transportProfilers); + + const std::string lineJSON( + m_Profiler.GetRankProfilingJSON(transportTypes, transportProfilers) + + ",\n"); + + const std::vector profilingJSON( + m_Profiler.AggregateProfilingJSON(lineJSON)); + + if (m_RankMPI == 0) + { + // std::cout << "write profiling file!" << std::endl; + std::string profileFileName; + if (m_DrainBB) + { + // auto bpTargetNames = m_BP4Serializer.GetBPBaseNames({m_Name}); + std::vector bpTargetNames = {m_Name}; + if (fileTransportIdx > -1) + { + profileFileName = + bpTargetNames[fileTransportIdx] + "/profiling.json"; + } + else + { + profileFileName = bpTargetNames[0] + "_profiling.json"; + } + m_FileDrainer.AddOperationWrite( + profileFileName, profilingJSON.size(), profilingJSON.data()); + } + else + { + transport::FileFStream profilingJSONStream(m_Comm); + // auto bpBaseNames = m_BP4Serializer.GetBPBaseNames({m_BBName}); + std::vector bpBaseNames = {m_Name}; + if (fileTransportIdx > -1) + { + profileFileName = + bpBaseNames[fileTransportIdx] + "/profiling.json"; + } + else + { + profileFileName = bpBaseNames[0] + "_profiling.json"; + } + profilingJSONStream.Open(profileFileName, Mode::Write); + profilingJSONStream.Write(profilingJSON.data(), + profilingJSON.size()); + profilingJSONStream.Close(); + } + } } /*write the content of metadata index file*/ diff --git a/source/adios2/engine/bp5/BP5Writer.h b/source/adios2/engine/bp5/BP5Writer.h index c259f32661..389eb356d0 100644 --- a/source/adios2/engine/bp5/BP5Writer.h +++ b/source/adios2/engine/bp5/BP5Writer.h @@ -176,6 +176,8 @@ class BP5Writer : public BP5Engine, public core::Engine template void PerformPutCommon(Variable &variable); + void FlushProfiler(); + /** manages all communication tasks in aggregation */ aggregator::MPIAggregator *m_Aggregator; // points to one of these below aggregator::MPIShmChain m_AggregatorTwoLevelShm; @@ -185,6 +187,8 @@ class BP5Writer : public BP5Engine, public core::Engine helper::Comm *DataWritingComm; // processes that write the same data file bool m_IAmWritingDataHeader = false; + adios2::profiling::JSONProfiler m_Profiler; + private: // updated during WriteMetaData uint64_t m_MetaDataPos = 0; diff --git a/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp b/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp index d1d9264ba5..6d8aaabac1 100644 --- a/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp +++ b/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp @@ -9,6 +9,7 @@ */ #include "IOChrono.h" +#include "adios2/helper/adiosMemory.h" namespace adios2 { @@ -31,5 +32,136 @@ void IOChrono::Stop(const std::string process) } } +// +// class JSON Profiler +// +JSONProfiler::JSONProfiler(helper::Comm const &comm) : m_Comm(comm) +{ + m_Profiler.m_IsActive = true; // default is true + + AddTimerWatch("buffering"); + // xAddTimerWatch("memcpy"); + AddTimerWatch("endstep"); + AddTimerWatch("PP"); + // AddTimerWatch("meta_merge"); + AddTimerWatch("meta_gather"); + // AddTimerWatch("meta_ds"); + // AddTimerWatch("meta_s"); + // AddTimerWatch("meta_sort_merge"); + + AddTimerWatch("AWD"); + + m_Profiler.m_Bytes.emplace("buffering", 0); + + m_RankMPI = m_Comm.Rank(); +} + +void JSONProfiler::AddTimerWatch(const std::string &name) +{ + const TimeUnit timerUnit = DefaultTimeUnitEnum; + m_Profiler.m_Timers.emplace(name, profiling::Timer(name, timerUnit)); +} + +std::string JSONProfiler::GetRankProfilingJSON( + const std::vector &transportsTypes, + const std::vector &transportsProfilers) noexcept +{ + auto lf_WriterTimer = [](std::string &rankLog, + const profiling::Timer &timer) { + // rankLog += "\"" + timer.m_Process + "_" + timer.GetShortUnits() + + // "\": " + std::to_string(timer.m_ProcessTime) + ", "; + timer.AddToJsonStr(rankLog); + }; + + // prepare string dictionary per rank + std::string rankLog("{ \"rank\": " + std::to_string(m_RankMPI) + ", "); + + auto &profiler = m_Profiler; + + std::string timeDate(profiler.m_Timers.at("buffering").m_LocalTimeDate); + timeDate.pop_back(); + // avoid whitespace + std::replace(timeDate.begin(), timeDate.end(), ' ', '_'); + + rankLog += "\"start\": \"" + timeDate + "\", "; + // rankLog += "\"threads\": " + std::to_string(m_Parameters.Threads) + ", "; + rankLog += + "\"bytes\": " + std::to_string(profiler.m_Bytes.at("buffering")) + ", "; + + for (const auto &timerPair : profiler.m_Timers) + { + const profiling::Timer &timer = timerPair.second; + // rankLog += "\"" + timer.m_Process + "_" + timer.GetShortUnits() + + // "\": " + std::to_string(timer.m_ProcessTime) + ", "; + timer.AddToJsonStr(rankLog); + } + + const size_t transportsSize = transportsTypes.size(); + + for (unsigned int t = 0; t < transportsSize; ++t) + { + rankLog += "\"transport_" + std::to_string(t) + "\": { "; + rankLog += "\"type\": \"" + transportsTypes[t] + "\", "; + + for (const auto &transportTimerPair : transportsProfilers[t]->m_Timers) + { + lf_WriterTimer(rankLog, transportTimerPair.second); + } + // replace last comma with space + rankLog.pop_back(); + rankLog.pop_back(); + rankLog += " "; + + if (t == transportsSize - 1) // last element + { + rankLog += "}"; + } + else + { + rankLog += "},"; + } + } + rankLog += " }"; // end rank entry + + return rankLog; +} + +std::vector +JSONProfiler::AggregateProfilingJSON(const std::string &rankLog) const +{ + // Gather sizes + const size_t rankLogSize = rankLog.size(); + std::vector rankLogsSizes = m_Comm.GatherValues(rankLogSize); + + // Gatherv JSON per rank + std::vector profilingJSON(3); + const std::string header("[\n"); + const std::string footer("\n]\n"); + size_t gatheredSize = 0; + size_t position = 0; + + if (m_RankMPI == 0) // pre-allocate in destination + { + gatheredSize = std::accumulate(rankLogsSizes.begin(), + rankLogsSizes.end(), size_t(0)); + + profilingJSON.resize(gatheredSize + header.size() + footer.size() - 2); + adios2::helper::CopyToBuffer(profilingJSON, position, header.c_str(), + header.size()); + } + + m_Comm.GathervArrays(rankLog.c_str(), rankLog.size(), rankLogsSizes.data(), + rankLogsSizes.size(), &profilingJSON[position]); + + if (m_RankMPI == 0) // add footer to close JSON + { + position += gatheredSize - 2; + helper::CopyToBuffer(profilingJSON, position, footer.c_str(), + footer.size()); + } + + return profilingJSON; +} + } // end namespace profiling } // end namespace adios2 diff --git a/source/adios2/toolkit/profiling/iochrono/IOChrono.h b/source/adios2/toolkit/profiling/iochrono/IOChrono.h index 2e08ab157c..638b903721 100644 --- a/source/adios2/toolkit/profiling/iochrono/IOChrono.h +++ b/source/adios2/toolkit/profiling/iochrono/IOChrono.h @@ -17,6 +17,7 @@ /// \endcond #include "adios2/common/ADIOSConfig.h" +#include "adios2/helper/adiosComm.h" #include "adios2/toolkit/profiling/iochrono/Timer.h" namespace adios2 @@ -58,6 +59,29 @@ class IOChrono void Stop(const std::string process); }; +class JSONProfiler +{ +public: + JSONProfiler(helper::Comm const &comm); + void Gather(); + void AddTimerWatch(const std::string &); + + void Start(const std::string process) { m_Profiler.Start(process); }; + void Stop(const std::string process) { m_Profiler.Stop(process); }; + + std::string + GetRankProfilingJSON(const std::vector &transportsTypes, + const std::vector + &transportsProfilers) noexcept; + + std::vector AggregateProfilingJSON(const std::string &rankLog) const; + +private: + IOChrono m_Profiler; + int m_RankMPI = 0; + helper::Comm const &m_Comm; +}; + } // end namespace profiling } // end namespace adios diff --git a/source/adios2/toolkit/profiling/iochrono/Timer.cpp b/source/adios2/toolkit/profiling/iochrono/Timer.cpp index 0f6122c3e0..c8f01db724 100644 --- a/source/adios2/toolkit/profiling/iochrono/Timer.cpp +++ b/source/adios2/toolkit/profiling/iochrono/Timer.cpp @@ -21,6 +21,9 @@ Timer::Timer(const std::string process, const TimeUnit timeUnit) : m_Process(process), m_TimeUnit(timeUnit), m_LocalTimeDate(helper::LocalTimeDate()) { + std::size_t found = m_Process.find("gather"); + if (found != std::string::npos) + m_Always = true; } void Timer::Resume() noexcept @@ -33,6 +36,8 @@ void Timer::Pause() { m_ElapsedTime = std::chrono::high_resolution_clock::now(); m_ProcessTime += GetElapsedTime(); + + AddDetail(); } std::string Timer::GetShortUnits() const noexcept diff --git a/source/adios2/toolkit/profiling/iochrono/Timer.h b/source/adios2/toolkit/profiling/iochrono/Timer.h index 115289a2ed..7219ad2eb7 100644 --- a/source/adios2/toolkit/profiling/iochrono/Timer.h +++ b/source/adios2/toolkit/profiling/iochrono/Timer.h @@ -19,11 +19,51 @@ #include "adios2/common/ADIOSConfig.h" #include "adios2/common/ADIOSTypes.h" +#include // myTimer + namespace adios2 { namespace profiling { +static std::chrono::time_point + m_ADIOS2ProgStart = std::chrono::high_resolution_clock::now(); + +class myTimer +{ +public: + myTimer(const std::string &tag, int rank) + { + m_Tag = tag; + m_Rank = rank; + m_Start = std::chrono::high_resolution_clock::now(); + } + + ~myTimer() + { + m_End = std::chrono::high_resolution_clock::now(); + + double relative = std::chrono::duration_cast( + m_Start - m_ADIOS2ProgStart) + .count(); + double micros = std::chrono::duration_cast( + m_End - m_Start) + .count(); + + std::cout << "Timer [" << m_Tag << " on rank =" << m_Rank + << "] start:" << relative / 1000.0 + << " msec, took:" << micros / 1000.0 << " msec\n"; + + std::cout << std::endl; + } + +private: + std::chrono::time_point m_Start; + std::chrono::time_point m_End; + std::string m_Tag; + int m_Rank = 0; +}; + class Timer { @@ -31,6 +71,7 @@ class Timer /** process name */ const std::string m_Process; + bool m_Always = false; /** process elapsed time */ int64_t m_ProcessTime = 0; @@ -61,6 +102,51 @@ class Timer /** Returns TimeUnit as a short std::string */ std::string GetShortUnits() const noexcept; + void AddDetail() + { + m_nCalls++; + double relative = std::chrono::duration_cast( + m_InitialTime - m_ADIOS2ProgStart) + .count(); + double micros = std::chrono::duration_cast( + m_ElapsedTime - m_InitialTime) + .count(); + + if ((micros > 10000) || m_Always) + { + if (m_Details.size() > 0) + m_Details += ","; + + std::ostringstream ss; + + ss << "\"" << relative / 1000.0 << "+" << micros / 1000.0 << "\""; + + m_Details += ss.str(); + } + } + + void AddToJsonStr(std::string &rankLog) const + { + if (0 == m_nCalls) + return; + + rankLog += + "\"" + m_Process + "\":{ \"mus\":" + std::to_string(m_ProcessTime); + rankLog += ", \"nCalls\":" + std::to_string(m_nCalls); + + if (500 > m_nCalls) + { + if (m_Details.size() > 2) + { + rankLog += ", \"trace\":[" + m_Details + "]"; + } + } + rankLog += "}, "; + } + + std::string m_Details; + uint64_t m_nCalls = 0; + private: /** Set at Resume */ std::chrono::time_point m_InitialTime;