diff --git a/CMakeLists.txt b/CMakeLists.txt index b83215e744..efb105598d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,14 +101,6 @@ set_property( CACHE FIN_BACKEND PROPERTY STRINGS # HIP is always required find_package(hip REQUIRED PATHS /opt/rocm) find_package(rocblas REQUIRED PATHS /opt/rocm) -# probably not required -target_flags(HIP_COMPILER_FLAGS hip::device) -# Remove cuda arch flags -string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") - -message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}") - -add_definitions("-DHIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}") # OpenCL 1.2 if( FIN_BACKEND STREQUAL "OpenCL") @@ -208,6 +200,8 @@ elseif(FIN_BACKEND STREQUAL "HIP") set(MIOPEN_TIDY_ERRORS ALL) endif() +if(NOT MIOPEN_ENABLE_FIN) + message("MIOPEN_ENABLE_FIN is NOT SET") include(ClangTidy) enable_clang_tidy( CHECKS @@ -329,6 +323,9 @@ enable_cppcheck( MIOPEN_USE_MIOPENGEMM=1 __linux__=1 ) +else() + message("MIOPEN_ENABLE_FIN is SET") +endif() set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) diff --git a/Dockerfile b/Dockerfile index 723f52e803..6b6626ffe1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,12 @@ RUN dpkg --add-architecture i386 # Add rocm repository RUN apt-get update RUN DEBIAN_FRONTEND=noninteractive apt-get install -y curl apt-utils wget gnupg2 -RUN curl https://raw.githubusercontent.com/RadeonOpenCompute/ROCm-docker/master/add-rocm.sh | bash + +#RUN curl https://raw.githubusercontent.com/RadeonOpenCompute/ROCm-docker/master/add-rocm.sh | bash +ARG ROCMVERSION=4.5 +ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/.apt_$ROCMVERSION/ +RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - +RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO ubuntu main > /etc/apt/sources.list.d/rocm.list" # Install dependencies required to build hcc # Ubuntu csomic contains llvm-7 required to build Tensile diff --git a/src/include/conv_fin.hpp b/src/include/conv_fin.hpp index 7ca262e1da..97bbd8a46a 100644 --- a/src/include/conv_fin.hpp +++ b/src/include/conv_fin.hpp @@ -73,7 +73,8 @@ namespace fin { -using json = nlohmann::json; +const int INVOKE_LIMIT = 2; +using json = nlohmann::json; // TODO: Create a config class to encapsulate config // related code, such as checking direction etc template @@ -83,23 +84,17 @@ class ConvFin : public Fin ConvFin() : Fin() {} ConvFin(json _job) : Fin(), job(_job) { - VerifyDevProps(); - command = _job["config"]; - command["bias"] = 0; - // timing is always enabled - is_fwd = (_job["direction"].get() == 0 || _job["direction"].get() & 1); - is_bwd = (_job["direction"].get() == 0 || _job["direction"].get() & 2); - is_wrw = (_job["direction"].get() == 0 || _job["direction"].get() & 4); - SetConvDescriptor(); - // workspace_dev = nullptr; // TODO: replaced with a tensor class - // the variable name is implementation dependent, checking size instead + if(job.contains("config")) + PrepConvolution(); } + void VerifyDevProps() { std::cerr << "Verifying device properties" << std::endl; std::string arch = job["arch"]; arch = arch.substr(0, arch.find(':')); const size_t num_cu = job["num_cu"]; + std::ignore = num_cu; if(arch == "gfx900") { assert(num_cu == 56 || num_cu == 64); @@ -116,10 +111,28 @@ class ConvFin : public Fin { assert(num_cu == 72 || num_cu == 36); } + else if(arch == "gfx90a") + { + assert(num_cu == 110); + } else throw std::runtime_error("Invalid Arch Name"); } + void PrepConvolution() + { + VerifyDevProps(); + command = job["config"]; + command["bias"] = 0; + // timing is always enabled + is_fwd = (job["direction"].get() == 0 || job["direction"].get() & 1); + is_bwd = (job["direction"].get() == 0 || job["direction"].get() & 2); + is_wrw = (job["direction"].get() == 0 || job["direction"].get() & 4); + SetConvDescriptor(); + // workspace_dev = nullptr; // TODO: replaced with a tensor class + // the variable name is implementation dependent, checking size instead + } + // Getters and setters std::vector GetInputTensorLengths(); std::vector GetWeightTensorLengths(); @@ -142,6 +155,7 @@ class ConvFin : public Fin int CopyFromDevice(); int RunGPU(); int TestApplicability(); + int TestPerfDbValid(); int GetandSetData(); int GetSolverList(); int MIOpenFind(); @@ -244,7 +258,8 @@ int ConvFin::MIOpenFindCompile() auto process_solver = [&]() -> bool { std::cerr << "Processing Solver: " << solver_id.ToString() << std::endl; res_item["solver_id"] = solver_id.ToString(); - if(res_item["solver_id"] == "ConvBiasActivAsm1x1U") + if(solver_id.ToString() == "ConvBiasActivAsm1x1U" || + solver_id.ToString().find("Fused") != std::string::npos) { std::cerr << "Skipping fused solvers" << std::endl; return false; @@ -476,8 +491,10 @@ int ConvFin::MIOpenFindEval() outputTensor.desc, outputTensor.gpuData.buf.get()}, workspace.gpuData.buf.get(), - workspace.desc.GetNumBytes()}; - invoker(h, invoke_ctx); + workspace.desc.GetNumBytes(), + convDesc.attribute.gfx90aFp16alt.GetFwd()}; + for(auto idx = 0; idx < INVOKE_LIMIT; idx++) + invoker(h, invoke_ctx); } else if(conv_dir == miopen::conv::Direction::BackwardData) { @@ -489,8 +506,10 @@ int ConvFin::MIOpenFindEval() inputTensor.desc, inputTensor.gpuData.buf.get()}, workspace.gpuData.buf.get(), - workspace.desc.GetNumBytes()}; - invoker(h, invoke_ctx); + workspace.desc.GetNumBytes(), + convDesc.attribute.gfx90aFp16alt.GetBwd()}; + for(auto idx = 0; idx < INVOKE_LIMIT; idx++) + invoker(h, invoke_ctx); } else if(conv_dir == miopen::conv::Direction::BackwardWeights) { @@ -502,8 +521,10 @@ int ConvFin::MIOpenFindEval() weightTensor.desc, weightTensor.gpuData.buf.get()}, workspace.gpuData.buf.get(), - workspace.desc.GetNumBytes()}; - invoker(h, invoke_ctx); + workspace.desc.GetNumBytes(), + convDesc.attribute.gfx90aFp16alt.GetWrW()}; + for(auto idx = 0; idx < INVOKE_LIMIT; idx++) + invoker(h, invoke_ctx); } else { @@ -520,7 +541,6 @@ int ConvFin::MIOpenFindEval() res_item["reason"] = "Success"; return true; - }; auto res = process_solver(); @@ -659,8 +679,10 @@ int ConvFin::MIOpenFind() outputTensor.desc, outputTensor.gpuData.buf.get()}, workspace.gpuData.buf.get(), - workspace.desc.GetNumBytes()}; - invoker(h, invoke_ctx); + workspace.desc.GetNumBytes(), + convDesc.attribute.gfx90aFp16alt.GetFwd()}; + for(auto idx = 0; idx < INVOKE_LIMIT; idx++) + invoker(h, invoke_ctx); } else if(conv_dir == miopen::conv::Direction::BackwardData) { @@ -672,8 +694,10 @@ int ConvFin::MIOpenFind() inputTensor.desc, inputTensor.gpuData.buf.get()}, workspace.gpuData.buf.get(), - workspace.desc.GetNumBytes()}; - invoker(h, invoke_ctx); + workspace.desc.GetNumBytes(), + convDesc.attribute.gfx90aFp16alt.GetBwd()}; + for(auto idx = 0; idx < INVOKE_LIMIT; idx++) + invoker(h, invoke_ctx); } else if(conv_dir == miopen::conv::Direction::BackwardWeights) { @@ -685,8 +709,10 @@ int ConvFin::MIOpenFind() weightTensor.desc, weightTensor.gpuData.buf.get()}, workspace.gpuData.buf.get(), - workspace.desc.GetNumBytes()}; - invoker(h, invoke_ctx); + workspace.desc.GetNumBytes(), + convDesc.attribute.gfx90aFp16alt.GetWrW()}; + for(auto idx = 0; idx < INVOKE_LIMIT; idx++) + invoker(h, invoke_ctx); } else { @@ -703,7 +729,6 @@ int ConvFin::MIOpenFind() res_item["reason"] = "Success"; return true; - }; auto res = process_solver(); @@ -770,14 +795,147 @@ int ConvFin::TestApplicability() return 0; } +class ParamString +{ + std::string values; + + public: + ParamString() {} + ParamString(std::string in_val) : values(in_val) {} + + void Serialize(std::ostream& stream) const { stream << values; } + bool Deserialize(const std::string& s) + { + values = s; + return true; + } +}; + +template +int ConvFin::TestPerfDbValid() +{ + bool ret = true; + namespace fs = boost::filesystem; + std::cout << miopen::GetSystemDbPath() << std::endl; + + std::vector contents; + std::copy(fs::directory_iterator(miopen::GetSystemDbPath()), + fs::directory_iterator(), + std::back_inserter(contents)); + for(auto const& db_file : contents) + { + std::string pathstr = db_file.native(); + std::string filestr = db_file.filename().native(); + + if(job["arch"].size() > 0 and job["num_cu"].size() > 0) + { + std::string arch = job["arch"]; + int num_cu = job["num_cu"]; + std::stringstream db_name; + db_name << arch; + if(num_cu > 64) + db_name << std::hex << num_cu << ".db"; + else + db_name << "_" << num_cu << ".db"; + + if(filestr.compare(db_name.str()) != 0) + continue; + } + + if(pathstr.compare(pathstr.size() - 3, 3, ".db") != 0) + continue; + + std::cout << pathstr << "/" << filestr << std::endl; + + auto sql = miopen::SQLite{pathstr, true}; + + // pull out records for all configs from perf_db + std::unordered_map> records; + std::map> perfdb_entries; + std::vector> err_list; + auto select_query = "SELECT config, solver, params, id FROM perf_db;"; + auto stmt = miopen::SQLite::Statement{sql, select_query}; + while(true) + { + auto rc = stmt.Step(sql); + if(rc == SQLITE_ROW) + { + const auto config_id = stmt.ColumnText(0); + const auto solver_id = stmt.ColumnText(1); + const auto params = stmt.ColumnText(2); + const auto perf_id = stmt.ColumnText(3); + records[config_id][solver_id].SetValues(solver_id, ParamString(params)); + perfdb_entries[perf_id]["config"] = config_id; + perfdb_entries[perf_id]["solver"] = solver_id; + } + else if(rc == SQLITE_DONE) + break; + else if(rc == SQLITE_ERROR || rc == SQLITE_MISUSE) + MIOPEN_THROW(miopenStatusInternalError, sql.ErrorMessage()); + } + + // iterate through each config + for(auto it = perfdb_entries.begin(); it != perfdb_entries.end(); it++) + { + auto solver_nm = it->second["solver"]; + auto config_id = it->second["config"]; + auto record = records.find(config_id)->second.find(solver_nm)->second; + + auto slv_id = miopen::solver::Id(solver_nm); + if(!slv_id.IsValid()) + { + std::map err; + err["perfdb_id"] = it->first; + err["config"] = config_id; + err["solver"] = solver_nm; + err_list.push_back(err); + ret = false; + continue; + } + + auto solver = slv_id.GetSolver(); + + // check if the params in the record deserialize + if(!solver.TestSysDbRecord(record)) + { + std::map err; + err["perfdb_id"] = it->first; + err["config"] = config_id; + err["solver"] = solver_nm; + err_list.push_back(err); + ret = false; + } + } + std::string listing = filestr + "_errors"; + output[listing] = err_list; + } + + if(ret) + output["clear"] = "true"; + + return ret; +} + template int ConvFin::GetSolverList() { // pair.first = id, pair. second = string id - std::vector> solvers; + std::vector> solvers; for(const auto& id : miopen::solver::GetSolversByPrimitive(miopen::solver::Primitive::Convolution)) - solvers.push_back(std::make_pair(id.Value(), id.ToString())); + { + std::unordered_map solver; + solver["id"] = std::to_string(id.Value()); + solver["name"] = id.ToString(); + solver["tunable"] = "0"; + solver["dynamic"] = "0"; + if(id.GetSolver().IsTunable()) + solver["tunable"] = "1"; + if(id.GetSolver().IsDynamic()) + solver["dynamic"] = "1"; + solvers.push_back(solver); + } + output["all_solvers"] = solvers; return 0; } @@ -836,15 +994,25 @@ int ConvFin::ProcessStep(const std::string& step_name) if(step_name == "copy_buf_from_device") return CopyFromDevice(); if(step_name == "applicability") + { return TestApplicability(); + } + if(step_name == "perf_db_test") + return TestPerfDbValid(); if(step_name == "get_solvers") return GetSolverList(); if(step_name == "miopen_find") + { return MIOpenFind(); + } if(step_name == "miopen_find_compile") + { return MIOpenFindCompile(); + } if(step_name == "miopen_find_eval") + { return MIOpenFindEval(); + } return 0; } @@ -1082,7 +1250,7 @@ int ConvFin::SetConvDescriptor() exit(0); } - miopenPaddingMode_t p_mode = miopenPaddingSame; + miopenPaddingMode_t p_mode = miopenPaddingDefault; if((command["pad_mode"]) == "same") p_mode = miopenPaddingSame; else if((command["pad_mode"]) == "valid") diff --git a/src/include/error.hpp b/src/include/error.hpp index 0279019964..8ba99b1ec4 100644 --- a/src/include/error.hpp +++ b/src/include/error.hpp @@ -19,7 +19,7 @@ struct Exception : std::exception const char* what() const noexcept override { return message.c_str(); } }; -} +} // namespace fin #define FIN_THROW(...) \ do \ { \ diff --git a/src/main.cpp b/src/main.cpp index 0994a33363..5952056f6a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -60,8 +60,6 @@ using json = nlohmann::json; int main(int argc, char* argv[], char* envp[]) { std::vector args(argv, argv + argc); - std::string ifile; - std::string ofile; std::map MapInputs = {}; for(auto& arg : args) @@ -91,7 +89,6 @@ int main(int argc, char* argv[], char* envp[]) } if(args[i] == "-o") { - ofile = args[i + 1]; MapInputs[args[i].back()] = args[i + 1]; } } @@ -101,13 +98,22 @@ int main(int argc, char* argv[], char* envp[]) // The JSON is a list of commands, so we iterate over the list and then // process each map - std::ifstream i(input_filename.string()); + std::ifstream input_file(input_filename.string()); + if(!input_file) + { + + throw std::runtime_error("Error loading json file: " + input_filename.string()); + } // TODO: fix the output writing so that interim results are not lost if one of // the iterations crash - std::ofstream o(output_filename.string()); + std::ofstream output_file(output_filename.string()); + if(!output_file) + { + throw std::runtime_error("Error opening json file: " + output_filename.string()); + } json j; // = json::parse(cmd); - i >> j; - i.close(); + input_file >> j; + input_file.close(); json final_output; // Get the process env std::vector jenv; @@ -120,22 +126,22 @@ int main(int argc, char* argv[], char* envp[]) // process through the jobs for(auto& it : j) { - auto command = it; - fin::Fin* f = nullptr; + auto command = it; + std::unique_ptr f = nullptr; // TODO : Move this to a factory function if(command.contains("config")) { if(command["config"]["cmd"] == "conv") { - f = new fin::ConvFin(command); + f = std::make_unique>(command); } else if(command["config"]["cmd"] == "convfp16") { - f = new fin::ConvFin(command); + f = std::make_unique>(command); } else if(command["config"]["cmd"] == "convbfp16") { - f = new fin::ConvFin(command); + f = std::make_unique>(command); } else { @@ -143,9 +149,13 @@ int main(int argc, char* argv[], char* envp[]) exit(-1); } } + else if(command.contains("pdb_verif") and command["pdb_verif"] == true) + { + f = std::make_unique>(command); + } else { - f = new fin::ConvFin(); + f = std::make_unique>(); } for(auto& step_it : command["steps"]) @@ -159,8 +169,8 @@ int main(int argc, char* argv[], char* envp[]) f->output["input"] = command; final_output.push_back(f->output); } - o << std::setw(4) << final_output << std::endl; - o.flush(); - o.close(); + output_file << std::setw(4) << final_output << std::endl; + output_file.flush(); + output_file.close(); return 0; } diff --git a/src/tests/fin_input_find_compile2.json b/src/tests/fin_input_find_compile2.json new file mode 100644 index 0000000000..abec534afe --- /dev/null +++ b/src/tests/fin_input_find_compile2.json @@ -0,0 +1,3 @@ +[ + { "steps": [ "miopen_find_compile"], "tag" : "resnet50", "label" : "resnet_tuning", "direction" : 4, "arch" : "gfx908:sram-ecc+:xnack-", "num_cu" : 120, "config" : { "in_w" : 28, "sources" : [ "issue_1760" ], "pad_d" : 0, "out_channels" : 128, "dilation_d" : 1, "pad_w" : 1, "conv_stride_h" : 1, "conv_stride_d" : 1, "fusion_mode" : -1, "pad_mode" : "default", "in_h" : 28, "tags" : [ "resnet50" ], "in_d" : 1, "cmd" : "conv", "activMode" : -1, "fil_h" : 3, "group_count" : 1, "dilation_h" : 1, "in_channels" : 128, "pad_h" : 1, "batchsize" : 32, "conv_stride_w" : 1, "conv_mode" : "conv", "recur" : 0, "fil_w" : 3, "spatial_dim" : 2, "fil_d" : 1, "trans_output_pad_d" : 0, "dilation_w" : 1 , "in_layout" : "NHWC", "out_layout": "NHWC", "wei_layout" : "NHWC"} } +] diff --git a/tests/pdb_check_all.json b/tests/pdb_check_all.json new file mode 100644 index 0000000000..629dac2ac8 --- /dev/null +++ b/tests/pdb_check_all.json @@ -0,0 +1,6 @@ +[ +{ +"pdb_verif": true, +"steps": ["perf_db_test"] +} +] diff --git a/tests/pdb_check_gfx906.json b/tests/pdb_check_gfx906.json new file mode 100644 index 0000000000..8e4c34fa9d --- /dev/null +++ b/tests/pdb_check_gfx906.json @@ -0,0 +1,8 @@ +[ +{ +"pdb_verif": true, +"arch": "gfx906", +"num_cu": 60, +"steps": ["perf_db_test"] +} +] diff --git a/tests/pdb_check_gfx908.json b/tests/pdb_check_gfx908.json new file mode 100644 index 0000000000..7693f1c113 --- /dev/null +++ b/tests/pdb_check_gfx908.json @@ -0,0 +1,8 @@ +[ +{ +"pdb_verif": true, +"arch": "gfx908", +"num_cu": 120, +"steps": ["perf_db_test"] +} +] diff --git a/tests/pdb_check_gfx90a.json b/tests/pdb_check_gfx90a.json new file mode 100644 index 0000000000..d7470ac50d --- /dev/null +++ b/tests/pdb_check_gfx90a.json @@ -0,0 +1,8 @@ +[ +{ +"pdb_verif": true, +"arch": "gfx90a", +"num_cu": 110, +"steps": ["perf_db_test"] +} +]