Skip to content

Commit

Permalink
fix: ignore compute_cap if not present (#1866)
Browse files Browse the repository at this point in the history
* fix: ignore compute_cap if not present

* fix: correct gpu info

* fix: remove check for toolkit version

---------

Co-authored-by: vansangpfiev <[email protected]>
  • Loading branch information
vansangpfiev and sangjanai authored Jan 17, 2025
1 parent 0707069 commit 68094d9
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 23 deletions.
11 changes: 0 additions & 11 deletions engine/services/engine_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -404,17 +404,6 @@ cpp::result<bool, std::string> EngineService::DownloadCuda(
auto suitable_toolkit_version =
GetSuitableCudaVersion(engine, hw_inf_.cuda_driver_version);

// compare cuda driver version with cuda toolkit version
// cuda driver version should be greater than toolkit version to ensure compatibility
if (semantic_version_utils::CompareSemanticVersion(
hw_inf_.cuda_driver_version, suitable_toolkit_version) < 0) {
CTL_ERR("Your Cuda driver version "
<< hw_inf_.cuda_driver_version
<< " is not compatible with cuda toolkit version "
<< suitable_toolkit_version);
return cpp::fail("Cuda driver is not compatible with cuda toolkit");
}

auto url_obj = url_parser::Url{
.protocol = "https",
.host = jan_host,
Expand Down
2 changes: 1 addition & 1 deletion engine/utils/hardware/gpu_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ inline std::vector<GPU> GetGPUInfo() {

for (size_t i = 0; i < nvidia_gpus.size(); i++) {
for (size_t j = 0; j < vulkan_gpus.size(); j++) {
if (nvidia_gpus[i].uuid == vulkan_gpus[j].uuid) {
if (nvidia_gpus[i].uuid.find(vulkan_gpus[j].uuid) != std::string::npos) {
vulkan_gpus[j].version =
nvidia_gpus[0].cuda_driver_version.value_or("unknown");
vulkan_gpus[j].add_info = NvidiaAddInfo{
Expand Down
28 changes: 17 additions & 11 deletions engine/utils/system_info_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,26 +108,32 @@ std::vector<GpuInfo> GetGpuInfoList() {
auto [driver_version, cuda_version] = GetDriverAndCudaVersion();
if (driver_version.empty() || cuda_version.empty())
return gpuInfoList;

bool need_fallback = false;
CommandExecutor cmd(kGpuQueryCommand);
auto output = cmd.execute();
if (output.find("NVIDIA") == std::string::npos) {
need_fallback = true;
output = CommandExecutor(kGpuQueryCommandFb).execute();
}

const std::regex gpu_info_reg(kGpuInfoRegex);
std::string rg = need_fallback ? kGpuInfoRegexFb : kGpuInfoRegex;
const std::regex gpu_info_reg(rg);
std::smatch match;
std::string::const_iterator search_start(output.cbegin());
int rg_count = need_fallback ? 5 : 6;

while (
std::regex_search(search_start, output.cend(), match, gpu_info_reg)) {
GpuInfo gpuInfo = {
match[1].str(), // id
match[2].str(), // vram_total
match[3].str(), // vram_free
match[4].str(), // name
GetGpuArch(match[4].str()), // arch
driver_version, // driver_version
cuda_version, // cuda_driver_version
match[5].str(), // compute_cap
match[6].str() // uuid
match[1].str(), // id
match[2].str(), // vram_total
match[3].str(), // vram_free
match[4].str(), // name
GetGpuArch(match[4].str()), // arch
driver_version, // driver_version
cuda_version, // cuda_driver_version
need_fallback ? "0" : match[5].str(), // compute_cap
match[rg_count].str() // uuid
};
gpuInfoList.push_back(gpuInfo);
search_start = match.suffix().first;
Expand Down
7 changes: 7 additions & 0 deletions engine/utils/system_info_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ constexpr static auto kGpuQueryCommand{
constexpr static auto kGpuInfoRegex{
R"((\d+),\s*(\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+),\s*([^\n,]+))"};

constexpr static auto kGpuQueryCommandFb{
"nvidia-smi "
"--query-gpu=index,memory.total,memory.free,name,uuid "
"--format=csv,noheader,nounits"};
constexpr static auto kGpuInfoRegexFb{
R"((\d+),\s*(\d+),\s*(\d+),\s*([^,]+),\s*([^\n,]+))"};

struct SystemInfo {
explicit SystemInfo(std::string os, std::string arch)
: os(std::move(os)), arch(std::move(arch)) {}
Expand Down

0 comments on commit 68094d9

Please sign in to comment.