Skip to content

Commit

Permalink
Implementation for SWDEV-479728:[RDC] - Clock Speed/Power Cap Control
Browse files Browse the repository at this point in the history
Change-Id: I767a71325527aa3c691e9607953ceafebacfb4d5
Signed-off-by: adapryor <[email protected]>
  • Loading branch information
adam360x committed Dec 20, 2024
1 parent 7c91a07 commit df170c8
Show file tree
Hide file tree
Showing 20 changed files with 1,548 additions and 116 deletions.
6 changes: 6 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ set(HEALTH_EXAMPLE_EXE "health")
add_executable(${HEALTH_EXAMPLE_EXE} "${HEALTH_EXAMPLE_SRC_LIST}")
target_link_libraries(${HEALTH_EXAMPLE_EXE} pthread dl rdc_bootstrap)

set(CONFIG_EXAMPLE_SRC_LIST "config_example.cc")
cmake_print_variables(CONFIG_EXAMPLE_SRC_LIST)
set(CONFIG_EXAMPLE_EXE "config")
add_executable(${CONFIG_EXAMPLE_EXE} "${CONFIG_EXAMPLE_SRC_LIST}")
target_link_libraries(${CONFIG_EXAMPLE_EXE} pthread dl rdc_bootstrap)

message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Finished Cmake Example ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
156 changes: 156 additions & 0 deletions example/config_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#include <unistd.h>

#include <chrono>
#include <iostream>
#include <thread>

#include "rdc/rdc.h"

int main() {
rdc_gpu_group_t group_id;
rdc_status_t result;
bool standalone = false;
rdc_handle_t rdc_handle;
uint32_t count = 0;
rdc_config_setting_list_t settings_list;
rdc_config_setting_t setting;
uint64_t watts;

char hostIpAddress[] = {"localhost:50051"};
char group_name[] = {"group1"};

// Select the embedded mode and standalone mode dynamically.
std::cout << "Start rdci in: \n";
std::cout << "0 - Embedded mode \n";
std::cout << "1 - Standalone mode \n";
while (!(std::cin >> standalone)) {
std::cout << "Invalid input.\n";
std::cin.clear();
std::cin.ignore();
}
std::cout << std::endl;
std::cout << (standalone ? "Standalone mode selected.\n" : "Embedded mode selected.\n");

// Init the rdc
result = rdc_init(0);

if (result != RDC_ST_OK) {
std::cout << "Error initializing RDC. Return: " << rdc_status_string(result) << std::endl;
goto cleanup;
} else {
std::cout << "RDC Initialized.\n";
}

if (standalone) { // standalone
result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr);
if (result != RDC_ST_OK) {
std::cout << "Error connecting to remote rdcd. Return: " << rdc_status_string(result)
<< std::endl;
goto cleanup;
}
} else { // embedded
result = rdc_start_embedded(RDC_OPERATION_MODE_AUTO, &rdc_handle);
if (result != RDC_ST_OK) {
std::cout << "Error starting embedded RDC engine. Return: " << rdc_status_string(result)
<< std::endl;
goto cleanup;
}
}

// Now we can use the same API for both standalone and embedded
// Get the list of devices in the system
uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES];
result = rdc_device_get_all(rdc_handle, gpu_index_list, &count);
if (result != RDC_ST_OK) {
std::cout << "Error to find devices on the system. Return: " << rdc_status_string(result);
goto cleanup;
}
if (count == 0) {
std::cout << "No GPUs find on the sytem ";
goto cleanup;
} else {
std::cout << count << " GPUs found in the system.\n";
}

// Create the group
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY, group_name, &group_id);
if (result != RDC_ST_OK) {
std::cout << "Error creating group. Return: " << rdc_status_string(result);
goto cleanup;
}
std::cout << "Created the GPU group " << group_id << std::endl;

// Add all GPUs to the group
for (uint32_t i = 0; i < count; i++) {
result = rdc_group_gpu_add(rdc_handle, group_id, gpu_index_list[i]); // Add GPU 0
if (result != RDC_ST_OK) {
std::cout << "Error adding group. Return: " << rdc_status_string(result);
goto cleanup;
}
rdc_device_attributes_t attribute;
result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i], &attribute);
if (result != RDC_ST_OK) {
std::cout << "Error get GPU attribute. Return: " << rdc_status_string(result);
goto cleanup;
}
std::cout << "Add GPU " << gpu_index_list[i] << ":" << attribute.device_name << " to group "
<< group_id << std::endl;
}

setting.type = RDC_CFG_POWER_LIMIT;
// Our targeted value is 195 Watts, which will be converted into Microwatts inside of
// rdc_config_set
setting.target_value = 195;
result = rdc_config_set(rdc_handle, group_id, setting);
if (result != RDC_ST_OK) {
std::cout << "Error set config RDC_CFG_POWER_LIMIT, Return: " << rdc_status_string(result)
<< std::endl;
goto cleanup;
}

result = rdc_config_get(rdc_handle, group_id, &settings_list);
if (result != RDC_ST_OK) {
std::cout << "Error get config, Return: " << rdc_status_string(result) << std::endl;
goto cleanup;
}

// Prompt user to change amd-smi to other value, and watch rdc config change it back

std::cout << "Config before wait:" << std::endl;

result = rdc_config_get(rdc_handle, group_id, &settings_list);
if (result != RDC_ST_OK) {
std::cout << "Error get config, Return: " << rdc_status_string(result) << std::endl;
goto cleanup;
}

std::cout << "The config will keep the power limit to 195 Watts" << std::endl;
std::cout << "You can change the power limit using amd-smi, the RDC config module should be able "
"to detect it and set it back"
<< std::endl;
std::cout << "Waiting 3 minutes before exit ..." << std::endl;
std::this_thread::sleep_for(std::chrono::minutes(3));

result = rdc_config_clear(rdc_handle, group_id);
if (result != RDC_ST_OK) {
std::cout << "Error clear config, Return: " << rdc_status_string(result) << std::endl;
goto cleanup;
}

//... clean up
cleanup:
std::cout << "Cleaning up.\n";

result = rdc_group_gpu_destroy(rdc_handle, group_id);
if (result != RDC_ST_OK) {
std::cout << "Error delete GPU group. Return: " << rdc_status_string(result);
}
std::cout << "Deleted the GPU group " << group_id << std::endl;

if (standalone)
rdc_disconnect(rdc_handle);
else
rdc_stop_embedded(rdc_handle);
rdc_shutdown();
return result;
}
Loading

0 comments on commit df170c8

Please sign in to comment.