Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get device/vendor ids of SF from its parent PCI #5964

Merged
merged 1 commit into from
Jan 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/ucs/sys/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <ctype.h>
#include <stdio.h>
#include <time.h>
#include <libgen.h>


const char *ucs_memunits_suffixes[] = {"", "K", "M", "G", "T", "P", "E", NULL};
Expand Down Expand Up @@ -199,6 +200,17 @@ ucs_status_t ucs_str_to_memunits(const char *buf, void *dest)
return UCS_OK;
}

char *ucs_dirname(char *path, int num_layers)
{
while (num_layers-- > 0) {
path = dirname(path);
if (path == NULL) {
return NULL;
}
}
return path;
}

void ucs_snprintf_safe(char *buf, size_t size, const char *fmt, ...)
{
va_list ap;
Expand Down
11 changes: 11 additions & 0 deletions src/ucs/sys/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,17 @@ void ucs_expand_path(const char *path, char *fullpath, size_t max);
void ucs_fill_filename_template(const char *tmpl, char *buf, size_t max);


/**
* Strip specified number of last components from file/dir path
*
* @param path The pointer of file path to be stripped
* @param num_layers The number of components to be stripped
*
* @return Pointer of the stripped dir path.
*/
char *ucs_dirname(char *path, int num_layers);


/**
* Format a string to a buffer of given size, and fill the rest of the buffer
* with '\0'. Also, guarantee that the last char in the buffer is '\0'.
Expand Down
114 changes: 94 additions & 20 deletions src/uct/ib/base/ib_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,24 +482,95 @@ void uct_ib_handle_async_event(uct_ib_device_t *dev, uct_ib_async_event_t *event
ucs_log(level, "IB Async event on %s: %s", uct_ib_device_name(dev), event_info);
}

static ucs_status_t uct_ib_device_get_path_buffer(uct_ib_device_t *dev,
char *path_buffer)
{
char *resolved_path;

resolved_path = realpath(dev->ibv_context->device->ibdev_path, path_buffer);
if (resolved_path == NULL) {
return UCS_ERR_IO_ERROR;
}
yosefe marked this conversation as resolved.
Show resolved Hide resolved

/* Make sure there is "/infiniband/" substring in path_buffer */
if (strstr(path_buffer, "/infiniband/") == NULL) {
return UCS_ERR_IO_ERROR;
}
yosefe marked this conversation as resolved.
Show resolved Hide resolved

return UCS_OK;
}

static ucs_status_t uct_ib_device_get_ids_from_path(const char *path,
uint16_t *vendor_id,
uint16_t *device_id)
{
ucs_status_t status;
long value;

status = ucs_read_file_number(&value, 1, "%s/%s", path, "vendor");
if (status != UCS_OK) {
return status;
}
*vendor_id = value;

status = ucs_read_file_number(&value, 1, "%s/%s", path, "device");
if (status != UCS_OK) {
return status;
}
*device_id = value;

return UCS_OK;
}

static void uct_ib_device_get_ids(uct_ib_device_t *dev)
{
long vendor_id, device_id;

if ((ucs_read_file_number(&vendor_id, 1, UCT_IB_DEVICE_SYSFS_FMT,
uct_ib_device_name(dev), "vendor") == UCS_OK) &&
(ucs_read_file_number(&device_id, 1, UCT_IB_DEVICE_SYSFS_FMT,
uct_ib_device_name(dev), "device") == UCS_OK)) {
dev->pci_id.vendor = vendor_id;
dev->pci_id.device = device_id;
ucs_debug("%s vendor_id: 0x%x device_id: %d", uct_ib_device_name(dev),
char *ids_path;
char path_buffer[PATH_MAX];
ucs_status_t status;

/* PF: realpath name is of form /sys/devices/.../0000:03:00.0/infiniband/mlx5_0 */
/* SF: realpath name is of form /sys/devices/.../0000:03:00.0/<UUID>/infiniband/mlx5_0 */

status = uct_ib_device_get_path_buffer(dev, path_buffer);
if (status != UCS_OK) {
goto not_found;
}

/* PF: strip 2 layers. */
ids_path = ucs_dirname(path_buffer, 2);
if (ids_path == NULL) {
goto not_found;
}

status = uct_ib_device_get_ids_from_path(ids_path,
&dev->pci_id.vendor,
&dev->pci_id.device);
if (status == UCS_OK) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    if (status != UCS_OK) {
        goto not_found;
    }
    ...

not_found:
    dev->pci_id.vendor = 0;
    ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yosefe Done.

ucs_debug("PF: %s vendor_id: 0x%x device_id: %d", uct_ib_device_name(dev),
dev->pci_id.vendor, dev->pci_id.device);
} else {
dev->pci_id.vendor = 0;
dev->pci_id.device = 0;
ucs_warn("%s: could not read device/vendor id from sysfs, "
"performance may be affected", uct_ib_device_name(dev));
return;
}

/* SF: strip 3 layers (1 more layer than PF). */
ids_path = ucs_dirname(path_buffer, 1);
if (ids_path == NULL) {
goto not_found;
}

status = uct_ib_device_get_ids_from_path(ids_path,
&dev->pci_id.vendor,
&dev->pci_id.device);
if (status == UCS_OK) {
ucs_debug("SF: %s vendor_id: 0x%x device_id: %d", uct_ib_device_name(dev),
dev->pci_id.vendor, dev->pci_id.device);
return;
}

not_found:
dev->pci_id.vendor = 0;
dev->pci_id.device = 0;
ucs_warn("%s: could not read device/vendor id from sysfs, "
"performance may be affected", uct_ib_device_name(dev));
}

ucs_status_t uct_ib_device_query(uct_ib_device_t *dev,
Expand Down Expand Up @@ -1015,7 +1086,7 @@ ucs_status_t uct_ib_modify_qp(struct ibv_qp *qp, enum ibv_qp_state state)

static ucs_sys_device_t uct_ib_device_get_sys_dev(uct_ib_device_t *dev)
{
char path_buffer[PATH_MAX], *resolved_path;
char path_buffer[PATH_MAX];
ucs_sys_device_t sys_dev;
ucs_sys_bus_id_t bus_id;
ucs_status_t status;
Expand All @@ -1025,17 +1096,20 @@ static ucs_sys_device_t uct_ib_device_get_sys_dev(uct_ib_device_t *dev)
/* realpath name is of form /sys/devices/.../0000:05:00.0/infiniband/mlx5_0
* and bus_id is constructed from 0000:05:00.0 */

resolved_path = realpath(dev->ibv_context->device->ibdev_path, path_buffer);
if (resolved_path == NULL) {
status = uct_ib_device_get_path_buffer(dev, path_buffer);
if (status != UCS_OK) {
return UCS_SYS_DEVICE_ID_UNKNOWN;
}

/* Make sure there is "/infiniband/" substring in path_buffer*/
if (strstr(path_buffer, "/infiniband/") == NULL) {
pcie_bus = ucs_dirname(path_buffer, 2);
if (pcie_bus == NULL) {
return UCS_SYS_DEVICE_ID_UNKNOWN;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

space line after

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yosefe Fixed.

pcie_bus = basename(pcie_bus);
if (pcie_bus == NULL) {
return UCS_SYS_DEVICE_ID_UNKNOWN;
}

pcie_bus = basename(dirname(dirname(path_buffer)));
num_fields = sscanf(pcie_bus, "%hx:%hhx:%hhx.%hhx", &bus_id.domain,
&bus_id.bus, &bus_id.slot, &bus_id.function);
if (num_fields != 4) {
Expand Down
10 changes: 10 additions & 0 deletions test/gtest/ucs/test_sys.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ class test_sys : public ucs::test {
return ucs_get_mem_prot((uintptr_t)address, (uintptr_t)address + size);
}

void test_dirname(char *path, int num_layers, const char *expected) {
path = ucs_dirname(path, num_layers);
EXPECT_EQ(std::string(expected), path);
}

void test_memunits(size_t size, const char *expected) {
char buf[256];

Expand Down Expand Up @@ -139,6 +144,11 @@ UCS_TEST_F(test_sys, module) {
EXPECT_EQ(1, test_module_loaded);
}

UCS_TEST_F(test_sys, dirname) {
char path[] = "/sys/devices/pci0000:00/0000:00:00.0";
test_dirname(path, 3, "/sys");
}

UCS_TEST_F(test_sys, memunits_to_str) {
test_memunits(256, "256");
test_memunits(1256, "1256");
Expand Down