Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-13292 control: Use cart API to detect fabric #13989

Merged
merged 12 commits into from
Apr 29, 2024
Merged
3 changes: 1 addition & 2 deletions src/control/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def is_firmware_mgmt_build(benv):

def get_build_tags(benv):
"Get custom go build tags."
tags = ["ucx", "spdk"]
tags = ["spdk"]
if is_firmware_mgmt_build(benv):
tags.append("firmware")
if not is_release_build(benv):
Expand Down Expand Up @@ -120,7 +120,6 @@ def scons():

denv.Tool('go_builder')

denv.require('ofi', 'ucx')
# Sets CGO_LDFLAGS for rpath options
denv.d_add_rpaths("..", True, True)
denv.AppendENVPath("CGO_CFLAGS", denv.subst("$_CPPINCFLAGS"), sep=" ")
Expand Down
8 changes: 0 additions & 8 deletions src/control/cmd/daos_agent/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,6 @@ func (cmd *startCmd) Execute(_ []string) error {
}
cmd.Debugf("created dRPC server: %s", time.Since(createDrpcStart))

hwprovInitStart := time.Now()
hwprovFini, err := hwprov.Init(cmd.Logger)
if err != nil {
return err
}
defer hwprovFini()
cmd.Debugf("initialized hardware providers: %s", time.Since(hwprovInitStart))

cacheStart := time.Now()
cache := NewInfoCache(ctx, cmd.Logger, cmd.ctlInvoker, cmd.cfg)
if cmd.attachInfoCacheDisabled() {
Expand Down
58 changes: 58 additions & 0 deletions src/control/lib/hardware/cart/bindings.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//
// (C) Copyright 2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//

package cart

/*
#cgo LDFLAGS: -lcart

#include <cart/types.h>
#include <cart/api.h>
*/
import "C"

import (
"unsafe"

"github.com/pkg/errors"

"github.com/daos-stack/daos/src/control/lib/daos"
"github.com/daos-stack/daos/src/control/logging"
)

func getProtocolInfo(log logging.Logger, provider string) ([]*crtFabricDevice, error) {
var cInfo *C.struct_crt_protocol_info
var cProtoStr *C.char
if provider != "" {
log.Debugf("getting fabric protocol info from CART for %q", provider)
cProtoStr = C.CString(provider)
defer C.free(unsafe.Pointer(cProtoStr))
} else {
log.Debug("getting all fabric protocol info from CART")
}

if err := daos.Status(C.crt_protocol_info_get(cProtoStr, &cInfo)); err != daos.Success {
return nil, errors.Wrap(err, "crt_hg_get_protocol_info")
}
defer C.crt_protocol_info_free(cInfo)

infoList := make([]*crtFabricDevice, 0)

for cur := cInfo; cur != nil; cur = cur.next {
infoList = append(infoList, cToCrtProtocolInfo(cur))
}

log.Debugf("CART protocol info discovered:\n%+v", infoList)
return infoList, nil
}

func cToCrtProtocolInfo(cInfo *C.struct_crt_protocol_info) *crtFabricDevice {
return &crtFabricDevice{
Class: C.GoString(cInfo.class_name),
Protocol: C.GoString(cInfo.protocol_name),
Device: C.GoString(cInfo.device_name),
}
}
145 changes: 145 additions & 0 deletions src/control/lib/hardware/cart/cart.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//
// (C) Copyright 2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//

package cart

import (
"context"
"fmt"

"github.com/pkg/errors"

"github.com/daos-stack/daos/src/control/lib/hardware"
"github.com/daos-stack/daos/src/control/logging"
)

const (
classLibFabric = "ofi"
classUCX = "ucx"
classNA = "na"
)

// crtFabricDevice is a single fabric device discovered by CART.
type crtFabricDevice struct {
Class string
Protocol string
Device string
kjacque marked this conversation as resolved.
Show resolved Hide resolved
}

// isUCX indicates whether this is a UCX device.
func (cfd *crtFabricDevice) IsUCX() bool {
return cfd.Class == classUCX
}

// OSName returns the OS level network device name for this device.
func (cfd *crtFabricDevice) OSName() string {
if cfd.IsUCX() {
return getOSNameFromUCXDevice(cfd.Device)
}
return cfd.Device
}

// ProviderName returns the DAOS fabric provider name for this device's protocol.
func (cfd *crtFabricDevice) ProviderName() string {
return fmt.Sprintf("%s+%s", cfd.Class, cfd.Protocol)
}

type getProtocolFn func(log logging.Logger, provider string) ([]*crtFabricDevice, error)

// Provider provides access to the CART API.
type Provider struct {
log logging.Logger
getProtocolInfo getProtocolFn
}

// NewProvider creates a new CART Provider.
func NewProvider(log logging.Logger) *Provider {
return &Provider{
log: log,
}
}

// GetFabricInterfaces fetches information about the system fabric interfaces via CART.
func (p *Provider) GetFabricInterfaces(ctx context.Context, provider string) (*hardware.FabricInterfaceSet, error) {
if p == nil {
return nil, errors.New("nil CART Provider")
}

ch := make(chan *fabricResult)
go p.getFabricInterfaces(provider, ch)
select {
case <-ctx.Done():
return nil, ctx.Err()
case result := <-ch:
return result.fiSet, result.err
}
}

type fabricResult struct {
fiSet *hardware.FabricInterfaceSet
err error
}

type providerPriorities map[string]int

func (p providerPriorities) getPriority(provName string) int {
prio, ok := p[provName]
if !ok {
prio = len(p)
p[provName] = prio
}
return prio
}

func (p *Provider) getFabricInterfaces(provider string, ch chan *fabricResult) {
if p.getProtocolInfo == nil {
p.getProtocolInfo = getProtocolInfo
}

devices, err := p.getProtocolInfo(p.log, provider)
if err != nil {
provMsg := ""
if provider != "" {
provMsg = fmt.Sprintf(" for provider %q", provider)
}
ch <- &fabricResult{
err: errors.Wrapf(err, "fetching fabric interfaces%s", provMsg),
}
return
}

fis := hardware.NewFabricInterfaceSet()
priorities := make(providerPriorities)
for _, dev := range devices {
fis.Update(crtFabricDeviceToFabricInterface(dev, priorities))
}

ch <- &fabricResult{
fiSet: fis,
}
}

func crtFabricDeviceToFabricInterface(dev *crtFabricDevice, priorities providerPriorities) *hardware.FabricInterface {
return &hardware.FabricInterface{
Name: dev.Device,
OSName: dev.OSName(),
Providers: getProviderSet(dev, priorities),
}
}

// getProviderSet returns a set of one or more DAOS providers associated with the protocol info.
func getProviderSet(dev *crtFabricDevice, priorities providerPriorities) *hardware.FabricProviderSet {
if dev.IsUCX() {
// UCX determines its own priorities within the provider set
return getProviderSetFromUCXTransport(dev.Protocol)
}

name := dev.ProviderName()
return hardware.NewFabricProviderSet(&hardware.FabricProvider{
Name: name,
Priority: priorities.getPriority(name),
})
}
Loading
Loading