diff --git a/src/control/cmd/daos_agent/fabric.go b/src/control/cmd/daos_agent/fabric.go index fbdf6faabac..84657061c3b 100644 --- a/src/control/cmd/daos_agent/fabric.go +++ b/src/control/cmd/daos_agent/fabric.go @@ -362,7 +362,7 @@ func (n *NUMAFabric) Find(name string) ([]*FabricInterface, error) { } // FindDevice looks up a fabric device with a given name, domain, and provider. -// NB: The domain and provider are optional. All other parameters are required. If there is more +// NB: The name is required. All other parameters are optional. If there is more // than one match, all of them are returned. func (n *NUMAFabric) FindDevice(params *FabricIfaceParams) ([]*FabricInterface, error) { if params == nil { @@ -406,7 +406,7 @@ func filterDomain(domain string, fiList []*FabricInterface) []*FabricInterface { func filterProvider(provider string, fiList []*FabricInterface) []*FabricInterface { result := make([]*FabricInterface, 0, len(fiList)) for _, fi := range fiList { - if fi.HasProvider(provider) { + if fi.HasProvider(provider) || fi.NetDevClass == FabricDevClassManual { result = append(result, fi) } } diff --git a/src/control/cmd/daos_agent/fabric_test.go b/src/control/cmd/daos_agent/fabric_test.go index 06f26e9896d..c70b68ac8bf 100644 --- a/src/control/cmd/daos_agent/fabric_test.go +++ b/src/control/cmd/daos_agent/fabric_test.go @@ -1100,6 +1100,36 @@ func TestAgent_NUMAFabric_FindDevice(t *testing.T) { }, }, }, + "success with manual interfaces": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: FabricDevClassManual, + }, + { + Name: "t2", + Domain: "t2", + NetDevClass: FabricDevClassManual, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + Domain: "t2", + Provider: "p2", + }, + expResult: []*FabricInterface{ + { + Name: "t2", + Domain: "t2", + NetDevClass: FabricDevClassManual, + }, + }, + }, "success with no domain": { nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ diff --git a/src/control/cmd/daos_agent/infocache.go b/src/control/cmd/daos_agent/infocache.go index 512a2a4e512..317ed72f755 100644 --- a/src/control/cmd/daos_agent/infocache.go +++ b/src/control/cmd/daos_agent/infocache.go @@ -561,6 +561,11 @@ func (c *InfoCache) waitFabricReady(ctx context.Context, netDevClass hardware.Ne } } + if len(needIfaces) == 0 { + c.log.Debugf("no interfaces with device class %s to wait for", netDevClass) + return nil + } + return hardware.WaitFabricReady(ctx, c.log, hardware.WaitFabricReadyParams{ StateProvider: c.devStateGetter, FabricIfaces: needIfaces, diff --git a/src/control/cmd/daos_agent/infocache_test.go b/src/control/cmd/daos_agent/infocache_test.go index a6c4688002b..1f658055115 100644 --- a/src/control/cmd/daos_agent/infocache_test.go +++ b/src/control/cmd/daos_agent/infocache_test.go @@ -1581,6 +1581,7 @@ func TestAgent_InfoCache_waitFabricReady(t *testing.T) { netDevClass: hardware.Infiniband, expChecked: []string{"t0", "t1"}, }, + "nothing to wait for": {}, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index 8945022ae29..25cbd5d820e 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -7,6 +7,7 @@ package main import ( + "fmt" "net" "strings" @@ -240,42 +241,28 @@ func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, sys string) (*mgmt } func (mod *mgmtModule) selectAttachInfo(ctx context.Context, srvResp *mgmtpb.GetAttachInfoResp, iface, domain string) (*mgmtpb.GetAttachInfoResp, error) { - reqProviders := mod.getIfaceProviders(ctx, iface, domain) - + resp := srvResp if mod.providerIdx > 0 { + mod.log.Debugf("using secondary provider idx %d", mod.providerIdx) + + var err error // Secondary provider indices begin at 1 - resp, err := mod.selectSecondaryAttachInfo(srvResp, mod.providerIdx) + resp, err = mod.selectSecondaryAttachInfo(srvResp, mod.providerIdx) if err != nil { return nil, err } - - if len(reqProviders) != 0 && !reqProviders.Has(resp.ClientNetHint.Provider) { - mod.log.Errorf("requested fabric interface %q (domain: %q) does not report support for configured provider %q (idx %d)", - iface, domain, resp.ClientNetHint.Provider, mod.providerIdx) - } - - return resp, nil } - if len(reqProviders) == 0 || reqProviders.Has(srvResp.ClientNetHint.Provider) { - return srvResp, nil - } - - mod.log.Debugf("primary provider is not supported by requested interface %q domain %q (supports: %s)", iface, domain, strings.Join(reqProviders.ToSlice(), ", ")) - - // We can try to be smart about choosing a provider if the client requested a specific interface - for _, hint := range srvResp.SecondaryClientNetHints { - if reqProviders.Has(hint.Provider) { - mod.log.Tracef("found secondary provider supported by requested interface: %q (idx %d)", hint.Provider, hint.ProviderIdx) - return mod.selectSecondaryAttachInfo(srvResp, uint(hint.ProviderIdx)) - } + reqProviders := mod.getIfaceProviders(ctx, iface, domain, hardware.NetDevClass(resp.ClientNetHint.NetDevClass)) + if len(reqProviders) == 0 || reqProviders.Has(resp.ClientNetHint.Provider) { + return resp, nil } - mod.log.Errorf("no supported provider for requested interface %q domain %q, using primary by default", iface, domain) - return srvResp, nil + return nil, fmt.Errorf("provider %s is not supported by requested interface %q domain %q (supports: %s)", + resp.ClientNetHint.Provider, iface, domain, strings.Join(reqProviders.ToSlice(), ", ")) } -func (mod *mgmtModule) getIfaceProviders(ctx context.Context, iface, domain string) common.StringSet { +func (mod *mgmtModule) getIfaceProviders(ctx context.Context, iface, domain string, ndc hardware.NetDevClass) common.StringSet { providers := common.NewStringSet() if iface == "" { return providers @@ -288,9 +275,10 @@ func (mod *mgmtModule) getIfaceProviders(ctx context.Context, iface, domain stri if fis, err := mod.getFabricInterface(ctx, &FabricIfaceParams{ Interface: iface, Domain: domain, + DevClass: ndc, }); err != nil { mod.log.Errorf("requested fabric interface %q (domain %q) may not function as desired: %s", iface, domain, err) - } else { + } else if fis.NetDevClass != FabricDevClassManual { providers.Add(fis.Providers()...) } @@ -362,7 +350,7 @@ func (mod *mgmtModule) populateNUMAFabricMap(ctx context.Context, resp *mgmtpb.G if exists { pbFIs.Ifaces = make([]*mgmtpb.FabricInterface, 0, len(fis)) for _, fi := range fis { - if fi.HasProvider(resp.ClientNetHint.Provider) { + if fi.HasProvider(resp.ClientNetHint.Provider) || fi.NetDevClass == FabricDevClassManual { pbFIs.Ifaces = append(pbFIs.Ifaces, &mgmtpb.FabricInterface{ NumaNode: uint32(numaNode), Interface: fi.Name, diff --git a/src/control/cmd/daos_agent/mgmt_rpc_test.go b/src/control/cmd/daos_agent/mgmt_rpc_test.go index 7e57e518d5f..1dccde639dc 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc_test.go +++ b/src/control/cmd/daos_agent/mgmt_rpc_test.go @@ -109,6 +109,7 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { mockFabricScan fabricScanFn mockGetNetIfaces func() ([]net.Interface, error) numaGetter *mockNUMAProvider + fabricCfg []*NUMAFabricConfig reqBytes []byte expResp *mgmtpb.GetAttachInfoResp expErr error @@ -266,6 +267,34 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { }, }), }, + "req interface with cfg ifaces": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{ + Sys: testSys, + Interface: "test0", + }), + fabricCfg: []*NUMAFabricConfig{ + { + NUMANode: 0, + Interfaces: []*FabricInterfaceConfig{ + { + Interface: "test0", + Domain: "test0", + }, + }, + }, + }, + expResp: respWith(testResp, "test0", "test0", []*mgmtpb.FabricInterfaces{ + { + Ifaces: []*mgmtpb.FabricInterface{ + { + Interface: "test0", + Domain: "test0", + Provider: "ofi+tcp", // automatically set to the same as server requested + }, + }, + }, + }), + }, "incompatible error": { reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{}), mockGetAttachInfo: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { @@ -320,14 +349,26 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { } } + ic := newTestInfoCache(t, log, testInfoCacheParams{ + mockGetAttachInfo: tc.mockGetAttachInfo, + mockScanFabric: tc.mockFabricScan, + mockNetIfaces: tc.mockGetNetIfaces, + mockNetDevClassGetter: &hardware.MockNetDevClassProvider{ + GetNetDevClassReturn: []hardware.MockGetNetDevClassResult{ + { + NDC: hardware.Ether, + }, + }, + }, + }) + if tc.fabricCfg != nil { + nf := NUMAFabricFromConfig(log, tc.fabricCfg) + ic.EnableStaticFabricCache(test.Context(t), nf) + } mod := &mgmtModule{ - log: log, - sys: testSys, - cache: newTestInfoCache(t, log, testInfoCacheParams{ - mockGetAttachInfo: tc.mockGetAttachInfo, - mockScanFabric: tc.mockFabricScan, - mockNetIfaces: tc.mockGetNetIfaces, - }), + log: log, + sys: testSys, + cache: ic, numaGetter: tc.numaGetter, }