diff --git a/docs/ovs_offload.md b/docs/ovs_offload.md index eac34febda2..aadc6628f0d 100644 --- a/docs/ovs_offload.md +++ b/docs/ovs_offload.md @@ -226,3 +226,53 @@ The following manufacturers are known to work: - [Mellanox Bluefield-2](https://www.mellanox.com/products/bluefield2-overview) Deployment guide can be found [here](https://docs.google.com/document/d/1hRke0cOCY84Ef8OU283iPg_PHiJ6O17aUkb9Vv-fWPQ/edit?usp=sharing). + +## vDPA + +vDPA (Virtio DataPath Acceleration) is a technology that enables the acceleration of virtIO devices while +allowing the implementations of such devices (e.g: NIC vendors) to use their own control plane. + +vDPA can be combined with the SR-IOV OVS Hardware offloading setup to expose the workload to an +open standard interface such as virtio-net. + +### Additional Prerequisites: +* Linux Kernel >= 5.12 +* iproute >= 5.14 + +### Supported Hardware: +- Mellanox ConnectX-6DX NIC + +### Additional configuration +In addition to all the steps listed above, insert the virtio-vdpa driver and the mlx-vdpa driver: + + $ modprobe vdpa + $ modprobe virtio-vdpa + $ modprobe mlx5-vdpa + +The the `vdpa` tool (part of iproute package) is used to create a vdpa device on top +of an existing VF: + + $ vdpa mgmtdev show + pci/0000:65:00.2: + supported_classes net + $ vdpa dev add name vdpa2 mgmtdev pci/0000:65:00.2 + $ vdpa dev list + vdpa2: type network mgmtdev pci/0000:65:00.2 vendor_id 5555 max_vqs 16 max_vq_size 256 + +After a device has been created, the SR-IOV Device Plugin plugin configuration has to be modified for it +to select and expose the vdpa device: + +```json +{ + "resourceList": [ + { + "resourceName": "cx6_sriov_vpda_virtio", + "selectors": { + "vendors": ["15b3"], + "devices": ["101e"], + "vdpaType": "virtio" + } + } + ] +} +``` diff --git a/go-controller/go.mod b/go-controller/go.mod index 33fc34c842f..003860db4e1 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -15,6 +15,7 @@ require ( github.com/google/uuid v1.2.0 github.com/gorilla/mux v1.8.0 github.com/j-keck/arping v1.0.2 + github.com/k8snetworkplumbingwg/govdpa v0.1.4 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.3.0 github.com/miekg/dns v1.1.31 github.com/mitchellh/copystructure v1.2.0 diff --git a/go-controller/go.sum b/go-controller/go.sum index ce922021cd6..4e7d44a7f9e 100644 --- a/go-controller/go.sum +++ b/go-controller/go.sum @@ -514,6 +514,8 @@ github.com/juju/utils v0.0.0-20180808125547-9dfc6dbfb02b/go.mod h1:6/KLg8Wz/y2KV github.com/juju/version v0.0.0-20161031051906-1f41e27e54f2/go.mod h1:kE8gK5X0CImdr7qpSKl3xB2PmpySSmfj7zVbkZFs81U= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= +github.com/k8snetworkplumbingwg/govdpa v0.1.4 h1:e6mM7JFZkLVJeMQw3px96EigHAhnb4VUlqhNub/2Psk= +github.com/k8snetworkplumbingwg/govdpa v0.1.4/go.mod h1:UQR1xu7A+nnRK1dkLEi12OnNL0OiBPpIKOYDuaQQkck= github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.3.0 h1:MjRRgZyTGo90G+UrwlDQjU+uG4Z7By65qvQxGoILT/8= github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.3.0/go.mod h1:nqCI7aelBJU61wiBeeZWJ6oi4bJy5nrjkM6lWIMA4j0= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index 4bd74efa594..f51283adfe5 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -11,6 +11,7 @@ import ( utilnet "k8s.io/utils/net" current "github.com/containernetworking/cni/pkg/types/100" + "github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -95,6 +96,13 @@ func (pr *PodRequest) checkOrUpdatePodUID(podUID string) error { func (pr *PodRequest) getVFNetdevName() (string, error) { // Get the vf device Name + + // If a vDPA device exists, it takes preference over the vendor device, steering-wize + vdpaDevice, err := util.GetVdpaOps().GetVdpaDeviceByPci(pr.CNIConf.DeviceID) + if err == nil && vdpaDevice.Driver() == kvdpa.VirtioVdpaDriver { + return vdpaDevice.VirtioNet().NetDev(), nil + } + vfNetdevices, err := util.GetSriovnetOps().GetNetDevicesFromPci(pr.CNIConf.DeviceID) if err != nil { return "", err diff --git a/go-controller/pkg/util/mocks/VdpaDevice.go b/go-controller/pkg/util/mocks/VdpaDevice.go new file mode 100644 index 00000000000..65f9a97bf08 --- /dev/null +++ b/go-controller/pkg/util/mocks/VdpaDevice.go @@ -0,0 +1,66 @@ +// Code generated by mockery v2.2.1. DO NOT EDIT. + +package mocks + +import mock "github.com/stretchr/testify/mock" + +// VdpaDevice is an autogenerated mock type for the VdpaDevice type +type VdpaDevice struct { + mock.Mock +} + +// GetDriver provides a mock function with given fields: +func (_m *VdpaDevice) GetDriver() string { + ret := _m.Called() + + var r0 string + if rf, ok := ret.Get(0).(func() string); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + +// GetNetDev provides a mock function with given fields: +func (_m *VdpaDevice) GetNetDev() string { + ret := _m.Called() + + var r0 string + if rf, ok := ret.Get(0).(func() string); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + +// GetParent provides a mock function with given fields: +func (_m *VdpaDevice) GetParent() string { + ret := _m.Called() + + var r0 string + if rf, ok := ret.Get(0).(func() string); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + +// GetPath provides a mock function with given fields: +func (_m *VdpaDevice) GetPath() string { + ret := _m.Called() + + var r0 string + if rf, ok := ret.Get(0).(func() string); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} diff --git a/go-controller/pkg/util/mocks/VdpaOps.go b/go-controller/pkg/util/mocks/VdpaOps.go new file mode 100644 index 00000000000..028c6bcce5b --- /dev/null +++ b/go-controller/pkg/util/mocks/VdpaOps.go @@ -0,0 +1,36 @@ +// Code generated by mockery v2.2.1. DO NOT EDIT. + +package mocks + +import ( + kvdpa "github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa" + mock "github.com/stretchr/testify/mock" +) + +// VdpaOps is an autogenerated mock type for the VdpaOps type +type VdpaOps struct { + mock.Mock +} + +// GetVdpaDeviceByPci provides a mock function with given fields: pciAddress +func (_m *VdpaOps) GetVdpaDeviceByPci(pciAddress string) (kvdpa.VdpaDevice, error) { + ret := _m.Called(pciAddress) + + var r0 kvdpa.VdpaDevice + if rf, ok := ret.Get(0).(func(string) kvdpa.VdpaDevice); ok { + r0 = rf(pciAddress) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(kvdpa.VdpaDevice) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(string) error); ok { + r1 = rf(pciAddress) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} diff --git a/go-controller/pkg/util/vdpa_linux.go b/go-controller/pkg/util/vdpa_linux.go new file mode 100644 index 00000000000..38108d325f6 --- /dev/null +++ b/go-controller/pkg/util/vdpa_linux.go @@ -0,0 +1,37 @@ +package util + +import ( + "github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa" +) + +type VdpaDevice interface { + kvdpa.VdpaDevice +} + +type VdpaOps interface { + GetVdpaDeviceByPci(pciAddress string) (kvdpa.VdpaDevice, error) +} + +type defaultVdpaOps struct { +} + +var vdpaOps VdpaOps = &defaultVdpaOps{} + +// SetVdpaOpsInst method should be used by unit tests in +func SetVdpaOpsInst(mockInst VdpaOps) { + vdpaOps = mockInst +} + +// GetVdpaOps will be invoked by functions in other packages that would need access to the govdpa library methods. +func GetVdpaOps() VdpaOps { + return vdpaOps +} + +func (v *defaultVdpaOps) GetVdpaDeviceByPci(pciAddress string) (kvdpa.VdpaDevice, error) { + // the PCI prefix is required by the govdpa library + vdpaDevices, err := kvdpa.GetVdpaDevicesByPciAddress("pci/" + pciAddress) + if len(vdpaDevices) > 0 { + return vdpaDevices[0], nil + } + return nil, err +} diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/LICENSE b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/device.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/device.go new file mode 100644 index 00000000000..120b4957048 --- /dev/null +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/device.go @@ -0,0 +1,338 @@ +package kvdpa + +import ( + "errors" + "os" + "path/filepath" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// Exported constants +const ( + VhostVdpaDriver = "vhost_vdpa" + VirtioVdpaDriver = "virtio_vdpa" +) + +// Private constants +const ( + vdpaBusDevDir = "/sys/bus/vdpa/devices" + vdpaVhostDevDir = "/dev" + rootDevDir = "/sys/devices" +) + +// VdpaDevice contains information about a Vdpa Device +type VdpaDevice interface { + Driver() string + Name() string + MgmtDev() MgmtDev + VirtioNet() VirtioNet + VhostVdpa() VhostVdpa + ParentDevicePath() (string, error) +} + +// vdpaDev implements VdpaDevice interface +type vdpaDev struct { + name string + driver string + mgmtDev *mgmtDev + virtioNet VirtioNet + vhostVdpa VhostVdpa +} + +// Driver resturns de device's driver name +func (vd *vdpaDev) Driver() string { + return vd.driver +} + +// Driver resturns de device's name +func (vd *vdpaDev) Name() string { + return vd.name +} + +// MgmtDev returns the device's management device +func (vd *vdpaDev) MgmtDev() MgmtDev { + return vd.mgmtDev +} + +// VhostVdpa returns the VhostVdpa device information associated +// or nil if the device is not bound to the vhost_vdpa driver +func (vd *vdpaDev) VhostVdpa() VhostVdpa { + return vd.vhostVdpa +} + +// Virtionet returns the VirtioNet device information associated +// or nil if the device is not bound to the virtio_vdpa driver +func (vd *vdpaDev) VirtioNet() VirtioNet { + return vd.virtioNet +} + +// getBusInfo populates the vdpa bus information +// the vdpa device must have at least the name prepopulated +func (vd *vdpaDev) getBusInfo() error { + driverLink, err := os.Readlink(filepath.Join(vdpaBusDevDir, vd.name, "driver")) + if err != nil { + // No error if driver is not present. The device is simply not bound to any. + return nil + } + + vd.driver = filepath.Base(driverLink) + + switch vd.driver { + case VhostVdpaDriver: + vd.vhostVdpa, err = vd.getVhostVdpaDev() + if err != nil { + return err + } + case VirtioVdpaDriver: + vd.virtioNet, err = vd.getVirtioVdpaDev() + if err != nil { + return err + } + } + + return nil +} + +// parseAttributes populates the vdpa device information from netlink attributes +func (vd *vdpaDev) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + mgmtDev := &mgmtDev{} + for _, a := range attrs { + switch a.Attr.Type { + case VdpaAttrDevName: + vd.name = string(a.Value[:len(a.Value)-1]) + case VdpaAttrMgmtDevBusName: + mgmtDev.busName = string(a.Value[:len(a.Value)-1]) + case VdpaAttrMgmtDevDevName: + mgmtDev.devName = string(a.Value[:len(a.Value)-1]) + } + } + vd.mgmtDev = mgmtDev + return nil +} + +/* Finds the vhost vdpa device of a vdpa device and returns it's path */ +func (vd *vdpaDev) getVhostVdpaDev() (VhostVdpa, error) { + // vhost vdpa devices live in the vdpa device's path + path := filepath.Join(vdpaBusDevDir, vd.name) + return GetVhostVdpaDevInPath(path) +} + +/* ParentDevice returns the path of the parent device (e.g: PCI) of the device */ +func (vd *vdpaDev) ParentDevicePath() (string, error) { + vdpaDevicePath := filepath.Join(vdpaBusDevDir, vd.name) + + /* For pci devices we have: + /sys/bud/vdpa/devices/vdpaX -> + ../../../devices/pci0000:00/.../0000:05:00:1/vdpaX + + Resolving the symlinks should give us the parent PCI device. + */ + devicePath, err := filepath.EvalSymlinks(vdpaDevicePath) + if err != nil { + return "", err + } + + /* If the parent device is the root device /sys/devices, there is + no parent (e.g: vdpasim). + */ + parent := filepath.Dir(devicePath) + if parent == rootDevDir { + return devicePath, nil + } + + return parent, nil +} + +/* + Finds the virtio vdpa device of a vdpa device and returns its path + +Currently, PCI-based devices have the following sysfs structure: +/sys/bus/vdpa/devices/ + + vdpa1 -> ../../../devices/pci0000:00/0000:00:03.2/0000:05:00.2/vdpa1 + +In order to find the virtio device we look for virtio* devices inside the parent device: + + sys/devices/pci0000:00/0000:00:03.2/0000:05:00.2/virtio{N} + +We also check the virtio device exists in the virtio bus: +/sys/bus/virtio/devices + + virtio{N} -> ../../../devices/pci0000:00/0000:00:03.2/0000:05:00.2/virtio{N} +*/ +func (vd *vdpaDev) getVirtioVdpaDev() (VirtioNet, error) { + parentPath, err := vd.ParentDevicePath() + if err != nil { + return nil, err + } + return GetVirtioNetInPath(parentPath) +} + +/*GetVdpaDevice returns the vdpa device information by a vdpa device name */ +func GetVdpaDevice(name string) (VdpaDevice, error) { + nameAttr, err := GetNetlinkOps().NewAttribute(VdpaAttrDevName, name) + if err != nil { + return nil, err + } + + msgs, err := GetNetlinkOps(). + RunVdpaNetlinkCmd(VdpaCmdDevGet, 0, []*nl.RtAttr{nameAttr}) + if err != nil { + return nil, err + } + + vdpaDevs, err := parseDevLinkVdpaDevList(msgs) + if err != nil { + return nil, err + } + return vdpaDevs[0], nil +} + +/* +GetVdpaDevicesByMgmtDev returns the VdpaDevice objects whose MgmtDev +has the given bus and device names. +*/ +func GetVdpaDevicesByMgmtDev(busName, devName string) ([]VdpaDevice, error) { + result := []VdpaDevice{} + devices, err := ListVdpaDevices() + if err != nil { + return nil, err + } + for _, device := range devices { + if device.MgmtDev() != nil && + device.MgmtDev().BusName() == busName && + device.MgmtDev().DevName() == devName { + result = append(result, device) + } + } + if len(result) == 0 { + return nil, syscall.ENODEV + } + return result, nil +} + +/*ListVdpaDevices returns a list of all available vdpa devices */ +func ListVdpaDevices() ([]VdpaDevice, error) { + msgs, err := GetNetlinkOps().RunVdpaNetlinkCmd(VdpaCmdDevGet, syscall.NLM_F_DUMP, nil) + if err != nil { + return nil, err + } + + vdpaDevs, err := parseDevLinkVdpaDevList(msgs) + if err != nil { + return nil, err + } + return vdpaDevs, nil +} + +func extractBusNameAndMgmtDeviceName(fullMgmtDeviceName string) (busName string, mgmtDeviceName string, err error) { + numSlashes := strings.Count(fullMgmtDeviceName, "/") + if numSlashes > 1 { + return "", "", errors.New("expected mgmtDeviceName to be either in the format / or ") + } else if numSlashes == 0 { + return "", fullMgmtDeviceName, nil + } else { + values := strings.Split(fullMgmtDeviceName, "/") + return values[0], values[1], nil + } +} + +/* +GetVdpaDevicesByPciAddress returns the VdpaDevice objects for the given pciAddress + + The pciAddress must have one of the following formats: + - MgmtBusName/MgmtDevName + - MgmtDevName +*/ +func GetVdpaDevicesByPciAddress(pciAddress string) ([]VdpaDevice, error) { + busName, mgmtDeviceName, err := extractBusNameAndMgmtDeviceName(pciAddress) + if err != nil { + return nil, unix.EINVAL + } + + return GetVdpaDevicesByMgmtDev(busName, mgmtDeviceName) +} + +/*AddVdpaDevice adds a new vdpa device to the given management device */ +func AddVdpaDevice(mgmtDeviceName string, vdpaDeviceName string) error { + if mgmtDeviceName == "" || vdpaDeviceName == "" { + return unix.EINVAL + } + + busName, mgmtDeviceName, err := extractBusNameAndMgmtDeviceName(mgmtDeviceName) + if err != nil { + return unix.EINVAL + } + + var attributes []*nl.RtAttr + var busNameAttr *nl.RtAttr + if busName != "" { + busNameAttr, err = GetNetlinkOps().NewAttribute(VdpaAttrMgmtDevBusName, busName) + if err != nil { + return err + } + attributes = append(attributes, busNameAttr) + } + + mgmtAttr, err := GetNetlinkOps().NewAttribute(VdpaAttrMgmtDevDevName, mgmtDeviceName) + if err != nil { + return err + } + attributes = append(attributes, mgmtAttr) + + nameAttr, err := GetNetlinkOps().NewAttribute(VdpaAttrDevName, vdpaDeviceName) + if err != nil { + return err + } + attributes = append(attributes, nameAttr) + + _, err = GetNetlinkOps().RunVdpaNetlinkCmd(VdpaCmdDevNew, unix.NLM_F_ACK|unix.NLM_F_REQUEST, attributes) + if err != nil { + return err + } + + return nil +} + +/*DeleteVdpaDevice deletes a vdpa device */ +func DeleteVdpaDevice(vdpaDeviceName string) error { + if vdpaDeviceName == "" { + return unix.EINVAL + } + + nameAttr, err := GetNetlinkOps().NewAttribute(VdpaAttrDevName, vdpaDeviceName) + if err != nil { + return err + } + + _, err = GetNetlinkOps().RunVdpaNetlinkCmd(VdpaCmdDevDel, unix.NLM_F_ACK|unix.NLM_F_REQUEST, []*nl.RtAttr{nameAttr}) + if err != nil { + return err + } + + return nil +} + +func parseDevLinkVdpaDevList(msgs [][]byte) ([]VdpaDevice, error) { + devices := make([]VdpaDevice, 0, len(msgs)) + + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + dev := &vdpaDev{} + if err = dev.parseAttributes(attrs); err != nil { + return nil, err + } + if err = dev.getBusInfo(); err != nil { + return nil, err + } + devices = append(devices, dev) + } + return devices, nil +} diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/mgmtdev.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/mgmtdev.go new file mode 100644 index 00000000000..dd9e9b75133 --- /dev/null +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/mgmtdev.go @@ -0,0 +1,111 @@ +package kvdpa + +import ( + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// MgmtDev represents a Vdpa Management Device +type MgmtDev interface { + BusName() string // Optional + DevName() string // + Name() string // The MgmtDevName is BusName/DevName +} + +type mgmtDev struct { + busName string + devName string +} + +// BusName returns the MgmtDev's bus name +func (m *mgmtDev) BusName() string { + return m.busName +} + +// BusName returns the MgmtDev's device name +func (m *mgmtDev) DevName() string { + return m.devName +} + +// BusName returns the MgmtDev's name: [BusName/]DeviceName +func (m *mgmtDev) Name() string { + if m.busName != "" { + return strings.Join([]string{m.busName, m.devName}, "/") + } + return m.devName +} + +// parseAttributes parses the netlink attributes and populates the fields accordingly +func (m *mgmtDev) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + for _, a := range attrs { + switch a.Attr.Type { + case VdpaAttrMgmtDevBusName: + m.busName = string(a.Value[:len(a.Value)-1]) + case VdpaAttrMgmtDevDevName: + m.devName = string(a.Value[:len(a.Value)-1]) + } + } + return nil +} + +// ListVdpaMgmtDevices returns the list of all available MgmtDevs +func ListVdpaMgmtDevices() ([]MgmtDev, error) { + msgs, err := GetNetlinkOps().RunVdpaNetlinkCmd(VdpaCmdMgmtDevGet, syscall.NLM_F_DUMP, nil) + if err != nil { + return nil, err + } + + mgtmDevs, err := parseDevLinkVdpaMgmtDevList(msgs) + if err != nil { + return nil, err + } + return mgtmDevs, nil +} + +// GetVdpaMgmtDevices returns a MgmtDev based on a busName and deviceName +func GetVdpaMgmtDevices(busName, devName string) (MgmtDev, error) { + data := []*nl.RtAttr{} + if busName != "" { + bus, err := GetNetlinkOps().NewAttribute(VdpaAttrMgmtDevBusName, busName) + if err != nil { + return nil, err + } + data = append(data, bus) + } + + dev, err := GetNetlinkOps().NewAttribute(VdpaAttrMgmtDevDevName, devName) + if err != nil { + return nil, err + } + data = append(data, dev) + + msgs, err := GetNetlinkOps().RunVdpaNetlinkCmd(VdpaCmdMgmtDevGet, 0, data) + if err != nil { + return nil, err + } + + mgtmDevs, err := parseDevLinkVdpaMgmtDevList(msgs) + if err != nil { + return nil, err + } + return mgtmDevs[0], nil +} + +func parseDevLinkVdpaMgmtDevList(msgs [][]byte) ([]MgmtDev, error) { + devices := make([]MgmtDev, 0, len(msgs)) + + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + dev := &mgmtDev{} + if err = dev.parseAttributes(attrs); err != nil { + return nil, err + } + devices = append(devices, dev) + } + return devices, nil +} diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/netlink.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/netlink.go new file mode 100644 index 00000000000..b5af17f6a64 --- /dev/null +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/netlink.go @@ -0,0 +1,182 @@ +package kvdpa + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" +) + +/* Vdpa Netlink Name */ +const ( + VdpaGenlName = "vdpa" +) + +/* VDPA Netlink Commands */ +const ( + VdpaCmdUnspec uint8 = iota + VdpaCmdMgmtDevNew + VdpaCmdMgmtDevGet /* can dump */ + VdpaCmdDevNew + VdpaCmdDevDel + VdpaCmdDevGet /* can dump */ + VdpaCmdDevConfigGet /* can dump */ +) + +/* VDPA Netlink Attributes */ +const ( + VdpaAttrUnspec = iota + + /* bus name (optional) + dev name together make the parent device handle */ + VdpaAttrMgmtDevBusName /* string */ + VdpaAttrMgmtDevDevName /* string */ + VdpaAttrMgmtDevSupportedClasses /* u64 */ + + VdpaAttrDevName /* string */ + VdpaAttrDevID /* u32 */ + VdpaAttrDevVendorID /* u32 */ + VdpaAttrDevMaxVqs /* u32 */ + VdpaAttrDevMaxVqSize /* u16 */ + VdpaAttrDevMinVqSize /* u16 */ + + VdpaAttrDevNetCfgMacAddr /* binary */ + VdpaAttrDevNetStatus /* u8 */ + VdpaAttrDevNetCfgMaxVqp /* u16 */ + VdpaAttrGetNetCfgMTU /* u16 */ + + /* new attributes must be added above here */ + VdpaAttrMax +) + +var ( + commonNetlinkFlags = syscall.NLM_F_REQUEST | syscall.NLM_F_ACK +) + +// NetlinkOps defines the Netlink Operations +type NetlinkOps interface { + RunVdpaNetlinkCmd(command uint8, flags int, data []*nl.RtAttr) ([][]byte, error) + NewAttribute(attrType int, data interface{}) (*nl.RtAttr, error) +} + +type defaultNetlinkOps struct { +} + +var netlinkOps NetlinkOps = &defaultNetlinkOps{} + +// SetNetlinkOps method would be used by unit tests +func SetNetlinkOps(mockInst NetlinkOps) { + netlinkOps = mockInst +} + +// GetNetlinkOps will be invoked by functions in other packages that would need access to the sriovnet library methods. +func GetNetlinkOps() NetlinkOps { + return netlinkOps +} + +// RunVdpaNerlinkCmd runs a vdpa netlink command and returns the response +func (defaultNetlinkOps) RunVdpaNetlinkCmd(command uint8, flags int, data []*nl.RtAttr) ([][]byte, error) { + f, err := netlink.GenlFamilyGet(VdpaGenlName) + if err != nil { + return nil, err + } + + msg := &nl.Genlmsg{ + Command: command, + Version: nl.GENL_CTRL_VERSION, + } + req := nl.NewNetlinkRequest(int(f.ID), commonNetlinkFlags|flags) + + req.AddData(msg) + for _, d := range data { + req.AddData(d) + } + + msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + return msgs, nil +} + +// NewAttribute returns a new netlink attribute based on the provided data +func (defaultNetlinkOps) NewAttribute(attrType int, data interface{}) (*nl.RtAttr, error) { + switch attrType { + case VdpaAttrMgmtDevBusName, VdpaAttrMgmtDevDevName, VdpaAttrDevName: + strData, ok := data.(string) + if !ok { + return nil, fmt.Errorf("attribute type %d requires string data", attrType) + } + bytes := make([]byte, len(strData)+1) + copy(bytes, strData) + return nl.NewRtAttr(attrType, bytes), nil + /* TODO + case: + VdpaAttrMgmtDevBusName string + VdpaAttrMgmtDevDevName string + VdpaAttrMgmtDevSupportedClasses u64 + + VdpaAttrDevName string + VdpaAttrDevID u32 + VdpaAttrDevVendorID u32 + VdpaAttrDevMaxVqs u32 + VdpaAttrDevMaxVqSize u16 + VdpaAttrDevMinVqSize u16 + + VdpaAttrDevNetCfgMacAddr binary + VdpaAttrDevNetStatus u8 + VdpaAttrDevNetCfgMaxVqp u16 + VdpaAttrGetNetCfgMTU u16 + */ + default: + return nil, fmt.Errorf("invalid attribute type %d", attrType) + } + +} + +func newMockSingleMessage(command uint8, attrs []*nl.RtAttr) []byte { + b := make([]byte, 0) + dataBytes := make([][]byte, len(attrs)+1) + + msg := &nl.Genlmsg{ + Command: command, + Version: nl.GENL_CTRL_VERSION, + } + dataBytes[0] = msg.Serialize() + + for i, attr := range attrs { + dataBytes[i+1] = attr.Serialize() + } + next := 0 + for _, data := range dataBytes { + for _, dataByte := range data { + b = append(b, dataByte) + next = next + 1 + } + } + return b + /* + nlm := &nl.NetlinkRequest{ + NlMsghdr: unix.NlMsghdr{ + Len: uint32(unix.SizeofNlMsghdr), + Type: 0xa, + Flags: 0, + Seq: 1, + }, + } + for _, a := range attrs { + nlm.AddData(a) + } + return nlm.Serialize() + */ +} + +// Used for unit tests +func newMockNetLinkResponse(command uint8, data [][]*nl.RtAttr) [][]byte { + msgs := make([][]byte, len(data)) + for i, msgData := range data { + msgDataBytes := newMockSingleMessage(command, msgData) + msgs[i] = msgDataBytes + } + return msgs +} diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/vhost.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/vhost.go new file mode 100644 index 00000000000..5067dcf9e14 --- /dev/null +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/vhost.go @@ -0,0 +1,62 @@ +package kvdpa + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// VhostVdpa is the vhost-vdpa device information +type VhostVdpa interface { + Name() string + Path() string +} + +// vhostVdpa implements VhostVdpa interface +type vhostVdpa struct { + name string + path string +} + +// Name returns the vhost device's name +func (v *vhostVdpa) Name() string { + return v.name +} + +// Name returns the vhost device's path +func (v *vhostVdpa) Path() string { + return v.path +} + +// GetVhostVdpaDevInPath returns the VhostVdpa found in the provided parent device's path +func GetVhostVdpaDevInPath(parentPath string) (VhostVdpa, error) { + fd, err := os.Open(parentPath) + if err != nil { + return nil, err + } + defer fd.Close() + + fileInfos, err := fd.Readdir(-1) + if err != nil { + return nil, err + } + for _, file := range fileInfos { + if strings.Contains(file.Name(), "vhost-vdpa") && + file.IsDir() { + devicePath := filepath.Join(vdpaVhostDevDir, file.Name()) + info, err := os.Stat(devicePath) + if err != nil { + return nil, err + } + if info.Mode()&os.ModeDevice == 0 { + return nil, fmt.Errorf("vhost device %s is not a valid device", devicePath) + } + return &vhostVdpa{ + name: file.Name(), + path: devicePath, + }, nil + } + } + return nil, fmt.Errorf("no VhostVdpa device foiund in path %s", parentPath) +} diff --git a/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/virtio.go b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/virtio.go new file mode 100644 index 00000000000..1d99d5518d9 --- /dev/null +++ b/go-controller/vendor/github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa/virtio.go @@ -0,0 +1,68 @@ +package kvdpa + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +const ( + virtioDevDir = "/sys/bus/virtio/devices" +) + +// VirtioNet is the virtio-net device information +type VirtioNet interface { + Name() string + NetDev() string +} + +// virtioNet implements VirtioNet interface +type virtioNet struct { + name string + netDev string +} + +// Name returns the virtio device's name (as appears in the virtio bus) +func (v *virtioNet) Name() string { + return v.name +} + +// NetDev returns the virtio-net netdev name +func (v *virtioNet) NetDev() string { + return v.netDev +} + +// GetVirtioNetInPath returns the VirtioNet found in the provided parent device's path +func GetVirtioNetInPath(parentPath string) (VirtioNet, error) { + fd, err := os.Open(parentPath) + if err != nil { + return nil, err + } + defer fd.Close() + + fileInfos, err := fd.Readdir(-1) + if err != nil { + return nil, err + } + for _, file := range fileInfos { + if strings.Contains(file.Name(), "virtio") && + file.IsDir() { + virtioDevPath := filepath.Join(virtioDevDir, file.Name()) + if _, err := os.Stat(virtioDevPath); os.IsNotExist(err) { + return nil, fmt.Errorf("virtio device %s does not exist", virtioDevPath) + } + var netdev string + // Read the "net" directory in the virtio device path + netDeviceFiles, err := os.ReadDir(filepath.Join(virtioDevPath, "net")) + if err == nil && len(netDeviceFiles) == 1 { + netdev = strings.TrimSpace(netDeviceFiles[0].Name()) + } + return &virtioNet{ + name: file.Name(), + netDev: netdev, + }, nil + } + } + return nil, fmt.Errorf("no VirtioNet device found in path %s", parentPath) +} diff --git a/go-controller/vendor/modules.txt b/go-controller/vendor/modules.txt index d8784f55e15..06f07575fb6 100644 --- a/go-controller/vendor/modules.txt +++ b/go-controller/vendor/modules.txt @@ -177,6 +177,9 @@ github.com/json-iterator/go github.com/juju/errors # github.com/juju/testing v0.0.0-20200706033705-4c23f9c453cd ## explicit; go 1.14 +# github.com/k8snetworkplumbingwg/govdpa v0.1.4 +## explicit; go 1.17 +github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa # github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.3.0 ## explicit; go 1.17 github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io