From 3a6773fd225a69ddcc0abdf59742c9cfcbede91e Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 10 Oct 2018 21:03:48 +0300 Subject: [PATCH] Added sriov interface type support The type extracts PCI IDs from SRIOV-VF-PCI-ADDR environment variable set by SR-IOV CNI plugin; and configures hostdev libvirt devices that corresponds to the extracted IDs. The change mounts additional host mounts for /sys and /dev to allow containerized libvirt / qemu to plug pci devices using vfio kernel interface. Note that at this moment, SR-IOV enabled VMIs run their virt-launcher pods privileged to allow qemu open /dev/vfio/NN devices. We don't know in advance the name of the device until we create and start the pod, at which point SR-IOV DP allocates a PCI ID to the pod that can be mapped to its IOMMU group number and hence /dev/vfio/NN device. In the future, SR-IOV DP will register the /dev/vfio/NN device with device cgroup, at which point we will be able to drop the privileged mode. (Additional capabilities like SYS_RESOURCE and SYS_RAWIO are still needed.) This work is tracked in: https://github.com/intel/sriov-network-device-plugin/pull/26 --- api/openapi-spec/swagger.json | 4 + pkg/api/v1/deepcopy_generated.go | 25 +++ pkg/api/v1/openapi_generated.go | 26 ++- pkg/api/v1/schema.go | 5 + pkg/api/v1/schema_swagger_generated.go | 4 + pkg/virt-controller/services/template.go | 67 ++++++- pkg/virt-controller/services/template_test.go | 42 +++++ pkg/virt-launcher/virtwrap/api/converter.go | 177 ++++++++++++------ .../virtwrap/api/converter_test.go | 34 ++++ .../virtwrap/api/deepcopy_generated.go | 69 ++++++- pkg/virt-launcher/virtwrap/api/schema.go | 24 ++- .../virtwrap/network/podinterface.go | 5 + .../virtwrap/network/podinterface_test.go | 18 ++ tools/vms-generator/utils/utils.go | 16 ++ tools/vms-generator/vms-generator.go | 1 + 15 files changed, 451 insertions(+), 66 deletions(-) diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index e291a419a737..42c03f134f3d 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -4540,10 +4540,14 @@ }, "slirp": { "$ref": "#/definitions/v1.InterfaceSlirp" + }, + "sriov": { + "$ref": "#/definitions/v1.InterfaceSRIOV" } } }, "v1.InterfaceBridge": {}, + "v1.InterfaceSRIOV": {}, "v1.InterfaceSlirp": {}, "v1.KVMTimer": { "properties": { diff --git a/pkg/api/v1/deepcopy_generated.go b/pkg/api/v1/deepcopy_generated.go index dc99f04037fe..45870c612c02 100644 --- a/pkg/api/v1/deepcopy_generated.go +++ b/pkg/api/v1/deepcopy_generated.go @@ -963,6 +963,15 @@ func (in *InterfaceBindingMethod) DeepCopyInto(out *InterfaceBindingMethod) { **out = **in } } + if in.SRIOV != nil { + in, out := &in.SRIOV, &out.SRIOV + if *in == nil { + *out = nil + } else { + *out = new(InterfaceSRIOV) + **out = **in + } + } return } @@ -992,6 +1001,22 @@ func (in *InterfaceBridge) DeepCopy() *InterfaceBridge { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InterfaceSRIOV) DeepCopyInto(out *InterfaceSRIOV) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InterfaceSRIOV. +func (in *InterfaceSRIOV) DeepCopy() *InterfaceSRIOV { + if in == nil { + return nil + } + out := new(InterfaceSRIOV) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InterfaceSlirp) DeepCopyInto(out *InterfaceSlirp) { *out = *in diff --git a/pkg/api/v1/openapi_generated.go b/pkg/api/v1/openapi_generated.go index 8b5011e89333..e03f24050c35 100644 --- a/pkg/api/v1/openapi_generated.go +++ b/pkg/api/v1/openapi_generated.go @@ -61,6 +61,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "kubevirt.io/kubevirt/pkg/api/v1.Interface": schema_kubevirt_pkg_api_v1_Interface(ref), "kubevirt.io/kubevirt/pkg/api/v1.InterfaceBindingMethod": schema_kubevirt_pkg_api_v1_InterfaceBindingMethod(ref), "kubevirt.io/kubevirt/pkg/api/v1.InterfaceBridge": schema_kubevirt_pkg_api_v1_InterfaceBridge(ref), + "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSRIOV": schema_kubevirt_pkg_api_v1_InterfaceSRIOV(ref), "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSlirp": schema_kubevirt_pkg_api_v1_InterfaceSlirp(ref), "kubevirt.io/kubevirt/pkg/api/v1.KVMTimer": schema_kubevirt_pkg_api_v1_KVMTimer(ref), "kubevirt.io/kubevirt/pkg/api/v1.LunTarget": schema_kubevirt_pkg_api_v1_LunTarget(ref), @@ -1082,6 +1083,11 @@ func schema_kubevirt_pkg_api_v1_Interface(ref common.ReferenceCallback) common.O Ref: ref("kubevirt.io/kubevirt/pkg/api/v1.InterfaceSlirp"), }, }, + "sriov": { + SchemaProps: spec.SchemaProps{ + Ref: ref("kubevirt.io/kubevirt/pkg/api/v1.InterfaceSRIOV"), + }, + }, "ports": { SchemaProps: spec.SchemaProps{ Description: "List of ports to be forwarded to the virtual machine.", @@ -1121,7 +1127,7 @@ func schema_kubevirt_pkg_api_v1_Interface(ref common.ReferenceCallback) common.O }, }, Dependencies: []string{ - "kubevirt.io/kubevirt/pkg/api/v1.InterfaceBridge", "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSlirp", "kubevirt.io/kubevirt/pkg/api/v1.Port"}, + "kubevirt.io/kubevirt/pkg/api/v1.InterfaceBridge", "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSRIOV", "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSlirp", "kubevirt.io/kubevirt/pkg/api/v1.Port"}, } } @@ -1141,11 +1147,16 @@ func schema_kubevirt_pkg_api_v1_InterfaceBindingMethod(ref common.ReferenceCallb Ref: ref("kubevirt.io/kubevirt/pkg/api/v1.InterfaceSlirp"), }, }, + "sriov": { + SchemaProps: spec.SchemaProps{ + Ref: ref("kubevirt.io/kubevirt/pkg/api/v1.InterfaceSRIOV"), + }, + }, }, }, }, Dependencies: []string{ - "kubevirt.io/kubevirt/pkg/api/v1.InterfaceBridge", "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSlirp"}, + "kubevirt.io/kubevirt/pkg/api/v1.InterfaceBridge", "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSRIOV", "kubevirt.io/kubevirt/pkg/api/v1.InterfaceSlirp"}, } } @@ -1160,6 +1171,17 @@ func schema_kubevirt_pkg_api_v1_InterfaceBridge(ref common.ReferenceCallback) co } } +func schema_kubevirt_pkg_api_v1_InterfaceSRIOV(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Properties: map[string]spec.Schema{}, + }, + }, + Dependencies: []string{}, + } +} + func schema_kubevirt_pkg_api_v1_InterfaceSlirp(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/pkg/api/v1/schema.go b/pkg/api/v1/schema.go index 40a5103f8ede..350b5670e33b 100644 --- a/pkg/api/v1/schema.go +++ b/pkg/api/v1/schema.go @@ -803,6 +803,7 @@ type Interface struct { type InterfaceBindingMethod struct { Bridge *InterfaceBridge `json:"bridge,omitempty"` Slirp *InterfaceSlirp `json:"slirp,omitempty"` + SRIOV *InterfaceSRIOV `json:"sriov,omitempty"` } // --- @@ -813,6 +814,10 @@ type InterfaceBridge struct{} // +k8s:openapi-gen=true type InterfaceSlirp struct{} +// --- +// +k8s:openapi-gen=true +type InterfaceSRIOV struct{} + // Port repesents a port to expose from the virtual machine. // Default protocol TCP. // The port field is mandatory diff --git a/pkg/api/v1/schema_swagger_generated.go b/pkg/api/v1/schema_swagger_generated.go index bc38b19df56e..3c68ca923513 100644 --- a/pkg/api/v1/schema_swagger_generated.go +++ b/pkg/api/v1/schema_swagger_generated.go @@ -378,6 +378,10 @@ func (InterfaceSlirp) SwaggerDoc() map[string]string { return map[string]string{} } +func (InterfaceSRIOV) SwaggerDoc() map[string]string { + return map[string]string{} +} + func (Port) SwaggerDoc() map[string]string { return map[string]string{ "": "Port repesents a port to expose from the virtual machine.\nDefault protocol TCP.\nThe port field is mandatory", diff --git a/pkg/virt-controller/services/template.go b/pkg/virt-controller/services/template.go index 8977abbea4bb..15a19cc6cbf8 100644 --- a/pkg/virt-controller/services/template.go +++ b/pkg/virt-controller/services/template.go @@ -110,6 +110,15 @@ func GetImagePullPolicy(store cache.Store) (policy k8sv1.PullPolicy, err error) return } +func isSRIOVVmi(vmi *v1.VirtualMachineInstance) bool { + for _, iface := range vmi.Spec.Domain.Devices.Interfaces { + if iface.SRIOV != nil { + return true + } + } + return false +} + func (t *templateService) RenderLaunchManifest(vmi *v1.VirtualMachineInstance) (*k8sv1.Pod, error) { precond.MustNotBeNil(vmi) domain := precond.MustNotBeEmpty(vmi.GetObjectMeta().GetName()) @@ -125,6 +134,7 @@ func (t *templateService) RenderLaunchManifest(vmi *v1.VirtualMachineInstance) ( var volumes []k8sv1.Volume var volumeDevices []k8sv1.VolumeDevice var userId int64 = 0 + // Privileged mode is disabled by default. var privileged bool = false var volumeMounts []k8sv1.VolumeMount var imagePullSecrets []k8sv1.LocalObjectReference @@ -149,6 +159,60 @@ func (t *templateService) RenderLaunchManifest(vmi *v1.VirtualMachineInstance) ( MountPath: "/var/run/libvirt", }) + if isSRIOVVmi(vmi) { + // libvirt needs this volume to unbind the device from kernel + // driver, and register it with vfio userspace driver + volumeMounts = append(volumeMounts, k8sv1.VolumeMount{ + Name: "pci-bus", + MountPath: "/sys/bus/pci/", + }) + volumes = append(volumes, k8sv1.Volume{ + Name: "pci-bus", + VolumeSource: k8sv1.VolumeSource{ + HostPath: &k8sv1.HostPathVolumeSource{ + Path: "/sys/bus/pci/", + }, + }, + }) + + // libvirt needs this volume to determine iommu group assigned + // to the device + volumeMounts = append(volumeMounts, k8sv1.VolumeMount{ + Name: "pci-devices", + MountPath: "/sys/devices/", + }) + volumes = append(volumes, k8sv1.Volume{ + Name: "pci-devices", + VolumeSource: k8sv1.VolumeSource{ + HostPath: &k8sv1.HostPathVolumeSource{ + Path: "/sys/devices/", + }, + }, + }) + + // libvirt uses vfio-pci to pass host devices through + volumeMounts = append(volumeMounts, k8sv1.VolumeMount{ + Name: "dev-vfio", + MountPath: "/dev/vfio/", + }) + volumes = append(volumes, k8sv1.Volume{ + Name: "dev-vfio", + VolumeSource: k8sv1.VolumeSource{ + HostPath: &k8sv1.HostPathVolumeSource{ + Path: "/dev/vfio/", + }, + }, + }) + + // todo: revisit when SR-IOV DP registers /dev/vfio/NN with pod + // device group: + // https://github.com/intel/sriov-network-device-plugin/pull/26 + // + // Run virt-launcher compute container privileged to allow qemu + // to open /dev/vfio/NN for PCI passthrough + privileged = true + } + serviceAccountName := "" for _, volume := range vmi.Spec.Volumes { @@ -443,8 +507,7 @@ func (t *templateService) RenderLaunchManifest(vmi *v1.VirtualMachineInstance) ( Image: t.launcherImage, ImagePullPolicy: imagePullPolicy, SecurityContext: &k8sv1.SecurityContext{ - RunAsUser: &userId, - // Privileged mode is disabled. + RunAsUser: &userId, Privileged: &privileged, Capabilities: &k8sv1.Capabilities{ Add: capabilities, diff --git a/pkg/virt-controller/services/template_test.go b/pkg/virt-controller/services/template_test.go index 4f5100563c0e..5c994faa235f 100644 --- a/pkg/virt-controller/services/template_test.go +++ b/pkg/virt-controller/services/template_test.go @@ -741,6 +741,48 @@ var _ = Describe("Template", func() { }) }) + Context("with sriov interface", func() { + It("should run privileged", func() { + sriovInterface := v1.InterfaceSRIOV{} + domain := v1.DomainSpec{} + domain.Devices.Interfaces = []v1.Interface{{Name: "testnet", InterfaceBindingMethod: v1.InterfaceBindingMethod{SRIOV: &sriovInterface}}} + vmi := v1.VirtualMachineInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testvmi", Namespace: "default", UID: "1234", + }, + Spec: v1.VirtualMachineInstanceSpec{Domain: domain}, + } + + pod, err := svc.RenderLaunchManifest(&vmi) + Expect(err).ToNot(HaveOccurred()) + + Expect(len(pod.Spec.Containers)).To(Equal(1)) + Expect(*pod.Spec.Containers[0].SecurityContext.Privileged).To(Equal(true)) + }) + It("should mount pci related host directories", func() { + sriovInterface := v1.InterfaceSRIOV{} + domain := v1.DomainSpec{} + domain.Devices.Interfaces = []v1.Interface{{Name: "testnet", InterfaceBindingMethod: v1.InterfaceBindingMethod{SRIOV: &sriovInterface}}} + vmi := v1.VirtualMachineInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testvmi", Namespace: "default", UID: "1234", + }, + Spec: v1.VirtualMachineInstanceSpec{Domain: domain}, + } + + pod, err := svc.RenderLaunchManifest(&vmi) + Expect(err).ToNot(HaveOccurred()) + + Expect(len(pod.Spec.Containers)).To(Equal(1)) + // Skip first three mounts that are generic for all launcher pods + Expect(pod.Spec.Containers[0].VolumeMounts[3].MountPath).To(Equal("/sys/bus/pci/")) + Expect(pod.Spec.Containers[0].VolumeMounts[4].MountPath).To(Equal("/sys/devices/")) + Expect(pod.Spec.Containers[0].VolumeMounts[5].MountPath).To(Equal("/dev/vfio/")) + Expect(pod.Spec.Volumes[0].HostPath.Path).To(Equal("/sys/bus/pci/")) + Expect(pod.Spec.Volumes[1].HostPath.Path).To(Equal("/sys/devices/")) + Expect(pod.Spec.Volumes[2].HostPath.Path).To(Equal("/dev/vfio/")) + }) + }) Context("with slirp interface", func() { It("Should have empty port list in the pod manifest", func() { slirpInterface := v1.InterfaceSlirp{} diff --git a/pkg/virt-launcher/virtwrap/api/converter.go b/pkg/virt-launcher/virtwrap/api/converter.go index 1ed2e0647cd9..77cbd3493fed 100644 --- a/pkg/virt-launcher/virtwrap/api/converter.go +++ b/pkg/virt-launcher/virtwrap/api/converter.go @@ -491,6 +491,35 @@ func Convert_v1_FeatureHyperv_To_api_FeatureHyperv(source *v1.FeatureHyperv, hyp return nil } +// This function parses the SRIOV-VF-PCI-ADDR variable that is set by SR-IOV +// device plugin listing PCI IDs for devices allocated to the pod. The format +// is as follows: +// +// "": for no allocated devices +// "0000:81:11.1,": for a single device +// "0000:81:11.1,0000:81:11.2[,...]": for multiple devices +func getSRIOVPciAddresses() []string { + pciAddrString, isSet := os.LookupEnv("SRIOV-VF-PCI-ADDR") + if isSet { + addrs := strings.Split(pciAddrString, ",") + naddrs := len(addrs) + if naddrs > 0 { + if addrs[naddrs-1] == "" { + addrs = addrs[:naddrs-1] + } + } + return addrs + } + return []string{} +} + +func popSRIOVPciAddress(addrs []string) (string, []string, error) { + if len(addrs) > 0 { + return addrs[0], addrs[1:], nil + } + return "", addrs, fmt.Errorf("no more SR-IOV PCI addresses to allocate") +} + func Convert_v1_VirtualMachine_To_api_Domain(vmi *v1.VirtualMachineInstance, domain *Domain, c *ConverterContext) (err error) { precond.MustNotBeNil(vmi) precond.MustNotBeNil(domain) @@ -841,83 +870,117 @@ func Convert_v1_VirtualMachine_To_api_Domain(vmi *v1.VirtualMachineInstance, dom networks[network.Name] = network.DeepCopy() } + sriovPciAddresses := getSRIOVPciAddresses() + for _, iface := range vmi.Spec.Domain.Devices.Interfaces { net, isExist := networks[iface.Name] if !isExist { return fmt.Errorf("failed to find network %s", iface.Name) } - ifaceType := getInterfaceType(&iface) - domainIface := Interface{ - Model: &Model{ - Type: ifaceType, - }, - Alias: &Alias{ - Name: iface.Name, - }, - } - - // if UseEmulation unset and at least one NIC model is virtio, - // /dev/vhost-net must be present as we should have asked for it. - if ifaceType == "virtio" && virtioNetProhibited { - return fmt.Errorf("In-kernel virtio-net device emulation '/dev/vhost-net' not present") - } else if ifaceType == "virtio" && virtioNetMQRequested { - domainIface.Driver = &InterfaceDriver{Name: "vhost", Queues: numQueues} - } - - // Add a pciAddress if specifed - if iface.PciAddress != "" { - addr, err := decoratePciAddressField(iface.PciAddress) + if iface.SRIOV != nil { + var pciAddr string + pciAddr, sriovPciAddresses, err = popSRIOVPciAddress(sriovPciAddresses) if err != nil { - return fmt.Errorf("failed to configure interface %s: %v", iface.Name, err) + return err } - domainIface.Address = addr - } - if iface.Bridge != nil { - // TODO:(ihar) consider abstracting interface type conversion / - // detection into drivers - domainIface.Type = "bridge" - if value, ok := cniNetworks[iface.Name]; ok { - prefix := "" - // no error check, we assume that CNI type was set correctly - if net.Multus != nil { - prefix = "net" - } else if net.Genie != nil { - prefix = "eth" - } - domainIface.Source = InterfaceSource{ - Bridge: fmt.Sprintf("k6t-%s%d", prefix, value), - } - } else { - domainIface.Source = InterfaceSource{ - Bridge: DefaultBridgeName, - } + dbsfFields, err := util.ParsePciAddress(pciAddr) + if err != nil { + return err } + hostDev := HostDevice{ + Source: HostDeviceSource{ + Address: &Address{ + Type: "pci", + Domain: "0x" + dbsfFields[0], + Bus: "0x" + dbsfFields[1], + Slot: "0x" + dbsfFields[2], + Function: "0x" + dbsfFields[3], + }, + }, + Type: "pci", + Managed: "yes", + } if iface.BootOrder != nil { - domainIface.BootOrder = &BootOrder{Order: *iface.BootOrder} + hostDev.BootOrder = &BootOrder{Order: *iface.BootOrder} + } + log.Log.Infof("SR-IOV PCI device allocated: %s", pciAddr) + domain.Spec.Devices.HostDevices = append(domain.Spec.Devices.HostDevices, hostDev) + } else { + ifaceType := getInterfaceType(&iface) + domainIface := Interface{ + Model: &Model{ + Type: ifaceType, + }, + Alias: &Alias{ + Name: iface.Name, + }, } - } else if iface.Slirp != nil { - domainIface.Type = "user" - // Create network interface - if domain.Spec.QEMUCmd == nil { - domain.Spec.QEMUCmd = &Commandline{} + // if UseEmulation unset and at least one NIC model is virtio, + // /dev/vhost-net must be present as we should have asked for it. + if ifaceType == "virtio" && virtioNetProhibited { + return fmt.Errorf("In-kernel virtio-net device emulation '/dev/vhost-net' not present") + } else if ifaceType == "virtio" && virtioNetMQRequested { + domainIface.Driver = &InterfaceDriver{Name: "vhost", Queues: numQueues} } - if domain.Spec.QEMUCmd.QEMUArg == nil { - domain.Spec.QEMUCmd.QEMUArg = make([]Arg, 0) + // Add a pciAddress if specifed + if iface.PciAddress != "" { + addr, err := decoratePciAddressField(iface.PciAddress) + if err != nil { + return fmt.Errorf("failed to configure interface %s: %v", iface.Name, err) + } + domainIface.Address = addr } - // TODO: (seba) Need to change this if multiple interface can be connected to the same network - // append the ports from all the interfaces connected to the same network - err := createSlirpNetwork(iface, *net, domain) - if err != nil { - return err + if iface.Bridge != nil { + // TODO:(ihar) consider abstracting interface type conversion / + // detection into drivers + domainIface.Type = "bridge" + if value, ok := cniNetworks[iface.Name]; ok { + prefix := "" + // no error check, we assume that CNI type was set correctly + if net.Multus != nil { + prefix = "net" + } else if net.Genie != nil { + prefix = "eth" + } + domainIface.Source = InterfaceSource{ + Bridge: fmt.Sprintf("k6t-%s%d", prefix, value), + } + } else { + domainIface.Source = InterfaceSource{ + Bridge: DefaultBridgeName, + } + } + + if iface.BootOrder != nil { + domainIface.BootOrder = &BootOrder{Order: *iface.BootOrder} + } + } else if iface.Slirp != nil { + domainIface.Type = "user" + + // Create network interface + if domain.Spec.QEMUCmd == nil { + domain.Spec.QEMUCmd = &Commandline{} + } + + if domain.Spec.QEMUCmd.QEMUArg == nil { + domain.Spec.QEMUCmd.QEMUArg = make([]Arg, 0) + } + + // TODO: (seba) Need to change this if multiple interface can be connected to the same network + // append the ports from all the interfaces connected to the same network + err := createSlirpNetwork(iface, *net, domain) + if err != nil { + return err + } } + domain.Spec.Devices.Interfaces = append(domain.Spec.Devices.Interfaces, domainIface) } - domain.Spec.Devices.Interfaces = append(domain.Spec.Devices.Interfaces, domainIface) } return nil diff --git a/pkg/virt-launcher/virtwrap/api/converter_test.go b/pkg/virt-launcher/virtwrap/api/converter_test.go index 1bcaf719342c..d03e17954c88 100644 --- a/pkg/virt-launcher/virtwrap/api/converter_test.go +++ b/pkg/virt-launcher/virtwrap/api/converter_test.go @@ -1330,6 +1330,40 @@ var _ = Describe("Converter", func() { }) }) +var _ = Describe("getSRIOVPciAddresses", func() { + It("returns empty slice", func() { + Expect(len(getSRIOVPciAddresses())).To(Equal(0)) + }) + It("gracefully handles trailing comma", func() { + os.Setenv("SRIOV-VF-PCI-ADDR", "0000:81:11.1,") + addrs := getSRIOVPciAddresses() + Expect(len(addrs)).To(Equal(1)) + Expect(addrs[0]).To(Equal("0000:81:11.1")) + }) + It("returns multiple PCI addresses", func() { + os.Setenv("SRIOV-VF-PCI-ADDR", "0000:81:11.1,0001:02:00.0") + addrs := getSRIOVPciAddresses() + Expect(len(addrs)).To(Equal(2)) + Expect(addrs[0]).To(Equal("0000:81:11.1")) + Expect(addrs[1]).To(Equal("0001:02:00.0")) + }) +}) + +var _ = Describe("popSRIOVPciAddress", func() { + It("fails on empty slice", func() { + _, _, err := popSRIOVPciAddress([]string{}) + Expect(err).To(HaveOccurred()) + }) + It("pops the next address from a non-empty slice", func() { + addrs := []string{"0000:81:11.1", "0001:02:00.0"} + addr, rest, err := popSRIOVPciAddress(addrs) + Expect(err).ToNot(HaveOccurred()) + Expect(addr).To(Equal("0000:81:11.1")) + Expect(len(rest)).To(Equal(1)) + Expect(rest[0]).To(Equal("0001:02:00.0")) + }) +}) + func diskToDiskXML(disk *v1.Disk) string { devicePerBus := make(map[string]int) libvirtDisk := &Disk{} diff --git a/pkg/virt-launcher/virtwrap/api/deepcopy_generated.go b/pkg/virt-launcher/virtwrap/api/deepcopy_generated.go index c38a62864f80..036c1acdac3f 100644 --- a/pkg/virt-launcher/virtwrap/api/deepcopy_generated.go +++ b/pkg/virt-launcher/virtwrap/api/deepcopy_generated.go @@ -635,6 +635,13 @@ func (in *Devices) DeepCopyInto(out *Devices) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.HostDevices != nil { + in, out := &in.HostDevices, &out.HostDevices + *out = make([]HostDevice, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Controllers != nil { in, out := &in.Controllers, &out.Controllers *out = make([]Controller, len(*in)) @@ -1436,6 +1443,57 @@ func (in *GraphicsListen) DeepCopy() *GraphicsListen { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostDevice) DeepCopyInto(out *HostDevice) { + *out = *in + in.Source.DeepCopyInto(&out.Source) + if in.BootOrder != nil { + in, out := &in.BootOrder, &out.BootOrder + if *in == nil { + *out = nil + } else { + *out = new(BootOrder) + **out = **in + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostDevice. +func (in *HostDevice) DeepCopy() *HostDevice { + if in == nil { + return nil + } + out := new(HostDevice) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostDeviceSource) DeepCopyInto(out *HostDeviceSource) { + *out = *in + if in.Address != nil { + in, out := &in.Address, &out.Address + if *in == nil { + *out = nil + } else { + *out = new(Address) + **out = **in + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostDeviceSource. +func (in *HostDeviceSource) DeepCopy() *HostDeviceSource { + if in == nil { + return nil + } + out := new(HostDeviceSource) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HugePage) DeepCopyInto(out *HugePage) { *out = *in @@ -1501,7 +1559,7 @@ func (in *Interface) DeepCopyInto(out *Interface) { **out = **in } } - out.Source = in.Source + in.Source.DeepCopyInto(&out.Source) if in.Target != nil { in, out := &in.Target, &out.Target if *in == nil { @@ -1624,6 +1682,15 @@ func (in *InterfaceDriver) DeepCopy() *InterfaceDriver { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InterfaceSource) DeepCopyInto(out *InterfaceSource) { *out = *in + if in.Address != nil { + in, out := &in.Address, &out.Address + if *in == nil { + *out = nil + } else { + *out = new(Address) + **out = **in + } + } return } diff --git a/pkg/virt-launcher/virtwrap/api/schema.go b/pkg/virt-launcher/virtwrap/api/schema.go index dedc8baa3462..910f7f7f6fdb 100644 --- a/pkg/virt-launcher/virtwrap/api/schema.go +++ b/pkg/virt-launcher/virtwrap/api/schema.go @@ -268,6 +268,7 @@ type Devices struct { Emulator string `xml:"emulator,omitempty"` Interfaces []Interface `xml:"interface"` Channels []Channel `xml:"channel"` + HostDevices []HostDevice `xml:"hostdev,omitempty"` Controllers []Controller `xml:"controller,omitempty"` Video []Video `xml:"video"` Graphics []Graphics `xml:"graphics"` @@ -279,6 +280,20 @@ type Devices struct { Rng *Rng `xml:"rng,omitempty"` } +// BEGIN HostDevice ----------------------------- +type HostDevice struct { + Source HostDeviceSource `xml:"source"` + Type string `xml:"type,attr"` + BootOrder *BootOrder `xml:"boot,omitempty"` + Managed string `xml:"managed,attr"` +} + +type HostDeviceSource struct { + Address *Address `xml:"address,omitempty"` +} + +// END HostDevice ----------------------------- + // BEGIN Controller ----------------------------- // Controller represens libvirt controller element https://libvirt.org/formatdomain.html#elementsControllers @@ -455,10 +470,11 @@ type FilterRef struct { } type InterfaceSource struct { - Network string `xml:"network,attr,omitempty"` - Device string `xml:"dev,attr,omitempty"` - Bridge string `xml:"bridge,attr,omitempty"` - Mode string `xml:"mode,attr,omitempty"` + Network string `xml:"network,attr,omitempty"` + Device string `xml:"dev,attr,omitempty"` + Bridge string `xml:"bridge,attr,omitempty"` + Mode string `xml:"mode,attr,omitempty"` + Address *Address `xml:"address,omitempty"` } type Model struct { diff --git a/pkg/virt-launcher/virtwrap/network/podinterface.go b/pkg/virt-launcher/virtwrap/network/podinterface.go index 8a9bce1757b4..5cc8f8ffd751 100644 --- a/pkg/virt-launcher/virtwrap/network/podinterface.go +++ b/pkg/virt-launcher/virtwrap/network/podinterface.go @@ -61,6 +61,11 @@ func (l *PodInterface) Plug(iface *v1.Interface, network *v1.Network, domain *ap precond.MustNotBeNil(domain) initHandler() + // There is nothing to plug for SR-IOV devices + if iface.SRIOV != nil { + return nil + } + driver, err := getBinding(iface, domain, podInterfaceName) if err != nil { return err diff --git a/pkg/virt-launcher/virtwrap/network/podinterface_test.go b/pkg/virt-launcher/virtwrap/network/podinterface_test.go index fd422bb35b48..2c7f7656bbdd 100644 --- a/pkg/virt-launcher/virtwrap/network/podinterface_test.go +++ b/pkg/virt-launcher/virtwrap/network/podinterface_test.go @@ -289,6 +289,24 @@ var _ = Describe("Pod Network", func() { }) }) }) + Context("SRIOV Plug", func() { + It("Does not crash", func() { + // Plug doesn't do anything for sriov so it's enough to pass an empty domain + domain := &api.Domain{} + // Same for network + net := &v1.Network{} + + iface := &v1.Interface{ + Name: "sriov", + InterfaceBindingMethod: v1.InterfaceBindingMethod{ + SRIOV: &v1.InterfaceSRIOV{}, + }, + } + podiface := PodInterface{} + err := podiface.Plug(iface, net, domain, "fakeiface") + Expect(err).ToNot(HaveOccurred()) + }) + }) Context("Slirp Plug", func() { It("Should create an interface in the qemu command line and remove it from the interfaces", func() { domain := NewDomainWithSlirpInterface() diff --git a/tools/vms-generator/utils/utils.go b/tools/vms-generator/utils/utils.go index 501935f98c5d..330649643f5e 100644 --- a/tools/vms-generator/utils/utils.go +++ b/tools/vms-generator/utils/utils.go @@ -46,6 +46,7 @@ const ( VmiBlockPVC = "vmi-block-pvc" VmiWindows = "vmi-windows" VmiSlirp = "vmi-slirp" + VmiSRIOV = "vmi-sriov" VmiWithHookSidecar = "vmi-with-sidecar-hook" VmiMultusPtp = "vmi-multus-ptp" VmiMultusMultipleNet = "vmi-multus-multiple-net" @@ -329,6 +330,21 @@ func GetVMISlirp() *v1.VirtualMachineInstance { return vm } +func GetVMISRIOV() *v1.VirtualMachineInstance { + vm := getBaseVMI(VmiSRIOV) + vm.Spec.Domain.Resources.Requests[k8sv1.ResourceMemory] = resource.MustParse("1024M") + vm.Spec.Domain.Resources.Requests["intel.com/sriov"] = resource.MustParse("1") + vm.Spec.Domain.Resources.Limits = k8sv1.ResourceList{"intel.com/sriov": resource.MustParse("1")} + vm.Spec.Networks = []v1.Network{*v1.DefaultPodNetwork(), {Name: "sriov-net", NetworkSource: v1.NetworkSource{Multus: &v1.CniNetwork{NetworkName: "sriov-net"}}}} + addRegistryDisk(&vm.Spec, fmt.Sprintf("%s/%s:%s", DockerPrefix, imageFedora, DockerTag), busVirtio) + addNoCloudDiskWitUserData(&vm.Spec, "#!/bin/bash\necho \"fedora\" |passwd fedora --stdin\ndhclient eth1\n") + + vm.Spec.Domain.Devices.Interfaces = []v1.Interface{{Name: "default", InterfaceBindingMethod: v1.InterfaceBindingMethod{Bridge: &v1.InterfaceBridge{}}}, + {Name: "sriov-net", InterfaceBindingMethod: v1.InterfaceBindingMethod{SRIOV: &v1.InterfaceSRIOV{}}}} + + return vm +} + func GetVMIMultusPtp() *v1.VirtualMachineInstance { vm := getBaseVMI(VmiMultusPtp) vm.Spec.Domain.Resources.Requests[k8sv1.ResourceMemory] = resource.MustParse("1024M") diff --git a/tools/vms-generator/vms-generator.go b/tools/vms-generator/vms-generator.go index 0e1039909b36..90b2e0dd37de 100644 --- a/tools/vms-generator/vms-generator.go +++ b/tools/vms-generator/vms-generator.go @@ -61,6 +61,7 @@ func main() { utils.VmiBlockPVC: utils.GetVMIBlockPvc(), utils.VmiWindows: utils.GetVMIWindows(), utils.VmiSlirp: utils.GetVMISlirp(), + utils.VmiSRIOV: utils.GetVMISRIOV(), utils.VmiWithHookSidecar: utils.GetVMIWithHookSidecar(), utils.VmiMultusPtp: utils.GetVMIMultusPtp(), utils.VmiMultusMultipleNet: utils.GetVMIMultusMultipleNet(),