diff --git a/cmd/sriovdp/manager.go b/cmd/sriovdp/manager.go index f230ad78b..f1d1a7a0f 100644 --- a/cmd/sriovdp/manager.go +++ b/cmd/sriovdp/manager.go @@ -154,7 +154,8 @@ func (rm *resourceManager) validConfigs() bool { } } - // Validate deviceType + // [To-Do]: Validate deviceType + resourceName[conf.ResourceName] = conf.ResourceName } diff --git a/deployments/kernel-net-demo/crdnetwork.yaml b/deployments/kernel-net-demo/crdnetwork.yaml index b5d5b9409..1dac3ae7a 100644 --- a/deployments/kernel-net-demo/crdnetwork.yaml +++ b/deployments/kernel-net-demo/crdnetwork.yaml @@ -18,4 +18,4 @@ spec: spec: properties: config: - type: string \ No newline at end of file + type: string diff --git a/deployments/kernel-net-demo/pod-tc1.yaml b/deployments/kernel-net-demo/pod-tc1.yaml index ae3a225af..36dd59b4e 100644 --- a/deployments/kernel-net-demo/pod-tc1.yaml +++ b/deployments/kernel-net-demo/pod-tc1.yaml @@ -15,11 +15,7 @@ spec: args: [ "while true; do sleep 300000; done;" ] resources: requests: - # cpu: "1" - #memory: "512Mi" intel.com/sriov_net_A: '2' limits: - #cpu: "1" - #memory: "512Mi" intel.com/sriov_net_A: '2' restartPolicy: "Never" diff --git a/deployments/kernel-net-demo/pod-tc2.yaml b/deployments/kernel-net-demo/pod-tc2.yaml index 44a55785a..61f28e75c 100644 --- a/deployments/kernel-net-demo/pod-tc2.yaml +++ b/deployments/kernel-net-demo/pod-tc2.yaml @@ -15,10 +15,6 @@ spec: args: [ "while true; do sleep 300000; done;" ] resources: requests: - cpu: "1" - memory: "512Mi" intel.com/sriov_net_B: '1' limits: - cpu: "1" - memory: "512Mi" intel.com/sriov_net_B: '1' diff --git a/deployments/kernel-net-demo/pod-tc3.yaml b/deployments/kernel-net-demo/pod-tc3.yaml index 64701cd5d..01c8d7418 100644 --- a/deployments/kernel-net-demo/pod-tc3.yaml +++ b/deployments/kernel-net-demo/pod-tc3.yaml @@ -15,10 +15,6 @@ spec: args: [ "while true; do sleep 300000; done;" ] resources: requests: - cpu: "1" - memory: "512Mi" intel.com/sriov: '1' limits: - cpu: "1" - memory: "512Mi" intel.com/sriov: '1' diff --git a/deployments/kernel-net-demo/pod-tc4.yaml b/deployments/kernel-net-demo/pod-tc4.yaml index f7ec10ff8..904371089 100644 --- a/deployments/kernel-net-demo/pod-tc4.yaml +++ b/deployments/kernel-net-demo/pod-tc4.yaml @@ -15,10 +15,6 @@ spec: args: [ "while true; do sleep 300000; done;" ] resources: requests: - cpu: "1" - memory: "512Mi" intel.com/sriov_net_A: '2' limits: - cpu: "1" - memory: "512Mi" intel.com/sriov_net_A: '2' diff --git a/images/sriovdp-daemonset.yaml b/images/sriovdp-daemonset.yaml index fa2c42ec9..ea606df58 100644 --- a/images/sriovdp-daemonset.yaml +++ b/images/sriovdp-daemonset.yaml @@ -52,6 +52,8 @@ spec: - name: config mountPath: /etc/pcidp/config.json readOnly: true + - name: log + mountPath: /var/log volumes: - name: devicesock hostPath: diff --git a/pkg/resources/pool.go b/pkg/resources/pool_stub.go similarity index 96% rename from pkg/resources/pool.go rename to pkg/resources/pool_stub.go index 95b31f0db..44d54e2f7 100644 --- a/pkg/resources/pool.go +++ b/pkg/resources/pool_stub.go @@ -23,12 +23,6 @@ import ( pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1" ) -const ( - sysBusPci = "/sys/bus/pci/devices" - totalVfFile = "sriov_totalvfs" - configuredVfFile = "sriov_numvfs" -) - type resourcePool struct { config *types.ResourceConfig devices map[string]*pluginapi.Device diff --git a/pkg/resources/server.go b/pkg/resources/server.go index fd8c58da0..40451dce7 100644 --- a/pkg/resources/server.go +++ b/pkg/resources/server.go @@ -29,7 +29,7 @@ import ( ) type resourceServer struct { - resourcPool types.ResourcePool + resourcePool types.ResourcePool endPoint string // Socket file resouceNamePrefix string grpcServer *grpc.Server @@ -42,7 +42,7 @@ type resourceServer struct { func newResourceServer(prefix, suffix string, rp types.ResourcePool) types.ResourceServer { sockName := fmt.Sprintf("%s.%s", rp.GetResourceName(), suffix) return &resourceServer{ - resourcPool: rp, + resourcePool: rp, endPoint: sockName, resouceNamePrefix: prefix, grpcServer: grpc.NewServer(), @@ -60,7 +60,7 @@ func (rs *resourceServer) register() error { return net.DialTimeout("unix", addr, timeout) })) if err != nil { - glog.Errorf("%s device plugin unable connect to Kubelet : %v", rs.resourcPool.GetResourceName(), err) + glog.Errorf("%s device plugin unable connect to Kubelet : %v", rs.resourcePool.GetResourceName(), err) return err } defer conn.Close() @@ -69,14 +69,14 @@ func (rs *resourceServer) register() error { request := &pluginapi.RegisterRequest{ Version: pluginapi.Version, Endpoint: rs.endPoint, - ResourceName: fmt.Sprintf("%s/%s", rs.resouceNamePrefix, rs.resourcPool.GetResourceName()), + ResourceName: fmt.Sprintf("%s/%s", rs.resouceNamePrefix, rs.resourcePool.GetResourceName()), } if _, err = client.Register(context.Background(), request); err != nil { - glog.Errorf("%s device plugin unable to register with Kubelet : %v", rs.resourcPool.GetResourceName(), err) + glog.Errorf("%s device plugin unable to register with Kubelet : %v", rs.resourcePool.GetResourceName(), err) return err } - glog.Infof("%s device plugin registered with Kubelet", rs.resourcPool.GetResourceName()) + glog.Infof("%s device plugin registered with Kubelet", rs.resourcePool.GetResourceName()) return nil } @@ -85,9 +85,9 @@ func (rs *resourceServer) Allocate(ctx context.Context, rqt *pluginapi.AllocateR resp := new(pluginapi.AllocateResponse) for _, container := range rqt.ContainerRequests { containerResp := new(pluginapi.ContainerAllocateResponse) - containerResp.Devices = rs.resourcPool.GetDeviceSpecs(rs.resourcPool.GetDeviceFiles(), container.DevicesIDs) - containerResp.Envs = rs.resourcPool.GetEnvs(rs.resourcPool.GetResourceName(), container.DevicesIDs) - containerResp.Mounts = rs.resourcPool.GetMounts() + containerResp.Devices = rs.resourcePool.GetDeviceSpecs(rs.resourcePool.GetDeviceFiles(), container.DevicesIDs) + containerResp.Envs = rs.resourcePool.GetEnvs(rs.resourcePool.GetResourceName(), container.DevicesIDs) + containerResp.Mounts = rs.resourcePool.GetMounts() resp.ContainerResponses = append(resp.ContainerResponses, containerResp) } glog.Infof("AllocateResponse send: %+v", resp) @@ -96,12 +96,12 @@ func (rs *resourceServer) Allocate(ctx context.Context, rqt *pluginapi.AllocateR func (rs *resourceServer) ListAndWatch(emtpy *pluginapi.Empty, stream pluginapi.DevicePlugin_ListAndWatchServer) error { - methodID := fmt.Sprintf("ListAndWatch(%s)", rs.resourcPool.GetResourceName()) // for logging purpose + methodID := fmt.Sprintf("ListAndWatch(%s)", rs.resourcePool.GetResourceName()) // for logging purpose glog.Infof("%s invoked", methodID) // Send initial list of devices devs := make([]*pluginapi.Device, 0) resp := new(pluginapi.ListAndWatchResponse) - for _, dev := range rs.resourcPool.GetDevices() { + for _, dev := range rs.resourcePool.GetDevices() { devs = append(devs, dev) } resp.Devices = devs @@ -123,7 +123,7 @@ func (rs *resourceServer) ListAndWatch(emtpy *pluginapi.Empty, stream pluginapi. case <-rs.updateSignal: // Device health changed; so send new device list glog.Infof("%s: device health changed!\n", methodID) - for _, dev := range rs.resourcPool.GetDevices() { + for _, dev := range rs.resourcePool.GetDevices() { devs = append(devs, dev) } resp.Devices = devs @@ -149,9 +149,9 @@ func (rs *resourceServer) GetDevicePluginOptions(ctx context.Context, empty *plu } func (rs *resourceServer) Init() error { - resourceName := rs.resourcPool.GetResourceName() + resourceName := rs.resourcePool.GetResourceName() glog.Infof("initializing %s device pool", resourceName) - if err := rs.resourcPool.DiscoverDevices(); err != nil { + if err := rs.resourcePool.DiscoverDevices(); err != nil { return err } return nil @@ -159,7 +159,7 @@ func (rs *resourceServer) Init() error { // gRPC server related func (rs *resourceServer) Start() error { - resourceName := rs.resourcPool.GetResourceName() + resourceName := rs.resourcePool.GetResourceName() _ = rs.cleanUp() // try tp clean up and continue glog.Infof("starting %s device plugin endpoint at: %s\n", resourceName, rs.endPoint) sockPath := filepath.Join(types.SockDir, rs.endPoint) @@ -202,7 +202,7 @@ func (rs *resourceServer) Start() error { } func (rs *resourceServer) restart() error { - resourceName := rs.resourcPool.GetResourceName() + resourceName := rs.resourcePool.GetResourceName() glog.Infof("restarting %s device plugin server...", resourceName) if rs.grpcServer == nil { return fmt.Errorf("grpc server instance not found for %s", resourceName) @@ -217,7 +217,7 @@ func (rs *resourceServer) restart() error { } func (rs *resourceServer) Stop() error { - resourceName := rs.resourcPool.GetResourceName() + resourceName := rs.resourcePool.GetResourceName() glog.Infof("stopping %s device plugin server...", resourceName) if rs.grpcServer == nil { return nil @@ -267,11 +267,11 @@ func (rs *resourceServer) cleanUp() error { } func (rs *resourceServer) triggerUpdate() { - rp := rs.resourcPool + rp := rs.resourcePool if rs.checkIntervals > 0 { go func() { for { - changed := rp.Probe(rs.resourcPool.GetConfig(), rp.GetDevices()) + changed := rp.Probe(rs.resourcePool.GetConfig(), rp.GetDevices()) if changed { rs.updateSignal <- true } diff --git a/pkg/resources/uioPool.go b/pkg/resources/uioPool.go index 7963ce57f..a0d58de8c 100644 --- a/pkg/resources/uioPool.go +++ b/pkg/resources/uioPool.go @@ -15,12 +15,8 @@ package resources import ( - "io/ioutil" - "os" - "path/filepath" - - "github.com/golang/glog" "github.com/intel/sriov-network-device-plugin/pkg/types" + "github.com/intel/sriov-network-device-plugin/pkg/utils" pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1" ) @@ -44,26 +40,7 @@ func newUioResourcePool(rc *types.ResourceConfig) types.ResourcePool { // Overrides GetDeviceFile() method func (rp *uioResourcePool) GetDeviceFile(dev string) (devFile string, err error) { - - vfDir := filepath.Join(sysBusPci, dev, "uio") - - _, err = os.Lstat(vfDir) - if err != nil { - glog.Errorf("Error. Could not get directory information for device: %s Err: %v", vfDir, err) - return "", err - } - - files, err := ioutil.ReadDir(vfDir) - - if err != nil { - return - } - - // uio directory should only contain one directory e.g uio1 - // assuption is there's a corresponding device file in /dev e.g. /dev/uio1 - devFile = filepath.Join("/dev", files[0].Name()) - - return + return utils.GetUIODeviceFile(dev) } func (rp *uioResourcePool) GetEnvs(resourceName string, deviceIDs []string) map[string]string { diff --git a/pkg/resources/vfioPool.go b/pkg/resources/vfioPool.go index 0a76de4ad..cc04bef0f 100644 --- a/pkg/resources/vfioPool.go +++ b/pkg/resources/vfioPool.go @@ -15,12 +15,9 @@ package resources import ( - "fmt" - "os" - "path/filepath" - "github.com/golang/glog" "github.com/intel/sriov-network-device-plugin/pkg/types" + "github.com/intel/sriov-network-device-plugin/pkg/utils" pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1" ) @@ -45,40 +42,7 @@ func newVfioResourcePool(rc *types.ResourceConfig) types.ResourcePool { // Overrides GetDeviceFile() method func (rp *vfioResourcePool) GetDeviceFile(dev string) (devFile string, err error) { - // Get iommu group for this device - devPath := filepath.Join(sysBusPci, dev) - _, err = os.Lstat(devPath) - if err != nil { - err = fmt.Errorf("Error. Could not get directory information for device: %s, Err: %v", dev, err) - return - } - - iommuDir := filepath.Join(devPath, "iommu_group") - if err != nil { - err = fmt.Errorf("error reading iommuDir %v", err) - return - } - - dirInfo, err := os.Lstat(iommuDir) - if err != nil { - err = fmt.Errorf("unable to find iommu_group %v", err) - return - } - - if dirInfo.Mode()&os.ModeSymlink == 0 { - err = fmt.Errorf("invalid symlink to iommu_group %v", err) - return - } - - linkName, err := filepath.EvalSymlinks(iommuDir) - if err != nil { - err = fmt.Errorf("error reading symlink to iommu_group %v", err) - return - } - - devFile = filepath.Join("/dev/vfio", filepath.Base(linkName)) - - return + return utils.GetVFIODeviceFile(dev) } func (rp *vfioResourcePool) GetEnvs(resourceName string, deviceIDs []string) map[string]string { diff --git a/pkg/types/types.go b/pkg/types/types.go index 82a1eee99..6300a91e6 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -57,7 +57,7 @@ type ResourceFactory interface { GetResourcePool(*ResourceConfig) ResourcePool } -//ResourcePool represents a generic resource entity +// ResourcePool represents a generic resource entity type ResourcePool interface { // extended API for internal use InitDevice() error @@ -70,7 +70,7 @@ type ResourcePool interface { IBaseResource } -//ResourcePool represents a generic resource entity +// IBaseResource represents a specific resource pool type IBaseResource interface { GetDeviceFile(dev string) (devFile string, err error) GetDeviceSpecs(deviceFiles map[string]string, deviceIDs []string) []*pluginapi.DeviceSpec diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index acf49bb79..82dd3ca8c 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -173,3 +173,64 @@ func ValidResourceName(name string) bool { var validString = regexp.MustCompile(`^[a-zA-Z0-9_]+$`) return validString.MatchString(name) } + +// GetVFIODeviceFile returns a vfio device files for vfio-pci bound PCI device's PCI address +func GetVFIODeviceFile(dev string) (devFile string, err error) { + // Get iommu group for this device + devPath := filepath.Join(sysBusPci, dev) + _, err = os.Lstat(devPath) + if err != nil { + err = fmt.Errorf("GetVFIODeviceFile(): Could not get directory information for device: %s, Err: %v", dev, err) + return + } + + iommuDir := filepath.Join(devPath, "iommu_group") + if err != nil { + err = fmt.Errorf("GetVFIODeviceFile(): error reading iommuDir %v", err) + return + } + + dirInfo, err := os.Lstat(iommuDir) + if err != nil { + err = fmt.Errorf("GetVFIODeviceFile(): unable to find iommu_group %v", err) + return + } + + if dirInfo.Mode()&os.ModeSymlink == 0 { + err = fmt.Errorf("GetVFIODeviceFile(): invalid symlink to iommu_group %v", err) + return + } + + linkName, err := filepath.EvalSymlinks(iommuDir) + if err != nil { + err = fmt.Errorf("GetVFIODeviceFile(): error reading symlink to iommu_group %v", err) + return + } + + devFile = filepath.Join("/dev/vfio", filepath.Base(linkName)) + + return +} + +// GetUIODeviceFile returns a vfio device files for vfio-pci bound PCI device's PCI address +func GetUIODeviceFile(dev string) (devFile string, err error) { + + vfDir := filepath.Join(sysBusPci, dev, "uio") + + _, err = os.Lstat(vfDir) + if err != nil { + return "", fmt.Errorf("GetUIODeviceFile(): could not get directory information for device: %s Err: %v", vfDir, err) + } + + files, err := ioutil.ReadDir(vfDir) + + if err != nil { + return + } + + // uio directory should only contain one directory e.g uio1 + // assuption is there's a corresponding device file in /dev e.g. /dev/uio1 + devFile = filepath.Join("/dev", files[0].Name()) + + return +}