Skip to content

Commit

Permalink
CDI implementation
Browse files Browse the repository at this point in the history
This commit implements Container Device Interface [1] support.

[1] https://github.com/container-orchestrated-devices/container-device-interface
  • Loading branch information
e0ne committed Jul 13, 2023
1 parent b3aea76 commit 47b70c3
Show file tree
Hide file tree
Showing 101 changed files with 10,962 additions and 288 deletions.
44 changes: 8 additions & 36 deletions cmd/sriovdp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,54 +23,26 @@ import (
"github.com/golang/glog"
)

const (
defaultConfig = "/etc/pcidp/config.json"
)

// Parse Command line flags
func flagInit(cp *cliParams) {
flag.StringVar(&cp.configFile, "config-file", defaultConfig,
"JSON device pool config file location")
flag.StringVar(&cp.resourcePrefix, "resource-prefix", "intel.com",
"resource name prefix used for K8s extended resource")
}

func main() {
cp := &cliParams{}
flagInit(cp)
FlagInit(cp)
flag.Parse()
rm := newResourceManager(cp)
rm := NewResourceManager(cp)

glog.Infof("resource manager reading configs")
if err := rm.readConfig(); err != nil {
glog.Errorf("error getting resources from file %v", err)
return
}

if len(rm.configList) < 1 {
glog.Errorf("no resource configuration; exiting")
return // No config found
}

// Validate configs
if !rm.validConfigs() {
glog.Fatalf("Exiting.. one or more invalid configuration(s) given")
return
}
glog.Infof("Discovering host devices")
if err := rm.discoverHostDevices(); err != nil {
glog.Errorf("error discovering host devices%v", err)
err := rm.Init()
if err != nil {
glog.Fatalf("error initialization resources manager %v", err)
return
}

glog.Infof("Initializing resource servers")
if err := rm.initServers(); err != nil {
if err := rm.InitServers(); err != nil {
glog.Errorf("error initializing resource servers %v", err)
return
}

glog.Infof("Starting all servers...")
if err := rm.startAllServers(); err != nil {
if err := rm.StartAllServers(); err != nil {
glog.Errorf("error starting resource servers %v\n", err)
return
}
Expand All @@ -83,7 +55,7 @@ func main() {
// Catch termination signals
sig := <-sigCh
glog.Infof("Received signal \"%v\", shutting down.", sig)
if err := rm.stopAllServers(); err != nil {
if err := rm.StopAllServers(); err != nil {
glog.Errorf("stopping servers produced error: %s", err.Error())
}
}
175 changes: 155 additions & 20 deletions cmd/sriovdp/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,59 +16,109 @@ package main

import (
"encoding/json"
"flag"
"fmt"
"io"
"os"
"path/filepath"

"github.com/golang/glog"
"github.com/jaypipes/ghw"

"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/cdi"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/factory"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
)

const (
socketSuffix = "sock"
defaultConfig = "/etc/pcidp/config.json"
socketSuffix = "sock"
)

// cliParams presents CLI parameters for SR-IOV Network Device Plugin
type cliParams struct {
configFile string
resourcePrefix string
useCdi bool
hostConfigDir string
}

type resourceManager struct {
// ResourceManager manages resources for SR-IOV Network Device Plugin binaries
type ResourceManager struct {
cliParams
pluginWatchMode bool
rFactory types.ResourceFactory
configList []*types.ResourceConfig
ConfigList []*types.ResourceConfig
resourceServers []types.ResourceServer
deviceProviders map[types.DeviceType]types.DeviceProvider
}

func newResourceManager(cp *cliParams) *resourceManager {
// FlagInit parse command line flags
func FlagInit(cp *cliParams) {
flag.StringVar(&cp.configFile, "config-file", defaultConfig,
"JSON device pool config file location")
flag.StringVar(&cp.resourcePrefix, "resource-prefix", "intel.com",
"resource name prefix used for K8s extended re"+
"source")
flag.BoolVar(&cp.useCdi, "use-cdi", false,
"Use Container Device Interface to expose devices in containers")
flag.StringVar(&cp.hostConfigDir, "host-config-dir", "/host/etc/pcidp/",
"Device Plugin config directory on a host")
}

// NewResourceManager initiates a new instance of NewResourceManager
func NewResourceManager(cp *cliParams) *ResourceManager {
pluginWatchMode := utils.DetectPluginWatchMode(types.SockDir)
if pluginWatchMode {
glog.Infof("Using Kubelet Plugin Registry Mode")
} else {
glog.Infof("Using Deprecated Device Plugin Registry Path")
}

rf := factory.NewResourceFactory(cp.resourcePrefix, socketSuffix, pluginWatchMode)
rf := factory.NewResourceFactory(cp.resourcePrefix, socketSuffix, pluginWatchMode, cp.useCdi)
dp := make(map[types.DeviceType]types.DeviceProvider)
for k := range types.SupportedDevices {
dp[k] = rf.GetDeviceProvider(k)
}

return &resourceManager{
return &ResourceManager{
cliParams: *cp,
pluginWatchMode: pluginWatchMode,
rFactory: rf,
deviceProviders: dp,
}
}

// readConfig reads and validate configurations from Config file
func (rm *resourceManager) readConfig() error {
// Init common data for Device Plugin and CDI service
func (rm *ResourceManager) Init() error {
glog.Infof("resource manager reading configs")
if err := rm.ReadConfig(); err != nil {
glog.Errorf("error getting resources from file %v", err)
return err
}

if len(rm.ConfigList) < 1 {
glog.Errorf("no resource configuration")
return fmt.Errorf("no resource configuration")
}

// Validate configs
if !rm.ValidConfigs() {
glog.Fatalf("Exiting.. one or more invalid configuration(s) given")
return fmt.Errorf("one or more invalid configuration(s) given")
}
glog.Infof("Discovering host devices")
if err := rm.DiscoverHostDevices(); err != nil {
glog.Errorf("error discovering host devices%v", err)
return err
}

return nil
}

// ReadConfig reads and validate configurations from Config file
func (rm *ResourceManager) ReadConfig() error {
resources := &types.ResourceConfList{}
rawBytes, err := os.ReadFile(rm.configFile)

Expand All @@ -90,7 +140,7 @@ func (rm *resourceManager) readConfig() error {
return fmt.Errorf("unsupported deviceType: \"%s\"", conf.DeviceType)
}
if conf.SelectorObjs, err = rm.rFactory.GetDeviceFilter(conf); err == nil {
rm.configList = append(rm.configList, &resources.ResourceList[i])
rm.ConfigList = append(rm.ConfigList, &resources.ResourceList[i])
} else {
glog.Warningf("unable to get SelectorObj from selectors list:'%s' for deviceType: %s error: %s",
*conf.Selectors, conf.DeviceType, err)
Expand All @@ -100,13 +150,85 @@ func (rm *resourceManager) readConfig() error {
return nil
}

func (rm *resourceManager) initServers() error {
// StoreConfig saves SR-IOV Device Plugin config into the file
func (rm *ResourceManager) StoreConfig() error {
filename := filepath.Base(rm.configFile)
source, err := os.Open(rm.configFile)
if err != nil {
glog.Errorf("storeConfig(): error creating reading config file: %v", err)
return err
}
defer source.Close()

destination, err := os.Create(filepath.Join(rm.hostConfigDir, filename))
if err != nil {
glog.Errorf("storeConfig(): error creating creating config file on host: %v", err)
return err
}
defer destination.Close()

_, err = io.Copy(destination, source)
if err != nil {
glog.Errorf("storeConfig(): error during copy config file to host: %v", err)
return err
}
return nil
}

// CreateCdiSpec creates CDI definition for container runtime
func (rm *ResourceManager) CreateCdiSpec() error {
glog.Infof("number of config: %d\n", len(rm.ConfigList))
deviceAllocated := make(map[string]bool)
for _, rc := range rm.ConfigList {
// Create new ResourcePool
glog.Infof("Creating new ResourcePool: %s", rc.ResourceName)
glog.Infof("DeviceType: %+v", rc.DeviceType)
dp, ok := rm.deviceProviders[rc.DeviceType]
if !ok {
glog.Infof("Unable to get device provider from deviceType: %s", rc.DeviceType)
return fmt.Errorf("error getting device provider")
}

filteredDevices := make([]types.HostDevice, 0)

for index := range rc.SelectorObjs {
devices := dp.GetDevices(rc, index)
partialFilteredDevices, err := dp.GetFilteredDevices(devices, rc, index)
if err != nil {
glog.Errorf("InitServers(): error getting filtered devices for config %+v: %q", rc, err)
}
partialFilteredDevices = rm.excludeAllocatedDevices(partialFilteredDevices, deviceAllocated)
glog.Infof("InitServers(): selector index %d will register %d devices", index, len(partialFilteredDevices))
filteredDevices = append(filteredDevices, partialFilteredDevices...)
}

if len(filteredDevices) < 1 {
glog.Infof("no devices in device pool, skipping creating resource server for %s", rc.ResourceName)
continue
}

rPool, err := rm.rFactory.GetResourcePool(rc, filteredDevices)
if err != nil {
glog.Errorf("createCdiSpec(): error creating ResourcePool with config %+v: %q", rc, err)
return err
}

err = cdi.CreateCDISpec(rm.resourcePrefix, filteredDevices, rPool)
if err != nil {
glog.Errorf("createCdiSpec(): error creating CDI spec: %v", err)
return err
}
}
return nil
}

// InitServers initializes device providers and resource pools
func (rm *ResourceManager) InitServers() error {
rf := rm.rFactory
glog.Infof("number of config: %d\n", len(rm.configList))
glog.Infof("number of config: %d\n", len(rm.ConfigList))
deviceAllocated := make(map[string]bool)
for _, rc := range rm.configList {
for _, rc := range rm.ConfigList {
// Create new ResourcePool
glog.Infof("")
glog.Infof("Creating new ResourcePool: %s", rc.ResourceName)
glog.Infof("DeviceType: %+v", rc.DeviceType)
dp, ok := rm.deviceProviders[rc.DeviceType]
Expand All @@ -131,11 +253,21 @@ func (rm *resourceManager) initServers() error {
glog.Infof("no devices in device pool, skipping creating resource server for %s", rc.ResourceName)
continue
}

rPool, err := rm.rFactory.GetResourcePool(rc, filteredDevices)
if err != nil {
glog.Errorf("initServers(): error creating ResourcePool with config %+v: %q", rc, err)
return err
}

if rm.useCdi {
err = cdi.CreateCDISpec(rm.resourcePrefix, filteredDevices, rPool)
if err != nil {
glog.Errorf("initServers(): error creating CDI spec: %v", err)
return err
}
}

// Create ResourceServer with this ResourcePool
s, err := rf.GetResourceServer(rPool)
if err != nil {
Expand All @@ -148,7 +280,7 @@ func (rm *resourceManager) initServers() error {
return nil
}

func (rm *resourceManager) excludeAllocatedDevices(filteredDevices []types.HostDevice, deviceAllocated map[string]bool) []types.HostDevice {
func (rm *ResourceManager) excludeAllocatedDevices(filteredDevices []types.HostDevice, deviceAllocated map[string]bool) []types.HostDevice {
filteredDevicesTemp := []types.HostDevice{}
for _, dev := range filteredDevices {
if !deviceAllocated[dev.GetDeviceID()] {
Expand All @@ -161,7 +293,8 @@ func (rm *resourceManager) excludeAllocatedDevices(filteredDevices []types.HostD
return filteredDevicesTemp
}

func (rm *resourceManager) startAllServers() error {
// StartAllServers start all Device Plugin resource servers
func (rm *ResourceManager) StartAllServers() error {
for _, rs := range rm.resourceServers {
if err := rs.Start(); err != nil {
return err
Expand All @@ -175,7 +308,8 @@ func (rm *resourceManager) startAllServers() error {
return nil
}

func (rm *resourceManager) stopAllServers() error {
// StopAllServers stops all Device Plugin resource servers
func (rm *ResourceManager) StopAllServers() error {
for _, rs := range rm.resourceServers {
if err := rs.Stop(); err != nil {
return err
Expand All @@ -184,11 +318,11 @@ func (rm *resourceManager) stopAllServers() error {
return nil
}

// Validate configurations
func (rm *resourceManager) validConfigs() bool {
// ValidConfigs validate configurations
func (rm *ResourceManager) ValidConfigs() bool {
resourceNames := make(map[string]string) // resource names placeholder

for _, conf := range rm.configList {
for _, conf := range rm.ConfigList {
// check if name contains acceptable characters
if !utils.ValidResourceName(conf.ResourceName) {
glog.Errorf("resource name \"%s\" contains invalid characters", conf.ResourceName)
Expand Down Expand Up @@ -229,7 +363,8 @@ func (rm *resourceManager) validConfigs() bool {
return true
}

func (rm *resourceManager) discoverHostDevices() error {
// DiscoverHostDevices finds SR-IOV devices on host
func (rm *ResourceManager) DiscoverHostDevices() error {
pci, err := ghw.PCI()
if err != nil {
return fmt.Errorf("discoverDevices(): error getting PCI info: %v", err)
Expand Down
Loading

0 comments on commit 47b70c3

Please sign in to comment.