Skip to content

ON-15256: Report topology info from device plugin #119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions deviceplugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ COPY LICENSE /app/LICENSE

RUN CGO_ENABLED=0 make device-plugin-build worker-build

FROM registry.access.redhat.com/ubi8/ubi-minimal:8.9
RUN microdnf install -y lshw-B.02.19.2 && microdnf clean all
FROM registry.access.redhat.com/ubi8/ubi-micro:8.9
COPY --from=builder /app/bin/onload-device-plugin /app/bin/onload-worker /usr/bin/
COPY --from=builder /app/LICENSE /licenses/LICENSE
USER 1001
Expand Down
35 changes: 32 additions & 3 deletions pkg/deviceplugin/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"reflect"
"slices"
"sync"

"github.com/golang/glog"
Expand Down Expand Up @@ -40,12 +41,17 @@ var DefaultConfig = NicManagerConfig{
NeedNic: true,
}

type nic struct {
name string
numa int64
}

// NicManager holds all the state required by the device plugin
type NicManager struct {
// interfaces is used to check the presence of any sfc nics on the node.
// Currently it is just used as a check for existence and no additional
// logic takes place.
interfaces []string
interfaces []nic
deviceFiles []*pluginapi.DeviceSpec
mounts []*pluginapi.Mount
devices []*pluginapi.Device
Expand All @@ -56,7 +62,11 @@ type NicManager struct {
}

func (manager *NicManager) GetInterfaces() []string {
return manager.interfaces
interfaces := []string{}
for _, i := range manager.interfaces {
interfaces = append(interfaces, i.name)
}
return interfaces
}

func (manager *NicManager) GetDeviceFiles() []*pluginapi.DeviceSpec {
Expand Down Expand Up @@ -92,13 +102,32 @@ func NewNicManager(

// Initialises the set of devices to advertise to kubernetes
func (manager *NicManager) initDevices() {

// Gets a list of all numa nodes of which there is an associated sfc nic,
// this isn't particularly helpful when you have nics on different numa
// nodes which are intended for different purposes, but this is basically a
// pathological case due to how we only advertise an "onload" device rather
// than a "real" one.
numaNodes := []*pluginapi.NUMANode{}
for _, nic := range manager.interfaces {
if nic.numa != -1 {
if !slices.ContainsFunc(numaNodes, func(n *pluginapi.NUMANode) bool {
return n.ID == nic.numa
}) {
numaNodes = append(numaNodes, &pluginapi.NUMANode{ID: nic.numa})
}
}
}

manager.devices = []*pluginapi.Device{}
fmt.Println(manager.interfaces)
for i := 0; i < manager.config.MaxPodsPerNode; i++ {
name := fmt.Sprintf("sfc-%v", i)
device := &pluginapi.Device{
ID: name,
Health: pluginapi.Healthy,
Topology: &pluginapi.TopologyInfo{
Nodes: numaNodes,
},
}
manager.devices = append(manager.devices, device)
}
Expand Down
56 changes: 56 additions & 0 deletions pkg/deviceplugin/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,59 @@ var _ = Describe("Testing command line options", func() {
Expect(err).ShouldNot(Succeed())
})
})

var _ = Describe("Testing topology information", func() {
var manager *NicManager

BeforeEach(func() {
manager = &NicManager{
config: NicManagerConfig{
MaxPodsPerNode: 1,
},
}
})

It("shouldn't provide numa information when none are specified", func() {
manager.interfaces = []nic{
{name: "A", numa: -1},
{name: "B", numa: -1},
{name: "C", numa: -1},
}
manager.initDevices()
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(0))
})

It("should describe numa information when a single node is present", func() {
manager.interfaces = []nic{
{name: "A", numa: 1},
{name: "B", numa: -1},
{name: "C", numa: -1},
}
manager.initDevices()
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(1))
Expect(manager.devices[0].Topology.GetNodes()[0].ID).To(Equal(int64(1)))
})

It("should describe numa information when multiple nodes are present", func() {
manager.interfaces = []nic{
{name: "A", numa: 1},
{name: "B", numa: 2},
{name: "C", numa: -1},
}
manager.initDevices()
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(2))
Expect(manager.devices[0].Topology.GetNodes()[0].ID).To(Equal(int64(1)))
Expect(manager.devices[0].Topology.GetNodes()[1].ID).To(Equal(int64(2)))
})

It("shouldn't provide duplicate numa information", func() {
manager.interfaces = []nic{
{name: "A", numa: 1},
{name: "B", numa: 1},
{name: "C", numa: -1},
}
manager.initDevices()
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(1))
Expect(manager.devices[0].Topology.GetNodes()[0].ID).To(Equal(int64(1)))
})
})
120 changes: 74 additions & 46 deletions pkg/deviceplugin/nic.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,69 +3,97 @@
package deviceplugin

import (
"errors"
"os"
"os/exec"
"regexp"
"path"
"strconv"
"strings"

"github.com/golang/glog"
)

// Takes the output from lshw and returns the device name for each solarflare
// device.
func parseOutput(output string) []string {
// "lshw -short -class network" sample output:
// H/W path Device Class Description
// =========================================================
// /0/100/1b/0 enp2s0f0 network XtremeScale SFC9250 10/25/40/50/100G Ethernet Controller
// /0/100/1b/0.1 enp2s0f1 network XtremeScale SFC9250 10/25/40/50/100G Ethernet Controller
// /0/100/1c.1/0 eno1 network NetXtreme BCM5720 Gigabit Ethernet PCIe
// /0/100/1c.1/0.1 eno2 network NetXtreme BCM5720 Gigabit Ethernet PCIe
const (
sysClassNetPath = "/sys/class/net/"
solarflareVendor = "0x1924"
)

func isSFCNic(devicePath string) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be worth using isOnloadableNic or isSupportedNic?

Just in case we are asked to support another vendor sometime in the future.

deviceDir, err := os.Stat(path.Join(devicePath, "device"))
if errors.Is(err, os.ErrNotExist) {
// Not a physical device, so won't have the "vendor" file
return false
} else if err != nil {
glog.Errorf("Failed to stat %s (%v)", devicePath, err)
return false
}
if !deviceDir.IsDir() {
return false
}

lines := strings.Split(output, "\n")
data, err := os.ReadFile(path.Join(devicePath, "device", "vendor"))
if errors.Is(err, os.ErrNotExist) {
// File doesn't exist but that is fine
return false
} else if err != nil {
glog.Errorf("Error reading %s (%v)",
path.Join(devicePath, "device", "vendor"), err)
return false
}

vendor := strings.TrimSuffix(string(data), "\n")
return vendor == solarflareVendor
}

var interfaces []string
// Get numa node from sysfs files.
// -1 means no specific numa node / unknown
func getNumaNode(devicePath string) int64 {
data, err := os.ReadFile(path.Join(devicePath, "device", "numa_node"))
if errors.Is(err, os.ErrNotExist) {
// File doesn't exist but that is fine, return -1
return -1
} else if err != nil {
glog.Errorf("Error reading %s (%v)",
path.Join(devicePath, "device", "vendor"), err)
return -1
}
numaString := strings.TrimSuffix(string(data), "\n")
node, err := strconv.ParseInt(numaString, 10, 64)
if err != nil {
glog.Errorf("Error parse int from string %s (%v)",
numaString, err)
return -1
}
return node
}

// Assume that we are running as root, if not then we would have to skip
// an additional line at the start of the output
skip_lines := 2
end_lines := 1
if os.Geteuid() != 0 {
skip_lines = 3
end_lines = 2
func readSysFiles() ([]nic, error) {
infos, err := os.ReadDir(sysClassNetPath)
if err != nil {
glog.Errorf("Error reading %s (%v)", sysClassNetPath, err)
return []nic{}, err
}

for _, line := range lines[skip_lines : len(lines)-end_lines] {
// This regex makes the assumption that all interface names only
// contain either lowercase letters or numbers. If that is not true,
// then this should be updated to reflect that.
r := regexp.MustCompile("([a-z0-9]+) *network *.*SFC")
out := r.FindStringSubmatch(line)
if out != nil {
// It is safe to access out[1] here since the return value of
// FindStringSubmatch is an array where the first value is the
// whole string and any subsequent values are the submatches.
// In this case since there is a submatch that should match the
// device name if FindStringSubmatch returns non-nil then there
// will be at least 2 elements in the return array.
interfaces = append(interfaces, out[1])
interfaces := []nic{}
for _, info := range infos {
devicePath := path.Join(sysClassNetPath, info.Name())
if !isSFCNic(devicePath) {
continue
}
nic := nic{
name: info.Name(),
numa: getNumaNode(devicePath),
}
interfaces = append(interfaces, nic)
}
return interfaces
return interfaces, nil
}

// Returns a list of the Solarflare interfaces present on the node
func queryNics() ([]string, error) {
// Depending on what information we are looking for in the output I think it
// is quite tempting to retrieve the information in a json format, then
// parse this using golang's built-in features.
bytes, err := exec.Command("lshw", "-short", "-class", "network").CombinedOutput()
output := string(bytes)
func queryNics() ([]nic, error) {
interfaces, err := readSysFiles()
if err != nil {
glog.Error(output)
glog.Errorf("error while listing sfc devices : %v", err)
return nil, err
glog.Errorf("Failed to list interfaces (%v)", err)
return []nic{}, err
}
interfaces := parseOutput(output)
return interfaces, nil
}