Skip to content

Commit d26fb55

Browse files
ON-15256: Report topology info from device plugin
The device plugin will now try to read the numa information from sysfs while finding the nics present in the system. The device plugin will then add all of the present numa node information to the list of advertised devices. Because the device plugin is only advertising a pseudo-device it is unclear how to proceed when multiple nics are present on different nodes.
1 parent c2d1bc3 commit d26fb55

File tree

3 files changed

+125
-9
lines changed

3 files changed

+125
-9
lines changed

pkg/deviceplugin/manager.go

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"fmt"
88
"reflect"
9+
"slices"
910
"sync"
1011

1112
"github.com/golang/glog"
@@ -40,12 +41,17 @@ var DefaultConfig = NicManagerConfig{
4041
NeedNic: true,
4142
}
4243

44+
type nic struct {
45+
name string
46+
numa int64
47+
}
48+
4349
// NicManager holds all the state required by the device plugin
4450
type NicManager struct {
4551
// interfaces is used to check the presence of any sfc nics on the node.
4652
// Currently it is just used as a check for existence and no additional
4753
// logic takes place.
48-
interfaces []string
54+
interfaces []nic
4955
deviceFiles []*pluginapi.DeviceSpec
5056
mounts []*pluginapi.Mount
5157
devices []*pluginapi.Device
@@ -56,7 +62,11 @@ type NicManager struct {
5662
}
5763

5864
func (manager *NicManager) GetInterfaces() []string {
59-
return manager.interfaces
65+
interfaces := []string{}
66+
for _, i := range manager.interfaces {
67+
interfaces = append(interfaces, i.name)
68+
}
69+
return interfaces
6070
}
6171

6272
func (manager *NicManager) GetDeviceFiles() []*pluginapi.DeviceSpec {
@@ -92,12 +102,32 @@ func NewNicManager(
92102

93103
// Initialises the set of devices to advertise to kubernetes
94104
func (manager *NicManager) initDevices() {
105+
106+
// Gets a list of all numa nodes of which there is an associated sfc nic,
107+
// this isn't particularly helpful when you have nics on different numa
108+
// nodes which are intended for different purposes, but this is basically a
109+
// pathological case due to how we only advertise an "onload" device rather
110+
// than a "real" one.
111+
numaNodes := []*pluginapi.NUMANode{}
112+
for _, nic := range manager.interfaces {
113+
if nic.numa != -1 {
114+
if !slices.ContainsFunc(numaNodes, func(n *pluginapi.NUMANode) bool {
115+
return n.ID == nic.numa
116+
}) {
117+
numaNodes = append(numaNodes, &pluginapi.NUMANode{ID: nic.numa})
118+
}
119+
}
120+
}
121+
95122
manager.devices = []*pluginapi.Device{}
96123
for i := 0; i < manager.config.MaxPodsPerNode; i++ {
97124
name := fmt.Sprintf("sfc-%v", i)
98125
device := &pluginapi.Device{
99126
ID: name,
100127
Health: pluginapi.Healthy,
128+
Topology: &pluginapi.TopologyInfo{
129+
Nodes: numaNodes,
130+
},
101131
}
102132
manager.devices = append(manager.devices, device)
103133
}

pkg/deviceplugin/manager_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,59 @@ var _ = Describe("Testing command line options", func() {
8888
Expect(err).ShouldNot(Succeed())
8989
})
9090
})
91+
92+
var _ = Describe("Testing topology information", func() {
93+
var manager *NicManager
94+
95+
BeforeEach(func() {
96+
manager = &NicManager{
97+
config: NicManagerConfig{
98+
MaxPodsPerNode: 1,
99+
},
100+
}
101+
})
102+
103+
It("shouldn't provide numa information when none are specified", func() {
104+
manager.interfaces = []nic{
105+
{name: "A", numa: -1},
106+
{name: "B", numa: -1},
107+
{name: "C", numa: -1},
108+
}
109+
manager.initDevices()
110+
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(0))
111+
})
112+
113+
It("should describe numa information when a single node is present", func() {
114+
manager.interfaces = []nic{
115+
{name: "A", numa: 1},
116+
{name: "B", numa: -1},
117+
{name: "C", numa: -1},
118+
}
119+
manager.initDevices()
120+
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(1))
121+
Expect(manager.devices[0].Topology.GetNodes()[0].ID).To(Equal(int64(1)))
122+
})
123+
124+
It("should describe numa information when multiple nodes are present", func() {
125+
manager.interfaces = []nic{
126+
{name: "A", numa: 1},
127+
{name: "B", numa: 2},
128+
{name: "C", numa: -1},
129+
}
130+
manager.initDevices()
131+
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(2))
132+
Expect(manager.devices[0].Topology.GetNodes()[0].ID).To(Equal(int64(1)))
133+
Expect(manager.devices[0].Topology.GetNodes()[1].ID).To(Equal(int64(2)))
134+
})
135+
136+
It("shouldn't provide duplicate numa information", func() {
137+
manager.interfaces = []nic{
138+
{name: "A", numa: 1},
139+
{name: "B", numa: 1},
140+
{name: "C", numa: -1},
141+
}
142+
manager.initDevices()
143+
Expect(len(manager.devices[0].Topology.GetNodes())).To(Equal(1))
144+
Expect(manager.devices[0].Topology.GetNodes()[0].ID).To(Equal(int64(1)))
145+
})
146+
})

pkg/deviceplugin/nic.go

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"os"
88
"path"
9+
"strconv"
910
"strings"
1011

1112
"github.com/golang/glog"
@@ -43,27 +44,56 @@ func isSFCNic(devicePath string) bool {
4344
return vendor == solarflareVendor
4445
}
4546

46-
func readSysFiles() ([]string, error) {
47+
// Get numa node from sysfs files.
48+
// -1 means no specific numa node / unknown
49+
func getNumaNode(devicePath string) int64 {
50+
data, err := os.ReadFile(path.Join(devicePath, "device", "numa_node"))
51+
if errors.Is(err, os.ErrNotExist) {
52+
// File doesn't exist but that is fine, return -1
53+
return -1
54+
} else if err != nil {
55+
glog.Errorf("Error reading %s (%v)",
56+
path.Join(devicePath, "device", "vendor"), err)
57+
return -1
58+
}
59+
numaString := strings.TrimSuffix(string(data), "\n")
60+
node, err := strconv.ParseInt(numaString, 10, 64)
61+
if err != nil {
62+
glog.Errorf("Error parse int from string %s (%v)",
63+
numaString, err)
64+
return -1
65+
}
66+
return node
67+
}
68+
69+
func readSysFiles() ([]nic, error) {
4770
infos, err := os.ReadDir(sysClassNetPath)
4871
if err != nil {
4972
glog.Errorf("Error reading %s (%v)", sysClassNetPath, err)
50-
return []string{}, err
73+
return []nic{}, err
5174
}
52-
interfaces := []string{}
75+
76+
interfaces := []nic{}
5377
for _, info := range infos {
54-
if isSFCNic(path.Join(sysClassNetPath, info.Name())) {
55-
interfaces = append(interfaces, info.Name())
78+
devicePath := path.Join(sysClassNetPath, info.Name())
79+
if !isSFCNic(devicePath) {
80+
continue
81+
}
82+
nic := nic{
83+
name: info.Name(),
84+
numa: getNumaNode(devicePath),
5685
}
86+
interfaces = append(interfaces, nic)
5787
}
5888
return interfaces, nil
5989
}
6090

6191
// Returns a list of the Solarflare interfaces present on the node
62-
func queryNics() ([]string, error) {
92+
func queryNics() ([]nic, error) {
6393
interfaces, err := readSysFiles()
6494
if err != nil {
6595
glog.Errorf("Failed to list interfaces (%v)", err)
66-
return []string{}, err
96+
return []nic{}, err
6797
}
6898
return interfaces, nil
6999
}

0 commit comments

Comments
 (0)