Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions pkg/collector/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ const (
netSubsystem = "net_cls,net_prio"
)

const (
systemdSlicesName = "machine.slice"
nonSystemdSlicesName = "machine"
)

// Regular expressions of cgroup paths for different resource managers.
// ^.*/(?:(.*?)_)?slurm(?:_(.*?)/)?(?:.*?)/job_([0-9]+)(?:.*$)
// ^.*/slurm(?:_(.*?))?/(?:.*?)/job_([0-9]+)(?:.*$)
Expand All @@ -98,9 +103,12 @@ var (

For v2 possibilities are /machine.slice/machine-qemu\x2d2\x2dinstance\x2d00000001.scope
/machine.slice/machine-qemu\x2d2\x2dinstance\x2d00000001.scope/libvirt

Non systemd: machine/qemu-1-instance1.libvirt-qemu
*/
var (
libvirtCgroupPathRegex = regexp.MustCompile("^.*/(?:.+?)-qemu-(?:[0-9]+)-(?P<id>instance-[0-9a-f]+)(?:.*$)")
libvirtCgroupPathRegex = regexp.MustCompile("^.*/(?:.+?)-qemu-(?:[0-9]+)-(?P<id>instance-[0-9a-f]+)(?:.*$)")
libvirtCgroupNoSystemdPathRegex = regexp.MustCompile("^.*/(?:.+?)qemu-(?:[0-9]+)-(?P<id>instance-[0-9a-f]+)(?:.*$)")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a regex regexp.MustCompile("^.*/(?:.+?)qemu-(?:[0-9]+)-(?P<id>instance-[0-9a-f]+)(?:.*$)") must work for both cases. So, I think there is no need to define two?!

)

// Ref: https://linuxera.org/cpu-memory-management-kubernetes-cgroupsv2/
Expand Down Expand Up @@ -137,8 +145,29 @@ var (
"collector.cgroups.force-version",
"Set cgroups version manually. Used only for testing.",
).Hidden().Enum("v1", "v2")

noSystemdMode = CEEMSExporterApp.Flag(
"collector.cgroups.no-systemd-mode",
"Set if running on a non-systemd host",
).Default("false").Bool()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using a CLI flag (which users have to configure), we should be able to detect the cgroup layout, right?! This is what we are doing for k8s to detect if we are using cgroupfs or systemd drivers. We can use a similar logic here as well. We can add a field to cgroupManager something like nonSystemdLayout bool and set it to true when machine is found. Something like:

// Discover all cgroup layout
for _, slice := range []string{"machine", "machine.slice"} {
	if _, err := os.Stat(filepath.Join(*cgroupfsPath, activeSubsystem, slice)); err == nil {
		manager.slices = append(manager.slices, slice)
                if slice == "machine" {
                   manager.nonSystemdLayout = true
                }
                break // This should be fine as there will atmost one of machine or machine.slice exist at any given time
	}
}

Eventually we can use the manager.nonSysytemdLayout when parsing number of vCPUs. What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah that sounds sensible

)

func resolveSlices(nonSystemdMode bool) string {
if nonSystemdMode {
return nonSystemdSlicesName
} else {
return systemdSlicesName
}
}

func resolveLibvirtRegex(nonSystemdMode bool) *regexp.Regexp {
if nonSystemdMode {
return libvirtCgroupNoSystemdPathRegex
} else {
return libvirtCgroupPathRegex
}
}

// resolveSubsystem returns the resolved cgroups v1 subsystem.
func resolveSubsystem(subsystem string) string {
switch subsystem {
Expand Down Expand Up @@ -311,7 +340,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error)
fs: fs,
mode: cgroups.Unified,
root: *cgroupfsPath,
slices: []string{"machine.slice"},
slices: []string{resolveSlices(*noSystemdMode)},
}
} else {
var mode cgroups.CGMode
Expand All @@ -330,7 +359,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error)
mode: mode,
root: *cgroupfsPath,
activeController: activeSubsystem,
slices: []string{"machine.slice"},
slices: []string{resolveSlices(*noSystemdMode)},
}
}

Expand All @@ -339,10 +368,10 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error)
manager.name = rmNames[name]

// Add path regex
manager.idRegex = libvirtCgroupPathRegex
manager.idRegex = resolveLibvirtRegex(*noSystemdMode)

// Identify child cgroup
// In cgroups v1, all the child cgroups like emulator, vcpu* are flat whereas
// In cgroups v1 or on a non-systemd host, all the child cgroups like emulator, vcpu* are flat whereas
// in v2 they are all inside libvirt child
manager.isChild = func(p string) bool {
return strings.Contains(p, "/libvirt") || strings.Contains(p, "/emulator") || strings.Contains(p, "/vcpu")
Expand Down Expand Up @@ -1078,10 +1107,9 @@ func (c *cgroupCollector) cpusFromChildren(path string) (int, error) {
// In cgroup v1, they are flat whereas in cgroup v2 they are inside libvirt folder
var vcpuPath string

switch c.cgroupManager.mode { //nolint:exhaustive
case cgroups.Unified:
if c.cgroupManager.mode == cgroups.Unified && !(*noSystemdMode) {
vcpuPath = fmt.Sprintf("%s%s/libvirt/vcpu*", c.cgroupManager.root, path)
default:
} else {
vcpuPath = fmt.Sprintf("%s%s/vcpu*", c.cgroupManager.root, path)
}

Expand Down