From ee029a34880f67614ef1c4ea6f1b27d00a5ec400 Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Thu, 9 Oct 2025 16:31:14 +0200 Subject: [PATCH 1/2] profilingmetrics: add syscall metric Signed-off-by: Florian Lehner --- connector/profilingmetricsconnector/frame.go | 109 ++++++++++++++++++- 1 file changed, 103 insertions(+), 6 deletions(-) diff --git a/connector/profilingmetricsconnector/frame.go b/connector/profilingmetricsconnector/frame.go index 2254506e8..e98a192de 100644 --- a/connector/profilingmetricsconnector/frame.go +++ b/connector/profilingmetricsconnector/frame.go @@ -38,7 +38,10 @@ type frameInfo struct { filename string } -const nativeLibraryAttrName = "shlib_name" +const ( + nativeLibraryAttrName = "shlib_name" + syscallAttrName = "syscall_name" +) var ( metricUser = metric{name: "samples.user.count", desc: "Number of samples executing userspace code (self)"} @@ -70,10 +73,14 @@ var ( frameTypeBeam: metricBeam, } + // match shared libraries rx = regexp.MustCompile(`(?:.*/)?(.+)\.so`) + + // match syscalls + syscallRx = regexp.MustCompile(`^__(?:x64|arm64)_sys_(\w+)`) ) -func fetchFrameInfo(dictionary pprofile.ProfilesDictionary, +func fetchLeafFrameInfo(dictionary pprofile.ProfilesDictionary, locationIndices pcommon.Int32Slice, sampleLocationIndex int, ) (frameInfo, error) { @@ -153,12 +160,12 @@ func classifyFrame(dictionary pprofile.ProfilesDictionary, counts map[metric]int64, nativeCounts map[string]int64, ) error { - fi, err := fetchFrameInfo(dictionary, locationIndices, 0) + leaf, err := fetchLeafFrameInfo(dictionary, locationIndices, 0) if err != nil { return err } - leafFrameType := fi.typ + leafFrameType := leaf.typ // We don't need a separate metric for total number of samples, as this can always be // derived from summing the metricKernel and metricUser counts. metric := allowedFrameTypes[leafFrameType] @@ -177,7 +184,7 @@ func classifyFrame(dictionary pprofile.ProfilesDictionary, } // Extract native library name and increment associated count - if sm := rx.FindStringSubmatch(fi.filename); sm != nil { + if sm := rx.FindStringSubmatch(leaf.filename); sm != nil { nativeCounts[sm[1]]++ } else { counts[metric]++ @@ -186,6 +193,74 @@ func classifyFrame(dictionary pprofile.ProfilesDictionary, return nil } +// identifySyscall walks the frames and extracts the syscall information. +func identifySyscall(dictionary pprofile.ProfilesDictionary, + locationIndices pcommon.Int32Slice, + syscallCounts map[string]int64, +) error { + // TODO: Scale syscallCounts by number of events in each Sample. Currently, + // this logic assumes 1 event per Sample (thus the increments by 1 below), + // which isn't necessarily the case. + attrTable := dictionary.AttributeTable() + locationTable := dictionary.LocationTable() + strTable := dictionary.StringTable() + funcTable := dictionary.FunctionTable() + + attrTblLen := attrTable.Len() + locTblLen := locationTable.Len() + strTblLen := strTable.Len() + funcTblLen := funcTable.Len() + + for _, li := range locationIndices.All() { + if li >= int32(locTblLen) { + // log error + continue + } + loc := locationTable.At(int(li)) + for _, attrIdx := range loc.AttributeIndices().All() { + if attrIdx >= int32(attrTblLen) { + // log error + continue + } + attr := attrTable.At(int(attrIdx)) + if int(attr.KeyStrindex()) >= strTblLen { + // log error + continue + } + + if strTable.At(int(attr.KeyStrindex())) == string(semconv.ProfileFrameTypeKey) { + frameType := attr.Value().Str() + if frameType == frameTypeKernel { + for _, ln := range loc.Line().All() { + if ln.FunctionIndex() >= int32(funcTblLen) { + // log error + continue + } + fn := funcTable.At(int(ln.FunctionIndex())) + if fn.NameStrindex() >= int32(strTblLen) { + // log error + continue + } + fnName := strTable.At(int(fn.NameStrindex())) + + // Avoid string allocations by using indices to string location. + indices := syscallRx.FindStringSubmatchIndex(fnName) + if len(indices) == 4 { + syscall := fnName[indices[2]:indices[3]] + syscallCounts[syscall]++ + return nil + } + } + + } + } + + } + + } + return nil +} + func (c *profilesToMetricsConnector) addFrameMetrics(dictionary pprofile.ProfilesDictionary, profile pprofile.Profile, scopeMetrics pmetric.ScopeMetrics, ) { @@ -193,6 +268,7 @@ func (c *profilesToMetricsConnector) addFrameMetrics(dictionary pprofile.Profile counts := make(map[metric]int64) nativeCounts := make(map[string]int64) + syscallCounts := make(map[string]int64) // Process all samples and extract metric counts for _, sample := range profile.Sample().All() { @@ -201,7 +277,12 @@ func (c *profilesToMetricsConnector) addFrameMetrics(dictionary pprofile.Profile sample, counts, nativeCounts); err != nil { // Should not happen with well-formed profile data // TODO: Add error metric or log error - continue + } + + if err := identifySyscall(dictionary, stack.LocationIndices(), + syscallCounts); err != nil { + // Should not happen with well-formed profile data + // TODO: Add error metric or log error } } @@ -236,4 +317,20 @@ func (c *profilesToMetricsConnector) addFrameMetrics(dictionary pprofile.Profile dp.SetIntValue(count) dp.Attributes().PutStr(nativeLibraryAttrName, libraryName) } + + for sysCall, count := range syscallCounts { + m := scopeMetrics.Metrics().AppendEmpty() + m.SetName(c.config.MetricsPrefix + metricNative.name) + m.SetDescription(metricNative.desc) + m.SetUnit("1") + + sum := m.SetEmptySum() + sum.SetIsMonotonic(true) + sum.SetAggregationTemporality(pmetric.AggregationTemporalityDelta) + + dp := sum.DataPoints().AppendEmpty() + dp.SetTimestamp(profile.Time()) + dp.SetIntValue(count) + dp.Attributes().PutStr(syscallAttrName, sysCall) + } } From 5f2af5300bda68e2964ffef2861d0be4a338659d Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Fri, 10 Oct 2025 10:02:02 +0200 Subject: [PATCH 2/2] fixup Signed-off-by: Florian Lehner --- connector/profilingmetricsconnector/frame.go | 71 ++++++++++--------- .../profilingmetricsconnector/frame_test.go | 46 ++++++++---- 2 files changed, 70 insertions(+), 47 deletions(-) diff --git a/connector/profilingmetricsconnector/frame.go b/connector/profilingmetricsconnector/frame.go index e98a192de..adb9f94b9 100644 --- a/connector/profilingmetricsconnector/frame.go +++ b/connector/profilingmetricsconnector/frame.go @@ -19,6 +19,7 @@ package profilingmetricsconnector // import "github.com/elastic/opentelemetry-co import ( "fmt" + "log/slog" "regexp" "go.opentelemetry.io/collector/pdata/pcommon" @@ -44,19 +45,20 @@ const ( ) var ( - metricUser = metric{name: "samples.user.count", desc: "Number of samples executing userspace code (self)"} - metricKernel = metric{name: "samples.kernel.count", desc: "Number of samples executing kernel code (self)"} - metricNative = metric{name: "samples.native.count", desc: "Number of samples executing native code (self)"} - metricJVM = metric{name: "samples.jvm.count", desc: "Number of samples executing HotSpot code (self)"} - metricPython = metric{name: "samples.cpython.count", desc: "Number of samples executing Python code (self)"} - metricGo = metric{name: "samples.go.count", desc: "Number of samples executing Go code (self)"} - metricV8JS = metric{name: "samples.v8js.count", desc: "Number of samples executing V8 JS code (self)"} - metricPHP = metric{name: "samples.php.count", desc: "Number of samples executing PHP code (self)"} - metricPerl = metric{name: "samples.perl.count", desc: "Number of samples executing Perl code (self)"} - metricRuby = metric{name: "samples.ruby.count", desc: "Number of samples executing Ruby code (self)"} - metricDotnet = metric{name: "samples.dotnet.count", desc: "Number of samples executing Dotnet code (self)"} - metricRust = metric{name: "samples.rust.count", desc: "Number of samples executing Rust code (self)"} - metricBeam = metric{name: "samples.beam.count", desc: "Number of samples executing Beam code (self)"} + metricUser = metric{name: "samples.user.count", desc: "Number of samples executing userspace code (self)"} + metricKernel = metric{name: "samples.kernel.count", desc: "Number of samples executing kernel code (self)"} + metricSyscall = metric{name: "samples.syscall.count", desc: "Number of samples executing syscall code (self)"} + metricNative = metric{name: "samples.native.count", desc: "Number of samples executing native code (self)"} + metricJVM = metric{name: "samples.jvm.count", desc: "Number of samples executing HotSpot code (self)"} + metricPython = metric{name: "samples.cpython.count", desc: "Number of samples executing Python code (self)"} + metricGo = metric{name: "samples.go.count", desc: "Number of samples executing Go code (self)"} + metricV8JS = metric{name: "samples.v8js.count", desc: "Number of samples executing V8 JS code (self)"} + metricPHP = metric{name: "samples.php.count", desc: "Number of samples executing PHP code (self)"} + metricPerl = metric{name: "samples.perl.count", desc: "Number of samples executing Perl code (self)"} + metricRuby = metric{name: "samples.ruby.count", desc: "Number of samples executing Ruby code (self)"} + metricDotnet = metric{name: "samples.dotnet.count", desc: "Number of samples executing Dotnet code (self)"} + metricRust = metric{name: "samples.rust.count", desc: "Number of samples executing Rust code (self)"} + metricBeam = metric{name: "samples.beam.count", desc: "Number of samples executing Beam code (self)"} allowedFrameTypes = map[string]metric{ frameTypeNative: metricNative, @@ -77,7 +79,7 @@ var ( rx = regexp.MustCompile(`(?:.*/)?(.+)\.so`) // match syscalls - syscallRx = regexp.MustCompile(`^__(?:x64|arm64)_sys_(\w+)`) + syscallRx = regexp.MustCompile(`^(?:__x64_sys|__arm64_sys|ksys)_(\w+)`) ) func fetchLeafFrameInfo(dictionary pprofile.ProfilesDictionary, @@ -155,10 +157,8 @@ func fetchLeafFrameInfo(dictionary pprofile.ProfilesDictionary, // classifyFrame classifies sample into one or more categories based on frame type. // This takes place by incrementing the associated metric count. func classifyFrame(dictionary pprofile.ProfilesDictionary, - locationIndices pcommon.Int32Slice, - sample pprofile.Sample, - counts map[metric]int64, - nativeCounts map[string]int64, + locationIndices pcommon.Int32Slice, sample pprofile.Sample, + counts map[metric]int64, nativeCounts map[string]int64, ) error { leaf, err := fetchLeafFrameInfo(dictionary, locationIndices, 0) if err != nil { @@ -196,11 +196,8 @@ func classifyFrame(dictionary pprofile.ProfilesDictionary, // identifySyscall walks the frames and extracts the syscall information. func identifySyscall(dictionary pprofile.ProfilesDictionary, locationIndices pcommon.Int32Slice, - syscallCounts map[string]int64, + syscallCounts map[string]int64, multiplier int64, ) error { - // TODO: Scale syscallCounts by number of events in each Sample. Currently, - // this logic assumes 1 event per Sample (thus the increments by 1 below), - // which isn't necessarily the case. attrTable := dictionary.AttributeTable() locationTable := dictionary.LocationTable() strTable := dictionary.StringTable() @@ -213,18 +210,21 @@ func identifySyscall(dictionary pprofile.ProfilesDictionary, for _, li := range locationIndices.All() { if li >= int32(locTblLen) { - // log error + slog.Error("identifySyscall", slog.Any("li", li), + slog.Any("locTblLen", locTblLen)) continue } loc := locationTable.At(int(li)) for _, attrIdx := range loc.AttributeIndices().All() { if attrIdx >= int32(attrTblLen) { - // log error + slog.Error("identifySyscall", slog.Any("attrIdx", attrIdx), + slog.Any("attrTblLen", attrTblLen)) continue } attr := attrTable.At(int(attrIdx)) if int(attr.KeyStrindex()) >= strTblLen { - // log error + slog.Error("identifySyscall", slog.Any("attr.KeyStrindex()", attr.KeyStrindex()), + slog.Any("strTblLen", strTblLen)) continue } @@ -233,12 +233,14 @@ func identifySyscall(dictionary pprofile.ProfilesDictionary, if frameType == frameTypeKernel { for _, ln := range loc.Line().All() { if ln.FunctionIndex() >= int32(funcTblLen) { - // log error + slog.Error("identifySyscall", slog.Any("ln.FunctionIndex()", ln.FunctionIndex()), + slog.Any("funcTblLen", funcTblLen)) continue } fn := funcTable.At(int(ln.FunctionIndex())) if fn.NameStrindex() >= int32(strTblLen) { - // log error + slog.Error("identifySyscall", slog.Any("fn.NameStrindex()", fn.NameStrindex()), + slog.Any("strTblLen", strTblLen)) continue } fnName := strTable.At(int(fn.NameStrindex())) @@ -247,16 +249,14 @@ func identifySyscall(dictionary pprofile.ProfilesDictionary, indices := syscallRx.FindStringSubmatchIndex(fnName) if len(indices) == 4 { syscall := fnName[indices[2]:indices[3]] - syscallCounts[syscall]++ + syscallCounts[syscall] += multiplier return nil } } } } - } - } return nil } @@ -272,17 +272,18 @@ func (c *profilesToMetricsConnector) addFrameMetrics(dictionary pprofile.Profile // Process all samples and extract metric counts for _, sample := range profile.Sample().All() { + multiplier := int64(sample.TimestampsUnixNano().Len()) stack := stackTable.At(int(sample.StackIndex())) if err := classifyFrame(dictionary, stack.LocationIndices(), sample, counts, nativeCounts); err != nil { // Should not happen with well-formed profile data - // TODO: Add error metric or log error + slog.Error("classifyFrame", slog.Any("error", err)) } if err := identifySyscall(dictionary, stack.LocationIndices(), - syscallCounts); err != nil { + syscallCounts, multiplier); err != nil { // Should not happen with well-formed profile data - // TODO: Add error metric or log error + slog.Error("identifySyscall", slog.Any("error", err)) } } @@ -320,8 +321,8 @@ func (c *profilesToMetricsConnector) addFrameMetrics(dictionary pprofile.Profile for sysCall, count := range syscallCounts { m := scopeMetrics.Metrics().AppendEmpty() - m.SetName(c.config.MetricsPrefix + metricNative.name) - m.SetDescription(metricNative.desc) + m.SetName(c.config.MetricsPrefix + metricSyscall.name) + m.SetDescription(metricSyscall.desc) m.SetUnit("1") sum := m.SetEmptySum() diff --git a/connector/profilingmetricsconnector/frame_test.go b/connector/profilingmetricsconnector/frame_test.go index f47710e45..c40ef2ff4 100644 --- a/connector/profilingmetricsconnector/frame_test.go +++ b/connector/profilingmetricsconnector/frame_test.go @@ -73,17 +73,24 @@ func (m *metricsConsumerStub) ConsumeMetrics(ctx context.Context, md pmetric.Met assert.Equal(m.t, pmetric.AggregationTemporalityDelta, sum.AggregationTemporality()) assert.Equal(m.t, 1, sum.DataPoints().Len()) - dp := sum.DataPoints().At(0) - // For native metrics, this is convenient way to test library name extraction - if strings.HasSuffix(name, metricNative.name) { - if shlibName, exists := dp.Attributes().Get(nativeLibraryAttrName); exists { - name = fmt.Sprintf("%v/%v", name, shlibName.AsString()) + for _, dp := range sum.DataPoints().All() { + switch { + case strings.HasSuffix(name, metricNative.name): + // For native metrics, this is convenient way to test library name extraction + if shlibName, exists := dp.Attributes().Get(nativeLibraryAttrName); exists { + name = fmt.Sprintf("%v/%v", name, shlibName.AsString()) + } + case strings.HasSuffix(name, metricSyscall.name): + // For syscall metrics, this is convenient way to test syscall name extraction + if syscallName, exists := dp.Attributes().Get(syscallAttrName); exists { + name = fmt.Sprintf("%v/%v", name, syscallName.AsString()) + } + default: + // Non-native metrics should not have attributes attached + assert.Equal(m.t, 0, dp.Attributes().Len()) } - } else { - // Non-native metrics should not have attributes attached - assert.Equal(m.t, 0, dp.Attributes().Len()) + m.counts[name] += dp.IntValue() } - m.counts[name] += dp.IntValue() } } } @@ -98,6 +105,7 @@ func newProfiles() (pprofile.Profiles, pprofile.KeyValueAndUnitSlice, pprofile.LocationSlice, pprofile.StackSlice, + pprofile.FunctionSlice, ) { profiles := pprofile.NewProfiles() dict := profiles.Dictionary() @@ -107,6 +115,7 @@ func newProfiles() (pprofile.Profiles, locTable := dict.LocationTable() mappingTable := dict.MappingTable() stackTable := dict.StackTable() + funcTable := dict.FunctionTable() strTable.Append("") strTable.Append("samples") @@ -116,8 +125,9 @@ func newProfiles() (pprofile.Profiles, mappingTable.AppendEmpty() attrTable.AppendEmpty() stackTable.AppendEmpty() + funcTable.AppendEmpty() - return profiles, dict, strTable, attrTable, locTable, stackTable + return profiles, dict, strTable, attrTable, locTable, stackTable, funcTable } // newProfile initializes and appends a Profile to a Profiles instance. @@ -146,11 +156,12 @@ func TestConsumeProfiles_FrameMetrics(t *testing.T) { } // Create a Profile and higher-level envelopes - profiles, _, strTable, attrTable, locTable, stackTable := newProfiles() + profiles, _, strTable, attrTable, locTable, stackTable, _ := newProfiles() prof := newProfile(profiles) // Create a profiles object with a sample that has a location with a frame type attribute. sample := prof.Sample().AppendEmpty() + sample.TimestampsUnixNano().Append(1, 2, 3, 4) // Add an attribute for frame type attr := attrTable.AppendEmpty() @@ -191,7 +202,7 @@ func TestConsumeProfiles_FrameMetricsMultiple(t *testing.T) { } // Create a Profile and higher-level envelopes - profiles, dict, strTable, attrTable, locTable, stackTable := newProfiles() + profiles, dict, strTable, attrTable, locTable, stackTable, funcTable := newProfiles() prof := newProfile(profiles) mappingTable := dict.MappingTable() @@ -218,6 +229,7 @@ func TestConsumeProfiles_FrameMetricsMultiple(t *testing.T) { locPy.AttributeIndices().Append(2) locKernel := locTable.AppendEmpty() locKernel.AttributeIndices().Append(3) + locKernel.Line().AppendEmpty().SetFunctionIndex(1) locNative := locTable.AppendEmpty() locNative.AttributeIndices().Append(4) @@ -249,20 +261,29 @@ func TestConsumeProfiles_FrameMetricsMultiple(t *testing.T) { // Eight samples sampleKernel := prof.Sample().AppendEmpty() sampleKernel.SetStackIndex(3) + sampleKernel.TimestampsUnixNano().Append(1, 2, 3) sampleNative := prof.Sample().AppendEmpty() sampleNative.SetStackIndex(4) sampleNative = prof.Sample().AppendEmpty() sampleNative.SetStackIndex(5) + sampleNative.TimestampsUnixNano().Append(2) sampleGo := prof.Sample().AppendEmpty() sampleGo.SetStackIndex(1) + sampleGo.TimestampsUnixNano().Append(3) samplePy := prof.Sample().AppendEmpty() samplePy.SetStackIndex(2) samplePy = prof.Sample().AppendEmpty() samplePy.SetStackIndex(2) samplePy = prof.Sample().AppendEmpty() samplePy.SetStackIndex(2) + samplePy.TimestampsUnixNano().Append(4) sampleGo = prof.Sample().AppendEmpty() sampleGo.SetStackIndex(1) + sampleGo.TimestampsUnixNano().Append(5) + + syscallFunc := funcTable.AppendEmpty() + syscallFunc.SetNameStrindex(int32(strTable.Len())) + strTable.Append("__x64_sys_bpf") err := conn.ConsumeProfiles(context.Background(), profiles) assert.NoError(t, err) @@ -274,6 +295,7 @@ func TestConsumeProfiles_FrameMetricsMultiple(t *testing.T) { "frametest.samples.kernel.count": 1, "frametest.samples.native.count": 1, "frametest.samples.native.count/libc": 1, + "frametest.samples.syscall.count/bpf": 3, }, m.counts) }