From 5da7e2ea1a408c075072a9cb190a209377964cc0 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Thu, 18 Dec 2025 22:09:20 -0500 Subject: [PATCH 1/2] rough draft --- include/ddprof_cli.hpp | 4 + include/ddprof_context.hpp | 11 ++ include/ddprof_context_lib.hpp | 5 + include/ddprof_worker.hpp | 4 +- include/perf_watcher.hpp | 12 ++- include/pevent.hpp | 3 + src/ddprof_cli.cc | 27 +++++ src/ddprof_context_lib.cc | 109 ++++++++++++++++++++ src/ddprof_worker.cc | 178 ++++++++++++++++++++++++++++++++- src/perf_mainloop.cc | 10 +- src/pevent_lib.cc | 35 ++++++- test/CMakeLists.txt | 8 ++ 12 files changed, 396 insertions(+), 10 deletions(-) diff --git a/include/ddprof_cli.hpp b/include/ddprof_cli.hpp index 8818e8368..78103462e 100644 --- a/include/ddprof_cli.hpp +++ b/include/ddprof_cli.hpp @@ -78,6 +78,10 @@ struct DDProfCLI { bool continue_exec{false}; bool timeline{true}; + // SDT probe options + std::string sdt_mode{"auto"}; // auto, only, off + std::string target_binary; // Path for SDT probe discovery + // args std::vector command_line; diff --git a/include/ddprof_context.hpp b/include/ddprof_context.hpp index f59533c25..da02c90f1 100644 --- a/include/ddprof_context.hpp +++ b/include/ddprof_context.hpp @@ -12,7 +12,9 @@ #include "exporter_input.hpp" #include "perf_clock.hpp" #include "perf_watcher.hpp" +#include "sdt_allocation_correlator.hpp" #include "unique_fd.hpp" +#include "uprobe_attacher.hpp" #include #include @@ -44,6 +46,10 @@ struct DDProfContext { std::string tags; std::chrono::milliseconds initial_loaded_libs_check_delay{0}; std::chrono::milliseconds loaded_libs_check_interval{0}; + + // SDT probe options + std::string sdt_mode{"auto"}; + std::string target_binary; } params; ddprof::UniqueFd socket_fd; @@ -52,5 +58,10 @@ struct DDProfContext { std::vector watchers; ExporterInput exp_input; DDProfWorkerContext worker_ctx; + + // SDT probe support + std::vector sdt_attachments; // Attached uprobes for SDT + SDTAllocationCorrelator sdt_correlator; // Entry/exit correlation + bool sdt_probes_active{false}; // True if SDT probes are being used }; } // namespace ddprof diff --git a/include/ddprof_context_lib.hpp b/include/ddprof_context_lib.hpp index 696d83cb7..cb83524e7 100644 --- a/include/ddprof_context_lib.hpp +++ b/include/ddprof_context_lib.hpp @@ -18,4 +18,9 @@ DDRes context_set(const DDProfCLI &ddprof_cli, DDProfContext &ctx); int context_allocation_profiling_watcher_idx(const DDProfContext &ctx); +int context_sdt_allocation_profiling_watcher_idx(const DDProfContext &ctx); + +DDRes context_setup_sdt_probes(const DDProfCLI &ddprof_cli, + DDProfContext &ctx); + } // namespace ddprof diff --git a/include/ddprof_worker.hpp b/include/ddprof_worker.hpp index 4c38250ee..f081ed3e1 100644 --- a/include/ddprof_worker.hpp +++ b/include/ddprof_worker.hpp @@ -11,6 +11,7 @@ #include "ddres.hpp" #include "persistent_worker_state.hpp" #include "pevent.hpp" +#include "sdt_probe.hpp" namespace ddprof { struct DDProfContext; @@ -24,7 +25,8 @@ DDRes ddprof_worker_cycle(DDProfContext &ctx, std::chrono::steady_clock::time_point now, bool synchronous_export); DDRes ddprof_worker_process_event(const perf_event_header *hdr, int watcher_pos, - DDProfContext &ctx); + DDProfContext &ctx, + SDTProbeType sdt_probe_type = SDTProbeType::kUnknown); // Only init unwinding elements DDRes worker_library_init(DDProfContext &ctx, diff --git a/include/perf_watcher.hpp b/include/perf_watcher.hpp index 71c11f557..e1374b9fa 100644 --- a/include/perf_watcher.hpp +++ b/include/perf_watcher.hpp @@ -98,10 +98,14 @@ enum DDPROF_SAMPLE_TYPES : uint8_t { #undef X_ENUM // Define our own event type on top of perf event types -enum DDProfTypeId : uint8_t { kDDPROF_TYPE_CUSTOM = PERF_TYPE_MAX + 100 }; +enum DDProfTypeId : uint8_t { + kDDPROF_TYPE_CUSTOM = PERF_TYPE_MAX + 100, + kDDPROF_TYPE_SDT_UPROBE = PERF_TYPE_MAX + 101, +}; enum DDProfCustomCountId : uint8_t { kDDPROF_COUNT_ALLOCATIONS = 0, + kDDPROF_COUNT_ALLOCATIONS_SDT = 1, }; // Kernel events are necessary to get a full accounting of CPU @@ -122,6 +126,9 @@ enum DDProfCustomCountId : uint8_t { #define SKIP_FRAMES {.nb_frames_to_skip = NB_FRAMES_TO_SKIP} +// SDT probes don't add frames to the stack (uprobe fires at probe location) +#define SDT_NO_SKIP {.nb_frames_to_skip = 0} + // Whereas tracepoints are dynamically configured and can be checked at runtime, // we lack the ability to inspect events of type other than TYPE_TRACEPOINT. // Accordingly, we maintain a list of events, even though the type of these @@ -150,7 +157,8 @@ enum DDProfCustomCountId : uint8_t { X(sALGN, "Align. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \ X(sEMU, "Emu. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \ X(sDUM, "Dummy", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY, 1, DDPROF_PWT_NOCOUNT, {}) \ - X(sALLOC, "Allocations", kDDPROF_TYPE_CUSTOM, kDDPROF_COUNT_ALLOCATIONS, 524288, DDPROF_PWT_ALLOC_SPACE, SKIP_FRAMES) + X(sALLOC, "Allocations", kDDPROF_TYPE_CUSTOM, kDDPROF_COUNT_ALLOCATIONS, 524288, DDPROF_PWT_ALLOC_SPACE, SKIP_FRAMES) \ + X(sALLOC_SDT, "SDT Allocations", kDDPROF_TYPE_SDT_UPROBE, kDDPROF_COUNT_ALLOCATIONS_SDT, 524288, DDPROF_PWT_ALLOC_SPACE, SDT_NO_SKIP) // clang-format on diff --git a/include/pevent.hpp b/include/pevent.hpp index d01832b24..f79fd8f44 100644 --- a/include/pevent.hpp +++ b/include/pevent.hpp @@ -7,6 +7,7 @@ #include "ddprof_defs.hpp" #include "perf_ringbuffer.hpp" +#include "sdt_probe.hpp" #include @@ -29,6 +30,8 @@ struct PEvent { std::vector sub_fds; // perf FDs of other events outputting to the same ring buffer // (eg. perf events for other process threads in PID mode) + SDTProbeType sdt_probe_type{ + SDTProbeType::kUnknown}; // Type of SDT probe (for uprobe events) }; struct PEventHdr { diff --git a/src/ddprof_cli.cc b/src/ddprof_cli.cc index 0dc47e518..4cc1e9c63 100644 --- a/src/ddprof_cli.cc +++ b/src/ddprof_cli.cc @@ -429,6 +429,29 @@ int DDProfCLI::parse(int argc, const char *argv[]) { ->default_val(k_default_max_profiled_pids) ->envname("DD_PROFILING_MAXIMUM_PIDS") ->group("")); + + // SDT probe options + extended_options.push_back( + app.add_option("--sdt-probes,--sdt_probes", sdt_mode, + "SDT probe mode for memory profiling:\n" + " auto: Use SDT probes if found, fallback to hooks\n" + " only: Only use SDT probes, fail if not found\n" + " off: Never use SDT probes (hook-based only)") + ->default_val("auto") + ->check(CLI::IsMember({"auto", "only", "off"})) + ->envname("DD_PROFILING_SDT_PROBES") + ->group("")); + + extended_options.push_back( + app.add_option("--target-binary,--target_binary", target_binary, + "Path to target binary for SDT probe discovery.\n" + "Required for SDT probe mode with statically linked " + "binaries.\n" + "If not specified, uses the first element of the command " + "line.") + ->envname("DD_PROFILING_TARGET_BINARY") + ->group("")); + // Parse CLI11_PARSE(app, argc, argv); @@ -595,6 +618,10 @@ void DDProfCLI::print() const { disable_symbolization ? "true" : "false"); PRINT_NFO(" - reorder_events: %s", reorder_events ? "true" : "false"); PRINT_NFO(" - maximum_pids: %d", maximum_pids); + PRINT_NFO(" - sdt_mode: %s", sdt_mode.c_str()); + if (!target_binary.empty()) { + PRINT_NFO(" - target_binary: %s", target_binary.c_str()); + } } CommandLineWrapper DDProfCLI::get_user_command_line() const { diff --git a/src/ddprof_context_lib.cc b/src/ddprof_context_lib.cc index b1f0644d4..cc047509c 100644 --- a/src/ddprof_context_lib.cc +++ b/src/ddprof_context_lib.cc @@ -15,6 +15,8 @@ #include "logger_setup.hpp" #include "presets.hpp" #include "prng.hpp" +#include "sdt_probe.hpp" +#include "uprobe_attacher.hpp" #include #include @@ -100,6 +102,10 @@ void copy_cli_values(const DDProfCLI &ddprof_cli, DDProfContext &ctx) { ctx.params.loaded_libs_check_interval = ddprof_cli.loaded_libs_check_interval; ctx.params.socket_path = ddprof_cli.socket_path; ctx.params.pipefd_to_library = UniqueFd{ddprof_cli.pipefd_to_library}; + + // SDT probe options + ctx.params.sdt_mode = ddprof_cli.sdt_mode; + ctx.params.target_binary = ddprof_cli.target_binary; } DDRes context_add_watchers(const DDProfCLI &ddprof_cli, DDProfContext &ctx) { @@ -152,6 +158,9 @@ DDRes context_set(const DDProfCLI &ddprof_cli, DDProfContext &ctx) { DDRES_CHECK_FWD(context_add_watchers(ddprof_cli, ctx)); + // Setup SDT probes if applicable + DDRES_CHECK_FWD(context_setup_sdt_probes(ddprof_cli, ctx)); + if (ctx.params.socket_path.empty()) { ctx.params.socket_path = generate_socket_path(); } @@ -181,4 +190,104 @@ int context_allocation_profiling_watcher_idx(const DDProfContext &ctx) { return -1; } +int context_sdt_allocation_profiling_watcher_idx(const DDProfContext &ctx) { + const std::span watchers{ctx.watchers}; + auto it = + std::find_if(watchers.begin(), watchers.end(), [](const auto &watcher) { + return watcher.type == kDDPROF_TYPE_SDT_UPROBE && + watcher.config == kDDPROF_COUNT_ALLOCATIONS_SDT; + }); + + if (it != watchers.end()) { + return it - watchers.begin(); + } + return -1; +} + +DDRes context_setup_sdt_probes(const DDProfCLI &ddprof_cli, + DDProfContext &ctx) { + // Check if SDT mode is off + if (ddprof_cli.sdt_mode == "off") { + LG_DBG("SDT probe mode is off, skipping SDT setup"); + return {}; + } + + // Check if we have an SDT allocation watcher + int sdt_watcher_idx = context_sdt_allocation_profiling_watcher_idx(ctx); + if (sdt_watcher_idx < 0) { + // No SDT watcher, nothing to do + LG_DBG("No SDT allocation watcher found, skipping SDT setup"); + return {}; + } + + // Determine target binary path + std::string target_binary = ddprof_cli.target_binary; + if (target_binary.empty() && !ddprof_cli.command_line.empty()) { + target_binary = ddprof_cli.command_line[0]; + LG_DBG("Using command line first element as target binary: %s", + target_binary.c_str()); + } + + if (target_binary.empty()) { + if (ddprof_cli.sdt_mode == "only") { + DDRES_RETURN_ERROR_LOG( + DD_WHAT_INPUT_PROCESS, + "SDT probe mode is 'only' but no target binary specified"); + } + LG_DBG("No target binary specified, SDT probes will not be used"); + return {}; + } + + // Try to discover SDT probes + LG_NTC("Attempting to discover SDT probes in %s", target_binary.c_str()); + auto probes = parse_sdt_probes(target_binary.c_str()); + + if (!probes) { + if (ddprof_cli.sdt_mode == "only") { + DDRES_RETURN_ERROR_LOG(DD_WHAT_INPUT_PROCESS, + "SDT probe mode is 'only' but no SDT probes found " + "in %s", + target_binary.c_str()); + } + LG_NTC("No SDT probes found in %s, will use hook-based allocation tracking", + target_binary.c_str()); + return {}; + } + + // Check if we have the required allocation probes + if (!probes->has_allocation_probes()) { + if (ddprof_cli.sdt_mode == "only") { + DDRES_RETURN_ERROR_LOG( + DD_WHAT_INPUT_PROCESS, + "SDT probe mode is 'only' but required allocation probes not found " + "in %s (need %.*s:entry/exit and %.*s:entry)", + target_binary.c_str(), static_cast(kMallocProvider.size()), + kMallocProvider.data(), static_cast(kFreeProvider.size()), + kFreeProvider.data()); + } + LG_NTC("Required allocation SDT probes not found in %s, will use " + "hook-based tracking", + target_binary.c_str()); + return {}; + } + + LG_NTC("Found %zu SDT probes in %s", probes->probes.size(), + target_binary.c_str()); + + // Get the watcher to get stack sample size + const PerfWatcher &watcher = ctx.watchers[sdt_watcher_idx]; + + // Note: We can't attach uprobes yet because we don't have the PID. + // The uprobes will be attached later when we know the target PID. + // For now, we just store the probe info and mark that SDT is available. + + // Store the probe set in context for later attachment + // We'll need to attach when we know the PID (in ddprof_setup or similar) + ctx.sdt_probes_active = true; + + LG_NTC("SDT probes discovered successfully, will attach uprobes at runtime"); + + return {}; +} + } // namespace ddprof diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index aa1394370..5029d92d6 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -6,15 +6,19 @@ #include "ddprof_worker.hpp" #include "ddprof_context.hpp" +#include "ddprof_context_lib.hpp" #include "ddprof_perf_event.hpp" #include "ddprof_stats.hpp" #include "dso_hdr.hpp" #include "exporter/ddprof_exporter.hpp" #include "logger.hpp" #include "perf.hpp" +#include "perf_archmap.hpp" #include "pevent_lib.hpp" #include "pprof/ddprof_pprof.hpp" #include "procutils.hpp" +#include "sdt_allocation_correlator.hpp" +#include "sdt_probe.hpp" #include "symbolizer.hpp" #include "tags.hpp" #include "tsc_clock.hpp" @@ -330,6 +334,169 @@ DDRes clear_unvisited_pids(DDProfContext &ctx) { return {}; } +/************************* SDT uprobe helpers *********************************/ + +/// Extract a register value from a perf sample's register array +/// The perf register mask determines which registers are present in the array +/// @param sample The perf event sample +/// @param perf_reg_num The perf register number (e.g., PAM_X86_RDI) +/// @return The register value, or 0 if the register is not available +uint64_t extract_register_value(const perf_event_sample *sample, + unsigned int perf_reg_num) { + if (!sample->regs || sample->abi == 0) { + return 0; + } + + // Check if this register is in the mask + if (!(k_perf_register_mask & (1ULL << perf_reg_num))) { + return 0; + } + + // Count how many bits are set before this register in the mask + // This gives us the index into the regs array + uint64_t mask_below = (1ULL << perf_reg_num) - 1; + unsigned int index = __builtin_popcountll(k_perf_register_mask & mask_below); + + return sample->regs[index]; +} + +/// Get the first argument value from an SDT probe sample +/// For malloc entry, this is the size; for free entry, this is the pointer +uint64_t get_sdt_arg1(const perf_event_sample *sample) { + // First argument is passed in RDI (x86-64) or X0 (aarch64) + return extract_register_value(sample, param_to_perf_regno(1)); +} + +/// Get the return value from a function (for malloc exit) +uint64_t get_sdt_return_value(const perf_event_sample *sample) { + // Return value is in RAX (x86-64) or X0 (aarch64) +#ifdef __x86_64__ + return extract_register_value(sample, PAM_X86_RAX); +#elif __aarch64__ + return extract_register_value(sample, PAM_ARM_X0); +#else +#error Architecture not supported +#endif +} + +/// Unwind a sample and populate the output +DDRes unwind_sdt_sample(DDProfContext &ctx, perf_event_sample *sample, + int watcher_pos, UnwindOutput &output) { + struct UnwindState *us = ctx.worker_ctx.us; + + ddprof_stats_add(STATS_SAMPLE_COUNT, 1, nullptr); + ddprof_stats_add(STATS_UNWIND_AVG_STACK_SIZE, sample->size_stack, nullptr); + + // copy the sample context into the unwind structure + unwind_init_sample(us, sample->regs, sample->pid, sample->size_stack, + sample->data_stack); + + us->output.pid = sample->pid; + us->output.tid = sample->tid; + + DDRes res = unwindstate_unwind(us); + if (!IsDDResFatal(res)) { + output = us->output; + } + return res; +} + +/// Process an SDT uprobe sample for memory allocation tracking +DDRes ddprof_pr_sdt_sample(DDProfContext &ctx, perf_event_sample *sample, + int watcher_pos, SDTProbeType probe_type) { + if (!sample) { + return ddres_warn(DD_WHAT_PERFSAMP); + } + + auto ticks0 = TscClock::cycles_now(); + PerfWatcher *watcher = &ctx.watchers[watcher_pos]; + struct UnwindState *us = ctx.worker_ctx.us; + SDTAllocationCorrelator &correlator = ctx.sdt_correlator; + + switch (probe_type) { + case SDTProbeType::kMallocEntry: { + // Extract allocation size from first argument + uint64_t size = get_sdt_arg1(sample); + LG_DBG("SDT malloc entry: pid=%d tid=%d size=%lu", sample->pid, sample->tid, + size); + + // Unwind and capture stack trace at entry + UnwindOutput stack; + DDRes res = unwind_sdt_sample(ctx, sample, watcher_pos, stack); + if (!IsDDResFatal(res)) { + correlator.on_malloc_entry(sample->pid, sample->tid, size, sample->time, + std::move(stack)); + } + break; + } + + case SDTProbeType::kMallocExit: { + // Extract returned pointer + uintptr_t ptr = get_sdt_return_value(sample); + LG_DBG("SDT malloc exit: pid=%d tid=%d ptr=%p", sample->pid, sample->tid, + reinterpret_cast(ptr)); + + // Try to correlate with entry + auto result = + correlator.on_malloc_exit(sample->pid, sample->tid, ptr, sample->time); + if (result && ptr != 0) { + // Successfully correlated - aggregate the allocation + int const i_export = ctx.worker_ctx.i_current_pprof; + DDProfPProf *pprof = ctx.worker_ctx.pprof[i_export]; + + // Register live allocation for tracking + if (Any(EventAggregationMode::kLiveSum & watcher->aggregation_mode)) { + ctx.worker_ctx.live_allocation.register_allocation( + result->stack, ptr, result->size, watcher_pos, sample->pid); + } + + // Aggregate the sample + if (Any(EventAggregationMode::kSum & watcher->aggregation_mode)) { + uint64_t timestamp = 0; + if (ctx.params.timeline && result->timestamp != 0) { + timestamp = result->timestamp + ctx.worker_ctx.perfclock_offset; + } + const DDProfValuePack pack{static_cast(result->size), 1, + timestamp}; + + DDRES_CHECK_FWD(pprof_aggregate( + &result->stack, us->symbol_hdr, pack, watcher, + us->dso_hdr.get_file_info_vector(), ctx.params.show_samples, + kSumPos, ctx.worker_ctx.symbolizer, pprof)); + } + } + break; + } + + case SDTProbeType::kFreeEntry: { + // Extract pointer being freed + uintptr_t ptr = get_sdt_arg1(sample); + LG_DBG("SDT free entry: pid=%d tid=%d ptr=%p", sample->pid, sample->tid, + reinterpret_cast(ptr)); + + if (ptr != 0) { + // Register deallocation for live allocation tracking + ctx.worker_ctx.live_allocation.register_deallocation(ptr, watcher_pos, + sample->pid); + correlator.on_free_entry(sample->pid, sample->tid, ptr, sample->time); + } + break; + } + + case SDTProbeType::kFreeExit: + // We don't need to do anything for free exit + break; + + default: + LG_WRN("Unknown SDT probe type: %d", static_cast(probe_type)); + break; + } + + ddprof_stats_add(STATS_AGGREGATION_AVG_TIME, + TscClock::cycles_now() - ticks0, nullptr); + return {}; +} + /************************* perf_event_open() helpers **************************/ void ddprof_pr_mmap(DDProfContext &ctx, const perf_event_mmap2 *map, int watcher_pos, PerfClock::time_point timestamp) { @@ -762,7 +929,8 @@ struct perf_event_hdr_wpid : perf_event_header { }; DDRes ddprof_worker_process_event(const perf_event_header *hdr, int watcher_pos, - DDProfContext &ctx) { + DDProfContext &ctx, + SDTProbeType sdt_probe_type) { // global try catch to avoid leaking exceptions to main loop try { ddprof_stats_add(STATS_EVENT_COUNT, 1, nullptr); @@ -783,7 +951,13 @@ DDRes ddprof_worker_process_event(const perf_event_header *hdr, int watcher_pos, uint64_t const mask = watcher->sample_type; perf_event_sample *sample = hdr2samp(hdr, mask); if (sample) { - DDRES_CHECK_FWD(ddprof_pr_sample(ctx, sample, watcher_pos)); + // Check if this is an SDT uprobe sample + if (sdt_probe_type != SDTProbeType::kUnknown) { + DDRES_CHECK_FWD( + ddprof_pr_sdt_sample(ctx, sample, watcher_pos, sdt_probe_type)); + } else { + DDRES_CHECK_FWD(ddprof_pr_sample(ctx, sample, watcher_pos)); + } } } break; diff --git a/src/perf_mainloop.cc b/src/perf_mainloop.cc index d0c7548e7..658e3cc57 100644 --- a/src/perf_mainloop.cc +++ b/src/perf_mainloop.cc @@ -275,8 +275,8 @@ DDRes worker_process_ring_buffers_ordered(std::span pes, return {}; } auto &pevent = pes[evt.buffer_idx]; - auto res = - ddprof_worker_process_event(evt.event, pevent.watcher_pos, ctx); + auto res = ddprof_worker_process_event(evt.event, pevent.watcher_pos, ctx, + pevent.sdt_probe_type); if (!IsDDResOK(res)) { return res; } @@ -333,7 +333,8 @@ worker_process_ring_buffers(std::span pes, DDProfContext &ctx, events = true; const auto *hdr = reinterpret_cast(buffer.data()); - DDRes res = ddprof_worker_process_event(hdr, pevent.watcher_pos, ctx); + DDRes res = ddprof_worker_process_event(hdr, pevent.watcher_pos, ctx, + pevent.sdt_probe_type); // Check for processing error if (IsDDResNotOK(res)) { @@ -350,7 +351,8 @@ worker_process_ring_buffers(std::span pes, DDProfContext &ctx, events = true; const auto *hdr = reinterpret_cast(buffer.data()); - DDRes res = ddprof_worker_process_event(hdr, pevent.watcher_pos, ctx); + DDRes res = ddprof_worker_process_event(hdr, pevent.watcher_pos, ctx, + pevent.sdt_probe_type); // Check for processing error if (IsDDResNotOK(res)) { diff --git a/src/pevent_lib.cc b/src/pevent_lib.cc index 49f249f80..33b919b2f 100644 --- a/src/pevent_lib.cc +++ b/src/pevent_lib.cc @@ -11,9 +11,11 @@ #include "lib/allocation_event.hpp" #include "perf.hpp" #include "ringbuffer_utils.hpp" +#include "sdt_probe.hpp" #include "sys_utils.hpp" #include "syscalls.hpp" #include "tracepoint_config.hpp" +#include "uprobe_attacher.hpp" #include "user_override.hpp" #include @@ -169,8 +171,39 @@ DDRes pevent_open(DDProfContext &ctx, std::span pids, int num_cpu, if (watcher->type < kDDPROF_TYPE_CUSTOM) { DDRES_CHECK_FWD(pevent_open_all_cpus(watcher, watcher_idx, pids, num_cpu, ctx.perf_clock_source, pevent_hdr)); + } else if (watcher->type == kDDPROF_TYPE_SDT_UPROBE) { + // SDT uprobe-based allocation profiling + // This is handled separately - the uprobe fds are stored in the context + // and we create PEvent entries for them here + if (ctx.sdt_attachments.empty()) { + LG_WRN("SDT uprobe watcher requested but no uprobes attached, " + "falling back to MPSC ring buffer"); + // Fall back to custom MPSC ring buffer (hook-based) + size_t pevent_idx = 0; + DDRES_CHECK_FWD(pevent_create(pevent_hdr, watcher_idx, &pevent_idx)); + int const order = pevent_compute_min_mmap_order( + k_mpsc_buffer_size_shift, watcher->options.stack_sample_size, + k_min_number_samples_per_ring_buffer); + DDRES_CHECK_FWD(ring_buffer_create( + order, RingBufferType::kMPSCRingBuffer, true, + &pevent_hdr->pes[pevent_idx])); + // Mark as custom event for the fallback case + pevent_hdr->pes[pevent_idx].custom_event = true; + } else { + // Create a PEvent entry for each attached uprobe + for (const auto &att : ctx.sdt_attachments) { + size_t pevent_idx = 0; + DDRES_CHECK_FWD(pevent_create(pevent_hdr, watcher_idx, &pevent_idx)); + pevent_set_info(att.fd, -1, pevent_hdr->pes[pevent_idx], + watcher->options.stack_sample_size); + // Store the probe type in the PEvent for later identification + pevent_hdr->pes[pevent_idx].sdt_probe_type = att.probe_type; + } + LG_NTC("Created %zu PEvent entries for SDT uprobes", + ctx.sdt_attachments.size()); + } } else { - // custom event, eg.allocation profiling + // custom event, eg.allocation profiling (hook-based) size_t pevent_idx = 0; DDRES_CHECK_FWD(pevent_create(pevent_hdr, watcher_idx, &pevent_idx)); int const order = pevent_compute_min_mmap_order( diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 01d4e6257..85e6593b8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -369,6 +369,11 @@ add_unit_test(tracepoint_config-ut tracepoint_config-ut.cc ../src/tracepoint_con add_unit_test(live_allocation-ut live_allocation-ut.cc ../src/live_allocation.cc) +add_unit_test(sdt_allocation_correlator-ut sdt_allocation_correlator-ut.cc + ../src/sdt_allocation_correlator.cc) + +add_unit_test(sdt_probe-ut sdt_probe-ut.cc ../src/sdt_probe.cc LIBRARIES ${ELFUTILS_LIBRARIES}) + add_unit_test(ddprof_process-ut ddprof_process-ut.cc ${PROCESS_SRC} LIBRARIES ${ELFUTILS_LIBRARIES}) add_unit_test(glibc_fixes-ut glibc_fixes-ut.cc ../src/lib/glibc_fixes.c LIBRARIES pthread) @@ -522,3 +527,6 @@ add_test( WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) add_exe(deep_stacks deep_stacks.cc) + +# SDT test binary - requires systemtap-sdt-dev +add_exe(sdt_test_binary sdt_test_binary.cc LIBRARIES Threads::Threads) From 88e09cecccb4e3715cf8b95ed13990c106f99626 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Fri, 19 Dec 2025 06:52:01 -0500 Subject: [PATCH 2/2] rough wip --- app/base-env/Dockerfile | 16 +- include/sdt_allocation_correlator.hpp | 127 ++++++ include/sdt_probe.hpp | 102 +++++ include/uprobe_attacher.hpp | 101 +++++ src/sdt_allocation_correlator.cc | 158 +++++++ src/sdt_probe.cc | 597 ++++++++++++++++++++++++++ src/uprobe_attacher.cc | 384 +++++++++++++++++ test/sdt_allocation_correlator-ut.cc | 211 +++++++++ test/sdt_probe-ut.cc | 244 +++++++++++ test/sdt_test_binary.cc | 165 +++++++ 10 files changed, 2102 insertions(+), 3 deletions(-) create mode 100644 include/sdt_allocation_correlator.hpp create mode 100644 include/sdt_probe.hpp create mode 100644 include/uprobe_attacher.hpp create mode 100644 src/sdt_allocation_correlator.cc create mode 100644 src/sdt_probe.cc create mode 100644 src/uprobe_attacher.cc create mode 100644 test/sdt_allocation_correlator-ut.cc create mode 100644 test/sdt_probe-ut.cc create mode 100644 test/sdt_test_binary.cc diff --git a/app/base-env/Dockerfile b/app/base-env/Dockerfile index c40c781be..a138b3221 100644 --- a/app/base-env/Dockerfile +++ b/app/base-env/Dockerfile @@ -70,6 +70,7 @@ RUN apt-get update \ software-properties-common \ ssh-client \ subversion \ + systemtap-sdt-dev \ unzip \ wget \ zlib1g-dev @@ -145,14 +146,23 @@ RUN VERSION="1.17.0" \ && rm -rf "googletest-${VERSION}" "${TAR_NAME}" # More recent Cppcheck (ubuntu defaults to a 1.8 version) -RUN VERSION="2.18.0" \ +# cppcheck 2.18.0 requires Python 3.7+, so use older version for Ubuntu 18 +# Older cppcheck needs CMAKE_POLICY_VERSION_MINIMUM for CMake 4.x compatibility +RUN if [ "${UBUNTU_VERSION}" -ge 20 ]; then \ + VERSION="2.18.0"; \ + SHA256="dc74e300ac59f2ef9f9c05c21d48ae4c8dd1ce17f08914dd30c738ff482e748f"; \ + CMAKE_COMPAT_FLAG=""; \ + else \ + VERSION="2.13.0"; \ + SHA256="8229afe1dddc3ed893248b8a723b428dc221ea014fbc76e6289840857c03d450"; \ + CMAKE_COMPAT_FLAG="-DCMAKE_POLICY_VERSION_MINIMUM=3.5"; \ + fi \ && TAR_NAME="${VERSION}.tar.gz" \ && curl -fsSLO "https://github.com/danmar/cppcheck/archive/refs/tags/${TAR_NAME}" \ - && SHA256="dc74e300ac59f2ef9f9c05c21d48ae4c8dd1ce17f08914dd30c738ff482e748f" \ && (printf "${SHA256} ${TAR_NAME}" | sha256sum --check --strict --status) \ && tar xf "${TAR_NAME}" \ && pushd "cppcheck-${VERSION}" \ - && cmake -GNinja -Bbuild -DCMAKE_BUILD_TYPE=Release \ + && cmake -GNinja -Bbuild -DCMAKE_BUILD_TYPE=Release ${CMAKE_COMPAT_FLAG} \ && cmake --build build -t install \ && popd \ && rm -rf "cppcheck-${VERSION}" "${TAR_NAME}" diff --git a/include/sdt_allocation_correlator.hpp b/include/sdt_allocation_correlator.hpp new file mode 100644 index 000000000..c41f394ca --- /dev/null +++ b/include/sdt_allocation_correlator.hpp @@ -0,0 +1,127 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#pragma once + +#include "unwind_output.hpp" + +#include +#include +#include +#include + +namespace ddprof { + +/// Represents a pending malloc that hasn't been matched with its exit yet +struct PendingAllocation { + pid_t pid; + pid_t tid; + uint64_t size; + uint64_t entry_timestamp; + UnwindOutput stack; // Stack trace captured at malloc entry +}; + +/// Result of a successful malloc correlation +struct CorrelatedAllocation { + uint64_t size; // Allocation size from entry + uintptr_t ptr; // Returned pointer from exit + UnwindOutput stack; // Stack trace from entry + uint64_t timestamp; // Exit timestamp +}; + +/// Correlates malloc entry (size) with exit (pointer) events +/// +/// When malloc is called: +/// 1. Entry probe fires with size argument -> on_malloc_entry() stores it +/// 2. Exit probe fires with returned pointer -> on_malloc_exit() correlates +/// +/// This class maintains per-thread pending allocations and matches them. +class SDTAllocationCorrelator { +public: + SDTAllocationCorrelator(); + ~SDTAllocationCorrelator(); + + // Non-copyable, movable + SDTAllocationCorrelator(const SDTAllocationCorrelator &) = delete; + SDTAllocationCorrelator &operator=(const SDTAllocationCorrelator &) = delete; + SDTAllocationCorrelator(SDTAllocationCorrelator &&) = default; + SDTAllocationCorrelator &operator=(SDTAllocationCorrelator &&) = default; + + /// Record a malloc entry event + /// @param pid Process ID + /// @param tid Thread ID + /// @param size Allocation size + /// @param timestamp Event timestamp + /// @param stack Stack trace at entry + void on_malloc_entry(pid_t pid, pid_t tid, uint64_t size, uint64_t timestamp, + UnwindOutput stack); + + /// Process a malloc exit event and try to correlate with entry + /// @param pid Process ID + /// @param tid Thread ID + /// @param ptr Returned pointer + /// @param timestamp Event timestamp + /// @return Correlated allocation if successful, nullopt if no matching entry + std::optional on_malloc_exit(pid_t pid, pid_t tid, + uintptr_t ptr, + uint64_t timestamp); + + /// Record a free entry event (for deallocation tracking) + /// This is a pass-through - just returns the info needed to track deallocation + /// @param pid Process ID + /// @param tid Thread ID + /// @param ptr Pointer being freed + /// @param timestamp Event timestamp + void on_free_entry(pid_t pid, pid_t tid, uintptr_t ptr, uint64_t timestamp); + + /// Clean up stale pending entries that are older than max_age_ns + /// Call this periodically to prevent memory leaks from lost events + /// @param current_time Current timestamp + /// @param max_age_ns Maximum age in nanoseconds + /// @return Number of stale entries cleaned up + size_t cleanup_stale(uint64_t current_time, uint64_t max_age_ns); + + /// Get the number of pending (unmatched) allocations + size_t pending_count() const { return _pending.size(); } + + /// Get statistics + uint64_t total_entries() const { return _total_entries; } + uint64_t total_exits() const { return _total_exits; } + uint64_t successful_correlations() const { return _successful_correlations; } + uint64_t missed_entries() const { return _missed_entries; } + uint64_t missed_exits() const { return _missed_exits; } + uint64_t stale_cleanups() const { return _stale_cleanups; } + + /// Reset statistics + void reset_stats(); + + /// Default maximum correlation age (1 second) + static constexpr uint64_t kDefaultMaxCorrelationAge = 1'000'000'000ULL; + +private: + /// Hash function for (pid, tid) pair + struct TidHash { + size_t operator()(const std::pair &p) const { + // Combine pid and tid into a single hash + return std::hash{}((static_cast(p.first) << 32) | + static_cast(p.second)); + } + }; + + // Key: (pid, tid), Value: pending allocation + // Each thread can have at most one pending malloc at a time + std::unordered_map, PendingAllocation, TidHash> + _pending; + + // Statistics + uint64_t _total_entries{0}; + uint64_t _total_exits{0}; + uint64_t _successful_correlations{0}; + uint64_t _missed_entries{0}; // Exit without matching entry + uint64_t _missed_exits{0}; // Entry overwritten before exit + uint64_t _stale_cleanups{0}; // Entries cleaned up due to age +}; + +} // namespace ddprof diff --git a/include/sdt_probe.hpp b/include/sdt_probe.hpp new file mode 100644 index 000000000..7dd877ec1 --- /dev/null +++ b/include/sdt_probe.hpp @@ -0,0 +1,102 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#pragma once + +#include +#include +#include +#include +#include + +namespace ddprof { + +/// Location type for SDT probe arguments +enum class SDTArgLocation : uint8_t { + kRegister, // Value in a CPU register + kMemory, // Value at memory location (base register + offset) + kConstant, // Constant/immediate value +}; + +/// Represents a single argument to an SDT probe +/// Arguments are encoded as assembly expressions like "8@%rdi" or "-4@8(%rbp)" +struct SDTArgument { + int8_t size; // Size in bytes (negative = signed) + SDTArgLocation location; + uint8_t base_reg; // Base register number (for register or memory) + uint8_t index_reg; // Index register (for scaled memory addressing) + uint8_t scale; // Scale factor for index register + int64_t offset; // Memory offset or constant value + std::string raw_spec; // Original argument specification +}; + +/// Represents a discovered SDT probe from .note.stapsdt section +struct SDTProbe { + std::string provider; // e.g., "ddprof_malloc" + std::string name; // e.g., "entry" + uint64_t address; // Probe location (virtual address in ELF) + uint64_t base; // Base address for prelink adjustment + uint64_t semaphore; // Semaphore address (0 if not used) + std::vector arguments; + + /// Get full probe name as "provider:name" + std::string full_name() const { return provider + ":" + name; } +}; + +/// Probe types for memory allocation tracking +enum class SDTProbeType : uint8_t { + kUnknown, + kMallocEntry, + kMallocExit, + kFreeEntry, + kFreeExit, +}; + +/// Collection of SDT probes discovered from a binary +struct SDTProbeSet { + std::string binary_path; + std::vector probes; + + /// Find probes matching a provider and name + std::vector find_probes(std::string_view provider, + std::string_view name) const; + + /// Find a single probe by provider and name (returns first match) + const SDTProbe *find_probe(std::string_view provider, + std::string_view name) const; + + /// Check if the probe set contains all required allocation probes + bool has_allocation_probes() const; + + /// Get the probe type for a probe + static SDTProbeType get_probe_type(const SDTProbe &probe); +}; + +/// Parse SDT probes from an ELF binary file +/// Returns nullopt if no probes found or file cannot be read +std::optional parse_sdt_probes(const char *filepath); + +/// Parse a single SDT argument specification +/// Format: [+-]?size@location where location can be: +/// - %reg (register) +/// - constant (immediate value) +/// - offset(%reg) (memory reference) +/// - offset(%base,%index,scale) (scaled indexed addressing) +std::optional parse_sdt_argument(std::string_view arg_spec); + +/// Convert x86-64 register name to perf register number +/// Returns -1 if register name is not recognized +int x86_reg_name_to_perf_reg(std::string_view reg_name); + +/// Provider name for malloc probes +inline constexpr std::string_view kMallocProvider = "ddprof_malloc"; +/// Provider name for free probes +inline constexpr std::string_view kFreeProvider = "ddprof_free"; +/// Entry probe name +inline constexpr std::string_view kEntryProbe = "entry"; +/// Exit probe name +inline constexpr std::string_view kExitProbe = "exit"; + +} // namespace ddprof diff --git a/include/uprobe_attacher.hpp b/include/uprobe_attacher.hpp new file mode 100644 index 000000000..26a74ada4 --- /dev/null +++ b/include/uprobe_attacher.hpp @@ -0,0 +1,101 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#pragma once + +#include "ddres_def.hpp" +#include "sdt_probe.hpp" + +#include +#include +#include +#include + +namespace ddprof { + +/// Configuration for a single uprobe attachment +struct UprobeConfig { + std::string binary_path; // Path to target binary + uint64_t offset; // Offset within binary (from start of file) + bool is_return_probe; // true for uretprobe + pid_t pid; // Target PID (-1 for all processes) + uint32_t stack_sample_size; // Size of user stack to capture +}; + +/// Represents an attached uprobe +struct UprobeAttachment { + int fd; // perf_event fd + UprobeConfig config; // Configuration used + const SDTProbe *probe; // Associated SDT probe (may be null) + SDTProbeType probe_type; // Type of probe for event processing +}; + +/// Attaches uprobes to SDT probe locations using perf_event_open +class UprobeAttacher { +public: + UprobeAttacher(); + ~UprobeAttacher(); + + // Non-copyable + UprobeAttacher(const UprobeAttacher &) = delete; + UprobeAttacher &operator=(const UprobeAttacher &) = delete; + + // Movable + UprobeAttacher(UprobeAttacher &&other) noexcept; + UprobeAttacher &operator=(UprobeAttacher &&other) noexcept; + + /// Get the uprobe PMU type from sysfs + /// Returns nullopt if uprobes are not supported + std::optional get_uprobe_type(); + + /// Attach a single uprobe + /// @param config Uprobe configuration + /// @param out Output attachment info + /// @return DDRes indicating success or failure + DDRes attach(const UprobeConfig &config, UprobeAttachment *out); + + /// Attach all allocation-related SDT probes from a binary + /// @param probes SDT probe set discovered from the binary + /// @param pid Target process ID + /// @param stack_sample_size Size of user stack to capture + /// @param out Vector to store attachment info + /// @return DDRes indicating success or failure + DDRes attach_allocation_probes(const SDTProbeSet &probes, pid_t pid, + uint32_t stack_sample_size, + std::vector *out); + + /// Get all current attachments + const std::vector &attachments() const { + return _attachments; + } + + /// Detach and close all attached uprobes + void detach_all(); + + /// Enable all attached uprobes + DDRes enable_all(); + + /// Disable all attached uprobes + DDRes disable_all(); + +private: + std::optional _uprobe_type; + std::vector _attachments; +}; + +/// Read the uprobe PMU type from sysfs +/// Returns nullopt if not available +std::optional read_uprobe_pmu_type(); + +/// Convert a virtual address from an ELF file to a file offset +/// This is needed because uprobes use file offsets, not virtual addresses +/// @param binary_path Path to the binary +/// @param vaddr Virtual address from SDT probe +/// @param offset Output file offset +/// @return DDRes indicating success or failure +DDRes vaddr_to_file_offset(const char *binary_path, uint64_t vaddr, + uint64_t *offset); + +} // namespace ddprof diff --git a/src/sdt_allocation_correlator.cc b/src/sdt_allocation_correlator.cc new file mode 100644 index 000000000..cbe13b4af --- /dev/null +++ b/src/sdt_allocation_correlator.cc @@ -0,0 +1,158 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "sdt_allocation_correlator.hpp" + +#include "logger.hpp" + +namespace ddprof { + +SDTAllocationCorrelator::SDTAllocationCorrelator() = default; +SDTAllocationCorrelator::~SDTAllocationCorrelator() = default; + +void SDTAllocationCorrelator::on_malloc_entry(pid_t pid, pid_t tid, + uint64_t size, uint64_t timestamp, + UnwindOutput stack) { + ++_total_entries; + + auto key = std::make_pair(pid, tid); + + // Check if there's already a pending entry for this thread + auto it = _pending.find(key); + if (it != _pending.end()) { + // This means we missed the exit for the previous malloc + // This can happen if: + // - The malloc returned an error (NULL) + // - We lost the exit event due to ring buffer overflow + // - Nested allocation in signal handler (rare) + ++_missed_exits; + LG_DBG("Overwriting pending malloc entry for pid=%d tid=%d (missed exit?)", + pid, tid); + } + + // Store the new pending allocation + _pending[key] = PendingAllocation{ + .pid = pid, + .tid = tid, + .size = size, + .entry_timestamp = timestamp, + .stack = std::move(stack), + }; + + LG_DBG("Recorded malloc entry: pid=%d tid=%d size=%lu timestamp=%lu", pid, + tid, size, timestamp); +} + +std::optional +SDTAllocationCorrelator::on_malloc_exit(pid_t pid, pid_t tid, uintptr_t ptr, + uint64_t timestamp) { + ++_total_exits; + + auto key = std::make_pair(pid, tid); + auto it = _pending.find(key); + + if (it == _pending.end()) { + // No matching entry found + // This can happen if: + // - We lost the entry event due to ring buffer overflow + // - The profiler started between entry and exit + // - pid/tid mismatch due to thread migration (shouldn't happen) + ++_missed_entries; + LG_DBG("malloc exit without entry for pid=%d tid=%d ptr=%p", pid, tid, + reinterpret_cast(ptr)); + return std::nullopt; + } + + PendingAllocation &pending = it->second; + + // Sanity checks + if (timestamp < pending.entry_timestamp) { + // Exit before entry? Clock skew or event reordering issue + LG_DBG("malloc exit before entry for pid=%d tid=%d (clock skew?)", pid, + tid); + _pending.erase(it); + ++_missed_entries; + return std::nullopt; + } + + // Check for stale entries (entry/exit too far apart) + uint64_t duration = timestamp - pending.entry_timestamp; + if (duration > kDefaultMaxCorrelationAge) { + LG_DBG("Stale malloc entry/exit for pid=%d tid=%d (duration=%lu ns)", pid, + tid, duration); + _pending.erase(it); + ++_stale_cleanups; + return std::nullopt; + } + + // Ignore NULL returns (failed allocations) + if (ptr == 0) { + LG_DBG("malloc returned NULL for pid=%d tid=%d size=%lu", pid, tid, + pending.size); + _pending.erase(it); + return std::nullopt; + } + + // Success! Create correlated allocation + ++_successful_correlations; + + CorrelatedAllocation result{ + .size = pending.size, + .ptr = ptr, + .stack = std::move(pending.stack), + .timestamp = timestamp, + }; + + _pending.erase(it); + + LG_DBG("Correlated malloc: pid=%d tid=%d size=%lu ptr=%p", pid, tid, + result.size, reinterpret_cast(result.ptr)); + + return result; +} + +void SDTAllocationCorrelator::on_free_entry(pid_t pid, pid_t tid, uintptr_t ptr, + uint64_t timestamp) { + // For free, we don't need to correlate with an exit + // Just log for debugging + (void)pid; + (void)tid; + (void)ptr; + (void)timestamp; + LG_DBG("free entry: pid=%d tid=%d ptr=%p", pid, tid, + reinterpret_cast(ptr)); +} + +size_t SDTAllocationCorrelator::cleanup_stale(uint64_t current_time, + uint64_t max_age_ns) { + size_t cleaned = 0; + + for (auto it = _pending.begin(); it != _pending.end();) { + if (current_time > it->second.entry_timestamp && + current_time - it->second.entry_timestamp > max_age_ns) { + LG_DBG("Cleaning stale pending malloc: pid=%d tid=%d age=%lu ns", + it->second.pid, it->second.tid, + current_time - it->second.entry_timestamp); + it = _pending.erase(it); + ++cleaned; + ++_stale_cleanups; + } else { + ++it; + } + } + + return cleaned; +} + +void SDTAllocationCorrelator::reset_stats() { + _total_entries = 0; + _total_exits = 0; + _successful_correlations = 0; + _missed_entries = 0; + _missed_exits = 0; + _stale_cleanups = 0; +} + +} // namespace ddprof diff --git a/src/sdt_probe.cc b/src/sdt_probe.cc new file mode 100644 index 000000000..fb85016ce --- /dev/null +++ b/src/sdt_probe.cc @@ -0,0 +1,597 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "sdt_probe.hpp" + +#include "defer.hpp" +#include "logger.hpp" +#include "unique_fd.hpp" + +#include +#include +#include +#include +#include +#include +#include + +using namespace std::literals; + +namespace ddprof { + +namespace { + +// Note type for SystemTap SDT probes +constexpr Elf64_Word kNtStapsdt = 3; + +// Note name for SDT probes (null-terminated, padded to 4-byte alignment) +constexpr std::string_view kStapsdtNoteName = "stapsdt\0"sv; + +// Section name for SDT notes +constexpr const char *kStapsdtSectionName = ".note.stapsdt"; + +/// x86-64 register name to PERF_REG_X86_* mapping +/// Based on linux/arch/x86/include/uapi/asm/perf_regs.h +struct RegMapping { + const char *name; + int perf_reg; +}; + +// PERF_REG_X86_* values from perf_regs.h +enum { + PERF_REG_X86_AX = 0, + PERF_REG_X86_BX = 1, + PERF_REG_X86_CX = 2, + PERF_REG_X86_DX = 3, + PERF_REG_X86_SI = 4, + PERF_REG_X86_DI = 5, + PERF_REG_X86_BP = 6, + PERF_REG_X86_SP = 7, + PERF_REG_X86_IP = 8, + PERF_REG_X86_FLAGS = 9, + PERF_REG_X86_CS = 10, + PERF_REG_X86_SS = 11, + PERF_REG_X86_DS = 12, + PERF_REG_X86_ES = 13, + PERF_REG_X86_FS = 14, + PERF_REG_X86_GS = 15, + PERF_REG_X86_R8 = 16, + PERF_REG_X86_R9 = 17, + PERF_REG_X86_R10 = 18, + PERF_REG_X86_R11 = 19, + PERF_REG_X86_R12 = 20, + PERF_REG_X86_R13 = 21, + PERF_REG_X86_R14 = 22, + PERF_REG_X86_R15 = 23, +}; + +// Register mappings for x86-64 +// Includes both 64-bit and 32-bit register names +static const RegMapping kX86RegMappings[] = { + // 64-bit registers + {"rax", PERF_REG_X86_AX}, + {"rbx", PERF_REG_X86_BX}, + {"rcx", PERF_REG_X86_CX}, + {"rdx", PERF_REG_X86_DX}, + {"rsi", PERF_REG_X86_SI}, + {"rdi", PERF_REG_X86_DI}, + {"rbp", PERF_REG_X86_BP}, + {"rsp", PERF_REG_X86_SP}, + {"rip", PERF_REG_X86_IP}, + {"r8", PERF_REG_X86_R8}, + {"r9", PERF_REG_X86_R9}, + {"r10", PERF_REG_X86_R10}, + {"r11", PERF_REG_X86_R11}, + {"r12", PERF_REG_X86_R12}, + {"r13", PERF_REG_X86_R13}, + {"r14", PERF_REG_X86_R14}, + {"r15", PERF_REG_X86_R15}, + // 32-bit registers (lower 32 bits of 64-bit regs) + {"eax", PERF_REG_X86_AX}, + {"ebx", PERF_REG_X86_BX}, + {"ecx", PERF_REG_X86_CX}, + {"edx", PERF_REG_X86_DX}, + {"esi", PERF_REG_X86_SI}, + {"edi", PERF_REG_X86_DI}, + {"ebp", PERF_REG_X86_BP}, + {"esp", PERF_REG_X86_SP}, + {"r8d", PERF_REG_X86_R8}, + {"r9d", PERF_REG_X86_R9}, + {"r10d", PERF_REG_X86_R10}, + {"r11d", PERF_REG_X86_R11}, + {"r12d", PERF_REG_X86_R12}, + {"r13d", PERF_REG_X86_R13}, + {"r14d", PERF_REG_X86_R14}, + {"r15d", PERF_REG_X86_R15}, + // 16-bit registers + {"ax", PERF_REG_X86_AX}, + {"bx", PERF_REG_X86_BX}, + {"cx", PERF_REG_X86_CX}, + {"dx", PERF_REG_X86_DX}, + {"si", PERF_REG_X86_SI}, + {"di", PERF_REG_X86_DI}, + {"bp", PERF_REG_X86_BP}, + {"sp", PERF_REG_X86_SP}, + // 8-bit registers (low bytes) + {"al", PERF_REG_X86_AX}, + {"bl", PERF_REG_X86_BX}, + {"cl", PERF_REG_X86_CX}, + {"dl", PERF_REG_X86_DX}, + {"sil", PERF_REG_X86_SI}, + {"dil", PERF_REG_X86_DI}, + {"bpl", PERF_REG_X86_BP}, + {"spl", PERF_REG_X86_SP}, + {"r8b", PERF_REG_X86_R8}, + {"r9b", PERF_REG_X86_R9}, + {"r10b", PERF_REG_X86_R10}, + {"r11b", PERF_REG_X86_R11}, + {"r12b", PERF_REG_X86_R12}, + {"r13b", PERF_REG_X86_R13}, + {"r14b", PERF_REG_X86_R14}, + {"r15b", PERF_REG_X86_R15}, +}; + +/// Find note section by name +Elf_Scn *find_note_section(Elf *elf, const char *section_name) { + size_t stridx; + if (elf_getshdrstrndx(elf, &stridx) != 0) { + return nullptr; + } + + Elf_Scn *section = nullptr; + GElf_Shdr section_header; + while ((section = elf_nextscn(elf, section)) != nullptr) { + if (!gelf_getshdr(section, §ion_header) || + section_header.sh_type != SHT_NOTE) { + continue; + } + + const char *name = elf_strptr(elf, stridx, section_header.sh_name); + if (name && !strcmp(name, section_name)) { + return section; + } + } + + return nullptr; +} + +/// Parse SDT note descriptor to extract probe information +/// Returns true on success +bool parse_sdt_note_desc(const std::byte *desc, size_t desc_size, + bool is_64bit, SDTProbe &probe) { + const size_t addr_size = is_64bit ? 8 : 4; + const size_t min_size = 3 * addr_size; // pc, base, semaphore + + if (desc_size < min_size) { + LG_DBG("SDT note descriptor too small: %zu < %zu", desc_size, min_size); + return false; + } + + // Read fixed fields (3 addresses) + size_t pos = 0; + + if (is_64bit) { + uint64_t val; + memcpy(&val, desc + pos, sizeof(val)); + probe.address = val; + pos += sizeof(val); + + memcpy(&val, desc + pos, sizeof(val)); + probe.base = val; + pos += sizeof(val); + + memcpy(&val, desc + pos, sizeof(val)); + probe.semaphore = val; + pos += sizeof(val); + } else { + uint32_t val; + memcpy(&val, desc + pos, sizeof(val)); + probe.address = val; + pos += sizeof(val); + + memcpy(&val, desc + pos, sizeof(val)); + probe.base = val; + pos += sizeof(val); + + memcpy(&val, desc + pos, sizeof(val)); + probe.semaphore = val; + pos += sizeof(val); + } + + // Read null-terminated strings: provider, name, arguments + const char *str_ptr = reinterpret_cast(desc + pos); + const char *end_ptr = reinterpret_cast(desc + desc_size); + + // Provider + size_t len = strnlen(str_ptr, end_ptr - str_ptr); + if (str_ptr + len >= end_ptr) { + LG_DBG("SDT note: provider string not terminated"); + return false; + } + probe.provider = std::string(str_ptr, len); + str_ptr += len + 1; + + // Name + len = strnlen(str_ptr, end_ptr - str_ptr); + if (str_ptr + len >= end_ptr) { + LG_DBG("SDT note: probe name string not terminated"); + return false; + } + probe.name = std::string(str_ptr, len); + str_ptr += len + 1; + + // Arguments (may be empty) + len = strnlen(str_ptr, end_ptr - str_ptr); + std::string args_str(str_ptr, len); + + // Parse space-separated arguments + if (!args_str.empty()) { + size_t arg_start = 0; + while (arg_start < args_str.size()) { + // Skip leading spaces + while (arg_start < args_str.size() && args_str[arg_start] == ' ') { + ++arg_start; + } + if (arg_start >= args_str.size()) { + break; + } + + // Find end of argument + size_t arg_end = args_str.find(' ', arg_start); + if (arg_end == std::string::npos) { + arg_end = args_str.size(); + } + + std::string_view arg_spec(args_str.data() + arg_start, + arg_end - arg_start); + if (auto arg = parse_sdt_argument(arg_spec)) { + probe.arguments.push_back(std::move(*arg)); + } else { + LG_DBG("Failed to parse SDT argument: %.*s", + static_cast(arg_spec.size()), arg_spec.data()); + } + + arg_start = arg_end + 1; + } + } + + return true; +} + +/// Parse SDT probes from an ELF handle +std::optional parse_sdt_probes_from_elf(Elf *elf, + const char *filepath) { + // Determine if 64-bit ELF + GElf_Ehdr ehdr; + if (!gelf_getehdr(elf, &ehdr)) { + LG_DBG("Failed to get ELF header for %s", filepath); + return std::nullopt; + } + bool is_64bit = (ehdr.e_ident[EI_CLASS] == ELFCLASS64); + + // Find .note.stapsdt section + Elf_Scn *note_scn = find_note_section(elf, kStapsdtSectionName); + if (!note_scn) { + LG_DBG("No %s section found in %s", kStapsdtSectionName, filepath); + return std::nullopt; + } + + Elf_Data *data = elf_getdata(note_scn, nullptr); + if (!data || data->d_size == 0) { + LG_DBG("Empty %s section in %s", kStapsdtSectionName, filepath); + return std::nullopt; + } + + SDTProbeSet result; + result.binary_path = filepath; + + // Iterate over notes in the section + size_t pos = 0; + GElf_Nhdr note_header; + size_t name_pos; + size_t desc_pos; + + while ((pos = gelf_getnote(data, pos, ¬e_header, &name_pos, &desc_pos)) > + 0) { + // Check note type and name + if (note_header.n_type != kNtStapsdt) { + continue; + } + + // Verify note name is "stapsdt" + const char *note_name = + reinterpret_cast(data->d_buf) + name_pos; + if (note_header.n_namesz != kStapsdtNoteName.size() || + memcmp(note_name, kStapsdtNoteName.data(), kStapsdtNoteName.size()) != + 0) { + continue; + } + + // Parse the descriptor + const auto *desc = + reinterpret_cast(data->d_buf) + desc_pos; + + SDTProbe probe; + if (parse_sdt_note_desc(desc, note_header.n_descsz, is_64bit, probe)) { + LG_DBG("Found SDT probe: %s:%s at 0x%lx", probe.provider.c_str(), + probe.name.c_str(), probe.address); + result.probes.push_back(std::move(probe)); + } + } + + if (result.probes.empty()) { + return std::nullopt; + } + + return result; +} + +} // anonymous namespace + +int x86_reg_name_to_perf_reg(std::string_view reg_name) { + for (const auto &mapping : kX86RegMappings) { + if (reg_name == mapping.name) { + return mapping.perf_reg; + } + } + return -1; +} + +std::optional parse_sdt_argument(std::string_view arg_spec) { + // Format: [+-]?size@location + // Examples: "8@%rdi", "-4@%esi", "8@-8(%rbp)", "-4@$42" + + if (arg_spec.empty()) { + return std::nullopt; + } + + SDTArgument arg; + arg.raw_spec = std::string(arg_spec); + arg.base_reg = 0; + arg.index_reg = 0; + arg.scale = 0; + arg.offset = 0; + + // Find @ separator + auto at_pos = arg_spec.find('@'); + if (at_pos == std::string_view::npos || at_pos == 0) { + LG_DBG("Invalid SDT argument format (no @): %.*s", + static_cast(arg_spec.size()), arg_spec.data()); + return std::nullopt; + } + + // Parse size (before @) + auto size_str = arg_spec.substr(0, at_pos); + int size_val = 0; + auto [ptr, ec] = std::from_chars(size_str.data(), + size_str.data() + size_str.size(), size_val); + if (ec != std::errc{} || ptr != size_str.data() + size_str.size()) { + LG_DBG("Invalid SDT argument size: %.*s", static_cast(size_str.size()), + size_str.data()); + return std::nullopt; + } + arg.size = static_cast(size_val); + + // Parse location (after @) + auto loc_str = arg_spec.substr(at_pos + 1); + if (loc_str.empty()) { + LG_DBG("Empty SDT argument location"); + return std::nullopt; + } + + if (loc_str[0] == '%') { + // Register: %rdi, %rax, etc. + arg.location = SDTArgLocation::kRegister; + auto reg_name = loc_str.substr(1); + int reg_num = x86_reg_name_to_perf_reg(reg_name); + if (reg_num < 0) { + LG_DBG("Unknown register in SDT argument: %.*s", + static_cast(reg_name.size()), reg_name.data()); + return std::nullopt; + } + arg.base_reg = static_cast(reg_num); + } else if (loc_str[0] == '$') { + // Constant: $42 + arg.location = SDTArgLocation::kConstant; + auto const_str = loc_str.substr(1); + int64_t const_val = 0; + auto [p, e] = std::from_chars( + const_str.data(), const_str.data() + const_str.size(), const_val); + if (e != std::errc{}) { + LG_DBG("Invalid constant in SDT argument: %.*s", + static_cast(const_str.size()), const_str.data()); + return std::nullopt; + } + arg.offset = const_val; + } else { + // Memory reference: offset(%reg) or offset(%base,%index,scale) + arg.location = SDTArgLocation::kMemory; + + // Find the opening parenthesis + auto paren_pos = loc_str.find('('); + if (paren_pos == std::string_view::npos) { + // No parenthesis - could be a bare constant or symbol + // Try to parse as constant + int64_t val = 0; + auto [p, e] = + std::from_chars(loc_str.data(), loc_str.data() + loc_str.size(), val); + if (e == std::errc{} && p == loc_str.data() + loc_str.size()) { + arg.location = SDTArgLocation::kConstant; + arg.offset = val; + return arg; + } + LG_DBG("Unsupported SDT argument location format: %.*s", + static_cast(loc_str.size()), loc_str.data()); + return std::nullopt; + } + + // Parse offset before parenthesis + if (paren_pos > 0) { + auto offset_str = loc_str.substr(0, paren_pos); + int64_t offset_val = 0; + auto [p, e] = std::from_chars( + offset_str.data(), offset_str.data() + offset_str.size(), offset_val); + if (e != std::errc{}) { + LG_DBG("Invalid offset in SDT memory argument: %.*s", + static_cast(offset_str.size()), offset_str.data()); + return std::nullopt; + } + arg.offset = offset_val; + } + + // Find closing parenthesis + auto close_paren = loc_str.find(')', paren_pos); + if (close_paren == std::string_view::npos) { + LG_DBG("Missing closing parenthesis in SDT argument: %.*s", + static_cast(loc_str.size()), loc_str.data()); + return std::nullopt; + } + + // Parse register(s) inside parentheses + auto regs_str = loc_str.substr(paren_pos + 1, close_paren - paren_pos - 1); + + // Check for comma (indicates base,index,scale format) + auto comma_pos = regs_str.find(','); + if (comma_pos == std::string_view::npos) { + // Simple format: (%reg) + if (regs_str.empty() || regs_str[0] != '%') { + LG_DBG("Invalid register format in SDT argument: %.*s", + static_cast(regs_str.size()), regs_str.data()); + return std::nullopt; + } + auto reg_name = regs_str.substr(1); + int reg_num = x86_reg_name_to_perf_reg(reg_name); + if (reg_num < 0) { + LG_DBG("Unknown register in SDT memory argument: %.*s", + static_cast(reg_name.size()), reg_name.data()); + return std::nullopt; + } + arg.base_reg = static_cast(reg_num); + } else { + // Complex format: (%base,%index,scale) or (%base,%index) + // For now, we only support simple offset(%base) format + // Log a warning but try to parse just the base register + LG_DBG("Scaled indexed addressing not fully supported: %.*s", + static_cast(loc_str.size()), loc_str.data()); + + auto base_str = regs_str.substr(0, comma_pos); + if (base_str.empty() || base_str[0] != '%') { + return std::nullopt; + } + auto reg_name = base_str.substr(1); + int reg_num = x86_reg_name_to_perf_reg(reg_name); + if (reg_num < 0) { + return std::nullopt; + } + arg.base_reg = static_cast(reg_num); + } + } + + return arg; +} + +std::optional parse_sdt_probes(const char *filepath) { + if (!filepath || filepath[0] == '\0') { + LG_DBG("Empty filepath for SDT probe parsing"); + return std::nullopt; + } + + UniqueFd fd{::open(filepath, O_RDONLY)}; + if (!fd) { + LG_DBG("Failed to open %s for SDT probe parsing: %s", filepath, + strerror(errno)); + return std::nullopt; + } + + // Initialize libelf if needed + if (elf_version(EV_CURRENT) == EV_NONE) { + LG_ERR("ELF library initialization failed: %s", elf_errmsg(-1)); + return std::nullopt; + } + + Elf *elf = elf_begin(fd.get(), ELF_C_READ_MMAP, nullptr); + if (!elf) { + LG_DBG("Failed to open ELF file %s: %s", filepath, elf_errmsg(-1)); + return std::nullopt; + } + defer { elf_end(elf); }; + + // Check that it's actually an ELF file + if (elf_kind(elf) != ELF_K_ELF) { + LG_DBG("%s is not an ELF file", filepath); + return std::nullopt; + } + + return parse_sdt_probes_from_elf(elf, filepath); +} + +std::vector +SDTProbeSet::find_probes(std::string_view provider, + std::string_view name) const { + std::vector result; + for (const auto &probe : probes) { + if (probe.provider == provider && probe.name == name) { + result.push_back(&probe); + } + } + return result; +} + +const SDTProbe *SDTProbeSet::find_probe(std::string_view provider, + std::string_view name) const { + for (const auto &probe : probes) { + if (probe.provider == provider && probe.name == name) { + return &probe; + } + } + return nullptr; +} + +bool SDTProbeSet::has_allocation_probes() const { + // Check for required probes: malloc entry/exit and free entry + // free exit is optional + bool has_malloc_entry = find_probe(kMallocProvider, kEntryProbe) != nullptr; + bool has_malloc_exit = find_probe(kMallocProvider, kExitProbe) != nullptr; + bool has_free_entry = find_probe(kFreeProvider, kEntryProbe) != nullptr; + + if (!has_malloc_entry) { + LG_DBG("Missing SDT probe: %.*s:%.*s", + static_cast(kMallocProvider.size()), kMallocProvider.data(), + static_cast(kEntryProbe.size()), kEntryProbe.data()); + } + if (!has_malloc_exit) { + LG_DBG("Missing SDT probe: %.*s:%.*s", + static_cast(kMallocProvider.size()), kMallocProvider.data(), + static_cast(kExitProbe.size()), kExitProbe.data()); + } + if (!has_free_entry) { + LG_DBG("Missing SDT probe: %.*s:%.*s", + static_cast(kFreeProvider.size()), kFreeProvider.data(), + static_cast(kEntryProbe.size()), kEntryProbe.data()); + } + + return has_malloc_entry && has_malloc_exit && has_free_entry; +} + +SDTProbeType SDTProbeSet::get_probe_type(const SDTProbe &probe) { + if (probe.provider == kMallocProvider) { + if (probe.name == kEntryProbe) { + return SDTProbeType::kMallocEntry; + } + if (probe.name == kExitProbe) { + return SDTProbeType::kMallocExit; + } + } else if (probe.provider == kFreeProvider) { + if (probe.name == kEntryProbe) { + return SDTProbeType::kFreeEntry; + } + if (probe.name == kExitProbe) { + return SDTProbeType::kFreeExit; + } + } + return SDTProbeType::kUnknown; +} + +} // namespace ddprof diff --git a/src/uprobe_attacher.cc b/src/uprobe_attacher.cc new file mode 100644 index 000000000..77293176a --- /dev/null +++ b/src/uprobe_attacher.cc @@ -0,0 +1,384 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "uprobe_attacher.hpp" + +#include "ddres.hpp" +#include "defer.hpp" +#include "logger.hpp" +#include "perf.hpp" +#include "perf_archmap.hpp" +#include "unique_fd.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ddprof { + +namespace { + +// Path to uprobe PMU type in sysfs +constexpr const char *kUprobePmuTypePath = + "/sys/bus/event_source/devices/uprobe/type"; + +// Bit to set in config for return probes +constexpr uint64_t kRetprobeBit = 1ULL << 0; + +// Path to uprobe retprobe bit offset +constexpr const char *kUprobeRetprobeBitPath = + "/sys/bus/event_source/devices/uprobe/format/retprobe"; + +/// Read an integer value from a sysfs file +std::optional read_sysfs_uint(const char *path) { + std::ifstream f(path); + if (!f) { + return std::nullopt; + } + uint32_t value; + if (!(f >> value)) { + return std::nullopt; + } + return value; +} + +/// Parse retprobe bit position from format file +/// Format is "config:N" where N is the bit position +std::optional read_retprobe_bit() { + std::ifstream f(kUprobeRetprobeBitPath); + if (!f) { + // Default to bit 0 if file doesn't exist + return 0; + } + std::string line; + if (!std::getline(f, line)) { + return 0; + } + // Parse "config:N" + auto colon_pos = line.find(':'); + if (colon_pos == std::string::npos) { + return 0; + } + try { + return std::stoi(line.substr(colon_pos + 1)); + } catch (...) { + return 0; + } +} + +} // anonymous namespace + +std::optional read_uprobe_pmu_type() { + return read_sysfs_uint(kUprobePmuTypePath); +} + +DDRes vaddr_to_file_offset(const char *binary_path, uint64_t vaddr, + uint64_t *offset) { + UniqueFd fd{::open(binary_path, O_RDONLY)}; + if (!fd) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, "Failed to open %s: %s", + binary_path, strerror(errno)); + } + + if (elf_version(EV_CURRENT) == EV_NONE) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, "ELF library init failed: %s", + elf_errmsg(-1)); + } + + Elf *elf = elf_begin(fd.get(), ELF_C_READ_MMAP, nullptr); + if (!elf) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, "Failed to open ELF %s: %s", + binary_path, elf_errmsg(-1)); + } + defer { elf_end(elf); }; + + GElf_Ehdr ehdr; + if (!gelf_getehdr(elf, &ehdr)) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, "Failed to get ELF header: %s", + elf_errmsg(-1)); + } + + // Find the segment containing this virtual address + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, + "Failed to get program header count: %s", + elf_errmsg(-1)); + } + + for (size_t i = 0; i < phnum; ++i) { + GElf_Phdr phdr; + if (!gelf_getphdr(elf, i, &phdr)) { + continue; + } + + if (phdr.p_type != PT_LOAD) { + continue; + } + + // Check if vaddr falls within this segment's virtual address range + if (vaddr >= phdr.p_vaddr && vaddr < phdr.p_vaddr + phdr.p_memsz) { + // Convert to file offset + *offset = phdr.p_offset + (vaddr - phdr.p_vaddr); + LG_DBG("Converted vaddr 0x%lx to file offset 0x%lx in %s", vaddr, *offset, + binary_path); + return {}; + } + } + + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, + "No segment found for vaddr 0x%lx in %s", vaddr, + binary_path); +} + +UprobeAttacher::UprobeAttacher() = default; + +UprobeAttacher::~UprobeAttacher() { detach_all(); } + +UprobeAttacher::UprobeAttacher(UprobeAttacher &&other) noexcept + : _uprobe_type(other._uprobe_type), + _attachments(std::move(other._attachments)) { + other._attachments.clear(); +} + +UprobeAttacher &UprobeAttacher::operator=(UprobeAttacher &&other) noexcept { + if (this != &other) { + detach_all(); + _uprobe_type = other._uprobe_type; + _attachments = std::move(other._attachments); + other._attachments.clear(); + } + return *this; +} + +std::optional UprobeAttacher::get_uprobe_type() { + if (!_uprobe_type) { + _uprobe_type = read_uprobe_pmu_type(); + if (_uprobe_type) { + LG_DBG("Uprobe PMU type: %u", *_uprobe_type); + } else { + LG_DBG("Uprobe PMU type not available"); + } + } + return _uprobe_type; +} + +DDRes UprobeAttacher::attach(const UprobeConfig &config, + UprobeAttachment *out) { + auto uprobe_type = get_uprobe_type(); + if (!uprobe_type) { + DDRES_RETURN_ERROR_LOG( + DD_WHAT_PERFOPEN, + "Uprobe PMU type not available - kernel may not support uprobes"); + } + + // Get retprobe bit position + static std::optional retprobe_bit = read_retprobe_bit(); + if (!retprobe_bit) { + retprobe_bit = 0; + } + + struct perf_event_attr attr = {}; + attr.size = sizeof(attr); + attr.type = *uprobe_type; + + // For uprobe PMU: + // - config encodes whether this is a retprobe + // - config1 is the path to the binary (pointer) + // - config2 is the offset within the binary + + if (config.is_return_probe) { + attr.config = 1ULL << *retprobe_bit; + } + + // The binary path needs to remain valid for the lifetime of the perf event + // Store path as config1 (kernel will copy it) + attr.config1 = reinterpret_cast(config.binary_path.c_str()); + attr.config2 = config.offset; + + // Sample configuration + attr.sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | + PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | + PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER; + + attr.sample_regs_user = k_perf_register_mask; + attr.sample_stack_user = config.stack_sample_size; + + // Sample every hit (no sampling) + attr.sample_period = 1; + + // Other flags + attr.disabled = 1; // Start disabled, enable later + attr.exclude_kernel = 1; + attr.exclude_hv = 1; + attr.mmap = 1; // Get mmap events for symbol resolution + attr.mmap2 = 1; // Get mmap2 events with extended info + attr.comm = 1; // Get comm events for thread names + attr.task = 1; // Get fork/exit events + attr.watermark = 1; // Use watermark for wakeups + attr.wakeup_watermark = + config.stack_sample_size * 4; // Wake up when buffer has this many bytes + + int fd = perf_event_open(&attr, config.pid, -1, -1, PERF_FLAG_FD_CLOEXEC); + if (fd < 0) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, + "Failed to attach uprobe at %s+0x%lx (pid=%d, " + "retprobe=%d): %s", + config.binary_path.c_str(), config.offset, + config.pid, config.is_return_probe, strerror(errno)); + } + + LG_NFO("Attached uprobe at %s+0x%lx (fd=%d, pid=%d, retprobe=%d)", + config.binary_path.c_str(), config.offset, fd, config.pid, + config.is_return_probe); + + out->fd = fd; + out->config = config; + out->probe = nullptr; + out->probe_type = SDTProbeType::kUnknown; + + _attachments.push_back(*out); + + return {}; +} + +DDRes UprobeAttacher::attach_allocation_probes( + const SDTProbeSet &probes, pid_t pid, uint32_t stack_sample_size, + std::vector *out) { + + if (!probes.has_allocation_probes()) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, + "Binary does not have all required allocation SDT " + "probes (need %.*s:entry/exit and %.*s:entry)", + static_cast(kMallocProvider.size()), + kMallocProvider.data(), + static_cast(kFreeProvider.size()), + kFreeProvider.data()); + } + + // Define required probes and their configurations + struct ProbeSpec { + std::string_view provider; + std::string_view name; + bool is_return_probe; + SDTProbeType type; + }; + + static const ProbeSpec required_probes[] = { + {kMallocProvider, kEntryProbe, false, SDTProbeType::kMallocEntry}, + {kMallocProvider, kExitProbe, false, + SDTProbeType::kMallocExit}, // exit probe, but not a retprobe + {kFreeProvider, kEntryProbe, false, SDTProbeType::kFreeEntry}, + }; + + // Optional probes + static const ProbeSpec optional_probes[] = { + {kFreeProvider, kExitProbe, false, SDTProbeType::kFreeExit}, + }; + + auto attach_probe = [&](const ProbeSpec &spec) -> DDRes { + const SDTProbe *probe = probes.find_probe(spec.provider, spec.name); + if (!probe) { + LG_DBG("SDT probe %.*s:%.*s not found", + static_cast(spec.provider.size()), spec.provider.data(), + static_cast(spec.name.size()), spec.name.data()); + return ddres_warn(DD_WHAT_PERFOPEN); + } + + // Convert virtual address to file offset + uint64_t file_offset; + DDRES_CHECK_FWD(vaddr_to_file_offset(probes.binary_path.c_str(), + probe->address, &file_offset)); + + UprobeConfig config; + config.binary_path = probes.binary_path; + config.offset = file_offset; + config.is_return_probe = spec.is_return_probe; + config.pid = pid; + config.stack_sample_size = stack_sample_size; + + UprobeAttachment attachment; + DDRES_CHECK_FWD(attach(config, &attachment)); + + attachment.probe = probe; + attachment.probe_type = spec.type; + + // Update the last attachment in our list with probe info + if (!_attachments.empty()) { + _attachments.back().probe = probe; + _attachments.back().probe_type = spec.type; + } + + out->push_back(attachment); + return {}; + }; + + // Attach required probes + for (const auto &spec : required_probes) { + DDRes res = attach_probe(spec); + if (IsDDResFatal(res)) { + // Clean up any probes we already attached + detach_all(); + return res; + } + } + + // Attach optional probes (ignore failures) + for (const auto &spec : optional_probes) { + DDRes res = attach_probe(spec); + if (IsDDResNotOK(res)) { + LG_DBG("Optional SDT probe %.*s:%.*s not attached", + static_cast(spec.provider.size()), spec.provider.data(), + static_cast(spec.name.size()), spec.name.data()); + } + } + + LG_NTC("Attached %zu SDT probes for allocation tracking", out->size()); + return {}; +} + +void UprobeAttacher::detach_all() { + for (auto &att : _attachments) { + if (att.fd >= 0) { + ::close(att.fd); + att.fd = -1; + } + } + _attachments.clear(); +} + +DDRes UprobeAttacher::enable_all() { + for (const auto &att : _attachments) { + if (att.fd >= 0) { + if (ioctl(att.fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, + "Failed to enable uprobe fd=%d: %s", att.fd, + strerror(errno)); + } + } + } + return {}; +} + +DDRes UprobeAttacher::disable_all() { + for (const auto &att : _attachments) { + if (att.fd >= 0) { + if (ioctl(att.fd, PERF_EVENT_IOC_DISABLE, 0) < 0) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PERFOPEN, + "Failed to disable uprobe fd=%d: %s", att.fd, + strerror(errno)); + } + } + } + return {}; +} + +} // namespace ddprof diff --git a/test/sdt_allocation_correlator-ut.cc b/test/sdt_allocation_correlator-ut.cc new file mode 100644 index 000000000..cd145b0a7 --- /dev/null +++ b/test/sdt_allocation_correlator-ut.cc @@ -0,0 +1,211 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "sdt_allocation_correlator.hpp" +#include "loghandle.hpp" + +#include + +namespace ddprof { + +TEST(SDTAllocationCorrelatorTest, BasicCorrelation) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + UnwindOutput stack; + stack.pid = 123; + stack.tid = 456; + stack.locs.push_back({0x1234, 0x5678, 0x9abc}); + + pid_t pid = 123; + pid_t tid = 456; + uint64_t size = 1024; + uint64_t entry_time = 1000; + uint64_t exit_time = 1001; + uintptr_t ptr = 0xdeadbeef; + + // Record malloc entry + correlator.on_malloc_entry(pid, tid, size, entry_time, stack); + EXPECT_EQ(correlator.pending_count(), 1); + EXPECT_EQ(correlator.total_entries(), 1); + + // Record malloc exit and correlate + auto result = correlator.on_malloc_exit(pid, tid, ptr, exit_time); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->size, size); + EXPECT_EQ(result->ptr, ptr); + EXPECT_EQ(result->timestamp, exit_time); + EXPECT_EQ(result->stack.pid, stack.pid); + EXPECT_EQ(result->stack.tid, stack.tid); + + EXPECT_EQ(correlator.pending_count(), 0); + EXPECT_EQ(correlator.total_exits(), 1); + EXPECT_EQ(correlator.successful_correlations(), 1); + EXPECT_EQ(correlator.missed_entries(), 0); +} + +TEST(SDTAllocationCorrelatorTest, MissedEntry) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + pid_t pid = 123; + pid_t tid = 456; + uint64_t exit_time = 1001; + uintptr_t ptr = 0xdeadbeef; + + // Try to correlate exit without entry + auto result = correlator.on_malloc_exit(pid, tid, ptr, exit_time); + EXPECT_FALSE(result.has_value()); + EXPECT_EQ(correlator.missed_entries(), 1); + EXPECT_EQ(correlator.successful_correlations(), 0); +} + +TEST(SDTAllocationCorrelatorTest, OverwrittenEntry) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + UnwindOutput stack1, stack2; + stack1.pid = 123; + stack1.tid = 456; + stack1.locs.push_back({0x1234, 0x5678, 0x9abc}); + stack2.pid = 123; + stack2.tid = 456; + stack2.locs.push_back({0xaaaa, 0xbbbb, 0xcccc}); + + pid_t pid = 123; + pid_t tid = 456; + + // Record first entry + correlator.on_malloc_entry(pid, tid, 1024, 1000, stack1); + EXPECT_EQ(correlator.pending_count(), 1); + + // Record second entry on same thread - overwrites first + correlator.on_malloc_entry(pid, tid, 2048, 1001, stack2); + EXPECT_EQ(correlator.pending_count(), 1); + EXPECT_EQ(correlator.missed_exits(), 1); + + // Exit should correlate with second entry + auto result = correlator.on_malloc_exit(pid, tid, 0xbeef, 1002); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->size, 2048); +} + +TEST(SDTAllocationCorrelatorTest, MultipleThreads) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + UnwindOutput stack1, stack2; + stack1.pid = 100; + stack1.tid = 1; + stack1.locs.push_back({0x1111, 0x2222, 0x3333}); + stack2.pid = 100; + stack2.tid = 2; + stack2.locs.push_back({0x4444, 0x5555, 0x6666}); + + // Two threads allocating simultaneously + correlator.on_malloc_entry(100, 1, 1024, 1000, stack1); + correlator.on_malloc_entry(100, 2, 2048, 1001, stack2); + EXPECT_EQ(correlator.pending_count(), 2); + + // Thread 2 exits first + auto result2 = correlator.on_malloc_exit(100, 2, 0xaaaa, 1002); + ASSERT_TRUE(result2.has_value()); + EXPECT_EQ(result2->size, 2048); + EXPECT_EQ(correlator.pending_count(), 1); + + // Thread 1 exits + auto result1 = correlator.on_malloc_exit(100, 1, 0xbbbb, 1003); + ASSERT_TRUE(result1.has_value()); + EXPECT_EQ(result1->size, 1024); + EXPECT_EQ(correlator.pending_count(), 0); + + EXPECT_EQ(correlator.successful_correlations(), 2); +} + +TEST(SDTAllocationCorrelatorTest, FreeEntry) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + pid_t pid = 123; + pid_t tid = 456; + uintptr_t ptr = 0xdeadbeef; + uint64_t timestamp = 1000; + + // on_free_entry is a pass-through, just verifying it doesn't crash + EXPECT_NO_THROW(correlator.on_free_entry(pid, tid, ptr, timestamp)); +} + +TEST(SDTAllocationCorrelatorTest, CleanupStale) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + UnwindOutput stack; + stack.pid = 123; + stack.tid = 456; + stack.locs.push_back({0x1234, 0x5678, 0x9abc}); + + // Add entries with old timestamps + correlator.on_malloc_entry(100, 1, 1024, 1000, stack); + correlator.on_malloc_entry(100, 2, 2048, 2000, stack); + correlator.on_malloc_entry(100, 3, 4096, 5000, stack); + EXPECT_EQ(correlator.pending_count(), 3); + + // Cleanup entries older than 2 seconds from current time 6000 + uint64_t max_age = 2000; // 2000 ns + size_t cleaned = correlator.cleanup_stale(6000, max_age); + + // Entries at 1000 and 2000 should be cleaned (older than 6000 - 2000 = 4000) + EXPECT_EQ(cleaned, 2); + EXPECT_EQ(correlator.pending_count(), 1); + EXPECT_EQ(correlator.stale_cleanups(), 2); +} + +TEST(SDTAllocationCorrelatorTest, NullPointerMallocExit) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + UnwindOutput stack; + stack.pid = 123; + stack.tid = 456; + stack.locs.push_back({0x1234, 0x5678, 0x9abc}); + + // Entry with size + correlator.on_malloc_entry(123, 456, 1024, 1000, stack); + EXPECT_EQ(correlator.pending_count(), 1); + + // Exit with null pointer (malloc failed) + auto result = correlator.on_malloc_exit(123, 456, 0, 1001); + + // Should still correlate even with null pointer + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->ptr, 0); + EXPECT_EQ(result->size, 1024); + EXPECT_EQ(correlator.pending_count(), 0); +} + +TEST(SDTAllocationCorrelatorTest, ResetStats) { + LogHandle handle; + SDTAllocationCorrelator correlator; + + UnwindOutput stack; + stack.pid = 123; + stack.tid = 456; + stack.locs.push_back({0x1234, 0x5678, 0x9abc}); + + correlator.on_malloc_entry(123, 456, 1024, 1000, stack); + correlator.on_malloc_exit(123, 456, 0xbeef, 1001); + + EXPECT_EQ(correlator.total_entries(), 1); + EXPECT_EQ(correlator.total_exits(), 1); + EXPECT_EQ(correlator.successful_correlations(), 1); + + correlator.reset_stats(); + + EXPECT_EQ(correlator.total_entries(), 0); + EXPECT_EQ(correlator.total_exits(), 0); + EXPECT_EQ(correlator.successful_correlations(), 0); +} + +} // namespace ddprof diff --git a/test/sdt_probe-ut.cc b/test/sdt_probe-ut.cc new file mode 100644 index 000000000..914c5c5a0 --- /dev/null +++ b/test/sdt_probe-ut.cc @@ -0,0 +1,244 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "sdt_probe.hpp" +#include "loghandle.hpp" + +#include + +namespace ddprof { + +TEST(SDTProbeTest, ParseArgumentRegister) { + // Test parsing register arguments + auto arg = parse_sdt_argument("8@%rdi"); + ASSERT_TRUE(arg.has_value()); + EXPECT_EQ(arg->size, 8); + EXPECT_EQ(arg->location, SDTArgLocation::kRegister); + EXPECT_EQ(arg->raw_spec, "8@%rdi"); + + // Negative size means signed + arg = parse_sdt_argument("-4@%esi"); + ASSERT_TRUE(arg.has_value()); + EXPECT_EQ(arg->size, -4); + EXPECT_EQ(arg->location, SDTArgLocation::kRegister); +} + +TEST(SDTProbeTest, ParseArgumentMemory) { + // Test parsing memory reference arguments + auto arg = parse_sdt_argument("8@8(%rbp)"); + ASSERT_TRUE(arg.has_value()); + EXPECT_EQ(arg->size, 8); + EXPECT_EQ(arg->location, SDTArgLocation::kMemory); + EXPECT_EQ(arg->offset, 8); + + // Negative offset + arg = parse_sdt_argument("4@-16(%rsp)"); + ASSERT_TRUE(arg.has_value()); + EXPECT_EQ(arg->size, 4); + EXPECT_EQ(arg->location, SDTArgLocation::kMemory); + EXPECT_EQ(arg->offset, -16); +} + +TEST(SDTProbeTest, ParseArgumentConstant) { + // Test parsing constant arguments + auto arg = parse_sdt_argument("4@$42"); + ASSERT_TRUE(arg.has_value()); + EXPECT_EQ(arg->size, 4); + EXPECT_EQ(arg->location, SDTArgLocation::kConstant); + EXPECT_EQ(arg->offset, 42); + + // Negative constant + arg = parse_sdt_argument("8@$-1"); + ASSERT_TRUE(arg.has_value()); + EXPECT_EQ(arg->size, 8); + EXPECT_EQ(arg->location, SDTArgLocation::kConstant); + EXPECT_EQ(arg->offset, -1); +} + +TEST(SDTProbeTest, ParseArgumentInvalid) { + // Test invalid argument specs + EXPECT_FALSE(parse_sdt_argument("").has_value()); + EXPECT_FALSE(parse_sdt_argument("invalid").has_value()); + EXPECT_FALSE(parse_sdt_argument("@%rdi").has_value()); + EXPECT_FALSE(parse_sdt_argument("8@").has_value()); +} + +TEST(SDTProbeTest, X86RegNameToPerf) { + // Test x86-64 register name to perf register mapping +#ifdef __x86_64__ + EXPECT_GE(x86_reg_name_to_perf_reg("rax"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("rdi"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("rsi"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("rdx"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("rcx"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("r8"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("r9"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("rsp"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("rbp"), 0); + + // 32-bit register names should also work + EXPECT_GE(x86_reg_name_to_perf_reg("eax"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("edi"), 0); + EXPECT_GE(x86_reg_name_to_perf_reg("esi"), 0); + + // Invalid register names + EXPECT_EQ(x86_reg_name_to_perf_reg("invalid"), -1); + EXPECT_EQ(x86_reg_name_to_perf_reg(""), -1); +#endif +} + +TEST(SDTProbeTest, SDTProbeSetFindProbe) { + SDTProbeSet probe_set; + probe_set.binary_path = "/test/binary"; + + SDTProbe probe1; + probe1.provider = "ddprof_malloc"; + probe1.name = "entry"; + probe1.address = 0x1000; + probe_set.probes.push_back(probe1); + + SDTProbe probe2; + probe2.provider = "ddprof_malloc"; + probe2.name = "exit"; + probe2.address = 0x2000; + probe_set.probes.push_back(probe2); + + SDTProbe probe3; + probe3.provider = "ddprof_free"; + probe3.name = "entry"; + probe3.address = 0x3000; + probe_set.probes.push_back(probe3); + + // Find single probe + const SDTProbe *found = probe_set.find_probe("ddprof_malloc", "entry"); + ASSERT_NE(found, nullptr); + EXPECT_EQ(found->address, 0x1000); + + found = probe_set.find_probe("ddprof_malloc", "exit"); + ASSERT_NE(found, nullptr); + EXPECT_EQ(found->address, 0x2000); + + found = probe_set.find_probe("ddprof_free", "entry"); + ASSERT_NE(found, nullptr); + EXPECT_EQ(found->address, 0x3000); + + // Probe not found + found = probe_set.find_probe("ddprof_free", "exit"); + EXPECT_EQ(found, nullptr); + + found = probe_set.find_probe("unknown", "entry"); + EXPECT_EQ(found, nullptr); +} + +TEST(SDTProbeTest, SDTProbeSetFindProbes) { + SDTProbeSet probe_set; + probe_set.binary_path = "/test/binary"; + + // Add multiple probes with same provider/name (could happen with multiple + // call sites) + SDTProbe probe1; + probe1.provider = "test"; + probe1.name = "point"; + probe1.address = 0x1000; + probe_set.probes.push_back(probe1); + + SDTProbe probe2; + probe2.provider = "test"; + probe2.name = "point"; + probe2.address = 0x2000; + probe_set.probes.push_back(probe2); + + SDTProbe probe3; + probe3.provider = "other"; + probe3.name = "point"; + probe3.address = 0x3000; + probe_set.probes.push_back(probe3); + + auto probes = probe_set.find_probes("test", "point"); + EXPECT_EQ(probes.size(), 2); + + probes = probe_set.find_probes("other", "point"); + EXPECT_EQ(probes.size(), 1); + + probes = probe_set.find_probes("unknown", "unknown"); + EXPECT_EQ(probes.size(), 0); +} + +TEST(SDTProbeTest, SDTProbeSetHasAllocationProbes) { + SDTProbeSet probe_set; + probe_set.binary_path = "/test/binary"; + + // Empty set should not have allocation probes + EXPECT_FALSE(probe_set.has_allocation_probes()); + + // Add malloc entry + SDTProbe probe1; + probe1.provider = "ddprof_malloc"; + probe1.name = "entry"; + probe_set.probes.push_back(probe1); + EXPECT_FALSE(probe_set.has_allocation_probes()); + + // Add malloc exit + SDTProbe probe2; + probe2.provider = "ddprof_malloc"; + probe2.name = "exit"; + probe_set.probes.push_back(probe2); + EXPECT_FALSE(probe_set.has_allocation_probes()); + + // Add free entry - now we have all required probes + SDTProbe probe3; + probe3.provider = "ddprof_free"; + probe3.name = "entry"; + probe_set.probes.push_back(probe3); + EXPECT_TRUE(probe_set.has_allocation_probes()); +} + +TEST(SDTProbeTest, GetProbeType) { + SDTProbe probe; + + probe.provider = "ddprof_malloc"; + probe.name = "entry"; + EXPECT_EQ(SDTProbeSet::get_probe_type(probe), SDTProbeType::kMallocEntry); + + probe.name = "exit"; + EXPECT_EQ(SDTProbeSet::get_probe_type(probe), SDTProbeType::kMallocExit); + + probe.provider = "ddprof_free"; + probe.name = "entry"; + EXPECT_EQ(SDTProbeSet::get_probe_type(probe), SDTProbeType::kFreeEntry); + + probe.name = "exit"; + EXPECT_EQ(SDTProbeSet::get_probe_type(probe), SDTProbeType::kFreeExit); + + probe.provider = "unknown"; + probe.name = "unknown"; + EXPECT_EQ(SDTProbeSet::get_probe_type(probe), SDTProbeType::kUnknown); +} + +TEST(SDTProbeTest, SDTProbeFullName) { + SDTProbe probe; + probe.provider = "ddprof_malloc"; + probe.name = "entry"; + EXPECT_EQ(probe.full_name(), "ddprof_malloc:entry"); +} + +// Test parsing SDT probes from a non-existent file +TEST(SDTProbeTest, ParseNonExistentFile) { + auto result = parse_sdt_probes("/non/existent/file"); + EXPECT_FALSE(result.has_value()); +} + +// Test parsing SDT probes from a file without SDT probes +TEST(SDTProbeTest, ParseFileWithoutProbes) { + // Use /bin/ls as a test binary that likely doesn't have our specific probes + auto result = parse_sdt_probes("/bin/ls"); + // May or may not have SDT probes, but shouldn't crash + // If it has probes, they won't be our allocation probes + if (result.has_value()) { + EXPECT_FALSE(result->has_allocation_probes()); + } +} + +} // namespace ddprof diff --git a/test/sdt_test_binary.cc b/test/sdt_test_binary.cc new file mode 100644 index 000000000..d4deebfc8 --- /dev/null +++ b/test/sdt_test_binary.cc @@ -0,0 +1,165 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +// Test binary that includes SDT probes for memory allocation tracking. +// This simulates a statically linked application with custom allocators +// that use SDT probes for profiling. +// +// Build with: g++ -o sdt_test_binary sdt_test_binary.cc +// Requires: systemtap-sdt-dev package for sys/sdt.h +// +// Verify probes with: readelf -n sdt_test_binary | grep -A4 stapsdt + +#include + +#include +#include +#include +#include +#include +#include + +// Wrapper around malloc that fires SDT probes +void *my_malloc(size_t size) { + // Fire entry probe with size argument + DTRACE_PROBE1(ddprof_malloc, entry, size); + + void *ptr = std::malloc(size); + + // Fire exit probe with returned pointer + DTRACE_PROBE1(ddprof_malloc, exit, ptr); + + return ptr; +} + +// Wrapper around free that fires SDT probes +void my_free(void *ptr) { + // Fire entry probe with pointer argument + DTRACE_PROBE1(ddprof_free, entry, ptr); + + std::free(ptr); + + // Fire exit probe (no arguments needed) + DTRACE_PROBE(ddprof_free, exit); +} + +// Wrapper around calloc +void *my_calloc(size_t nmemb, size_t size) { + size_t total = nmemb * size; + DTRACE_PROBE1(ddprof_malloc, entry, total); + + void *ptr = std::calloc(nmemb, size); + + DTRACE_PROBE1(ddprof_malloc, exit, ptr); + + return ptr; +} + +// Wrapper around realloc +void *my_realloc(void *old_ptr, size_t size) { + // If old_ptr is not null, this is also a free + if (old_ptr != nullptr) { + DTRACE_PROBE1(ddprof_free, entry, old_ptr); + DTRACE_PROBE(ddprof_free, exit); + } + + DTRACE_PROBE1(ddprof_malloc, entry, size); + + void *ptr = std::realloc(old_ptr, size); + + DTRACE_PROBE1(ddprof_malloc, exit, ptr); + + return ptr; +} + +// Simulate some work with allocations +void do_allocations(int count, size_t base_size) { + std::vector ptrs; + ptrs.reserve(count); + + for (int i = 0; i < count; ++i) { + size_t size = base_size + (i % 100) * 16; + void *ptr = my_malloc(size); + if (ptr) { + std::memset(ptr, 0, size); + ptrs.push_back(ptr); + } + } + + // Free half of the allocations + for (size_t i = 0; i < ptrs.size(); i += 2) { + my_free(ptrs[i]); + ptrs[i] = nullptr; + } + + // Free remaining allocations + for (void *ptr : ptrs) { + if (ptr) { + my_free(ptr); + } + } +} + +// Worker thread function +void worker_thread(int thread_id, int iterations) { + for (int i = 0; i < iterations; ++i) { + do_allocations(10, 64 + thread_id * 32); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } +} + +void print_usage(const char *prog) { + std::cout << "Usage: " << prog << " [options]\n" + << "Options:\n" + << " -n Number of allocation cycles (default: 100)\n" + << " -t Number of worker threads (default: 2)\n" + << " -s Base allocation size (default: 1024)\n" + << " -h Show this help\n"; +} + +int main(int argc, char *argv[]) { + int cycles = 100; + int threads = 2; + size_t base_size = 1024; + + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "-n") == 0 && i + 1 < argc) { + cycles = std::atoi(argv[++i]); + } else if (std::strcmp(argv[i], "-t") == 0 && i + 1 < argc) { + threads = std::atoi(argv[++i]); + } else if (std::strcmp(argv[i], "-s") == 0 && i + 1 < argc) { + base_size = std::atoi(argv[++i]); + } else if (std::strcmp(argv[i], "-h") == 0) { + print_usage(argv[0]); + return 0; + } + } + + std::cout << "SDT Test Binary\n" + << "Cycles: " << cycles << "\n" + << "Threads: " << threads << "\n" + << "Base size: " << base_size << "\n"; + + // Do some single-threaded allocations first + std::cout << "Starting single-threaded allocations...\n"; + do_allocations(cycles, base_size); + + // Start worker threads + std::cout << "Starting " << threads << " worker threads...\n"; + std::vector workers; + workers.reserve(threads); + + for (int i = 0; i < threads; ++i) { + workers.emplace_back(worker_thread, i, cycles / threads); + } + + // Wait for all threads to complete + for (auto &t : workers) { + t.join(); + } + + std::cout << "Done.\n"; + return 0; +}