From b52d30cad8df9566fcb41e8428faa067b370c32c Mon Sep 17 00:00:00 2001 From: r1viollet Date: Sat, 18 Nov 2023 08:44:26 +0100 Subject: [PATCH 01/11] Inlining - v2 (WIP) - Add a unit test for inlining - Parse and cache line information --- include/dwfl_internals.hpp | 1 + include/dwfl_symbol.hpp | 1 + include/dwfl_symbol_lookup.hpp | 30 +++++-- include/symbol.hpp | 10 ++- include/unwind_output.hpp | 8 +- src/dwfl_symbol.cc | 52 +---------- src/dwfl_symbol_lookup.cc | 155 ++++++++++++++++++++++++++++----- src/pprof/ddprof_pprof.cc | 5 +- src/unwind_dwfl.cc | 4 +- test/CMakeLists.txt | 3 + test/dwfl_module-ut.cc | 64 +++++++++++++- 11 files changed, 239 insertions(+), 94 deletions(-) diff --git a/include/dwfl_internals.hpp b/include/dwfl_internals.hpp index 06f06b227..6adf7dcea 100644 --- a/include/dwfl_internals.hpp +++ b/include/dwfl_internals.hpp @@ -5,5 +5,6 @@ #pragma once +// #include #include #include diff --git a/include/dwfl_symbol.hpp b/include/dwfl_symbol.hpp index ca0a26217..b6accff4b 100644 --- a/include/dwfl_symbol.hpp +++ b/include/dwfl_symbol.hpp @@ -24,4 +24,5 @@ bool symbol_get_from_dwfl(Dwfl_Module *mod, ProcessAddress_t process_pc, bool compute_elf_range(ElfAddress_t file_pc, const GElf_Sym &elf_sym, ElfAddress_t &start_sym, ElfAddress_t &end_sym); + } // namespace ddprof diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index a17d5a450..5484fd020 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -8,6 +8,7 @@ #include "ddprof_defs.hpp" #include "ddprof_file_info-i.hpp" #include "ddprof_module.hpp" +#include "ddres.hpp" #include "dso.hpp" #include "dso_symbol_lookup.hpp" #include "hash_helper.hpp" @@ -42,16 +43,20 @@ struct DwflSymbolLookupStats { class DwflSymbolLookup { public: + using SymbolRange = std::pair; // build and check env var to know check setting DwflSymbolLookup(); // Get symbol from internal cache or fetch through dwarf - SymbolIdx_t get_or_insert(const DDProfMod &ddprof_mod, SymbolTable &table, + SymbolIdx_t get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, FileInfoId_t file_info_id, ProcessAddress_t process_pc, const Dso &dso); - void erase(FileInfoId_t file_info_id) { _file_info_map.erase(file_info_id); } + void erase(FileInfoId_t file_info_id) { + _file_info_function_map.erase(file_info_id); + } unsigned size() const; @@ -59,6 +64,12 @@ class DwflSymbolLookup { DwflSymbolLookupStats &stats() { return _stats; } private: + struct Line { + uint32_t _line; + ElfAddress_t _addr; + // possibly add a symbol ? + }; + using LineMap = std::map; /// Set through env var (DDPROF_CACHE_SETTING) in case of doubts on cache enum SymbolLookupSetting { K_CACHE_ON = 0, @@ -67,23 +78,30 @@ class DwflSymbolLookup { SymbolLookupSetting _lookup_setting{K_CACHE_ON}; - SymbolIdx_t insert(const DDProfMod &ddprof_mod, SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, + SymbolIdx_t insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, ProcessAddress_t process_pc, const Dso &dso, - SymbolMap &map); + SymbolMap &func_map, LineMap &line_map); // Symbols are ordered by file. // The assumption is that the elf addresses are the same across processes // The unordered map stores symbols per file, // The map stores symbols per address range using FileInfo2SymbolMap = std::unordered_map; + using FileInfo2LineMap = std::unordered_map; using FileInfo2SymbolVT = FileInfo2SymbolMap::value_type; + static DDRes parse_lines(Dwfl *dwfl, const DDProfMod &mod, + ProcessAddress_t process_pc, ElfAddress_t start_sym, + ElfAddress_t end_sym, LineMap &line_map, + SymbolTable &table, SymbolIdx_t current_sym); + static bool symbol_lookup_check(Dwfl_Module *mod, ElfAddress_t process_pc, const Symbol &symbol); // unordered map of DSO elements - FileInfo2SymbolMap _file_info_map; + FileInfo2SymbolMap _file_info_function_map; + FileInfo2LineMap _file_info_inlining_map; DwflSymbolLookupStats _stats; }; diff --git a/include/symbol.hpp b/include/symbol.hpp index 0e54e7a3c..90104e137 100644 --- a/include/symbol.hpp +++ b/include/symbol.hpp @@ -15,13 +15,14 @@ namespace ddprof { class Symbol { public: - Symbol() : _lineno(0) {} + Symbol() : _func_start_lineno(0), _parent_idx(-1) {} // Warning : Generates some string copies (these are not rvalues) Symbol(std::string symname, std::string demangle_name, uint32_t lineno, std::string srcpath) : _symname(std::move(symname)), _demangle_name(std::move(demangle_name)), - _lineno(lineno), _srcpath(std::move(srcpath)) {} + _func_start_lineno(lineno), _srcpath(std::move(srcpath)), + _parent_idx(-1) {} // OUTPUT OF ADDRINFO std::string _symname; @@ -30,7 +31,10 @@ class Symbol { std::string _demangle_name; // OUTPUT OF LINE INFO - uint32_t _lineno; + uint32_t _func_start_lineno; std::string _srcpath; + + // PARENT FUNCTION + SymbolIdx_t _parent_idx; }; } // namespace ddprof diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 267c7f0c3..ff2c8ad14 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -17,10 +17,10 @@ namespace ddprof { struct FunLoc { - uint64_t ip; // Relative to file, not VMA - SymbolIdx_t _symbol_idx; - MapInfoIdx_t _map_info_idx; - + uint64_t ip{}; + uint32_t _lineno{}; + SymbolIdx_t _symbol_idx{-1}; + MapInfoIdx_t _map_info_idx{-1}; friend auto operator<=>(const FunLoc &, const FunLoc &) = default; }; diff --git a/src/dwfl_symbol.cc b/src/dwfl_symbol.cc index 26142d248..b310cb82c 100644 --- a/src/dwfl_symbol.cc +++ b/src/dwfl_symbol.cc @@ -24,62 +24,14 @@ bool symbol_get_from_dwfl(Dwfl_Module *mod, ProcessAddress_t process_pc, bool symbol_success = false; const char *lsymname = dwfl_module_addrinfo( mod, process_pc, &loffset, &elf_sym, &lshndxp, &lelfp, &lbias); -#ifdef DEBUG - int dwfl_error_value = dwfl_errno(); - if (unlikely(dwfl_error_value)) { - LG_DBG("[DWFL_SYMB] addrinfo error -- Error:%s -- %lx", - dwfl_errmsg(dwfl_error_value), process_pc); - } -#else - dwfl_errno(); -#endif - if (lsymname) { symbol._symname = std::string(lsymname); symbol._demangle_name = Demangler::demangle(symbol._symname); symbol_success = true; } else { - return false; - } - -// #define FLAG_SYMBOL -// A small mechanism to create a trace around the expected function -#ifdef FLAG_SYMBOL - static constexpr std::string_view look_for_symb = "$x"; - if (symbol._demangle_name.find(look_for_symb) != std::string::npos) { - LG_NFO("DGB:: GOING THROUGH EXPECTED FUNC: %s", look_for_symb.data()); - } -#endif - Dwfl_Line *line = dwfl_module_getsrc(mod, process_pc); -#ifdef DEBUG - dwfl_error_value = dwfl_errno(); - if (unlikely(dwfl_error_value)) { - LG_DBG("[DWFL_SYMB] dwfl_src error pc=%lx : Error:%s (Sym=%s)", process_pc, - dwfl_errmsg(dwfl_error_value), symbol._demangle_name.c_str()); - } -#else - dwfl_errno(); -#endif - - if (line) { - int linep; - const char *localsrcpath = - dwfl_lineinfo(line, &process_pc, static_cast(&linep), nullptr, - nullptr, nullptr); - if (localsrcpath) { - symbol._srcpath = std::string(localsrcpath); - symbol._lineno = static_cast(linep); - } -#ifdef DEBUG - dwfl_error_value = dwfl_errno(); - if (unlikely(dwfl_error_value)) { - LG_DBG("[DWFL_SYMB] dwfl_lineinfo error pc=%lx : Error:%s (Sym=%s)", - process_pc, dwfl_errmsg(dwfl_error_value), - symbol._demangle_name.c_str()); - } -#else + // reset error state in case of dwfl error dwfl_errno(); -#endif + symbol_success = false; } return symbol_success; } diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 9c0e51917..4ac021fac 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -34,7 +34,7 @@ DwflSymbolLookup::DwflSymbolLookup() { unsigned DwflSymbolLookup::size() const { unsigned total_nb_elts = 0; std::for_each( - _file_info_map.begin(), _file_info_map.end(), + _file_info_function_map.begin(), _file_info_function_map.end(), [&](FileInfo2SymbolVT const &el) { total_nb_elts += el.second.size(); }); return total_nb_elts; } @@ -44,12 +44,10 @@ unsigned DwflSymbolLookup::size() const { /****************/ // Retrieve existing symbol or attempt to read from dwarf -SymbolIdx_t DwflSymbolLookup::get_or_insert(const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - FileInfoId_t file_info_id, - ProcessAddress_t process_pc, - const Dso &dso) { +SymbolIdx_t DwflSymbolLookup::get_or_insert( + Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, FileInfoId_t file_info_id, + ProcessAddress_t process_pc, const Dso &dso) { ++_stats._calls; ElfAddress_t const elf_pc = process_pc - ddprof_mod._sym_bias; @@ -57,7 +55,8 @@ SymbolIdx_t DwflSymbolLookup::get_or_insert(const DDProfMod &ddprof_mod, LG_DBG("Looking for : %lx = (%lx - %lx) / dso:%s", elf_pc, process_pc, ddprof_mod._low_addr, dso._filename.c_str()); #endif - SymbolMap &map = _file_info_map[file_info_id]; + SymbolMap &map = _file_info_function_map[file_info_id]; + LineMap &line_map = _file_info_inlining_map[file_info_id]; SymbolMap::FindRes const find_res = map.find_closest(elf_pc); if (find_res.second) { // already found the correct symbol #ifdef DEBUG @@ -77,15 +76,17 @@ SymbolIdx_t DwflSymbolLookup::get_or_insert(const DDProfMod &ddprof_mod, ++_stats._hit; return find_res.first->second.get_symbol_idx(); } - - return insert(ddprof_mod, table, dso_symbol_lookup, process_pc, dso, map); + // todo get line no + return insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, + map, line_map); } -SymbolIdx_t DwflSymbolLookup::insert(const DDProfMod &ddprof_mod, +SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, ProcessAddress_t process_pc, - const Dso &dso, SymbolMap &map) { + const Dso &dso, SymbolMap &func_map, + LineMap &line_map) { Symbol symbol; GElf_Sym elf_sym; @@ -115,7 +116,7 @@ SymbolIdx_t DwflSymbolLookup::insert(const DDProfMod &ddprof_mod, table[symbol_idx]._symname.c_str(), symbol_idx, dso.to_string().c_str()); #endif - map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); + func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); return symbol_idx; } @@ -133,31 +134,33 @@ SymbolIdx_t DwflSymbolLookup::insert(const DDProfMod &ddprof_mod, table.push_back(std::move(symbol)); Symbol &sym_ref = table.back(); - if (sym_ref._srcpath.empty()) { - // override with info from dso (this slightly mixes mappings and sources) - // But it helps a lot at Datadog (as mappings are ignored for now in UI) - sym_ref._srcpath = dso.format_filename(); - } + if (sym_ref._srcpath.empty()) {} if (!compute_elf_range(elf_pc, elf_sym, start_sym, end_sym)) { // elf section does not add up to something that makes sense // insert this PC without considering elf section start_sym = elf_pc; end_sym = elf_pc; -#ifdef DEBUG - LG_DBG("elf_range failure --> Insert: %lx,%lx -> %s,%d / shndx=%d", + LG_DBG("elf_range failure --> Insert: %lx,%lx -> %s, %d / shndx=%d", start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); -#endif - map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - return symbol_idx; } #ifdef DEBUG LG_DBG("Insert: %lx,%lx -> %s,%d / shndx=%d", start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); #endif - map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); + if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, start_sym, + end_sym, line_map, table, symbol_idx))) { + LG_DBG("Error when parsing line information for %s (%s)", + sym_ref._demangle_name.c_str(), dso._filename.c_str()); + } + if (sym_ref._srcpath.empty()) { + // override with info from dso (this slightly mixes mappings and sources) + // But it helps a lot at Datadog (as mappings are ignored for now in UI) + sym_ref._srcpath = dso.format_filename(); + } + func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); return symbol_idx; } } @@ -226,4 +229,108 @@ void DwflSymbolLookupStats::reset() { _errors = 0; } +size_t binary_search_start_index(Dwarf_Lines *lines, size_t nlines, + ElfAddress_t start_sym) { + size_t low = 0; + size_t high = nlines - 1; + + while (low <= high) { + size_t mid = low + (high - low) / 2; + Dwarf_Line *mid_line = dwarf_onesrcline(lines, mid); + Dwarf_Addr mid_addr; + dwarf_lineaddr(mid_line, &mid_addr); + + if (mid_addr < start_sym) { + low = mid + 1; + } else if (mid_addr > start_sym) { + high = mid - 1; + } else { + return mid; + } + + if (low == high) { + return low; + } + } + + return 0; // Return a default value if no suitable index is found +} + +DDRes DwflSymbolLookup::parse_lines(Dwfl *dwfl, const DDProfMod &mod, + ProcessAddress_t process_pc, + ElfAddress_t start_sym, + ElfAddress_t end_sym, LineMap &line_map, + SymbolTable &table, + SymbolIdx_t current_sym) { + Dwarf_Addr bias; + // This will lazily load the dwarf file + // the decompression can take time + // todo use dw access to avoid opening PID times + Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); + Dwarf *dwarf = dwfl_module_getdwarf(mod._mod, &bias); + // ElfAddress_t elf_address = process_pc - bias; + LG_DBG("Start %lx - end %lx", start_sym, end_sym); + + if (!cudie) { + // This will fail in case of no debug info + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + + Dwarf_Lines *lines; + size_t nlines; + if (dwarf_getsrclines(cudie, &lines, &nlines) != 0) { + LG_DBG("Unable to find source lines"); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + auto it = line_map.begin(); + + // for now we care about first source file + const char *current_line_file = nullptr; + + size_t start_index = binary_search_start_index(lines, nlines, start_sym); + int start_lineno = -1; + int end_lineno = -1; + { + size_t end_index = binary_search_start_index(lines, nlines, end_sym); + Dwarf_Line *line = dwarf_onesrcline(lines, end_index); + + if (dwarf_lineno(line, &end_lineno) == -1) { + end_lineno = std::numeric_limits::max(); + } + } + + Symbol &ref_sym = table[current_sym]; + + // todo: different files + for (size_t i = start_index; i < nlines; ++i) { + Dwarf_Line *line = dwarf_onesrcline(lines, i); + Dwarf_Addr line_addr; + int lineno; + dwarf_lineaddr(line, &line_addr); + if (line_addr < start_sym) { + continue; + } + if (line_addr > end_sym) { + break; + } + current_line_file = dwarf_linesrc(line, nullptr, nullptr); + if (current_line_file && ref_sym._srcpath.empty()) { + ref_sym._srcpath = std::string(current_line_file); + } + if (dwarf_lineno(line, &lineno) == -1) { + lineno = 0; + } else if (!ref_sym._func_start_lineno) { + start_lineno = lineno; + ref_sym._func_start_lineno = lineno; + } + // ... Process line information here ... + LG_DBG("Dwarf file = %s / %d / %lx", current_line_file, lineno, line_addr); + it = line_map.insert(it, + std::pair( + static_cast(line_addr), + Line{static_cast(lineno), line_addr})); + } + return {}; +} + } // namespace ddprof diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index fe2d5ac61..c96b9ede6 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -27,8 +27,7 @@ void write_function(const Symbol &symbol, ddog_prof_Function *ffi_func) { ffi_func->name = to_CharSlice(symbol._demangle_name); ffi_func->system_name = to_CharSlice(symbol._symname); ffi_func->filename = to_CharSlice(symbol._srcpath); - // Not filed (can be computed if needed using the start range from elf) - ffi_func->start_line = 0; + ffi_func->start_line = symbol._func_start_lineno; } void write_mapping(const MapInfo &mapinfo, ddog_prof_Mapping *ffi_mapping) { @@ -44,7 +43,7 @@ void write_location(const FunLoc *loc, const MapInfo &mapinfo, write_mapping(mapinfo, &ffi_location->mapping); write_function(symbol, &ffi_location->function); ffi_location->address = loc->ip; - ffi_location->line = symbol._lineno; + ffi_location->line = loc->_lineno; } constexpr int k_max_value_types = diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index c533400ab..e8c10a329 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -210,13 +210,11 @@ int frame_cb(Dwfl_Frame *dwfl_frame, void *arg) { DDRes add_dwfl_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, const DDProfMod &ddprof_mod, FileInfoId_t file_info_id) { - SymbolHdr &unwind_symbol_hdr = us->symbol_hdr; - // get or create the dwfl symbol SymbolIdx_t const symbol_idx = unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( - ddprof_mod, unwind_symbol_hdr._symbol_table, + us->_dwfl_wrapper->_dwfl, ddprof_mod, unwind_symbol_hdr._symbol_table, unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso); MapInfoIdx_t const map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d3ee6c5c9..0e7c92f26 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -204,6 +204,9 @@ add_unit_test( ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc + ../src/dwfl_symbol_lookup.cc + ../src/dso_symbol_lookup.cc + ../src/symbol_map.cc ../src/demangler/demangler.cc ../src/dso.cc ../src/dso_hdr.cc diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index dee9682c7..1ebf4902d 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -11,6 +11,9 @@ #include "dwfl_symbol.hpp" #include "loghandle.hpp" +#include "dwfl_symbol_lookup.hpp" +#include "symbol_table.hpp" + #include #include #include @@ -38,7 +41,7 @@ int count_fds(pid_t pid) { } return fd_count; } - +#ifdef TEMP_REMOVE TEST(DwflModule, inconsistency_test) { pid_t my_pid = getpid(); int nb_fds_start = count_fds(my_pid); @@ -182,5 +185,64 @@ TEST(DwflModule, short_lived) { } } } +#endif +__attribute__((always_inline)) inline ElfAddress_t deeper_function() { + ElfAddress_t ip = _THIS_IP_; + LG_DBG("I'm deep!!"); + return ip; +} + +__attribute__((always_inline)) inline ElfAddress_t inlined_function() { + ElfAddress_t ip = deeper_function(); + LG_DBG("I'm going up!!"); + return ip; +} +ElfAddress_t my_custom_function() { + ElfAddress_t ip = inlined_function(); + LG_DBG("The actual ip = %lx", ip); + return ip; +} + +TEST(DwflModule, inlined_func) { + pid_t my_pid = getpid(); + LogHandle handle; + // Load DSOs from our unit test + ElfAddress_t ip = my_custom_function(); + DsoHdr dso_hdr; + ddprof::SymbolTable table; + DwflSymbolLookup symbol_lookup; + DsoSymbolLookup dso_lookup; + DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); + // Check that we found the DSO matching this IP + ASSERT_TRUE(find_res.second); + { + DwflWrapper dwfl_wrapper; + // retrieve the map associated to pid + DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; + for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { + Dso &dso = it->second; + if (!has_relevant_path(dso._type) || !dso.is_executable()) { + continue; // skip non exec / non standard (anon/vdso...) + } + FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); + ASSERT_TRUE(file_info_id > k_file_info_error); + + const FileInfoValue &file_info_value = + dso_hdr.get_file_info_value(file_info_id); + DDProfMod *ddprof_mod = nullptr; + auto res = dwfl_wrapper.register_mod(dso._start, + dso_hdr.get_elf_range(dso_map, it), + file_info_value, &ddprof_mod); + + ASSERT_TRUE(IsDDResOK(res)); + ASSERT_TRUE(ddprof_mod->_mod); + if (find_res.first == it) { + SymbolIdx_t symbol_idx = + symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, + dso_lookup, file_info_id, ip, dso); + } + } + } +} } // namespace ddprof From b608c9a3e53720f21a48927381d343217228f35d Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 23 Nov 2023 08:58:18 +0100 Subject: [PATCH 02/11] Introduce a nested symbol structure for inlined symbols Work in progress. --- include/dwfl_internals.hpp | 2 +- include/dwfl_symbol_lookup.hpp | 27 ++- include/symbol.hpp | 4 +- include/symbol_map.hpp | 47 ++++ src/dwfl_symbol_lookup.cc | 429 +++++++++++++++++++++++---------- src/symbol_map.cc | 39 ++- test/dwfl_module-ut.cc | 8 +- test/symbol_map-ut.cc | 23 ++ 8 files changed, 435 insertions(+), 144 deletions(-) diff --git a/include/dwfl_internals.hpp b/include/dwfl_internals.hpp index 6adf7dcea..969c203eb 100644 --- a/include/dwfl_internals.hpp +++ b/include/dwfl_internals.hpp @@ -5,6 +5,6 @@ #pragma once -// #include +#include #include #include diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index 5484fd020..952bbb13f 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -63,13 +63,19 @@ class DwflSymbolLookup { const DwflSymbolLookupStats &stats() const { return _stats; } DwflSymbolLookupStats &stats() { return _stats; } -private: struct Line { - uint32_t _line; - ElfAddress_t _addr; - // possibly add a symbol ? + uint32_t _line{}; + ElfAddress_t _addr{}; + SymbolIdx_t _symbol_idx{-1}; }; using LineMap = std::map; + using InlineMap = NestedSymbolMap; + struct SymbolWrapper { + LineMap _line_map; + SymbolMap _symbol_map; + InlineMap _inline_map; + }; +private: /// Set through env var (DDPROF_CACHE_SETTING) in case of doubts on cache enum SymbolLookupSetting { K_CACHE_ON = 0, @@ -81,26 +87,21 @@ class DwflSymbolLookup { SymbolIdx_t insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, ProcessAddress_t process_pc, const Dso &dso, - SymbolMap &func_map, LineMap &line_map); + SymbolWrapper &symbol_wrapper); // Symbols are ordered by file. // The assumption is that the elf addresses are the same across processes // The unordered map stores symbols per file, // The map stores symbols per address range - using FileInfo2SymbolMap = std::unordered_map; + using FileInfo2SymbolWrapper = std::unordered_map; using FileInfo2LineMap = std::unordered_map; - using FileInfo2SymbolVT = FileInfo2SymbolMap::value_type; - - static DDRes parse_lines(Dwfl *dwfl, const DDProfMod &mod, - ProcessAddress_t process_pc, ElfAddress_t start_sym, - ElfAddress_t end_sym, LineMap &line_map, - SymbolTable &table, SymbolIdx_t current_sym); + using FileInfo2SymbolVT = FileInfo2SymbolWrapper::value_type; static bool symbol_lookup_check(Dwfl_Module *mod, ElfAddress_t process_pc, const Symbol &symbol); // unordered map of DSO elements - FileInfo2SymbolMap _file_info_function_map; + FileInfo2SymbolWrapper _file_info_function_map; FileInfo2LineMap _file_info_inlining_map; DwflSymbolLookupStats _stats; }; diff --git a/include/symbol.hpp b/include/symbol.hpp index 90104e137..f634e4e6e 100644 --- a/include/symbol.hpp +++ b/include/symbol.hpp @@ -19,10 +19,10 @@ class Symbol { // Warning : Generates some string copies (these are not rvalues) Symbol(std::string symname, std::string demangle_name, uint32_t lineno, - std::string srcpath) + std::string srcpath, int parent_idx = -1) : _symname(std::move(symname)), _demangle_name(std::move(demangle_name)), _func_start_lineno(lineno), _srcpath(std::move(srcpath)), - _parent_idx(-1) {} + _parent_idx(parent_idx) {} // OUTPUT OF ADDRINFO std::string _symname; diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index 00ffb7bf8..1ca6984ad 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -7,6 +7,7 @@ #include #include "ddprof_defs.hpp" +#include namespace ddprof { @@ -32,6 +33,8 @@ class SymbolSpan { SymbolIdx_t _symbol_idx; }; + + class SymbolMap : private std::map { public: using Map = std::map; @@ -55,4 +58,48 @@ class SymbolMap : private std::map { FindRes find_closest(Offset_t norm_pc); }; + +class NestedSymbolSpan { +public: + NestedSymbolSpan() : _end(0), _symbol_idx(-1), _parent_addr(0) {} + NestedSymbolSpan(Offset_t end, SymbolIdx_t symbol_idx, + ElfAddress_t parent = 0) + : _end(end), _symbol_idx(symbol_idx), _parent_addr(parent) {} + void set_end(Offset_t end) { + if (end > _end) { + _end = end; + } + } + [[nodiscard]] Offset_t get_end() const { return _end; } + [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } + [[nodiscard]] ElfAddress_t get_parent_addr() const{ + return _parent_addr; + } + +private: + Offset_t _end; + SymbolIdx_t _symbol_idx; + ElfAddress_t _parent_addr; +}; +class NestedSymbolMap : private std::map { +public: + using Map = std::map; + using It = Map::iterator; + using ConstIt = Map::const_iterator; + using FindRes = std::pair; + using ValueType = Map::value_type; + using Map::begin; + using Map::clear; + using Map::emplace; + using Map::emplace_hint; + using Map::empty; + using Map::end; + using Map::erase; + using Map::size; + + FindRes find_closest(Offset_t norm_pc); + + static bool is_within(const Offset_t &norm_pc, const ValueType &kv); +}; + } // namespace ddprof diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 4ac021fac..990e1eb00 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -14,10 +14,61 @@ #include #include #include +#include // For std::iota +#include +#include #include +#define DEBUG + + namespace ddprof { +namespace { +struct DieInformation { + struct Function { + ElfAddress_t start_addr{}; + ElfAddress_t end_addr{}; + const char *func_name{}; + int parent_pos{-1}; // position within the die vector + SymbolIdx_t symbol_idx = -1; + }; + std::vector die_mem_vec{}; +}; + +struct DieSearchParam { + Dwarf_Addr addr; + Dwarf_Die *die_mem; +}; + +size_t binary_search_start_index(Dwarf_Lines *lines, size_t nlines, + ElfAddress_t start_sym) { + size_t low = 0; + size_t high = nlines - 1; + + while (low <= high) { + size_t mid = low + (high - low) / 2; + Dwarf_Line *mid_line = dwarf_onesrcline(lines, mid); + Dwarf_Addr mid_addr; + dwarf_lineaddr(mid_line, &mid_addr); + + if (mid_addr < start_sym) { + low = mid + 1; + } else if (mid_addr > start_sym) { + high = mid - 1; + } else { + return mid; + } + + if (low == high) { + return low; + } + } + + return 0; // Return a default value if no suitable index is found +} +} // namespace + DwflSymbolLookup::DwflSymbolLookup() { if (const char *env_p = std::getenv("DDPROF_CACHE_SETTING")) { if (strcmp(env_p, "VALIDATE") == 0) { @@ -35,14 +86,10 @@ unsigned DwflSymbolLookup::size() const { unsigned total_nb_elts = 0; std::for_each( _file_info_function_map.begin(), _file_info_function_map.end(), - [&](FileInfo2SymbolVT const &el) { total_nb_elts += el.second.size(); }); + [&](FileInfo2SymbolVT const &el) { total_nb_elts += el.second._symbol_map.size(); }); return total_nb_elts; } -/****************/ -/* Range implem */ -/****************/ - // Retrieve existing symbol or attempt to read from dwarf SymbolIdx_t DwflSymbolLookup::get_or_insert( Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, @@ -55,8 +102,8 @@ SymbolIdx_t DwflSymbolLookup::get_or_insert( LG_DBG("Looking for : %lx = (%lx - %lx) / dso:%s", elf_pc, process_pc, ddprof_mod._low_addr, dso._filename.c_str()); #endif - SymbolMap &map = _file_info_function_map[file_info_id]; - LineMap &line_map = _file_info_inlining_map[file_info_id]; + SymbolWrapper &symbol_wrapper = _file_info_function_map[file_info_id]; + SymbolMap &map = symbol_wrapper._symbol_map; SymbolMap::FindRes const find_res = map.find_closest(elf_pc); if (find_res.second) { // already found the correct symbol #ifdef DEBUG @@ -76,24 +123,206 @@ SymbolIdx_t DwflSymbolLookup::get_or_insert( ++_stats._hit; return find_res.first->second.get_symbol_idx(); } - // todo get line no - return insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, - map, line_map); + return insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, symbol_wrapper); +} + +/* die_find callback for non-inlined function search */ +static int die_search_func_cb(Dwarf_Die *fn_die, void *data) { + DieSearchParam *ad = reinterpret_cast(data); + if (dwarf_tag(fn_die) == DW_TAG_subprogram && dwarf_haspc(fn_die, ad->addr)) { + memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); + return DWARF_CB_ABORT; + } + return DWARF_CB_OK; +} + +Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem) { + DieSearchParam ad; + ad.addr = addr; + ad.die_mem = die_mem; + /* dwarf_getscopes can't find subprogram. */ + if (!dwarf_getfuncs(cu_die, die_search_func_cb, &ad, 0)) + return NULL; + else + return die_mem; +} + +// return index to added element, else returns -1 +static int store_die_information(Dwarf_Die *sc_die, int parent_index, + DieInformation &data) { + // function or inlined function + int tag_val = dwarf_tag(sc_die); + if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { + DieInformation::Function function{}; + // die_name is usually the raw function name (no mangling info) + // link name can have mangling info + function.func_name = dwarf_diename(sc_die); + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { + if (attr) { + Dwarf_Addr ret_value; + if (dwarf_formaddr(attr, &ret_value) == 0) { + function.start_addr = ret_value; + } + } + } + // end is stored as a unsigned (not as a pointer) + if ((attr = dwarf_attr(sc_die, DW_AT_high_pc, &attr_mem))) { + if (attr) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.end_addr = function.start_addr + return_uval; + } + } + } + + function.parent_pos = parent_index; + data.die_mem_vec.push_back(std::move(function)); + return (data.die_mem_vec.size() - 1); + } + return -1; +} + +static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, + int parent_index, + DieInformation &die_info, + Dwarf_Die *die_mem) { + Dwarf_Die child_die; + int ret; + ret = dwarf_child(current_die, die_mem); + if (ret != 0) + return nullptr; + do { + int current_idx = store_die_information(die_mem, parent_index, die_info); + // todo: can their be childs outside of subprograms / inlined funcs ? + // Current assumption is that that is impossible + if (current_idx != -1) { + find_functions_in_child_die(die_mem, current_idx, die_info, &child_die); + } + } while (dwarf_siblingof(die_mem, die_mem) == 0); + return nullptr; +} + +static DDRes parse_die_information(Dwfl *dwfl, ProcessAddress_t process_pc, + DieInformation &die_information) { + Dwarf_Addr bias; + Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); + ElfAddress_t elf_addr = process_pc - bias; + Dwarf_Die die_mem; + Dwarf_Die *sc_die = die_find_realfunc(cudie, elf_addr, &die_mem); + if (sc_die == nullptr) { + DDRES_RETURN_WARN_LOG(DD_WHAT_DWFL_LIB_ERROR, "Unable to retrieve sc_die"); + } + // store parent function at index 0 + if (store_die_information(sc_die, -1, die_information) == -1) { + DDRES_RETURN_WARN_LOG(DD_WHAT_DWFL_LIB_ERROR, + "Unable to store the parent function"); + } + + find_functions_in_child_die(sc_die, 0, die_information, &die_mem); + + for (auto &el : die_information.die_mem_vec) { + LG_DBG("Inlined func = %s / start=%lx / end=%lx / parent=%d", + el.func_name, el.start_addr, el.end_addr, el.parent_pos); + } + return {}; +} + +int findClosestAfter(const std::vector &values, int reference) { + int closest = -1; + for (int val : values) { + if (val > reference && (closest == -1 || val < closest)) { + closest = val; + } + } + return closest; +} + +static DDRes parse_lines(Dwfl *dwfl, const DDProfMod &mod, + ProcessAddress_t process_pc, SymbolMap &func_map, + DwflSymbolLookup::LineMap &line_map, + SymbolTable &table, DieInformation &die_information) { + Dwarf_Addr bias; + // This will lazily load the dwarf file + // the decompression can take time + // todo use dw access to avoid opening PID times + + Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); + + if (!cudie) { + // This will fail in case of no debug info + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + + Dwarf_Lines *lines; + size_t nlines; + if (dwarf_getsrclines(cudie, &lines, &nlines) != 0) { + LG_DBG("Unable to find source lines"); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + auto it = line_map.begin(); + for (size_t func_pos = 0; func_pos ( + static_cast(current_func.start_addr), + DwflSymbolLookup::Line{static_cast(lineno), + current_func.start_addr, + current_func.symbol_idx})); + + int end_index = binary_search_start_index(lines, + nlines, + current_func.end_addr); + + line = dwarf_onesrcline(lines, end_index); + if (dwarf_lineno(line, &lineno) == -1) { + lineno = 0; + } + line_map.insert( + it, + std::pair( + static_cast(current_func.end_addr), + DwflSymbolLookup::Line{static_cast(lineno), + current_func.end_addr, + current_func.symbol_idx})); + } + return {}; } SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, ProcessAddress_t process_pc, - const Dso &dso, SymbolMap &func_map, - LineMap &line_map) { + const Dso &dso, + SymbolWrapper &symbol_wrapper) { Symbol symbol; GElf_Sym elf_sym; Offset_t lbias; + SymbolMap &func_map = symbol_wrapper._symbol_map; ElfAddress_t const elf_pc = process_pc - ddprof_mod._sym_bias; - if (!symbol_get_from_dwfl(ddprof_mod._mod, process_pc, symbol, elf_sym, lbias)) { ++_stats._no_dwfl_symbols; @@ -145,22 +374,76 @@ SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); } - +#define DEBUG #ifdef DEBUG + LG_DBG("-------------------------------"); LG_DBG("Insert: %lx,%lx -> %s,%d / shndx=%d", start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); #endif - if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, start_sym, - end_sym, line_map, table, symbol_idx))) { + auto res_emplace = func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); + LineMap &line_map = symbol_wrapper._line_map; + DieInformation die_information; + if (!IsDDResOK(parse_die_information(dwfl, process_pc, die_information))) { + LG_DBG("Error when parsing die information for %s (%s)", + sym_ref._demangle_name.c_str(), dso._filename.c_str()); + return symbol_idx; + } + if (die_information.die_mem_vec[0].end_addr > end_sym) { + // assume dwarf is more accurate + res_emplace.first->second.set_end(die_information.die_mem_vec[0].end_addr); + } + // Create associated symbols + assert(die_information.die_mem_vec.size()); + if (die_information.die_mem_vec.size() <= 1) { + // no inlining info + return symbol_idx; + } + // todo: ensure we group the inlined symbols to avoid dupes + // todo move this part + die_information.die_mem_vec[0].symbol_idx = symbol_idx; + + + // TODO: Think of what we should do with functions that do not have start + // or end + NestedSymbolMap &inline_map = symbol_wrapper._inline_map; + for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { + DieInformation::Function ¤t_func = die_information.die_mem_vec[pos]; + table.emplace_back( + Symbol({}, + current_func.func_name, + 0, + {}, + // Symbol associated to inlined function will be pos + symbol idx + current_func.parent_pos + symbol_idx)); + current_func.symbol_idx = table.size() - 1; + // add to the lookup + inline_map.emplace(current_func.start_addr, + NestedSymbolSpan( + current_func.end_addr, + current_func.symbol_idx, + die_information.die_mem_vec[current_func.parent_pos].start_addr)); + } + // associate line information to die information (includes file info) + if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, func_map, + line_map, table, die_information))) { LG_DBG("Error when parsing line information for %s (%s)", sym_ref._demangle_name.c_str(), dso._filename.c_str()); } - if (sym_ref._srcpath.empty()) { - // override with info from dso (this slightly mixes mappings and sources) - // But it helps a lot at Datadog (as mappings are ignored for now in UI) - sym_ref._srcpath = dso.format_filename(); + + for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { + DieInformation::Function &func = die_information.die_mem_vec[pos]; + auto &sym = table[func.symbol_idx]; + if (sym._srcpath.empty()) { + // override with info from dso (this slightly mixes mappings and sources) + // But it helps a lot at Datadog (as mappings are ignored for now in UI) + sym._srcpath = dso.format_filename(); + } + } + NestedSymbolMap::FindRes const find_res = inline_map.find_closest(elf_pc); + if (find_res.second) { + // in case we match an inlined symbol + return find_res.first->second.get_symbol_idx(); } - func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); return symbol_idx; } } @@ -229,108 +512,4 @@ void DwflSymbolLookupStats::reset() { _errors = 0; } -size_t binary_search_start_index(Dwarf_Lines *lines, size_t nlines, - ElfAddress_t start_sym) { - size_t low = 0; - size_t high = nlines - 1; - - while (low <= high) { - size_t mid = low + (high - low) / 2; - Dwarf_Line *mid_line = dwarf_onesrcline(lines, mid); - Dwarf_Addr mid_addr; - dwarf_lineaddr(mid_line, &mid_addr); - - if (mid_addr < start_sym) { - low = mid + 1; - } else if (mid_addr > start_sym) { - high = mid - 1; - } else { - return mid; - } - - if (low == high) { - return low; - } - } - - return 0; // Return a default value if no suitable index is found -} - -DDRes DwflSymbolLookup::parse_lines(Dwfl *dwfl, const DDProfMod &mod, - ProcessAddress_t process_pc, - ElfAddress_t start_sym, - ElfAddress_t end_sym, LineMap &line_map, - SymbolTable &table, - SymbolIdx_t current_sym) { - Dwarf_Addr bias; - // This will lazily load the dwarf file - // the decompression can take time - // todo use dw access to avoid opening PID times - Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); - Dwarf *dwarf = dwfl_module_getdwarf(mod._mod, &bias); - // ElfAddress_t elf_address = process_pc - bias; - LG_DBG("Start %lx - end %lx", start_sym, end_sym); - - if (!cudie) { - // This will fail in case of no debug info - return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); - } - - Dwarf_Lines *lines; - size_t nlines; - if (dwarf_getsrclines(cudie, &lines, &nlines) != 0) { - LG_DBG("Unable to find source lines"); - return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); - } - auto it = line_map.begin(); - - // for now we care about first source file - const char *current_line_file = nullptr; - - size_t start_index = binary_search_start_index(lines, nlines, start_sym); - int start_lineno = -1; - int end_lineno = -1; - { - size_t end_index = binary_search_start_index(lines, nlines, end_sym); - Dwarf_Line *line = dwarf_onesrcline(lines, end_index); - - if (dwarf_lineno(line, &end_lineno) == -1) { - end_lineno = std::numeric_limits::max(); - } - } - - Symbol &ref_sym = table[current_sym]; - - // todo: different files - for (size_t i = start_index; i < nlines; ++i) { - Dwarf_Line *line = dwarf_onesrcline(lines, i); - Dwarf_Addr line_addr; - int lineno; - dwarf_lineaddr(line, &line_addr); - if (line_addr < start_sym) { - continue; - } - if (line_addr > end_sym) { - break; - } - current_line_file = dwarf_linesrc(line, nullptr, nullptr); - if (current_line_file && ref_sym._srcpath.empty()) { - ref_sym._srcpath = std::string(current_line_file); - } - if (dwarf_lineno(line, &lineno) == -1) { - lineno = 0; - } else if (!ref_sym._func_start_lineno) { - start_lineno = lineno; - ref_sym._func_start_lineno = lineno; - } - // ... Process line information here ... - LG_DBG("Dwarf file = %s / %d / %lx", current_line_file, lineno, line_addr); - it = line_map.insert(it, - std::pair( - static_cast(line_addr), - Line{static_cast(lineno), line_addr})); - } - return {}; -} - } // namespace ddprof diff --git a/src/symbol_map.cc b/src/symbol_map.cc index fb1871ace..8329c4365 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -19,7 +19,6 @@ bool SymbolMap::is_within(const Offset_t &norm_pc, } SymbolMap::FindRes SymbolMap::find_closest(Offset_t norm_pc) { - // First element not less than (can match exactly a start addr) auto it = lower_bound(norm_pc); if (it != end()) { // map is empty @@ -35,7 +34,43 @@ SymbolMap::FindRes SymbolMap::find_closest(Offset_t norm_pc) { return {end(), false}; } // element can not be end (as we reversed or exit) - return {it, SymbolMap::is_within(norm_pc, *it)}; + return {it, is_within(norm_pc, *it)}; +} + + +NestedSymbolMap::FindRes NestedSymbolMap::find_closest(Offset_t norm_pc) { + auto it = lower_bound(norm_pc); + if (it != end()) { // map is empty + if (is_within(norm_pc, *it)) { + return {it, true}; + } + } + if (it != begin()) { + --it; + } + // Iteratively check the current symbol and its parent symbols + while (it != end()) { + if (is_within(norm_pc, *it)) { + return {it, true}; + } + // Traverse to the parent symbol if available + if (it->second.get_parent_addr() != 0) { + it = find(it->second.get_parent_addr()); + } else { + break; // No parent, stop the search + } + } + return {end(), false}; +} + +bool NestedSymbolMap::is_within(const Offset_t &norm_pc, const NestedSymbolMap::ValueType &kv) { + if (norm_pc < kv.first) { + return false; + } + if (norm_pc > kv.second.get_end()) { + return false; + } + return true; } } // namespace ddprof diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index 1ebf4902d..ef5728da9 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -187,12 +187,15 @@ TEST(DwflModule, short_lived) { } #endif __attribute__((always_inline)) inline ElfAddress_t deeper_function() { + // Without these instructions we can fall in the calling function + LG_DBG("Adding some logging instructions"); ElfAddress_t ip = _THIS_IP_; - LG_DBG("I'm deep!!"); + LG_DBG("I'm capturing the ip = %lx", ip); return ip; } __attribute__((always_inline)) inline ElfAddress_t inlined_function() { + LG_DBG("Before the call to deeper func!!"); ElfAddress_t ip = deeper_function(); LG_DBG("I'm going up!!"); return ip; @@ -241,6 +244,9 @@ TEST(DwflModule, inlined_func) { SymbolIdx_t symbol_idx = symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, dso_lookup, file_info_id, ip, dso); + const auto &sym = table[symbol_idx]; + LG_DBG("Sym = %s", sym._demangle_name.c_str()); + EXPECT_EQ(sym._demangle_name, "deeper_function"); } } } diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index 0da3ce5d9..2550748b1 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -6,6 +6,7 @@ #include #include "symbol_map.hpp" +#include "loghandle.hpp" namespace ddprof { @@ -22,6 +23,28 @@ TEST(SymbolMap, Map) { SymbolMap map; SymbolSpan span0_1000(0x1000, 12); map.emplace(0, span0_1000); + SymbolMap::FindRes res = map.find_closest(50); + EXPECT_TRUE(res.second); +} + + +TEST(SymbolMap, NestedSymbolMap) { + LogHandle handle; + NestedSymbolMap map; + NestedSymbolSpan span100_1000(0x1000, 0); + map.emplace(0x100, span100_1000); + NestedSymbolSpan span150_300(0x300, 1, 0x100); + map.emplace(0x150, span150_300); + { + NestedSymbolMap::FindRes res = map.find_closest(0x150); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 1); + } + { + NestedSymbolMap::FindRes res = map.find_closest(0x400); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 0); + } } } // namespace ddprof From 709e02cfef167dacbd85e91dcdae0b5081f1ba1a Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 23 Nov 2023 16:29:37 +0100 Subject: [PATCH 03/11] Allow to push vectors of inlined functions --- include/ddres_list.hpp | 1 + include/dwfl_symbol_lookup.hpp | 25 +++- include/symbol.hpp | 8 +- include/symbol_map.hpp | 43 +++--- include/unwind_helpers.hpp | 7 +- src/dwfl_symbol_lookup.cc | 252 ++++++++++++++++++++------------- src/symbol_map.cc | 37 ++++- src/unwind_dwfl.cc | 10 +- src/unwind_helpers.cc | 16 ++- test/dwfl_module-ut.cc | 7 +- test/symbol_map-ut.cc | 52 ++++++- 11 files changed, 310 insertions(+), 148 deletions(-) diff --git a/include/ddres_list.hpp b/include/ddres_list.hpp index d2f512e73..59ee55621 100644 --- a/include/ddres_list.hpp +++ b/include/ddres_list.hpp @@ -21,6 +21,7 @@ enum { DD_COMMON_START_RANGE = 1000, DD_NATIVE_START_RANGE = 2000 }; #define NATIVE_ERROR_TABLE(X) \ X(DWFL_LIB_ERROR, "error withing dwfl library") \ + X(NO_DWARF, "No dwarf information available") \ X(UW_CACHE_ERROR, "error from unwinding cache") \ X(UW_ERROR, "error from unwinding code") \ X(UW_MAX_DEPTH, "max depth reached in unwinding") \ diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index 952bbb13f..d370a31fa 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -48,11 +48,13 @@ class DwflSymbolLookup { DwflSymbolLookup(); // Get symbol from internal cache or fetch through dwarf - SymbolIdx_t get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - FileInfoId_t file_info_id, - ProcessAddress_t process_pc, const Dso &dso); + void get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, + FileInfoId_t file_info_id, + ProcessAddress_t process_pc, + const Dso &dso, + std::vector &symbol_indices); void erase(FileInfoId_t file_info_id) { _file_info_function_map.erase(file_info_id); @@ -89,6 +91,19 @@ class DwflSymbolLookup { ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper); + static DDRes insert_inlining_info(Dwfl *dwfl, + const DDProfMod &ddprof_mod, + SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, + ProcessAddress_t process_pc, + const Dso &dso, + SymbolWrapper &symbol_wrapper, + SymbolIdx_t parent_sym_idx); + + static void get_inlined(const SymbolWrapper &symbol_wrapper, + ElfAddress_t elf_pc, + std::vector &inlined_symbols); + // Symbols are ordered by file. // The assumption is that the elf addresses are the same across processes // The unordered map stores symbols per file, diff --git a/include/symbol.hpp b/include/symbol.hpp index f634e4e6e..040781490 100644 --- a/include/symbol.hpp +++ b/include/symbol.hpp @@ -15,14 +15,13 @@ namespace ddprof { class Symbol { public: - Symbol() : _func_start_lineno(0), _parent_idx(-1) {} + Symbol() : _func_start_lineno(0) {} // Warning : Generates some string copies (these are not rvalues) Symbol(std::string symname, std::string demangle_name, uint32_t lineno, std::string srcpath, int parent_idx = -1) : _symname(std::move(symname)), _demangle_name(std::move(demangle_name)), - _func_start_lineno(lineno), _srcpath(std::move(srcpath)), - _parent_idx(parent_idx) {} + _func_start_lineno(lineno), _srcpath(std::move(srcpath)) {} // OUTPUT OF ADDRINFO std::string _symname; @@ -33,8 +32,5 @@ class Symbol { // OUTPUT OF LINE INFO uint32_t _func_start_lineno; std::string _srcpath; - - // PARENT FUNCTION - SymbolIdx_t _parent_idx; }; } // namespace ddprof diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index 1ca6984ad..747e9ec75 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -59,34 +59,40 @@ class SymbolMap : private std::map { }; -class NestedSymbolSpan { +class NestedSymbolValue { public: - NestedSymbolSpan() : _end(0), _symbol_idx(-1), _parent_addr(0) {} - NestedSymbolSpan(Offset_t end, SymbolIdx_t symbol_idx, - ElfAddress_t parent = 0) - : _end(end), _symbol_idx(symbol_idx), _parent_addr(parent) {} - void set_end(Offset_t end) { - if (end > _end) { - _end = end; - } - } - [[nodiscard]] Offset_t get_end() const { return _end; } + NestedSymbolValue() : _symbol_idx(-1), _parent_addr(0) {} + NestedSymbolValue(SymbolIdx_t symbol_idx, ElfAddress_t parent = 0) + : _symbol_idx(symbol_idx), _parent_addr(parent) {} [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } [[nodiscard]] ElfAddress_t get_parent_addr() const{ return _parent_addr; } - private: - Offset_t _end; SymbolIdx_t _symbol_idx; ElfAddress_t _parent_addr; }; -class NestedSymbolMap : private std::map { + +struct NestedSymbolKey { + ElfAddress_t start; + ElfAddress_t end; + NestedSymbolKey(ElfAddress_t s, ElfAddress_t e) : start(s), end(e) {} + bool operator<(const NestedSymbolKey & other) const { + if (start != other.start) { + return start < other.start; + } + // Sort by end address in descending order if start addresses are equal + return end > other.end; + } +}; + + +class NestedSymbolMap : private std::map { public: - using Map = std::map; + using Map = std::map; using It = Map::iterator; using ConstIt = Map::const_iterator; - using FindRes = std::pair; + using FindRes = std::pair; using ValueType = Map::value_type; using Map::begin; using Map::clear; @@ -97,7 +103,10 @@ class NestedSymbolMap : private std::map { using Map::erase; using Map::size; - FindRes find_closest(Offset_t norm_pc); + FindRes find_parent(ConstIt it, + Offset_t norm_pc) const; + + FindRes find_closest(Offset_t norm_pc) const; static bool is_within(const Offset_t &norm_pc, const ValueType &kv); }; diff --git a/include/unwind_helpers.hpp b/include/unwind_helpers.hpp index 7788dd57c..977b34a61 100644 --- a/include/unwind_helpers.hpp +++ b/include/unwind_helpers.hpp @@ -17,7 +17,12 @@ struct UnwindState; bool is_max_stack_depth_reached(const UnwindState &us); -DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, +DDRes add_frame(std::vector symbol_indices, + MapInfoIdx_t map_idx, ElfAddress_t pc, + UnwindState *us); + +DDRes add_frame(SymbolIdx_t symbol_idx, + MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us); void add_common_frame(UnwindState *us, SymbolErrors lookup_case); diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 990e1eb00..b73eda243 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -91,10 +91,11 @@ unsigned DwflSymbolLookup::size() const { } // Retrieve existing symbol or attempt to read from dwarf -SymbolIdx_t DwflSymbolLookup::get_or_insert( +void DwflSymbolLookup::get_or_insert( Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, FileInfoId_t file_info_id, - ProcessAddress_t process_pc, const Dso &dso) { + ProcessAddress_t process_pc, const Dso &dso, + std::vector &symbol_indices) { ++_stats._calls; ElfAddress_t const elf_pc = process_pc - ddprof_mod._sym_bias; @@ -121,9 +122,19 @@ SymbolIdx_t DwflSymbolLookup::get_or_insert( } } ++_stats._hit; - return find_res.first->second.get_symbol_idx(); + symbol_indices.push_back(find_res.first->second.get_symbol_idx()); } - return insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, symbol_wrapper); + else { + // insert symbols using elf info + SymbolIdx_t elf_sym_idx = insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, symbol_wrapper); + symbol_indices.push_back(elf_sym_idx); + // parse associated dwarf info + insert_inlining_info(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, symbol_wrapper, elf_sym_idx); + } + get_inlined(symbol_wrapper, + elf_pc, + symbol_indices); + return; } /* die_find callback for non-inlined function search */ @@ -152,32 +163,32 @@ Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, static int store_die_information(Dwarf_Die *sc_die, int parent_index, DieInformation &data) { // function or inlined function - int tag_val = dwarf_tag(sc_die); - if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { - DieInformation::Function function{}; - // die_name is usually the raw function name (no mangling info) - // link name can have mangling info - function.func_name = dwarf_diename(sc_die); - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { - if (attr) { - Dwarf_Addr ret_value; - if (dwarf_formaddr(attr, &ret_value) == 0) { - function.start_addr = ret_value; - } + DieInformation::Function function{}; + // die_name is usually the raw function name (no mangling info) + // link name can have mangling info + function.func_name = dwarf_diename(sc_die); + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { + if (attr) { + Dwarf_Addr ret_value; + if (dwarf_formaddr(attr, &ret_value) == 0) { + function.start_addr = ret_value; } } - // end is stored as a unsigned (not as a pointer) - if ((attr = dwarf_attr(sc_die, DW_AT_high_pc, &attr_mem))) { - if (attr) { - Dwarf_Word return_uval; - if (dwarf_formudata(attr, &return_uval) == 0) { - function.end_addr = function.start_addr + return_uval; - } + } + // end is stored as a unsigned (not as a pointer) + if ((attr = dwarf_attr(sc_die, DW_AT_high_pc, &attr_mem))) { + if (attr) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.end_addr = function.start_addr + return_uval; } } - + } + // we often can find duplicates within the dwarf information + // some of the tags don't have the start and end info + if (function.start_addr && function.end_addr) { function.parent_pos = parent_index; data.die_mem_vec.push_back(std::move(function)); return (data.die_mem_vec.size() - 1); @@ -195,12 +206,15 @@ static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, if (ret != 0) return nullptr; do { - int current_idx = store_die_information(die_mem, parent_index, die_info); - // todo: can their be childs outside of subprograms / inlined funcs ? - // Current assumption is that that is impossible - if (current_idx != -1) { - find_functions_in_child_die(die_mem, current_idx, die_info, &child_die); + int tag_val = dwarf_tag(die_mem); + int next_parent_idx = parent_index; + if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { + int current_idx = store_die_information(die_mem, parent_index, die_info); + next_parent_idx = (current_idx != -1 ? current_idx:next_parent_idx); } + // + // todo: optimize the exploration to avoid going through soo many elements + find_functions_in_child_die(die_mem, next_parent_idx, die_info, &child_die); } while (dwarf_siblingof(die_mem, die_mem) == 0); return nullptr; } @@ -209,6 +223,9 @@ static DDRes parse_die_information(Dwfl *dwfl, ProcessAddress_t process_pc, DieInformation &die_information) { Dwarf_Addr bias; Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); + if (cudie == nullptr) { + DDRES_RETURN_WARN_LOG(DD_WHAT_NO_DWARF, "Unable to retrieve cu die"); + } ElfAddress_t elf_addr = process_pc - bias; Dwarf_Die die_mem; Dwarf_Die *sc_die = die_find_realfunc(cudie, elf_addr, &die_mem); @@ -217,10 +234,10 @@ static DDRes parse_die_information(Dwfl *dwfl, ProcessAddress_t process_pc, } // store parent function at index 0 if (store_die_information(sc_die, -1, die_information) == -1) { - DDRES_RETURN_WARN_LOG(DD_WHAT_DWFL_LIB_ERROR, - "Unable to store the parent function"); + LG_DBG("Unable to store the parent function"); + // todo figure this out. (example : dl_iterate_phdr) + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } - find_functions_in_child_die(sc_die, 0, die_information, &die_mem); for (auto &el : die_information.die_mem_vec) { @@ -230,7 +247,7 @@ static DDRes parse_die_information(Dwfl *dwfl, ProcessAddress_t process_pc, return {}; } -int findClosestAfter(const std::vector &values, int reference) { +int find_closest_after(const std::vector &values, int reference) { int closest = -1; for (int val : values) { if (val > reference && (closest == -1 || val < closest)) { @@ -310,13 +327,82 @@ static DDRes parse_lines(Dwfl *dwfl, const DDProfMod &mod, return {}; } +DDRes DwflSymbolLookup::insert_inlining_info(Dwfl *dwfl, + const DDProfMod &ddprof_mod, + SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, + ProcessAddress_t process_pc, + const Dso &dso, + SymbolWrapper &symbol_wrapper, + SymbolIdx_t parent_sym_idx) { + SymbolMap &func_map = symbol_wrapper._symbol_map; + DieInformation die_information; + Symbol &parent_sym = table[parent_sym_idx]; + if (!IsDDResOK(parse_die_information(dwfl, process_pc, die_information))) { + LG_DBG("Error when parsing die information for %s (%s)", + parent_sym._demangle_name.c_str(), dso._filename.c_str()); + return ddres_warn(DD_WHAT_NO_DWARF); + } + // todo : is there value in extending the elf span using dwarf info ? + // Create associated symbols + assert(die_information.die_mem_vec.size()); + if (die_information.die_mem_vec.size() <= 1) { + // no inlining info + return {}; + } + // todo: ensure we group the inlined symbols to avoid dupes + // todo move this part + die_information.die_mem_vec[0].symbol_idx = parent_sym_idx; + + NestedSymbolMap &inline_map = symbol_wrapper._inline_map; + for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { + DieInformation::Function ¤t_func = die_information.die_mem_vec[pos]; + table.emplace_back( + Symbol({}, + current_func.func_name, + 0, // no line for now + {})); + current_func.symbol_idx = table.size() - 1; + // add to the lookup + assert(current_func.parent_pos != -1); + // Parent function (position 0) is not added to the inlined functions + ElfAddress_t start_addr_parent = current_func.parent_pos? + die_information.die_mem_vec[current_func.parent_pos].start_addr:0; + LG_DBG("adding %lx - %lx: %s", + current_func.start_addr, + current_func.end_addr, + current_func.func_name); + inline_map.emplace( + NestedSymbolKey{current_func.start_addr, current_func.end_addr}, + NestedSymbolValue(current_func.symbol_idx, + start_addr_parent)); + } + // associate line information to die information (includes file info) + LineMap &line_map = symbol_wrapper._line_map; + if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, func_map, + line_map, table, die_information))) { + LG_DBG("Error when parsing line information for %s (%s)", + parent_sym._demangle_name.c_str(), dso._filename.c_str()); + } + + for (unsigned pos = 0; pos < die_information.die_mem_vec.size(); ++pos) { + DieInformation::Function &func = die_information.die_mem_vec[pos]; + auto &sym = table[func.symbol_idx]; + if (sym._srcpath.empty()) { + // override with info from dso (this slightly mixes mappings and sources) + // But it helps a lot at Datadog (as mappings are ignored for now in UI) + sym._srcpath = dso.format_filename(); + } + } + return {}; +} + SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper) { - Symbol symbol; GElf_Sym elf_sym; Offset_t lbias; @@ -380,74 +466,42 @@ SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, LG_DBG("Insert: %lx,%lx -> %s,%d / shndx=%d", start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); #endif - auto res_emplace = func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - LineMap &line_map = symbol_wrapper._line_map; - DieInformation die_information; - if (!IsDDResOK(parse_die_information(dwfl, process_pc, die_information))) { - LG_DBG("Error when parsing die information for %s (%s)", - sym_ref._demangle_name.c_str(), dso._filename.c_str()); - return symbol_idx; - } - if (die_information.die_mem_vec[0].end_addr > end_sym) { - // assume dwarf is more accurate - res_emplace.first->second.set_end(die_information.die_mem_vec[0].end_addr); - } - // Create associated symbols - assert(die_information.die_mem_vec.size()); - if (die_information.die_mem_vec.size() <= 1) { - // no inlining info - return symbol_idx; - } - // todo: ensure we group the inlined symbols to avoid dupes - // todo move this part - die_information.die_mem_vec[0].symbol_idx = symbol_idx; - - - // TODO: Think of what we should do with functions that do not have start - // or end - NestedSymbolMap &inline_map = symbol_wrapper._inline_map; - for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { - DieInformation::Function ¤t_func = die_information.die_mem_vec[pos]; - table.emplace_back( - Symbol({}, - current_func.func_name, - 0, - {}, - // Symbol associated to inlined function will be pos + symbol idx - current_func.parent_pos + symbol_idx)); - current_func.symbol_idx = table.size() - 1; - // add to the lookup - inline_map.emplace(current_func.start_addr, - NestedSymbolSpan( - current_func.end_addr, - current_func.symbol_idx, - die_information.die_mem_vec[current_func.parent_pos].start_addr)); - } - // associate line information to die information (includes file info) - if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, func_map, - line_map, table, die_information))) { - LG_DBG("Error when parsing line information for %s (%s)", - sym_ref._demangle_name.c_str(), dso._filename.c_str()); - } + func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { - DieInformation::Function &func = die_information.die_mem_vec[pos]; - auto &sym = table[func.symbol_idx]; - if (sym._srcpath.empty()) { - // override with info from dso (this slightly mixes mappings and sources) - // But it helps a lot at Datadog (as mappings are ignored for now in UI) - sym._srcpath = dso.format_filename(); - } - } - NestedSymbolMap::FindRes const find_res = inline_map.find_closest(elf_pc); - if (find_res.second) { - // in case we match an inlined symbol - return find_res.first->second.get_symbol_idx(); - } return symbol_idx; } } +void DwflSymbolLookup::get_inlined(const SymbolWrapper &symbol_wrapper, + ElfAddress_t elf_pc, + std::vector &inlined_symbols) { + const InlineMap& inline_map = symbol_wrapper._inline_map; + + NestedSymbolMap::FindRes find_res = inline_map.find_closest(elf_pc); + std::vector temp_symbols; + LG_DBG("Looking for %lx (%u)- matched %lx - %lx\n", + elf_pc, inline_map.size(), + find_res.second?find_res.first->first.start:0, + find_res.second?find_res.first->first.end:0); + + while (find_res.second) { + temp_symbols.push_back(find_res.first->second.get_symbol_idx()); + if (find_res.first->second.get_parent_addr()) { + find_res = inline_map.find_parent( + find_res.first, find_res.first->second.get_parent_addr()); + assert(find_res.second); + } else { + break; + } + } + + // Reverse iterate through temp_symbols and add them to inlined_symbols + for (auto it = temp_symbols.rbegin(); it != temp_symbols.rend(); ++it) { + LG_DBG("Adding %d \n", *it); + inlined_symbols.push_back(*it); + } +} + bool DwflSymbolLookup::symbol_lookup_check(Dwfl_Module *mod, Dwarf_Addr process_pc, const Symbol &symbol) { diff --git a/src/symbol_map.cc b/src/symbol_map.cc index 8329c4365..56cde76f2 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -4,6 +4,8 @@ // Datadog, Inc. #include "symbol_map.hpp" +#include +#include namespace ddprof { @@ -37,9 +39,27 @@ SymbolMap::FindRes SymbolMap::find_closest(Offset_t norm_pc) { return {it, is_within(norm_pc, *it)}; } +NestedSymbolMap::FindRes NestedSymbolMap::find_parent(NestedSymbolMap::ConstIt it, + Offset_t norm_pc) const { + // Handle case where start addresses are equal + // We do not want to return the same element, we are looking for parent + if (it != begin() && it->first.start == norm_pc) { + --it; + if (is_within(norm_pc, *it)) { + return {it, true}; + } + } + while (it != begin() && it->first.start > norm_pc) { + --it; + } + return {it, is_within(norm_pc, *it)}; +} -NestedSymbolMap::FindRes NestedSymbolMap::find_closest(Offset_t norm_pc) { - auto it = lower_bound(norm_pc); +NestedSymbolMap::FindRes NestedSymbolMap::find_closest(Offset_t norm_pc) const { + // Use the element with the lowest end possible, to ensure we find the + // deepest element + auto it = lower_bound(NestedSymbolKey{norm_pc, + 0}); if (it != end()) { // map is empty if (is_within(norm_pc, *it)) { return {it, true}; @@ -55,7 +75,14 @@ NestedSymbolMap::FindRes NestedSymbolMap::find_closest(Offset_t norm_pc) { } // Traverse to the parent symbol if available if (it->second.get_parent_addr() != 0) { - it = find(it->second.get_parent_addr()); + FindRes res = find_parent(it, it->second.get_parent_addr()); + if (res.second) { + it = res.first; + } + else { + assert(0); + break; + } } else { break; // No parent, stop the search } @@ -64,10 +91,10 @@ NestedSymbolMap::FindRes NestedSymbolMap::find_closest(Offset_t norm_pc) { } bool NestedSymbolMap::is_within(const Offset_t &norm_pc, const NestedSymbolMap::ValueType &kv) { - if (norm_pc < kv.first) { + if (norm_pc < kv.first.start) { return false; } - if (norm_pc > kv.second.get_end()) { + if (norm_pc > kv.first.end) { return false; } return true; diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index e8c10a329..95257277e 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -212,13 +212,13 @@ DDRes add_dwfl_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, const DDProfMod &ddprof_mod, FileInfoId_t file_info_id) { SymbolHdr &unwind_symbol_hdr = us->symbol_hdr; // get or create the dwfl symbol - SymbolIdx_t const symbol_idx = - unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( - us->_dwfl_wrapper->_dwfl, ddprof_mod, unwind_symbol_hdr._symbol_table, - unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso); + std::vector symbol_indices{}; + unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( + us->_dwfl_wrapper->_dwfl, ddprof_mod, unwind_symbol_hdr._symbol_table, + unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso, symbol_indices); MapInfoIdx_t const map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id); - return add_frame(symbol_idx, map_idx, pc, us); + return add_frame(symbol_indices, map_idx, pc, us); } // check for runtime symbols provided in /tmp files diff --git a/src/unwind_helpers.cc b/src/unwind_helpers.cc index b475ec3ec..dc0b23e5a 100644 --- a/src/unwind_helpers.cc +++ b/src/unwind_helpers.cc @@ -25,7 +25,20 @@ bool is_max_stack_depth_reached(const UnwindState &us) { return us.output.locs.size() + 2 >= kMaxStackDepth; } -DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, + +DDRes add_frame(std::vector symbol_indices, + MapInfoIdx_t map_idx, + ElfAddress_t pc, + UnwindState *us) { + + for (auto const el : symbol_indices) { + DDRES_CHECK_FWD(add_frame(el, map_idx, pc, us)); + } + return {}; +} + +DDRes add_frame(SymbolIdx_t symbol_idx, + MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us) { UnwindOutput *output = &us->output; if (output->locs.size() >= kMaxStackDepth) { @@ -48,7 +61,6 @@ DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, us->symbol_hdr._symbol_table[current._symbol_idx]._symname.c_str()); #endif output->locs.push_back(current); - return {}; } diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index ef5728da9..5d3771e26 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -241,10 +241,11 @@ TEST(DwflModule, inlined_func) { ASSERT_TRUE(IsDDResOK(res)); ASSERT_TRUE(ddprof_mod->_mod); if (find_res.first == it) { - SymbolIdx_t symbol_idx = + std::vector symbol_indices; symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, - dso_lookup, file_info_id, ip, dso); - const auto &sym = table[symbol_idx]; + dso_lookup, file_info_id, ip, dso, + symbol_indices); + const auto &sym = table[symbol_indices.back()]; LG_DBG("Sym = %s", sym._demangle_name.c_str()); EXPECT_EQ(sym._demangle_name, "deeper_function"); } diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index 2550748b1..4bb4aebc4 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -28,13 +28,16 @@ TEST(SymbolMap, Map) { } -TEST(SymbolMap, NestedSymbolMap) { +TEST(NestedSymbolMap, simple) { LogHandle handle; NestedSymbolMap map; - NestedSymbolSpan span100_1000(0x1000, 0); - map.emplace(0x100, span100_1000); - NestedSymbolSpan span150_300(0x300, 1, 0x100); - map.emplace(0x150, span150_300); + NestedSymbolValue span100_1000(0, 0); + map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); + NestedSymbolValue span150_300(1, 0x100); + map.emplace(NestedSymbolKey{0x150,0x300}, span150_300); + for (auto &el:map) { + LG_DBG("Idx = %d", el.second.get_symbol_idx()); + } { NestedSymbolMap::FindRes res = map.find_closest(0x150); EXPECT_TRUE(res.second); @@ -47,4 +50,43 @@ TEST(SymbolMap, NestedSymbolMap) { } } +TEST(NestedSymbolMap, same_addr) { + LogHandle handle; + NestedSymbolMap map; + NestedSymbolValue span100_1000(0); + map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); + NestedSymbolValue span100_300(1, 0x100); + map.emplace(NestedSymbolKey{0x100, 0x300}, span100_300); + for (auto &el:map) { + LG_DBG("Idx = %d", el.second.get_symbol_idx()); + } + + { // always return the deeper element + NestedSymbolMap::FindRes res = map.find_closest(0x100); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 1); + } +} + +// todo : fix bug on same start different end with multiple +TEST(NestedSymbolMap, InlinedFunctionLookup) { + NestedSymbolMap map; + // Insert main function + map.emplace(NestedSymbolKey{0x1180, 0x128a}, NestedSymbolValue(34, 0)); + // Insert inlined functions as per the log + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(1, 0x1180)); + map.emplace(NestedSymbolKey{0x11bd, 0x11c4}, NestedSymbolValue(2, 0x1180)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(3, 0x1180)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(4, 0x1180)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(5, 0x1180)); + map.emplace(NestedSymbolKey{0x11d0, 0x1203}, NestedSymbolValue(6, 0x1180)); + map.emplace(NestedSymbolKey{0x11fe, 0x11fe}, NestedSymbolValue(7, 0x11d0)); + // ... (Add other inlined functions similarly) + + // Test for a specific address + NestedSymbolMap::FindRes res = map.find_closest(0x11e0); + ASSERT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 6); // Expecting the most specific (deepest) symbol for this address +} + } // namespace ddprof From b6ee35fdb7fcccd41bc5c0645d9bc5379136b358 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Fri, 24 Nov 2023 10:18:52 +0100 Subject: [PATCH 04/11] Simplify the logics to store inlined functions --- include/dwfl_symbol_lookup.hpp | 38 ++++--- include/symbol_map.hpp | 18 ++-- include/unwind_helpers.hpp | 8 +- src/dwfl_symbol_lookup.cc | 189 ++++++++++++++++----------------- src/symbol_map.cc | 58 ++++------ src/unwind_dwfl.cc | 3 +- src/unwind_helpers.cc | 10 +- test/dwfl_module-ut.cc | 4 +- test/symbol_map-ut.cc | 28 +++-- 9 files changed, 162 insertions(+), 194 deletions(-) diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index d370a31fa..4caf2325c 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -49,12 +49,9 @@ class DwflSymbolLookup { // Get symbol from internal cache or fetch through dwarf void get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - FileInfoId_t file_info_id, - ProcessAddress_t process_pc, - const Dso &dso, - std::vector &symbol_indices); + SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, + FileInfoId_t file_info_id, ProcessAddress_t process_pc, + const Dso &dso, std::vector &symbol_indices); void erase(FileInfoId_t file_info_id) { _file_info_function_map.erase(file_info_id); @@ -77,6 +74,7 @@ class DwflSymbolLookup { SymbolMap _symbol_map; InlineMap _inline_map; }; + private: /// Set through env var (DDPROF_CACHE_SETTING) in case of doubts on cache enum SymbolLookupSetting { @@ -86,29 +84,29 @@ class DwflSymbolLookup { SymbolLookupSetting _lookup_setting{K_CACHE_ON}; - SymbolIdx_t insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, - SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, - ProcessAddress_t process_pc, const Dso &dso, - SymbolWrapper &symbol_wrapper); - - static DDRes insert_inlining_info(Dwfl *dwfl, - const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - ProcessAddress_t process_pc, - const Dso &dso, - SymbolWrapper &symbol_wrapper, - SymbolIdx_t parent_sym_idx); + SymbolMap::ValueType &insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, + ProcessAddress_t process_pc, const Dso &dso, + SymbolWrapper &symbol_wrapper); + + static DDRes insert_inlining_info(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, + ProcessAddress_t process_pc, const Dso &dso, + SymbolWrapper &symbol_wrapper, + SymbolMap::ValueType &parent_func); static void get_inlined(const SymbolWrapper &symbol_wrapper, ElfAddress_t elf_pc, + const SymbolMap::ValueType &parent_sym, std::vector &inlined_symbols); // Symbols are ordered by file. // The assumption is that the elf addresses are the same across processes // The unordered map stores symbols per file, // The map stores symbols per address range - using FileInfo2SymbolWrapper = std::unordered_map; + using FileInfo2SymbolWrapper = + std::unordered_map; using FileInfo2LineMap = std::unordered_map; using FileInfo2SymbolVT = FileInfo2SymbolWrapper::value_type; diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index 747e9ec75..d3f9d114d 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -33,8 +33,6 @@ class SymbolSpan { SymbolIdx_t _symbol_idx; }; - - class SymbolMap : private std::map { public: using Map = std::map; @@ -58,16 +56,14 @@ class SymbolMap : private std::map { FindRes find_closest(Offset_t norm_pc); }; - class NestedSymbolValue { public: NestedSymbolValue() : _symbol_idx(-1), _parent_addr(0) {} NestedSymbolValue(SymbolIdx_t symbol_idx, ElfAddress_t parent = 0) : _symbol_idx(symbol_idx), _parent_addr(parent) {} [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } - [[nodiscard]] ElfAddress_t get_parent_addr() const{ - return _parent_addr; - } + [[nodiscard]] ElfAddress_t get_parent_addr() const { return _parent_addr; } + private: SymbolIdx_t _symbol_idx; ElfAddress_t _parent_addr; @@ -77,7 +73,7 @@ struct NestedSymbolKey { ElfAddress_t start; ElfAddress_t end; NestedSymbolKey(ElfAddress_t s, ElfAddress_t e) : start(s), end(e) {} - bool operator<(const NestedSymbolKey & other) const { + bool operator<(const NestedSymbolKey &other) const { if (start != other.start) { return start < other.start; } @@ -86,7 +82,6 @@ struct NestedSymbolKey { } }; - class NestedSymbolMap : private std::map { public: using Map = std::map; @@ -103,10 +98,11 @@ class NestedSymbolMap : private std::map { using Map::erase; using Map::size; - FindRes find_parent(ConstIt it, - Offset_t norm_pc) const; + FindRes find_parent(ConstIt it, const NestedSymbolKey &parent_bound, + Offset_t norm_pc) const; - FindRes find_closest(Offset_t norm_pc) const; + FindRes find_closest(Offset_t norm_pc, + const NestedSymbolKey &parent_bound) const; static bool is_within(const Offset_t &norm_pc, const ValueType &kv); }; diff --git a/include/unwind_helpers.hpp b/include/unwind_helpers.hpp index 977b34a61..67458b873 100644 --- a/include/unwind_helpers.hpp +++ b/include/unwind_helpers.hpp @@ -17,12 +17,10 @@ struct UnwindState; bool is_max_stack_depth_reached(const UnwindState &us); -DDRes add_frame(std::vector symbol_indices, - MapInfoIdx_t map_idx, ElfAddress_t pc, - UnwindState *us); +DDRes add_frame(std::vector symbol_indices, MapInfoIdx_t map_idx, + ElfAddress_t pc, UnwindState *us); -DDRes add_frame(SymbolIdx_t symbol_idx, - MapInfoIdx_t map_idx, ElfAddress_t pc, +DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us); void add_common_frame(UnwindState *us, SymbolErrors lookup_case); diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index b73eda243..8d8e40584 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -21,7 +21,6 @@ #define DEBUG - namespace ddprof { namespace { @@ -84,18 +83,21 @@ DwflSymbolLookup::DwflSymbolLookup() { unsigned DwflSymbolLookup::size() const { unsigned total_nb_elts = 0; - std::for_each( - _file_info_function_map.begin(), _file_info_function_map.end(), - [&](FileInfo2SymbolVT const &el) { total_nb_elts += el.second._symbol_map.size(); }); + std::for_each(_file_info_function_map.begin(), _file_info_function_map.end(), + [&](FileInfo2SymbolVT const &el) { + total_nb_elts += el.second._symbol_map.size(); + }); return total_nb_elts; } // Retrieve existing symbol or attempt to read from dwarf -void DwflSymbolLookup::get_or_insert( - Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, FileInfoId_t file_info_id, - ProcessAddress_t process_pc, const Dso &dso, - std::vector &symbol_indices) { +void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, + DsoSymbolLookup &dso_symbol_lookup, + FileInfoId_t file_info_id, + ProcessAddress_t process_pc, + const Dso &dso, + std::vector &symbol_indices) { ++_stats._calls; ElfAddress_t const elf_pc = process_pc - ddprof_mod._sym_bias; @@ -122,18 +124,22 @@ void DwflSymbolLookup::get_or_insert( } } ++_stats._hit; + // add the inlined symbols on top + get_inlined(symbol_wrapper, elf_pc, *find_res.first, symbol_indices); + // then add the elf symbol symbol_indices.push_back(find_res.first->second.get_symbol_idx()); - } - else { + } else { // insert symbols using elf info - SymbolIdx_t elf_sym_idx = insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, symbol_wrapper); - symbol_indices.push_back(elf_sym_idx); + SymbolMap::ValueType &elf_sym = + insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, + symbol_wrapper); // parse associated dwarf info - insert_inlining_info(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, symbol_wrapper, elf_sym_idx); + insert_inlining_info(dwfl, ddprof_mod, table, process_pc, dso, + symbol_wrapper, elf_sym); + get_inlined(symbol_wrapper, elf_pc, elf_sym, symbol_indices); + // add elf symbol after + symbol_indices.push_back(elf_sym.second.get_symbol_idx()); } - get_inlined(symbol_wrapper, - elf_pc, - symbol_indices); return; } @@ -210,7 +216,7 @@ static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, int next_parent_idx = parent_index; if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { int current_idx = store_die_information(die_mem, parent_index, die_info); - next_parent_idx = (current_idx != -1 ? current_idx:next_parent_idx); + next_parent_idx = (current_idx != -1 ? current_idx : next_parent_idx); } // // todo: optimize the exploration to avoid going through soo many elements @@ -241,8 +247,8 @@ static DDRes parse_die_information(Dwfl *dwfl, ProcessAddress_t process_pc, find_functions_in_child_die(sc_die, 0, die_information, &die_mem); for (auto &el : die_information.die_mem_vec) { - LG_DBG("Inlined func = %s / start=%lx / end=%lx / parent=%d", - el.func_name, el.start_addr, el.end_addr, el.parent_pos); + LG_DBG("Inlined func = %s / start=%lx / end=%lx / parent=%d", el.func_name, + el.start_addr, el.end_addr, el.parent_pos); } return {}; } @@ -279,14 +285,13 @@ static DDRes parse_lines(Dwfl *dwfl, const DDProfMod &mod, LG_DBG("Unable to find source lines"); return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } - auto it = line_map.begin(); - for (size_t func_pos = 0; func_pos ( - static_cast(current_func.start_addr), - DwflSymbolLookup::Line{static_cast(lineno), - current_func.start_addr, - current_func.symbol_idx})); - - int end_index = binary_search_start_index(lines, - nlines, - current_func.end_addr); - - line = dwarf_onesrcline(lines, end_index); - if (dwarf_lineno(line, &lineno) == -1) { - lineno = 0; - } - line_map.insert( - it, - std::pair( - static_cast(current_func.end_addr), - DwflSymbolLookup::Line{static_cast(lineno), - current_func.end_addr, - current_func.symbol_idx})); } + // todo loop on all lines to get real line numbers and cache them + // auto it = line_map.begin(); + // line_map.insert(it, + // std::pair( + // static_cast(current_func.end_addr), + // DwflSymbolLookup::Line{static_cast(lineno), + // current_func.end_addr, + // current_func.symbol_idx})); + return {}; } -DDRes DwflSymbolLookup::insert_inlining_info(Dwfl *dwfl, - const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - ProcessAddress_t process_pc, - const Dso &dso, - SymbolWrapper &symbol_wrapper, - SymbolIdx_t parent_sym_idx) { +DDRes DwflSymbolLookup::insert_inlining_info( + Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, + ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper, + SymbolMap::ValueType &parent_func) { + + SymbolIdx_t parent_sym_idx = parent_func.second.get_symbol_idx(); + SymbolMap &func_map = symbol_wrapper._symbol_map; DieInformation die_information; Symbol &parent_sym = table[parent_sym_idx]; - if (!IsDDResOK(parse_die_information(dwfl, process_pc, die_information))) { + if (!IsDDResOK(parse_die_information(dwfl, process_pc, die_information)) || + die_information.die_mem_vec.size() == 0) { LG_DBG("Error when parsing die information for %s (%s)", parent_sym._demangle_name.c_str(), dso._filename.c_str()); return ddres_warn(DD_WHAT_NO_DWARF); } - // todo : is there value in extending the elf span using dwarf info ? // Create associated symbols - assert(die_information.die_mem_vec.size()); + // todo : is there value in extending the elf span using dwarf info ? + if (parent_func.second.get_end() < die_information.die_mem_vec[0].end_addr) { + LG_DBG("Extending end of parent func from %lx to %lx", + parent_func.second.get_end(), + die_information.die_mem_vec[0].end_addr); + parent_func.second.set_end(die_information.die_mem_vec[0].end_addr); + } + if (die_information.die_mem_vec.size() <= 1) { // no inlining info return {}; } + // todo: ensure we group the inlined symbols to avoid dupes // todo move this part die_information.die_mem_vec[0].symbol_idx = parent_sym_idx; @@ -357,30 +355,26 @@ DDRes DwflSymbolLookup::insert_inlining_info(Dwfl *dwfl, NestedSymbolMap &inline_map = symbol_wrapper._inline_map; for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { DieInformation::Function ¤t_func = die_information.die_mem_vec[pos]; - table.emplace_back( - Symbol({}, - current_func.func_name, - 0, // no line for now - {})); + table.emplace_back(Symbol({}, current_func.func_name, + 0, // no line for now + {})); current_func.symbol_idx = table.size() - 1; // add to the lookup assert(current_func.parent_pos != -1); // Parent function (position 0) is not added to the inlined functions - ElfAddress_t start_addr_parent = current_func.parent_pos? - die_information.die_mem_vec[current_func.parent_pos].start_addr:0; - LG_DBG("adding %lx - %lx: %s", - current_func.start_addr, - current_func.end_addr, - current_func.func_name); + ElfAddress_t start_addr_parent = current_func.parent_pos + ? die_information.die_mem_vec[current_func.parent_pos].start_addr + : 0; + LG_DBG("adding %lx - %lx: %s", current_func.start_addr, + current_func.end_addr, current_func.func_name); inline_map.emplace( NestedSymbolKey{current_func.start_addr, current_func.end_addr}, - NestedSymbolValue(current_func.symbol_idx, - start_addr_parent)); + NestedSymbolValue(current_func.symbol_idx, start_addr_parent)); } // associate line information to die information (includes file info) LineMap &line_map = symbol_wrapper._line_map; - if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, func_map, - line_map, table, die_information))) { + if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, func_map, line_map, + table, die_information))) { LG_DBG("Error when parsing line information for %s (%s)", parent_sym._demangle_name.c_str(), dso._filename.c_str()); } @@ -397,12 +391,11 @@ DDRes DwflSymbolLookup::insert_inlining_info(Dwfl *dwfl, return {}; } -SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, - SymbolTable &table, - DsoSymbolLookup &dso_symbol_lookup, - ProcessAddress_t process_pc, - const Dso &dso, - SymbolWrapper &symbol_wrapper) { +SymbolMap::ValueType & +DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, + SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, + ProcessAddress_t process_pc, const Dso &dso, + SymbolWrapper &symbol_wrapper) { Symbol symbol; GElf_Sym elf_sym; Offset_t lbias; @@ -431,8 +424,10 @@ SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, table[symbol_idx]._symname.c_str(), symbol_idx, dso.to_string().c_str()); #endif - func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - return symbol_idx; + auto res_emplace = + func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); + assert(res_emplace.second); + return *(res_emplace.first); } if (lbias != ddprof_mod._sym_bias) { @@ -466,38 +461,34 @@ SymbolIdx_t DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, LG_DBG("Insert: %lx,%lx -> %s,%d / shndx=%d", start_sym, end_sym, sym_ref._symname.c_str(), symbol_idx, elf_sym.st_shndx); #endif - func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); - - return symbol_idx; + auto res_emplace = + func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); + assert(res_emplace.second); + return *(res_emplace.first); } } void DwflSymbolLookup::get_inlined(const SymbolWrapper &symbol_wrapper, ElfAddress_t elf_pc, + const SymbolMap::ValueType &parent_sym, std::vector &inlined_symbols) { - const InlineMap& inline_map = symbol_wrapper._inline_map; + const InlineMap &inline_map = symbol_wrapper._inline_map; - NestedSymbolMap::FindRes find_res = inline_map.find_closest(elf_pc); + NestedSymbolKey parent_key{parent_sym.first, parent_sym.second.get_end()}; + NestedSymbolMap::FindRes find_res = + inline_map.find_closest(elf_pc, parent_key); std::vector temp_symbols; - LG_DBG("Looking for %lx (%u)- matched %lx - %lx\n", - elf_pc, inline_map.size(), - find_res.second?find_res.first->first.start:0, - find_res.second?find_res.first->first.end:0); + LG_DBG("Looking for %lx (%lu)- matched %lx - %lx\n", elf_pc, + inline_map.size(), find_res.second ? find_res.first->first.start : 0, + find_res.second ? find_res.first->first.end : 0); while (find_res.second) { temp_symbols.push_back(find_res.first->second.get_symbol_idx()); - if (find_res.first->second.get_parent_addr()) { - find_res = inline_map.find_parent( - find_res.first, find_res.first->second.get_parent_addr()); - assert(find_res.second); - } else { - break; - } + find_res = inline_map.find_parent(find_res.first, parent_key, elf_pc); } // Reverse iterate through temp_symbols and add them to inlined_symbols for (auto it = temp_symbols.rbegin(); it != temp_symbols.rend(); ++it) { - LG_DBG("Adding %d \n", *it); inlined_symbols.push_back(*it); } } diff --git a/src/symbol_map.cc b/src/symbol_map.cc index 56cde76f2..7ad2d537d 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -4,8 +4,8 @@ // Datadog, Inc. #include "symbol_map.hpp" -#include #include +#include namespace ddprof { @@ -39,58 +39,40 @@ SymbolMap::FindRes SymbolMap::find_closest(Offset_t norm_pc) { return {it, is_within(norm_pc, *it)}; } -NestedSymbolMap::FindRes NestedSymbolMap::find_parent(NestedSymbolMap::ConstIt it, - Offset_t norm_pc) const { - // Handle case where start addresses are equal - // We do not want to return the same element, we are looking for parent - if (it != begin() && it->first.start == norm_pc) { +// parent span acts as a bound +NestedSymbolMap::FindRes +NestedSymbolMap::find_parent(NestedSymbolMap::ConstIt it, + const NestedSymbolKey &parent_bound, + Offset_t norm_pc) const { + while (it != begin()) { --it; + if (it->first < parent_bound) { + return {end(), false}; + } if (is_within(norm_pc, *it)) { return {it, true}; } } - while (it != begin() && it->first.start > norm_pc) { - --it; - } - return {it, is_within(norm_pc, *it)}; + return {end(), false}; } -NestedSymbolMap::FindRes NestedSymbolMap::find_closest(Offset_t norm_pc) const { +NestedSymbolMap::FindRes +NestedSymbolMap::find_closest(Offset_t norm_pc, + const NestedSymbolKey &parent_bound) const { // Use the element with the lowest end possible, to ensure we find the // deepest element - auto it = lower_bound(NestedSymbolKey{norm_pc, - 0}); - if (it != end()) { // map is empty + auto it = lower_bound(NestedSymbolKey{norm_pc, 0}); + if (it != end()) { // map not empty + LG_DBG("Found %lx - %lx ", it->first.start, it->first.end); if (is_within(norm_pc, *it)) { return {it, true}; } } - if (it != begin()) { - --it; - } - // Iteratively check the current symbol and its parent symbols - while (it != end()) { - if (is_within(norm_pc, *it)) { - return {it, true}; - } - // Traverse to the parent symbol if available - if (it->second.get_parent_addr() != 0) { - FindRes res = find_parent(it, it->second.get_parent_addr()); - if (res.second) { - it = res.first; - } - else { - assert(0); - break; - } - } else { - break; // No parent, stop the search - } - } - return {end(), false}; + return find_parent(it, parent_bound, norm_pc); } -bool NestedSymbolMap::is_within(const Offset_t &norm_pc, const NestedSymbolMap::ValueType &kv) { +bool NestedSymbolMap::is_within(const Offset_t &norm_pc, + const NestedSymbolMap::ValueType &kv) { if (norm_pc < kv.first.start) { return false; } diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index 95257277e..dd9c66424 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -215,7 +215,8 @@ DDRes add_dwfl_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, std::vector symbol_indices{}; unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( us->_dwfl_wrapper->_dwfl, ddprof_mod, unwind_symbol_hdr._symbol_table, - unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso, symbol_indices); + unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso, + symbol_indices); MapInfoIdx_t const map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id); return add_frame(symbol_indices, map_idx, pc, us); diff --git a/src/unwind_helpers.cc b/src/unwind_helpers.cc index dc0b23e5a..58123c666 100644 --- a/src/unwind_helpers.cc +++ b/src/unwind_helpers.cc @@ -25,11 +25,8 @@ bool is_max_stack_depth_reached(const UnwindState &us) { return us.output.locs.size() + 2 >= kMaxStackDepth; } - -DDRes add_frame(std::vector symbol_indices, - MapInfoIdx_t map_idx, - ElfAddress_t pc, - UnwindState *us) { +DDRes add_frame(std::vector symbol_indices, MapInfoIdx_t map_idx, + ElfAddress_t pc, UnwindState *us) { for (auto const el : symbol_indices) { DDRES_CHECK_FWD(add_frame(el, map_idx, pc, us)); @@ -37,8 +34,7 @@ DDRes add_frame(std::vector symbol_indices, return {}; } -DDRes add_frame(SymbolIdx_t symbol_idx, - MapInfoIdx_t map_idx, ElfAddress_t pc, +DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us) { UnwindOutput *output = &us->output; if (output->locs.size() >= kMaxStackDepth) { diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index 5d3771e26..01ac0cf39 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -242,8 +242,8 @@ TEST(DwflModule, inlined_func) { ASSERT_TRUE(ddprof_mod->_mod); if (find_res.first == it) { std::vector symbol_indices; - symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, - dso_lookup, file_info_id, ip, dso, + symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, + dso_lookup, file_info_id, ip, dso, symbol_indices); const auto &sym = table[symbol_indices.back()]; LG_DBG("Sym = %s", sym._demangle_name.c_str()); diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index 4bb4aebc4..2775e3867 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -5,8 +5,8 @@ #include -#include "symbol_map.hpp" #include "loghandle.hpp" +#include "symbol_map.hpp" namespace ddprof { @@ -20,6 +20,8 @@ TEST(SymbolMap, Span) { } TEST(SymbolMap, Map) { + LogHandle handle; + LG_DBG("Size of SymbolMap::ValueType: %lu", sizeof(SymbolMap::ValueType)); SymbolMap map; SymbolSpan span0_1000(0x1000, 12); map.emplace(0, span0_1000); @@ -27,24 +29,24 @@ TEST(SymbolMap, Map) { EXPECT_TRUE(res.second); } - TEST(NestedSymbolMap, simple) { + NestedSymbolKey parent_key{0x50, 0x1000}; LogHandle handle; NestedSymbolMap map; NestedSymbolValue span100_1000(0, 0); map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); NestedSymbolValue span150_300(1, 0x100); - map.emplace(NestedSymbolKey{0x150,0x300}, span150_300); - for (auto &el:map) { + map.emplace(NestedSymbolKey{0x150, 0x300}, span150_300); + for (auto &el : map) { LG_DBG("Idx = %d", el.second.get_symbol_idx()); } { - NestedSymbolMap::FindRes res = map.find_closest(0x150); + NestedSymbolMap::FindRes res = map.find_closest(0x150, parent_key); EXPECT_TRUE(res.second); EXPECT_EQ(res.first->second.get_symbol_idx(), 1); } { - NestedSymbolMap::FindRes res = map.find_closest(0x400); + NestedSymbolMap::FindRes res = map.find_closest(0x400, parent_key); EXPECT_TRUE(res.second); EXPECT_EQ(res.first->second.get_symbol_idx(), 0); } @@ -53,16 +55,17 @@ TEST(NestedSymbolMap, simple) { TEST(NestedSymbolMap, same_addr) { LogHandle handle; NestedSymbolMap map; + NestedSymbolKey parent_key{0x50, 0x1000}; NestedSymbolValue span100_1000(0); map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); NestedSymbolValue span100_300(1, 0x100); map.emplace(NestedSymbolKey{0x100, 0x300}, span100_300); - for (auto &el:map) { + for (auto &el : map) { LG_DBG("Idx = %d", el.second.get_symbol_idx()); } { // always return the deeper element - NestedSymbolMap::FindRes res = map.find_closest(0x100); + NestedSymbolMap::FindRes res = map.find_closest(0x100, parent_key); EXPECT_TRUE(res.second); EXPECT_EQ(res.first->second.get_symbol_idx(), 1); } @@ -70,6 +73,7 @@ TEST(NestedSymbolMap, same_addr) { // todo : fix bug on same start different end with multiple TEST(NestedSymbolMap, InlinedFunctionLookup) { + LogHandle handle; NestedSymbolMap map; // Insert main function map.emplace(NestedSymbolKey{0x1180, 0x128a}, NestedSymbolValue(34, 0)); @@ -81,12 +85,14 @@ TEST(NestedSymbolMap, InlinedFunctionLookup) { map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(5, 0x1180)); map.emplace(NestedSymbolKey{0x11d0, 0x1203}, NestedSymbolValue(6, 0x1180)); map.emplace(NestedSymbolKey{0x11fe, 0x11fe}, NestedSymbolValue(7, 0x11d0)); - // ... (Add other inlined functions similarly) + map.emplace(NestedSymbolKey{0x11d0, 0x11d0}, NestedSymbolValue(8, 0x11d0)); + NestedSymbolKey parent_key{0x1180, 0x1300}; // Test for a specific address - NestedSymbolMap::FindRes res = map.find_closest(0x11e0); + NestedSymbolMap::FindRes res = map.find_closest(0x11e0, parent_key); ASSERT_TRUE(res.second); - EXPECT_EQ(res.first->second.get_symbol_idx(), 6); // Expecting the most specific (deepest) symbol for this address + EXPECT_EQ(res.first->second.get_symbol_idx(), + 6); // Expecting the most specific (deepest) symbol for this address } } // namespace ddprof From 06bbb3f31daee379520df66902a6690af9f2477b Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 27 Nov 2023 12:43:54 +0100 Subject: [PATCH 05/11] - Use die information to retrieve file names - Minor refactor : introduce a dwarf helper file --- include/dwarf_helpers.hpp | 13 +++ include/symbol_map.hpp | 7 +- src/dwarf_helpers.cc | 216 ++++++++++++++++++++++++++++++++++++++ src/dwfl_symbol_lookup.cc | 166 ++++++++++++++++------------- test/CMakeLists.txt | 3 + test/dwfl_module-ut.cc | 2 +- test/symbol_map-ut.cc | 24 ++--- 7 files changed, 338 insertions(+), 93 deletions(-) create mode 100644 include/dwarf_helpers.hpp create mode 100644 src/dwarf_helpers.cc diff --git a/include/dwarf_helpers.hpp b/include/dwarf_helpers.hpp new file mode 100644 index 000000000..f20aaaf8e --- /dev/null +++ b/include/dwarf_helpers.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include "dwfl_internals.hpp" + +#include + +namespace ddprof { + +// debug attribute functions +const char *get_attribute_name(int attrCode); +int print_attribute(Dwarf_Attribute *attr, void *arg); + +} // namespace ddprof diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index d3f9d114d..6237be733 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -58,15 +58,12 @@ class SymbolMap : private std::map { class NestedSymbolValue { public: - NestedSymbolValue() : _symbol_idx(-1), _parent_addr(0) {} - NestedSymbolValue(SymbolIdx_t symbol_idx, ElfAddress_t parent = 0) - : _symbol_idx(symbol_idx), _parent_addr(parent) {} + NestedSymbolValue() : _symbol_idx(-1) {} + NestedSymbolValue(SymbolIdx_t symbol_idx) : _symbol_idx(symbol_idx) {} [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } - [[nodiscard]] ElfAddress_t get_parent_addr() const { return _parent_addr; } private: SymbolIdx_t _symbol_idx; - ElfAddress_t _parent_addr; }; struct NestedSymbolKey { diff --git a/src/dwarf_helpers.cc b/src/dwarf_helpers.cc new file mode 100644 index 000000000..166f695f9 --- /dev/null +++ b/src/dwarf_helpers.cc @@ -0,0 +1,216 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "dwarf_helpers.hpp" + +#include "logger.hpp" + +#include +#include + +namespace ddprof { + +const char *get_attribute_name(int attrCode) { + // Should not get init unless called + // Something like following awk can help generate this map: + // cat file_with_dwarf_attributes.txt | + // awk '!/\/\*/ { print "{ "$1", \""$1"\"},"}' + static const std::map attributeNameMap = { + {DW_AT_sibling, "DW_AT_sibling"}, + {DW_AT_location, "DW_AT_location"}, + {DW_AT_name, "DW_AT_name"}, + {DW_AT_ordering, "DW_AT_ordering"}, + {DW_AT_byte_size, "DW_AT_byte_size"}, + {DW_AT_bit_size, "DW_AT_bit_size"}, + {DW_AT_stmt_list, "DW_AT_stmt_list"}, + {DW_AT_low_pc, "DW_AT_low_pc"}, + {DW_AT_high_pc, "DW_AT_high_pc"}, + {DW_AT_language, "DW_AT_language"}, + {DW_AT_discr, "DW_AT_discr"}, + {DW_AT_discr_value, "DW_AT_discr_value"}, + {DW_AT_visibility, "DW_AT_visibility"}, + {DW_AT_import, "DW_AT_import"}, + {DW_AT_string_length, "DW_AT_string_length"}, + {DW_AT_common_reference, "DW_AT_common_reference"}, + {DW_AT_comp_dir, "DW_AT_comp_dir"}, + {DW_AT_const_value, "DW_AT_const_value"}, + {DW_AT_containing_type, "DW_AT_containing_type"}, + {DW_AT_default_value, "DW_AT_default_value"}, + {DW_AT_inline, "DW_AT_inline"}, + {DW_AT_is_optional, "DW_AT_is_optional"}, + {DW_AT_lower_bound, "DW_AT_lower_bound"}, + {DW_AT_producer, "DW_AT_producer"}, + {DW_AT_prototyped, "DW_AT_prototyped"}, + {DW_AT_return_addr, "DW_AT_return_addr"}, + {DW_AT_start_scope, "DW_AT_start_scope"}, + {DW_AT_bit_stride, "DW_AT_bit_stride"}, + {DW_AT_upper_bound, "DW_AT_upper_bound"}, + {DW_AT_abstract_origin, "DW_AT_abstract_origin"}, + {DW_AT_accessibility, "DW_AT_accessibility"}, + {DW_AT_address_class, "DW_AT_address_class"}, + {DW_AT_artificial, "DW_AT_artificial"}, + {DW_AT_base_types, "DW_AT_base_types"}, + {DW_AT_calling_convention, "DW_AT_calling_convention"}, + {DW_AT_count, "DW_AT_count"}, + {DW_AT_data_member_location, "DW_AT_data_member_location"}, + {DW_AT_decl_column, "DW_AT_decl_column"}, + {DW_AT_decl_file, "DW_AT_decl_file"}, + {DW_AT_decl_line, "DW_AT_decl_line"}, + {DW_AT_declaration, "DW_AT_declaration"}, + {DW_AT_discr_list, "DW_AT_discr_list"}, + {DW_AT_encoding, "DW_AT_encoding"}, + {DW_AT_external, "DW_AT_external"}, + {DW_AT_frame_base, "DW_AT_frame_base"}, + {DW_AT_friend, "DW_AT_friend"}, + {DW_AT_identifier_case, "DW_AT_identifier_case"}, + {DW_AT_namelist_item, "DW_AT_namelist_item"}, + {DW_AT_priority, "DW_AT_priority"}, + {DW_AT_segment, "DW_AT_segment"}, + {DW_AT_specification, "DW_AT_specification"}, + {DW_AT_static_link, "DW_AT_static_link"}, + {DW_AT_type, "DW_AT_type"}, + {DW_AT_use_location, "DW_AT_use_location"}, + {DW_AT_variable_parameter, "DW_AT_variable_parameter"}, + {DW_AT_virtuality, "DW_AT_virtuality"}, + {DW_AT_vtable_elem_location, "DW_AT_vtable_elem_location"}, + {DW_AT_allocated, "DW_AT_allocated"}, + {DW_AT_associated, "DW_AT_associated"}, + {DW_AT_data_location, "DW_AT_data_location"}, + {DW_AT_byte_stride, "DW_AT_byte_stride"}, + {DW_AT_entry_pc, "DW_AT_entry_pc"}, + {DW_AT_use_UTF8, "DW_AT_use_UTF8"}, + {DW_AT_extension, "DW_AT_extension"}, + {DW_AT_ranges, "DW_AT_ranges"}, + {DW_AT_trampoline, "DW_AT_trampoline"}, + {DW_AT_call_column, "DW_AT_call_column"}, + {DW_AT_call_file, "DW_AT_call_file"}, + {DW_AT_call_line, "DW_AT_call_line"}, + {DW_AT_description, "DW_AT_description"}, + {DW_AT_binary_scale, "DW_AT_binary_scale"}, + {DW_AT_decimal_scale, "DW_AT_decimal_scale"}, + {DW_AT_small, "DW_AT_small"}, + {DW_AT_decimal_sign, "DW_AT_decimal_sign"}, + {DW_AT_digit_count, "DW_AT_digit_count"}, + {DW_AT_picture_string, "DW_AT_picture_string"}, + {DW_AT_mutable, "DW_AT_mutable"}, + {DW_AT_threads_scaled, "DW_AT_threads_scaled"}, + {DW_AT_explicit, "DW_AT_explicit"}, + {DW_AT_object_pointer, "DW_AT_object_pointer"}, + {DW_AT_endianity, "DW_AT_endianity"}, + {DW_AT_elemental, "DW_AT_elemental"}, + {DW_AT_pure, "DW_AT_pure"}, + {DW_AT_recursive, "DW_AT_recursive"}, + {DW_AT_signature, "DW_AT_signature"}, + {DW_AT_main_subprogram, "DW_AT_main_subprogram"}, + {DW_AT_data_bit_offset, "DW_AT_data_bit_offset"}, + {DW_AT_const_expr, "DW_AT_const_expr"}, + {DW_AT_enum_class, "DW_AT_enum_class"}, + {DW_AT_linkage_name, "DW_AT_linkage_name"}, + {DW_AT_string_length_bit_size, "DW_AT_string_length_bit_size"}, + {DW_AT_string_length_byte_size, "DW_AT_string_length_byte_size"}, + {DW_AT_rank, "DW_AT_rank"}, + {DW_AT_str_offsets_base, "DW_AT_str_offsets_base"}, + {DW_AT_addr_base, "DW_AT_addr_base"}, + {DW_AT_rnglists_base, "DW_AT_rnglists_base"}, + {DW_AT_dwo_name, "DW_AT_dwo_name"}, + {DW_AT_reference, "DW_AT_reference"}, + {DW_AT_rvalue_reference, "DW_AT_rvalue_reference"}, + {DW_AT_macros, "DW_AT_macros"}, + {DW_AT_call_all_calls, "DW_AT_call_all_calls"}, + {DW_AT_call_all_source_calls, "DW_AT_call_all_source_calls"}, + {DW_AT_call_all_tail_calls, "DW_AT_call_all_tail_calls"}, + {DW_AT_call_return_pc, "DW_AT_call_return_pc"}, + {DW_AT_call_value, "DW_AT_call_value"}, + {DW_AT_call_origin, "DW_AT_call_origin"}, + {DW_AT_call_parameter, "DW_AT_call_parameter"}, + {DW_AT_call_pc, "DW_AT_call_pc"}, + {DW_AT_call_tail_call, "DW_AT_call_tail_call"}, + {DW_AT_call_target, "DW_AT_call_target"}, + {DW_AT_call_target_clobbered, "DW_AT_call_target_clobbered"}, + {DW_AT_call_data_location, "DW_AT_call_data_location"}, + {DW_AT_call_data_value, "DW_AT_call_data_value"}, + {DW_AT_noreturn, "DW_AT_noreturn"}, + {DW_AT_alignment, "DW_AT_alignment"}, + {DW_AT_export_symbols, "DW_AT_export_symbols"}, + {DW_AT_deleted, "DW_AT_deleted"}, + {DW_AT_defaulted, "DW_AT_defaulted"}, + {DW_AT_loclists_base, "DW_AT_loclists_base"}, + {DW_AT_lo_user, "DW_AT_lo_user"}, + {DW_AT_MIPS_fde, "DW_AT_MIPS_fde"}, + {DW_AT_MIPS_loop_begin, "DW_AT_MIPS_loop_begin"}, + {DW_AT_MIPS_tail_loop_begin, "DW_AT_MIPS_tail_loop_begin"}, + {DW_AT_MIPS_epilog_begin, "DW_AT_MIPS_epilog_begin"}, + {DW_AT_MIPS_loop_unroll_factor, "DW_AT_MIPS_loop_unroll_factor"}, + {DW_AT_MIPS_software_pipeline_depth, + "DW_AT_MIPS_software_pipeline_depth"}, + {DW_AT_MIPS_linkage_name, "DW_AT_MIPS_linkage_name"}, + {DW_AT_MIPS_stride, "DW_AT_MIPS_stride"}, + {DW_AT_MIPS_abstract_name, "DW_AT_MIPS_abstract_name"}, + {DW_AT_MIPS_clone_origin, "DW_AT_MIPS_clone_origin"}, + {DW_AT_MIPS_has_inlines, "DW_AT_MIPS_has_inlines"}, + {DW_AT_MIPS_stride_byte, "DW_AT_MIPS_stride_byte"}, + {DW_AT_MIPS_stride_elem, "DW_AT_MIPS_stride_elem"}, + {DW_AT_MIPS_ptr_dopetype, "DW_AT_MIPS_ptr_dopetype"}, + {DW_AT_MIPS_allocatable_dopetype, "DW_AT_MIPS_allocatable_dopetype"}, + {DW_AT_MIPS_assumed_shape_dopetype, "DW_AT_MIPS_assumed_shape_dopetype"}, + {DW_AT_MIPS_assumed_size, "DW_AT_MIPS_assumed_size"}, + {DW_AT_sf_names, "DW_AT_sf_names"}, + {DW_AT_src_info, "DW_AT_src_info"}, + {DW_AT_mac_info, "DW_AT_mac_info"}, + {DW_AT_src_coords, "DW_AT_src_coords"}, + {DW_AT_body_begin, "DW_AT_body_begin"}, + {DW_AT_body_end, "DW_AT_body_end"}, + {DW_AT_GNU_vector, "DW_AT_GNU_vector"}, + {DW_AT_GNU_guarded_by, "DW_AT_GNU_guarded_by"}, + {DW_AT_GNU_pt_guarded_by, "DW_AT_GNU_pt_guarded_by"}, + {DW_AT_GNU_guarded, "DW_AT_GNU_guarded"}, + {DW_AT_GNU_pt_guarded, "DW_AT_GNU_pt_guarded"}, + {DW_AT_GNU_locks_excluded, "DW_AT_GNU_locks_excluded"}, + {DW_AT_GNU_exclusive_locks_required, + "DW_AT_GNU_exclusive_locks_required"}, + {DW_AT_GNU_shared_locks_required, "DW_AT_GNU_shared_locks_required"}, + {DW_AT_GNU_odr_signature, "DW_AT_GNU_odr_signature"}, + {DW_AT_GNU_template_name, "DW_AT_GNU_template_name"}, + {DW_AT_GNU_call_site_value, "DW_AT_GNU_call_site_value"}, + {DW_AT_GNU_call_site_data_value, "DW_AT_GNU_call_site_data_value"}, + {DW_AT_GNU_call_site_target, "DW_AT_GNU_call_site_target"}, + {DW_AT_GNU_call_site_target_clobbered, + "DW_AT_GNU_call_site_target_clobbered"}, + {DW_AT_GNU_tail_call, "DW_AT_GNU_tail_call"}, + {DW_AT_GNU_all_tail_call_sites, "DW_AT_GNU_all_tail_call_sites"}, + {DW_AT_GNU_all_call_sites, "DW_AT_GNU_all_call_sites"}, + {DW_AT_GNU_all_source_call_sites, "DW_AT_GNU_all_source_call_sites"}, + {DW_AT_GNU_locviews, "DW_AT_GNU_locviews"}, + {DW_AT_GNU_entry_view, "DW_AT_GNU_entry_view"}, + {DW_AT_GNU_macros, "DW_AT_GNU_macros"}, + {DW_AT_GNU_deleted, "DW_AT_GNU_deleted"}, + {DW_AT_GNU_dwo_name, "DW_AT_GNU_dwo_name"}, + {DW_AT_GNU_dwo_id, "DW_AT_GNU_dwo_id"}, + {DW_AT_GNU_ranges_base, "DW_AT_GNU_ranges_base"}, + {DW_AT_GNU_addr_base, "DW_AT_GNU_addr_base"}, + {DW_AT_GNU_pubnames, "DW_AT_GNU_pubnames"}, + {DW_AT_GNU_pubtypes, "DW_AT_GNU_pubtypes"}, + {DW_AT_GNU_numerator, "DW_AT_GNU_numerator"}, + {DW_AT_GNU_denominator, "DW_AT_GNU_denominator"}, + {DW_AT_GNU_bias, "DW_AT_GNU_bias"}, + {DW_AT_hi_user, "DW_AT_hi_user"}, + }; + auto it = attributeNameMap.find(attrCode); + if (it != attributeNameMap.end()) { + return it->second.c_str(); + } + return "Unknown Attribute"; +} + +int print_attribute(Dwarf_Attribute *attr, void *arg) { + // Extract information from the attribute and print it + // For example, you might want to print the attribute's name and value + // The implementation depends on how you want to display the attributes + LG_DBG("Attribute code %x(%s) - form %d", attr->code, + get_attribute_name(attr->code), attr->form); + // Return a non-zero value to continue iterating through attributes + return 0; +} +} // namespace ddprof diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 8d8e40584..71f77d796 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -6,6 +6,7 @@ #include "dwfl_symbol_lookup.hpp" #include "ddprof_module.hpp" +#include "dwarf_helpers.hpp" #include "dwfl_hdr.hpp" #include "dwfl_internals.hpp" #include "dwfl_symbol.hpp" @@ -29,6 +30,8 @@ struct DieInformation { ElfAddress_t start_addr{}; ElfAddress_t end_addr{}; const char *func_name{}; + const char *file_name{}; + int line_number{0}; int parent_pos{-1}; // position within the die vector SymbolIdx_t symbol_idx = -1; }; @@ -167,12 +170,16 @@ Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, // return index to added element, else returns -1 static int store_die_information(Dwarf_Die *sc_die, int parent_index, - DieInformation &data) { + DieInformation &data, + Dwarf_Files *dwarf_files) { + dwarf_getattrs(sc_die, print_attribute, nullptr, 0); + // function or inlined function DieInformation::Function function{}; // die_name is usually the raw function name (no mangling info) // link name can have mangling info function.func_name = dwarf_diename(sc_die); + LG_DBG("Storing func = %s", function.func_name); Dwarf_Attribute attr_mem; Dwarf_Attribute *attr; if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { @@ -192,20 +199,46 @@ static int store_die_information(Dwarf_Die *sc_die, int parent_index, } } } - // we often can find duplicates within the dwarf information - // some of the tags don't have the start and end info - if (function.start_addr && function.end_addr) { - function.parent_pos = parent_index; - data.die_mem_vec.push_back(std::move(function)); - return (data.die_mem_vec.size() - 1); + // some of the functions don't have the start and end info + if (!function.start_addr || !function.end_addr) { + return -1; + } + + // for inlined functions, we do not often get decl tag + if (dwarf_files && + ((attr = dwarf_attr(sc_die, DW_AT_decl_file, &attr_mem)) || + (attr = dwarf_attr(sc_die, DW_AT_call_file, &attr_mem)))) { + Dwarf_Word fileIdx = 0; + if (dwarf_formudata(attr, &fileIdx) == 0) { + // Assuming 'files' is a structure holding file information + // for the current compilation unit, obtained beforehand + const char *file = dwarf_filesrc(dwarf_files, fileIdx, NULL, NULL); + // Store or process the file name + function.file_name = file; + LG_DBG("File associated with function: %s", file); + } } - return -1; + + if ((attr = dwarf_attr(sc_die, DW_AT_decl_line, &attr_mem)) || + (attr = dwarf_attr(sc_die, DW_AT_call_line, &attr_mem))) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.line_number = return_uval; + LG_DBG("Line number associated with function: %u", function.line_number); + } + } + + // we often can find duplicates within the dwarf information + function.parent_pos = parent_index; + data.die_mem_vec.push_back(std::move(function)); + return (data.die_mem_vec.size() - 1); } static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, int parent_index, DieInformation &die_info, - Dwarf_Die *die_mem) { + Dwarf_Die *die_mem, + Dwarf_Files *dwarf_files) { Dwarf_Die child_die; int ret; ret = dwarf_child(current_die, die_mem); @@ -215,69 +248,54 @@ static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, int tag_val = dwarf_tag(die_mem); int next_parent_idx = parent_index; if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { - int current_idx = store_die_information(die_mem, parent_index, die_info); + int current_idx = + store_die_information(die_mem, parent_index, die_info, dwarf_files); next_parent_idx = (current_idx != -1 ? current_idx : next_parent_idx); } // // todo: optimize the exploration to avoid going through soo many elements - find_functions_in_child_die(die_mem, next_parent_idx, die_info, &child_die); + // Child dies can have functions, even without being a child of another func + find_functions_in_child_die(die_mem, next_parent_idx, die_info, &child_die, + dwarf_files); } while (dwarf_siblingof(die_mem, die_mem) == 0); return nullptr; } -static DDRes parse_die_information(Dwfl *dwfl, ProcessAddress_t process_pc, +static DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, DieInformation &die_information) { - Dwarf_Addr bias; - Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); + Dwarf_Files *files = nullptr; + size_t nfiles = 0; if (cudie == nullptr) { DDRES_RETURN_WARN_LOG(DD_WHAT_NO_DWARF, "Unable to retrieve cu die"); } - ElfAddress_t elf_addr = process_pc - bias; + // cached within the CU + if (dwarf_getsrcfiles(cudie, &files, &nfiles) != 0) { + files = nullptr; + } Dwarf_Die die_mem; Dwarf_Die *sc_die = die_find_realfunc(cudie, elf_addr, &die_mem); if (sc_die == nullptr) { DDRES_RETURN_WARN_LOG(DD_WHAT_DWFL_LIB_ERROR, "Unable to retrieve sc_die"); } // store parent function at index 0 - if (store_die_information(sc_die, -1, die_information) == -1) { + if (store_die_information(sc_die, -1, die_information, files) == -1) { LG_DBG("Unable to store the parent function"); - // todo figure this out. (example : dl_iterate_phdr) + // On some functions we are unable to find start / end info return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } - find_functions_in_child_die(sc_die, 0, die_information, &die_mem); + find_functions_in_child_die(sc_die, 0, die_information, &die_mem, files); for (auto &el : die_information.die_mem_vec) { - LG_DBG("Inlined func = %s / start=%lx / end=%lx / parent=%d", el.func_name, - el.start_addr, el.end_addr, el.parent_pos); + LG_DBG("Inlined func start=%lx / end=%lx / Sym = %s / parent=%d", + el.start_addr, el.end_addr, el.func_name, el.parent_pos); } return {}; } -int find_closest_after(const std::vector &values, int reference) { - int closest = -1; - for (int val : values) { - if (val > reference && (closest == -1 || val < closest)) { - closest = val; - } - } - return closest; -} - -static DDRes parse_lines(Dwfl *dwfl, const DDProfMod &mod, +static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, ProcessAddress_t process_pc, SymbolMap &func_map, DwflSymbolLookup::LineMap &line_map, SymbolTable &table, DieInformation &die_information) { - Dwarf_Addr bias; - // This will lazily load the dwarf file - // the decompression can take time - // todo use dw access to avoid opening PID times - - Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); - - if (!cudie) { - // This will fail in case of no debug info - return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); - } Dwarf_Lines *lines; size_t nlines; @@ -324,59 +342,63 @@ DDRes DwflSymbolLookup::insert_inlining_info( SymbolMap::ValueType &parent_func) { SymbolIdx_t parent_sym_idx = parent_func.second.get_symbol_idx(); - + Dwarf_Addr bias; + Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); + if (!cudie) { + Symbol &parent_sym = table[parent_sym_idx]; + LG_DBG("No debug information for %s (%s)", + parent_sym._demangle_name.c_str(), dso._filename.c_str()); + parent_sym._srcpath = dso.format_filename(); + return ddres_warn(DD_WHAT_NO_DWARF); + } + ElfAddress_t elf_addr = process_pc - bias; SymbolMap &func_map = symbol_wrapper._symbol_map; DieInformation die_information; - Symbol &parent_sym = table[parent_sym_idx]; - if (!IsDDResOK(parse_die_information(dwfl, process_pc, die_information)) || + if (!IsDDResOK(parse_die_information(cudie, elf_addr, die_information)) || die_information.die_mem_vec.size() == 0) { + Symbol &parent_sym = table[parent_sym_idx]; LG_DBG("Error when parsing die information for %s (%s)", parent_sym._demangle_name.c_str(), dso._filename.c_str()); return ddres_warn(DD_WHAT_NO_DWARF); } - // Create associated symbols - // todo : is there value in extending the elf span using dwarf info ? - if (parent_func.second.get_end() < die_information.die_mem_vec[0].end_addr) { + + // Extend the span of the elf symbol + if ((parent_func.second.get_end() + 1) < + die_information.die_mem_vec[0].end_addr) { LG_DBG("Extending end of parent func from %lx to %lx", parent_func.second.get_end(), die_information.die_mem_vec[0].end_addr); parent_func.second.set_end(die_information.die_mem_vec[0].end_addr); } + die_information.die_mem_vec[0].symbol_idx = parent_sym_idx; - if (die_information.die_mem_vec.size() <= 1) { - // no inlining info - return {}; + // update parent file name + if (die_information.die_mem_vec[0].file_name) { + auto &sym = table[parent_sym_idx]; + sym._srcpath = die_information.die_mem_vec[0].file_name; } - // todo: ensure we group the inlined symbols to avoid dupes - // todo move this part - die_information.die_mem_vec[0].symbol_idx = parent_sym_idx; - NestedSymbolMap &inline_map = symbol_wrapper._inline_map; for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { DieInformation::Function ¤t_func = die_information.die_mem_vec[pos]; - table.emplace_back(Symbol({}, current_func.func_name, - 0, // no line for now - {})); + table.emplace_back( + Symbol({}, current_func.func_name ? current_func.func_name : "undef", + current_func.line_number, + current_func.file_name ? current_func.file_name : "")); current_func.symbol_idx = table.size() - 1; // add to the lookup - assert(current_func.parent_pos != -1); - // Parent function (position 0) is not added to the inlined functions - ElfAddress_t start_addr_parent = current_func.parent_pos - ? die_information.die_mem_vec[current_func.parent_pos].start_addr - : 0; LG_DBG("adding %lx - %lx: %s", current_func.start_addr, current_func.end_addr, current_func.func_name); inline_map.emplace( NestedSymbolKey{current_func.start_addr, current_func.end_addr}, - NestedSymbolValue(current_func.symbol_idx, start_addr_parent)); + NestedSymbolValue(current_func.symbol_idx)); } + // associate line information to die information (includes file info) LineMap &line_map = symbol_wrapper._line_map; - if (IsDDResNotOK(parse_lines(dwfl, ddprof_mod, process_pc, func_map, line_map, - table, die_information))) { - LG_DBG("Error when parsing line information for %s (%s)", - parent_sym._demangle_name.c_str(), dso._filename.c_str()); + if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, process_pc, func_map, + line_map, table, die_information))) { + LG_DBG("Error when parsing line information (%s)", dso._filename.c_str()); } for (unsigned pos = 0; pos < die_information.die_mem_vec.size(); ++pos) { @@ -477,20 +499,14 @@ void DwflSymbolLookup::get_inlined(const SymbolWrapper &symbol_wrapper, NestedSymbolKey parent_key{parent_sym.first, parent_sym.second.get_end()}; NestedSymbolMap::FindRes find_res = inline_map.find_closest(elf_pc, parent_key); - std::vector temp_symbols; LG_DBG("Looking for %lx (%lu)- matched %lx - %lx\n", elf_pc, inline_map.size(), find_res.second ? find_res.first->first.start : 0, find_res.second ? find_res.first->first.end : 0); while (find_res.second) { - temp_symbols.push_back(find_res.first->second.get_symbol_idx()); + inlined_symbols.push_back(find_res.first->second.get_symbol_idx()); find_res = inline_map.find_parent(find_res.first, parent_key, elf_pc); } - - // Reverse iterate through temp_symbols and add them to inlined_symbols - for (auto it = temp_symbols.rbegin(); it != temp_symbols.rend(); ++it) { - inlined_symbols.push_back(*it); - } } bool DwflSymbolLookup::symbol_lookup_check(Dwfl_Module *mod, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0e7c92f26..c71ada269 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -201,6 +201,7 @@ add_unit_test( dwfl_module-ut dwfl_module-ut.cc ../src/build_id.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc @@ -232,6 +233,7 @@ add_unit_test( ../src/dso.cc ../src/dso_hdr.cc ../src/dso_symbol_lookup.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc @@ -272,6 +274,7 @@ set(ALLOCATION_TRACKER_UT_SRCS ../src/dso.cc ../src/dso_hdr.cc ../src/dso_symbol_lookup.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index 01ac0cf39..0393b7c02 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -245,7 +245,7 @@ TEST(DwflModule, inlined_func) { symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, dso_lookup, file_info_id, ip, dso, symbol_indices); - const auto &sym = table[symbol_indices.back()]; + const auto &sym = table[symbol_indices[0]]; LG_DBG("Sym = %s", sym._demangle_name.c_str()); EXPECT_EQ(sym._demangle_name, "deeper_function"); } diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index 2775e3867..63051b7b0 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -33,9 +33,9 @@ TEST(NestedSymbolMap, simple) { NestedSymbolKey parent_key{0x50, 0x1000}; LogHandle handle; NestedSymbolMap map; - NestedSymbolValue span100_1000(0, 0); + NestedSymbolValue span100_1000(0); map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); - NestedSymbolValue span150_300(1, 0x100); + NestedSymbolValue span150_300(1); map.emplace(NestedSymbolKey{0x150, 0x300}, span150_300); for (auto &el : map) { LG_DBG("Idx = %d", el.second.get_symbol_idx()); @@ -58,7 +58,7 @@ TEST(NestedSymbolMap, same_addr) { NestedSymbolKey parent_key{0x50, 0x1000}; NestedSymbolValue span100_1000(0); map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); - NestedSymbolValue span100_300(1, 0x100); + NestedSymbolValue span100_300(1); map.emplace(NestedSymbolKey{0x100, 0x300}, span100_300); for (auto &el : map) { LG_DBG("Idx = %d", el.second.get_symbol_idx()); @@ -76,16 +76,16 @@ TEST(NestedSymbolMap, InlinedFunctionLookup) { LogHandle handle; NestedSymbolMap map; // Insert main function - map.emplace(NestedSymbolKey{0x1180, 0x128a}, NestedSymbolValue(34, 0)); + map.emplace(NestedSymbolKey{0x1180, 0x128a}, NestedSymbolValue(34)); // Insert inlined functions as per the log - map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(1, 0x1180)); - map.emplace(NestedSymbolKey{0x11bd, 0x11c4}, NestedSymbolValue(2, 0x1180)); - map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(3, 0x1180)); - map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(4, 0x1180)); - map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(5, 0x1180)); - map.emplace(NestedSymbolKey{0x11d0, 0x1203}, NestedSymbolValue(6, 0x1180)); - map.emplace(NestedSymbolKey{0x11fe, 0x11fe}, NestedSymbolValue(7, 0x11d0)); - map.emplace(NestedSymbolKey{0x11d0, 0x11d0}, NestedSymbolValue(8, 0x11d0)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(1)); + map.emplace(NestedSymbolKey{0x11bd, 0x11c4}, NestedSymbolValue(2)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(3)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(4)); + map.emplace(NestedSymbolKey{0x11bd, 0x11bd}, NestedSymbolValue(5)); + map.emplace(NestedSymbolKey{0x11d0, 0x1203}, NestedSymbolValue(6)); + map.emplace(NestedSymbolKey{0x11fe, 0x11fe}, NestedSymbolValue(7)); + map.emplace(NestedSymbolKey{0x11d0, 0x11d0}, NestedSymbolValue(8)); NestedSymbolKey parent_key{0x1180, 0x1300}; // Test for a specific address From 776aec44ce73b1712ea7e9b392c2a788bb1a79ce Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 27 Nov 2023 22:10:08 +0100 Subject: [PATCH 06/11] Add line parsing logics --- src/dwarf_helpers.cc | 2 +- src/dwfl_symbol_lookup.cc | 130 ++++++++++++++++++++++++++------------ src/symbol_map.cc | 1 - test/CMakeLists.txt | 6 +- 4 files changed, 94 insertions(+), 45 deletions(-) diff --git a/src/dwarf_helpers.cc b/src/dwarf_helpers.cc index 166f695f9..ea05aac03 100644 --- a/src/dwarf_helpers.cc +++ b/src/dwarf_helpers.cc @@ -13,7 +13,7 @@ namespace ddprof { const char *get_attribute_name(int attrCode) { - // Should not get init unless called + // Should not get init unless cablled // Something like following awk can help generate this map: // cat file_with_dwarf_attributes.txt | // awk '!/\/\*/ { print "{ "$1", \""$1"\"},"}' diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 71f77d796..46a0f0956 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -132,6 +132,7 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, // then add the elf symbol symbol_indices.push_back(find_res.first->second.get_symbol_idx()); } else { + const size_t previous_table_size = table.size(); // insert symbols using elf info SymbolMap::ValueType &elf_sym = insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, @@ -142,6 +143,16 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, get_inlined(symbol_wrapper, elf_pc, elf_sym, symbol_indices); // add elf symbol after symbol_indices.push_back(elf_sym.second.get_symbol_idx()); + // For newly added symbols, insure we don't leave a blank file name + for (unsigned i = previous_table_size; i < table.size(); ++i) { + auto &sym = table[i]; + if (sym._srcpath.empty()) { + // override with info from dso (this slightly mixes mappings and + // sources) But it helps a lot at Datadog (as mappings are ignored for + // now in UI) + sym._srcpath = dso.format_filename(); + } + } } return; } @@ -172,14 +183,14 @@ Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, static int store_die_information(Dwarf_Die *sc_die, int parent_index, DieInformation &data, Dwarf_Files *dwarf_files) { +#ifdef DEEP_DEBUG dwarf_getattrs(sc_die, print_attribute, nullptr, 0); - +#endif // function or inlined function DieInformation::Function function{}; // die_name is usually the raw function name (no mangling info) // link name can have mangling info function.func_name = dwarf_diename(sc_die); - LG_DBG("Storing func = %s", function.func_name); Dwarf_Attribute attr_mem; Dwarf_Attribute *attr; if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { @@ -215,7 +226,6 @@ static int store_die_information(Dwarf_Die *sc_die, int parent_index, const char *file = dwarf_filesrc(dwarf_files, fileIdx, NULL, NULL); // Store or process the file name function.file_name = file; - LG_DBG("File associated with function: %s", file); } } @@ -224,7 +234,6 @@ static int store_die_information(Dwarf_Die *sc_die, int parent_index, Dwarf_Word return_uval; if (dwarf_formudata(attr, &return_uval) == 0) { function.line_number = return_uval; - LG_DBG("Line number associated with function: %u", function.line_number); } } @@ -265,9 +274,7 @@ static DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, DieInformation &die_information) { Dwarf_Files *files = nullptr; size_t nfiles = 0; - if (cudie == nullptr) { - DDRES_RETURN_WARN_LOG(DD_WHAT_NO_DWARF, "Unable to retrieve cu die"); - } + assert(cudie); // cached within the CU if (dwarf_getsrcfiles(cudie, &files, &nfiles) != 0) { files = nullptr; @@ -275,7 +282,8 @@ static DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, Dwarf_Die die_mem; Dwarf_Die *sc_die = die_find_realfunc(cudie, elf_addr, &die_mem); if (sc_die == nullptr) { - DDRES_RETURN_WARN_LOG(DD_WHAT_DWFL_LIB_ERROR, "Unable to retrieve sc_die"); + LG_DBG("Unable to retrieve sc_die at %lx", elf_addr); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } // store parent function at index 0 if (store_die_information(sc_die, -1, die_information, files) == -1) { @@ -293,16 +301,19 @@ static DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, } static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, - ProcessAddress_t process_pc, SymbolMap &func_map, - DwflSymbolLookup::LineMap &line_map, + ProcessAddress_t process_pc, + DwflSymbolLookup::SymbolWrapper &symbol_wrapper, SymbolTable &table, DieInformation &die_information) { + DwflSymbolLookup::LineMap &line_map = symbol_wrapper._line_map; + DwflSymbolLookup::InlineMap &inline_map = symbol_wrapper._inline_map; Dwarf_Lines *lines; size_t nlines; if (dwarf_getsrclines(cudie, &lines, &nlines) != 0) { LG_DBG("Unable to find source lines"); return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } +#ifdef V1 for (size_t func_pos = 0; func_pos < die_information.die_mem_vec.size(); ++func_pos) { const DieInformation::Function ¤t_func = @@ -321,21 +332,80 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, } const char *current_file = dwarf_linesrc(line, nullptr, nullptr); if (current_file) { + LG_DBG("Start file = %s vs %s", current_file, ref_sym._srcpath.c_str()); ref_sym._srcpath = std::string(current_file); } } - // todo loop on all lines to get real line numbers and cache them - // auto it = line_map.begin(); - // line_map.insert(it, - // std::pair( - // static_cast(current_func.end_addr), - // DwflSymbolLookup::Line{static_cast(lineno), - // current_func.end_addr, - // current_func.symbol_idx})); +#endif + const DieInformation::Function &parent_func = die_information.die_mem_vec[0]; + NestedSymbolKey parent_bound{parent_func.start_addr, parent_func.end_addr}; + int start_index = + binary_search_start_index(lines, nlines, parent_bound.start); + std::unordered_map> + file_frequencies; + + auto it = line_map.begin(); + for (size_t line_index = start_index; line_index < nlines; ++line_index) { + Dwarf_Line *line = dwarf_onesrcline(lines, line_index); + Dwarf_Addr line_addr; + dwarf_lineaddr(line, &line_addr); + if (line_addr > parent_bound.end) { + break; + } + + int lineno; + if (dwarf_lineno(line, &lineno) == -1) { + lineno = 0; // Handle the case where line number is not available + } + + // Determine the associated function for this line + const auto leaf_func = inline_map.find_closest(line_addr, parent_bound); + SymbolIdx_t symbol_idx; + if (!leaf_func.second) { + symbol_idx = parent_func.symbol_idx; + } else { + symbol_idx = leaf_func.first->second.get_symbol_idx(); + } + Symbol &ref_sym = table[symbol_idx]; + // Update the source path if necessary + const char *current_file = dwarf_linesrc(line, nullptr, nullptr); + file_frequencies[symbol_idx][current_file]++; + + LG_DBG("Associate %d / %s to %s (vs %s)", lineno, + current_file ? current_file : "undef", + ref_sym._demangle_name.c_str(), ref_sym._srcpath.c_str()); + + // Insert into the line map + it = line_map.insert( + it, + std::make_pair(static_cast(line_addr), + DwflSymbolLookup::Line{static_cast(lineno), + line_addr, symbol_idx})); + } + for (auto &symbol_entry : file_frequencies) { + SymbolIdx_t symbol_idx = symbol_entry.first; + Symbol &ref_sym = table[symbol_idx]; + const char *most_frequent_file = nullptr; + int max_frequency = 0; + + for (auto &file_entry : symbol_entry.second) { + if (file_entry.second > max_frequency) { + max_frequency = file_entry.second; + most_frequent_file = file_entry.first; + } + } + + if (most_frequent_file) { + LG_DBG("Associate to sym %s file %s", ref_sym._demangle_name.c_str(), + most_frequent_file); + ref_sym._srcpath = most_frequent_file; + } + } return {}; } +// todo: put dso part in parent DDRes DwflSymbolLookup::insert_inlining_info( Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper, @@ -348,11 +418,9 @@ DDRes DwflSymbolLookup::insert_inlining_info( Symbol &parent_sym = table[parent_sym_idx]; LG_DBG("No debug information for %s (%s)", parent_sym._demangle_name.c_str(), dso._filename.c_str()); - parent_sym._srcpath = dso.format_filename(); return ddres_warn(DD_WHAT_NO_DWARF); } ElfAddress_t elf_addr = process_pc - bias; - SymbolMap &func_map = symbol_wrapper._symbol_map; DieInformation die_information; if (!IsDDResOK(parse_die_information(cudie, elf_addr, die_information)) || die_information.die_mem_vec.size() == 0) { @@ -387,29 +455,16 @@ DDRes DwflSymbolLookup::insert_inlining_info( current_func.file_name ? current_func.file_name : "")); current_func.symbol_idx = table.size() - 1; // add to the lookup - LG_DBG("adding %lx - %lx: %s", current_func.start_addr, - current_func.end_addr, current_func.func_name); inline_map.emplace( NestedSymbolKey{current_func.start_addr, current_func.end_addr}, NestedSymbolValue(current_func.symbol_idx)); } // associate line information to die information (includes file info) - LineMap &line_map = symbol_wrapper._line_map; - if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, process_pc, func_map, - line_map, table, die_information))) { + if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, process_pc, symbol_wrapper, + table, die_information))) { LG_DBG("Error when parsing line information (%s)", dso._filename.c_str()); } - - for (unsigned pos = 0; pos < die_information.die_mem_vec.size(); ++pos) { - DieInformation::Function &func = die_information.die_mem_vec[pos]; - auto &sym = table[func.symbol_idx]; - if (sym._srcpath.empty()) { - // override with info from dso (this slightly mixes mappings and sources) - // But it helps a lot at Datadog (as mappings are ignored for now in UI) - sym._srcpath = dso.format_filename(); - } - } return {}; } @@ -440,11 +495,6 @@ DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, dso_symbol_lookup.get_or_insert(elf_pc, dso, table); #else SymbolIdx_t const symbol_idx = dso_symbol_lookup.get_or_insert(dso, table); -#endif -#ifdef DEBUG - LG_NTC("Insert (dwfl failure): %lx,%lx -> %s,%d,%s", start_sym, end_sym, - table[symbol_idx]._symname.c_str(), symbol_idx, - dso.to_string().c_str()); #endif auto res_emplace = func_map.emplace(start_sym, SymbolSpan(end_sym, symbol_idx)); diff --git a/src/symbol_map.cc b/src/symbol_map.cc index 7ad2d537d..8eb4fa65a 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -63,7 +63,6 @@ NestedSymbolMap::find_closest(Offset_t norm_pc, // deepest element auto it = lower_bound(NestedSymbolKey{norm_pc, 0}); if (it != end()) { // map not empty - LG_DBG("Found %lx - %lx ", it->first.start, it->first.end); if (is_within(norm_pc, *it)) { return {it, true}; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c71ada269..413d74b25 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -201,7 +201,7 @@ add_unit_test( dwfl_module-ut dwfl_module-ut.cc ../src/build_id.cc - ../src/dwarf_helpers.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc @@ -233,7 +233,7 @@ add_unit_test( ../src/dso.cc ../src/dso_hdr.cc ../src/dso_symbol_lookup.cc - ../src/dwarf_helpers.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc @@ -274,7 +274,7 @@ set(ALLOCATION_TRACKER_UT_SRCS ../src/dso.cc ../src/dso_hdr.cc ../src/dso_symbol_lookup.cc - ../src/dwarf_helpers.cc + ../src/dwarf_helpers.cc ../src/dwfl_hdr.cc ../src/ddprof_module_lib.cc ../src/dwfl_symbol.cc From 6ab3aee196852ccd9289001d2072919e5096395c Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 30 Nov 2023 14:01:26 +0100 Subject: [PATCH 07/11] Inlining Add line information to dwfl symbol lookup logic --- docs/Build.md | 2 +- include/dwfl_symbol_lookup.hpp | 24 ++-- include/symbol_map.hpp | 11 +- include/unwind_helpers.hpp | 2 + include/unwind_output.hpp | 2 +- include/unwind_output_hash.hpp | 2 +- setup_env.sh | 6 + src/dwfl_symbol_lookup.cc | 244 ++++++++++++++++++--------------- src/pprof/ddprof_pprof.cc | 6 +- src/symbol_map.cc | 28 ++++ src/unwind_dwfl.cc | 12 +- src/unwind_helpers.cc | 14 +- test/dwfl_module-ut.cc | 6 +- test/savecontext-ut.cc | 2 +- test/unwind_output_mock.hpp | 2 +- 15 files changed, 220 insertions(+), 143 deletions(-) diff --git a/docs/Build.md b/docs/Build.md index 6b0d4a9e8..e841ed3ab 100644 --- a/docs/Build.md +++ b/docs/Build.md @@ -47,7 +47,7 @@ Using the build image will guarantee this. ```bash CXX=clang++-17 CC=clang-17 source ./setup_env.sh MkBuildDir ClangDeb - DebCMake -DENABLE_CLANG_TIDY=ON ../ +DebCMake -DENABLE_CLANG_TIDY=ON ../ ``` ### Updating libdatadog diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index 4caf2325c..0fae3d883 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -14,6 +14,7 @@ #include "hash_helper.hpp" #include "symbol_map.hpp" #include "symbol_table.hpp" +#include "unwind_output.hpp" #include #include @@ -51,7 +52,7 @@ class DwflSymbolLookup { void get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, DsoSymbolLookup &dso_symbol_lookup, FileInfoId_t file_info_id, ProcessAddress_t process_pc, - const Dso &dso, std::vector &symbol_indices); + const Dso &dso, std::vector &func_locs); void erase(FileInfoId_t file_info_id) { _file_info_function_map.erase(file_info_id); @@ -62,12 +63,8 @@ class DwflSymbolLookup { const DwflSymbolLookupStats &stats() const { return _stats; } DwflSymbolLookupStats &stats() { return _stats; } - struct Line { - uint32_t _line{}; - ElfAddress_t _addr{}; - SymbolIdx_t _symbol_idx{-1}; - }; - using LineMap = std::map; + // todo: we can have a better type than symbol idx for the line + using LineMap = SymbolMap; using InlineMap = NestedSymbolMap; struct SymbolWrapper { LineMap _line_map; @@ -90,16 +87,21 @@ class DwflSymbolLookup { ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper); + static void add_fun_loc(DwflSymbolLookup::SymbolWrapper &symbol_wrapper, + const SymbolMap::ValueType &parent_sym, + ElfAddress_t elf_pc, ProcessAddress_t process_pc, + std::vector &func_locs); + static DDRes insert_inlining_info(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper, SymbolMap::ValueType &parent_func); - static void get_inlined(const SymbolWrapper &symbol_wrapper, - ElfAddress_t elf_pc, - const SymbolMap::ValueType &parent_sym, - std::vector &inlined_symbols); + static NestedSymbolMap::FindRes + get_inlined(SymbolWrapper &symbol_wrapper, ElfAddress_t process_pc, + ElfAddress_t elf_pc, const SymbolMap::ValueType &parent_sym, + std::vector &func_locs); // Symbols are ordered by file. // The assumption is that the elf addresses are the same across processes diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index 6237be733..ed3151809 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -58,12 +58,15 @@ class SymbolMap : private std::map { class NestedSymbolValue { public: - NestedSymbolValue() : _symbol_idx(-1) {} - NestedSymbolValue(SymbolIdx_t symbol_idx) : _symbol_idx(symbol_idx) {} + NestedSymbolValue() : _symbol_idx(-1), _call_line_number(0) {} + NestedSymbolValue(SymbolIdx_t symbol_idx, int call_line_number = 0) + : _symbol_idx(symbol_idx), _call_line_number(call_line_number) {} [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } + [[nodiscard]] int get_call_line_number() const { return _call_line_number; } private: SymbolIdx_t _symbol_idx; + int _call_line_number; }; struct NestedSymbolKey { @@ -101,6 +104,10 @@ class NestedSymbolMap : private std::map { FindRes find_closest(Offset_t norm_pc, const NestedSymbolKey &parent_bound) const; + FindRes find_closest_hint(Offset_t norm_pc, + const NestedSymbolKey &parent_bound, + ConstIt hint) const; + static bool is_within(const Offset_t &norm_pc, const ValueType &kv); }; diff --git a/include/unwind_helpers.hpp b/include/unwind_helpers.hpp index 67458b873..9639caf7e 100644 --- a/include/unwind_helpers.hpp +++ b/include/unwind_helpers.hpp @@ -17,6 +17,8 @@ struct UnwindState; bool is_max_stack_depth_reached(const UnwindState &us); +DDRes add_frame(const std::vector &fun_locs, UnwindState *us); + DDRes add_frame(std::vector symbol_indices, MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us); diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index ff2c8ad14..8996435c9 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -17,7 +17,7 @@ namespace ddprof { struct FunLoc { - uint64_t ip{}; + uint64_t _ip{}; uint32_t _lineno{}; SymbolIdx_t _symbol_idx{-1}; MapInfoIdx_t _map_info_idx{-1}; diff --git a/include/unwind_output_hash.hpp b/include/unwind_output_hash.hpp index c5fc1d4c6..f7452b8db 100644 --- a/include/unwind_output_hash.hpp +++ b/include/unwind_output_hash.hpp @@ -15,7 +15,7 @@ struct UnwindOutputHash { hash_combine(seed, uo.pid); hash_combine(seed, uo.tid); for (const auto &fl : uo.locs) { - hash_combine(seed, fl.ip); + hash_combine(seed, fl._ip); hash_combine(seed, fl._symbol_idx); hash_combine(seed, fl._map_info_idx); } diff --git a/setup_env.sh b/setup_env.sh index 069189d51..f616bd6a4 100755 --- a/setup_env.sh +++ b/setup_env.sh @@ -76,6 +76,12 @@ CmakeWithOptions() { eval ${cmake_cmd} } + +RelDebCMake() { + local BUILD_TYPE=RelWithDebInfo + CmakeWithOptions ${BUILD_TYPE} $@ +} + RelCMake() { local BUILD_TYPE=Release CmakeWithOptions ${BUILD_TYPE} $@ diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 46a0f0956..6b629b34b 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -19,6 +19,7 @@ #include #include #include +#include #define DEBUG @@ -31,7 +32,8 @@ struct DieInformation { ElfAddress_t end_addr{}; const char *func_name{}; const char *file_name{}; - int line_number{0}; + int decl_line_number{0}; + int call_line_number{0}; int parent_pos{-1}; // position within the die vector SymbolIdx_t symbol_idx = -1; }; @@ -67,7 +69,7 @@ size_t binary_search_start_index(Dwarf_Lines *lines, size_t nlines, } } - return 0; // Return a default value if no suitable index is found + return nlines; // Return a default value if no suitable index is found } } // namespace @@ -93,6 +95,30 @@ unsigned DwflSymbolLookup::size() const { return total_nb_elts; } +void DwflSymbolLookup::add_fun_loc( + DwflSymbolLookup::SymbolWrapper &symbol_wrapper, + const SymbolMap::ValueType &parent_sym, ElfAddress_t elf_pc, + ProcessAddress_t process_pc, std::vector &func_locs) { + auto last_inlined = + get_inlined(symbol_wrapper, process_pc, elf_pc, parent_sym, func_locs); + uint32_t line = 0; + if (last_inlined.second) { + line = last_inlined.first->second.get_call_line_number(); + } else { + // line can be associated to parent + const auto line_find = symbol_wrapper._line_map.find_closest(elf_pc); + if (line_find.second) { + line = line_find.first->second.get_symbol_idx(); + } + } + LG_DBG("Adding parent = %d", parent_sym.second.get_symbol_idx()); + func_locs.emplace_back( + FunLoc{._ip = process_pc, + ._lineno = line, + ._symbol_idx = parent_sym.second.get_symbol_idx(), + ._map_info_idx = -1}); +} + // Retrieve existing symbol or attempt to read from dwarf void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, @@ -100,10 +126,9 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, FileInfoId_t file_info_id, ProcessAddress_t process_pc, const Dso &dso, - std::vector &symbol_indices) { + std::vector &func_locs) { ++_stats._calls; ElfAddress_t const elf_pc = process_pc - ddprof_mod._sym_bias; - #ifdef DEBUG LG_DBG("Looking for : %lx = (%lx - %lx) / dso:%s", elf_pc, process_pc, ddprof_mod._low_addr, dso._filename.c_str()); @@ -113,7 +138,7 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolMap::FindRes const find_res = map.find_closest(elf_pc); if (find_res.second) { // already found the correct symbol #ifdef DEBUG - LG_DBG("Match : %lx,%lx -> %s,%d", find_res.first->first, + LG_DBG("Match: %lx,%lx -> %s,%d", find_res.first->first, find_res.first->second.get_end(), table[find_res.first->second.get_symbol_idx()]._symname.c_str(), find_res.first->second.get_symbol_idx()); @@ -127,10 +152,7 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, } } ++_stats._hit; - // add the inlined symbols on top - get_inlined(symbol_wrapper, elf_pc, *find_res.first, symbol_indices); - // then add the elf symbol - symbol_indices.push_back(find_res.first->second.get_symbol_idx()); + add_fun_loc(symbol_wrapper, *find_res.first, elf_pc, process_pc, func_locs); } else { const size_t previous_table_size = table.size(); // insert symbols using elf info @@ -140,9 +162,6 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, // parse associated dwarf info insert_inlining_info(dwfl, ddprof_mod, table, process_pc, dso, symbol_wrapper, elf_sym); - get_inlined(symbol_wrapper, elf_pc, elf_sym, symbol_indices); - // add elf symbol after - symbol_indices.push_back(elf_sym.second.get_symbol_idx()); // For newly added symbols, insure we don't leave a blank file name for (unsigned i = previous_table_size; i < table.size(); ++i) { auto &sym = table[i]; @@ -153,6 +172,7 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, sym._srcpath = dso.format_filename(); } } + add_fun_loc(symbol_wrapper, elf_sym, elf_pc, process_pc, func_locs); } return; } @@ -184,8 +204,10 @@ static int store_die_information(Dwarf_Die *sc_die, int parent_index, DieInformation &data, Dwarf_Files *dwarf_files) { #ifdef DEEP_DEBUG + // dwarf_dieoffset is good to figure out what element we are working on dwarf_getattrs(sc_die, print_attribute, nullptr, 0); #endif + // function or inlined function DieInformation::Function function{}; // die_name is usually the raw function name (no mangling info) @@ -215,28 +237,37 @@ static int store_die_information(Dwarf_Die *sc_die, int parent_index, return -1; } - // for inlined functions, we do not often get decl tag + // declaration files come with an indirection + // dwarf_attr_integrate follows the indirections + // for inlined functions, we could cache this access (as we are making several + // of them) if (dwarf_files && - ((attr = dwarf_attr(sc_die, DW_AT_decl_file, &attr_mem)) || - (attr = dwarf_attr(sc_die, DW_AT_call_file, &attr_mem)))) { + ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_file, &attr_mem)))) { Dwarf_Word fileIdx = 0; if (dwarf_formudata(attr, &fileIdx) == 0) { - // Assuming 'files' is a structure holding file information - // for the current compilation unit, obtained beforehand const char *file = dwarf_filesrc(dwarf_files, fileIdx, NULL, NULL); // Store or process the file name function.file_name = file; } } - if ((attr = dwarf_attr(sc_die, DW_AT_decl_line, &attr_mem)) || - (attr = dwarf_attr(sc_die, DW_AT_call_line, &attr_mem))) { + if ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_line, &attr_mem))) { Dwarf_Word return_uval; if (dwarf_formudata(attr, &return_uval) == 0) { - function.line_number = return_uval; + function.decl_line_number = return_uval; } } + if ((attr = dwarf_attr(sc_die, DW_AT_call_line, &attr_mem))) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.call_line_number = return_uval; + } + } + // other fields of interest + // - DW_AT_call_file + // - DW_AT_call_line to define parent line + // we often can find duplicates within the dwarf information function.parent_pos = parent_index; data.die_mem_vec.push_back(std::move(function)); @@ -287,21 +318,20 @@ static DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, } // store parent function at index 0 if (store_die_information(sc_die, -1, die_information, files) == -1) { - LG_DBG("Unable to store the parent function"); + LG_DBG("Incomplete die information for parent function"); // On some functions we are unable to find start / end info return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } find_functions_in_child_die(sc_die, 0, die_information, &die_mem, files); for (auto &el : die_information.die_mem_vec) { - LG_DBG("Inlined func start=%lx / end=%lx / Sym = %s / parent=%d", - el.start_addr, el.end_addr, el.func_name, el.parent_pos); + LG_DBG("Inlined func start=%lx / end=%lx / Sym = %s / file=%s", + el.start_addr, el.end_addr, el.func_name, el.file_name); } return {}; } static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, - ProcessAddress_t process_pc, DwflSymbolLookup::SymbolWrapper &symbol_wrapper, SymbolTable &table, DieInformation &die_information) { @@ -309,42 +339,27 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, DwflSymbolLookup::InlineMap &inline_map = symbol_wrapper._inline_map; Dwarf_Lines *lines; size_t nlines; + const DieInformation::Function &parent_func = die_information.die_mem_vec[0]; + SymbolIdx_t symbol_idx = parent_func.symbol_idx; + const Symbol *ref_sym = &table[symbol_idx]; + if (dwarf_getsrclines(cudie, &lines, &nlines) != 0) { - LG_DBG("Unable to find source lines"); + LG_DBG("Unable to find source lines for %s", ref_sym->_symname.c_str()); return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } -#ifdef V1 - for (size_t func_pos = 0; func_pos < die_information.die_mem_vec.size(); - ++func_pos) { - const DieInformation::Function ¤t_func = - die_information.die_mem_vec[func_pos]; - - int start_index = - binary_search_start_index(lines, nlines, current_func.start_addr); - // retrieve dwarf source line - Symbol &ref_sym = table[current_func.symbol_idx]; - Dwarf_Line *line = dwarf_onesrcline(lines, start_index); - int lineno; - if (dwarf_lineno(line, &lineno) == -1) { - lineno = 0; - } else { - ref_sym._func_start_lineno = lineno; - } - const char *current_file = dwarf_linesrc(line, nullptr, nullptr); - if (current_file) { - LG_DBG("Start file = %s vs %s", current_file, ref_sym._srcpath.c_str()); - ref_sym._srcpath = std::string(current_file); - } - } -#endif - const DieInformation::Function &parent_func = die_information.die_mem_vec[0]; NestedSymbolKey parent_bound{parent_func.start_addr, parent_func.end_addr}; int start_index = binary_search_start_index(lines, nlines, parent_bound.start); - std::unordered_map> - file_frequencies; - - auto it = line_map.begin(); + if (start_index >= nlines) { + LG_DBG("Unable to match lines for %s", ref_sym->_symname.c_str()); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + auto hint_line = line_map.end(); + NestedSymbolMap::ConstIt hint_inline = inline_map.begin(); + Dwarf_Addr previous_addr = 0; + NestedSymbolMap::FindRes current_func{inline_map.end(), false}; + // store closest line per file (to avoid missmatches) + std::unordered_map closest_lines; for (size_t line_index = start_index; line_index < nlines; ++line_index) { Dwarf_Line *line = dwarf_onesrcline(lines, line_index); Dwarf_Addr line_addr; @@ -352,60 +367,51 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, if (line_addr > parent_bound.end) { break; } - int lineno; if (dwarf_lineno(line, &lineno) == -1) { lineno = 0; // Handle the case where line number is not available } - - // Determine the associated function for this line - const auto leaf_func = inline_map.find_closest(line_addr, parent_bound); - SymbolIdx_t symbol_idx; - if (!leaf_func.second) { - symbol_idx = parent_func.symbol_idx; - } else { - symbol_idx = leaf_func.first->second.get_symbol_idx(); - } - Symbol &ref_sym = table[symbol_idx]; // Update the source path if necessary const char *current_file = dwarf_linesrc(line, nullptr, nullptr); - file_frequencies[symbol_idx][current_file]++; - - LG_DBG("Associate %d / %s to %s (vs %s)", lineno, - current_file ? current_file : "undef", - ref_sym._demangle_name.c_str(), ref_sym._srcpath.c_str()); - - // Insert into the line map - it = line_map.insert( - it, - std::make_pair(static_cast(line_addr), - DwflSymbolLookup::Line{static_cast(lineno), - line_addr, symbol_idx})); - } - for (auto &symbol_entry : file_frequencies) { - SymbolIdx_t symbol_idx = symbol_entry.first; - Symbol &ref_sym = table[symbol_idx]; - const char *most_frequent_file = nullptr; - int max_frequency = 0; - - for (auto &file_entry : symbol_entry.second) { - if (file_entry.second > max_frequency) { - max_frequency = file_entry.second; - most_frequent_file = file_entry.first; - } - } - if (most_frequent_file) { - LG_DBG("Associate to sym %s file %s", ref_sym._demangle_name.c_str(), - most_frequent_file); - ref_sym._srcpath = most_frequent_file; + if (previous_addr && line_addr != previous_addr) { + if (hint_line != line_map.end() && + hint_line->second.get_symbol_idx() == + closest_lines[ref_sym->_srcpath]) { + // extend previous element + hint_line->second.set_end(previous_addr); + } else { + // New line element + hint_line = line_map.emplace_hint( + hint_line, + std::make_pair( + previous_addr, + SymbolSpan{line_addr - 1, closest_lines[ref_sym->_srcpath]})); + } +#ifdef DEBUG + LG_DBG("Associate %d (%lx->%lx) / %s to %s (vs %s)", + closest_lines[ref_sym->_srcpath], previous_addr, line_addr - 1, + current_file ? current_file : "undef", + ref_sym->_demangle_name.c_str(), ref_sym->_srcpath.c_str()); +#endif + // todo: this can be more efficient with hint + current_func = inline_map.find_closest(line_addr, parent_bound); + if (!current_func.second) { + symbol_idx = parent_func.symbol_idx; + } else { + symbol_idx = current_func.first->second.get_symbol_idx(); + hint_inline = current_func.first; + } + ref_sym = &table[symbol_idx]; } + // keep line, if it matches the symbol + // todo can be optimized to avoid conversion + closest_lines[std::string(current_file)] = lineno; + previous_addr = line_addr; } - return {}; } -// todo: put dso part in parent DDRes DwflSymbolLookup::insert_inlining_info( Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper, @@ -425,7 +431,7 @@ DDRes DwflSymbolLookup::insert_inlining_info( if (!IsDDResOK(parse_die_information(cudie, elf_addr, die_information)) || die_information.die_mem_vec.size() == 0) { Symbol &parent_sym = table[parent_sym_idx]; - LG_DBG("Error when parsing die information for %s (%s)", + LG_DBG("Unable to extract die information for %s (%s)", parent_sym._demangle_name.c_str(), dso._filename.c_str()); return ddres_warn(DD_WHAT_NO_DWARF); } @@ -449,19 +455,20 @@ DDRes DwflSymbolLookup::insert_inlining_info( NestedSymbolMap &inline_map = symbol_wrapper._inline_map; for (unsigned pos = 1; pos < die_information.die_mem_vec.size(); ++pos) { DieInformation::Function ¤t_func = die_information.die_mem_vec[pos]; + current_func.symbol_idx = table.size(); table.emplace_back( Symbol({}, current_func.func_name ? current_func.func_name : "undef", - current_func.line_number, + current_func.decl_line_number, current_func.file_name ? current_func.file_name : "")); - current_func.symbol_idx = table.size() - 1; // add to the lookup inline_map.emplace( NestedSymbolKey{current_func.start_addr, current_func.end_addr}, - NestedSymbolValue(current_func.symbol_idx)); + NestedSymbolValue(current_func.symbol_idx, + current_func.call_line_number)); } // associate line information to die information (includes file info) - if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, process_pc, symbol_wrapper, + if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, symbol_wrapper, table, die_information))) { LG_DBG("Error when parsing line information (%s)", dso._filename.c_str()); } @@ -540,23 +547,34 @@ DwflSymbolLookup::insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, } } -void DwflSymbolLookup::get_inlined(const SymbolWrapper &symbol_wrapper, - ElfAddress_t elf_pc, - const SymbolMap::ValueType &parent_sym, - std::vector &inlined_symbols) { +NestedSymbolMap::FindRes DwflSymbolLookup::get_inlined( + SymbolWrapper &symbol_wrapper, ElfAddress_t process_pc, ElfAddress_t elf_pc, + const SymbolMap::ValueType &parent_sym, std::vector &func_locs) { const InlineMap &inline_map = symbol_wrapper._inline_map; NestedSymbolKey parent_key{parent_sym.first, parent_sym.second.get_end()}; - NestedSymbolMap::FindRes find_res = + NestedSymbolMap::FindRes find_inline = inline_map.find_closest(elf_pc, parent_key); - LG_DBG("Looking for %lx (%lu)- matched %lx - %lx\n", elf_pc, - inline_map.size(), find_res.second ? find_res.first->first.start : 0, - find_res.second ? find_res.first->first.end : 0); - - while (find_res.second) { - inlined_symbols.push_back(find_res.first->second.get_symbol_idx()); - find_res = inline_map.find_parent(find_res.first, parent_key, elf_pc); + NestedSymbolMap::FindRes last_found = {inline_map.end(), false}; + while (find_inline.second) { + uint32_t line = 0; + if (last_found.second) { + line = last_found.first->second.get_call_line_number(); + } else { + auto find_line = symbol_wrapper._line_map.find_closest(elf_pc); + if (find_line.second) { + line = find_line.first->second.get_symbol_idx(); + } + } + func_locs.emplace_back( + FunLoc{._ip = process_pc, + ._lineno = line, + ._symbol_idx = find_inline.first->second.get_symbol_idx(), + ._map_info_idx = -1}); + find_inline = inline_map.find_parent(find_inline.first, parent_key, elf_pc); + last_found = find_inline; } + return last_found; } bool DwflSymbolLookup::symbol_lookup_check(Dwfl_Module *mod, diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index c96b9ede6..5516838f7 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -42,7 +42,7 @@ void write_location(const FunLoc *loc, const MapInfo &mapinfo, const Symbol &symbol, ddog_prof_Location *ffi_location) { write_mapping(mapinfo, &ffi_location->mapping); write_function(symbol, &ffi_location->function); - ffi_location->address = loc->ip; + ffi_location->address = loc->_ip; ffi_location->line = loc->_lineno; } @@ -393,14 +393,14 @@ void ddprof_print_sample(const UnwindOutput &uw_output, buf += ";"; } if (sym._symname.empty()) { - if (loc_it->ip == 0) { + if (loc_it->_ip == 0) { std::string_view const path{sym._srcpath}; auto pos = path.rfind('/'); buf += "("; buf += path.substr(pos == std::string_view::npos ? 0 : pos + 1); buf += ")"; } else { - absl::StrAppendFormat(&buf, "%#x", loc_it->ip); + absl::StrAppendFormat(&buf, "%#x", loc_it->_ip); } } else { std::string_view const func{sym._symname}; diff --git a/src/symbol_map.cc b/src/symbol_map.cc index 8eb4fa65a..c2f5ff019 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -81,4 +81,32 @@ bool NestedSymbolMap::is_within(const Offset_t &norm_pc, return true; } +NestedSymbolMap::FindRes NestedSymbolMap::find_closest_hint( + Offset_t norm_pc, const NestedSymbolKey &parent_bound, ConstIt hint) const { + if (hint == end() || hint == begin()) { + return find_closest(norm_pc, parent_bound); + } + const NestedSymbolKey leaf_element{norm_pc, 0}; + const NestedSymbolKey high_bound{parent_bound.end, 0}; + + if (hint->first < leaf_element) { + // If the current element is less than or equal to norm_pc, move forward + for (auto it = hint; it != end() && it->first.start <= norm_pc; ++it) { + // Find the lower bound + if (leaf_element < it->first) { + // find first element that contains this + return find_parent(it, parent_bound, norm_pc); + } + if (high_bound < it->first) { + break; + } + } + } else { + auto it = ++hint; + // always move forward to make sure we can return current element + return find_parent(it, parent_bound, norm_pc); + } + return {end(), false}; +} + } // namespace ddprof diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index dd9c66424..593ae4f1a 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -52,7 +52,7 @@ bool is_infinite_loop(UnwindState *us) { for (unsigned i = 1; i < nb_frames_to_check; ++i) { FunLoc const &n_minus_one_loc = output.locs[nb_locs - i]; FunLoc const &n_minus_two_loc = output.locs[nb_locs - i - 1]; - if (n_minus_one_loc.ip != n_minus_two_loc.ip) { + if (n_minus_one_loc._ip != n_minus_two_loc._ip) { return false; } } @@ -212,14 +212,16 @@ DDRes add_dwfl_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, const DDProfMod &ddprof_mod, FileInfoId_t file_info_id) { SymbolHdr &unwind_symbol_hdr = us->symbol_hdr; // get or create the dwfl symbol - std::vector symbol_indices{}; + std::vector fun_locs{}; unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( us->_dwfl_wrapper->_dwfl, ddprof_mod, unwind_symbol_hdr._symbol_table, - unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso, - symbol_indices); + unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso, fun_locs); MapInfoIdx_t const map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id); - return add_frame(symbol_indices, map_idx, pc, us); + for (auto &el : fun_locs) { + el._map_info_idx = map_idx; + } + return add_frame(fun_locs, us); } // check for runtime symbols provided in /tmp files diff --git a/src/unwind_helpers.cc b/src/unwind_helpers.cc index 58123c666..516162c9a 100644 --- a/src/unwind_helpers.cc +++ b/src/unwind_helpers.cc @@ -25,6 +25,18 @@ bool is_max_stack_depth_reached(const UnwindState &us) { return us.output.locs.size() + 2 >= kMaxStackDepth; } +DDRes add_frame(const std::vector &fun_locs, UnwindState *us) { + for (auto const &el : fun_locs) { + UnwindOutput *output = &us->output; + if (output->locs.size() >= kMaxStackDepth) { + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_MAX_DEPTH, + "Max stack depth reached"); // avoid overflow + } + output->locs.push_back(el); + } + return {}; +} + DDRes add_frame(std::vector symbol_indices, MapInfoIdx_t map_idx, ElfAddress_t pc, UnwindState *us) { @@ -43,7 +55,7 @@ DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, } FunLoc current; current._symbol_idx = symbol_idx; - current.ip = pc; + current._ip = pc; if (map_idx == -1) { // just add an empty element for mapping info current._map_info_idx = us->symbol_hdr._common_mapinfo_lookup.get_or_insert( diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index 0393b7c02..ef8f61e4f 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -241,11 +241,11 @@ TEST(DwflModule, inlined_func) { ASSERT_TRUE(IsDDResOK(res)); ASSERT_TRUE(ddprof_mod->_mod); if (find_res.first == it) { - std::vector symbol_indices; + std::vector fun_locs; symbol_lookup.get_or_insert(dwfl_wrapper._dwfl, *ddprof_mod, table, dso_lookup, file_info_id, ip, dso, - symbol_indices); - const auto &sym = table[symbol_indices[0]]; + fun_locs); + const auto &sym = table[fun_locs[0]._symbol_idx]; LG_DBG("Sym = %s", sym._demangle_name.c_str()); EXPECT_EQ(sym._demangle_name, "deeper_function"); } diff --git a/test/savecontext-ut.cc b/test/savecontext-ut.cc index a4f0dee00..42232d957 100644 --- a/test/savecontext-ut.cc +++ b/test/savecontext-ut.cc @@ -111,7 +111,7 @@ TEST(getcontext, unwind_from_sighandler) { for (size_t iloc = 0; iloc < state.output.locs.size(); ++iloc) { auto &symbol = symbol_table[state.output.locs[iloc]._symbol_idx]; printf("%zu: %s %lx \n", iloc, symbol._demangle_name.c_str(), - state.output.locs[iloc].ip); + state.output.locs[iloc]._ip); } auto get_symbol = [&](int idx) { return symbol_table[state.output.locs[idx]._symbol_idx]; diff --git a/test/unwind_output_mock.hpp b/test/unwind_output_mock.hpp index 69742f504..4b7cd56d4 100644 --- a/test/unwind_output_mock.hpp +++ b/test/unwind_output_mock.hpp @@ -48,7 +48,7 @@ static inline void fill_unwind_output_1(UnwindOutput &uw_output) { std::vector &locs = uw_output.locs; for (unsigned i = 0; i < uw_output.locs.size(); ++i) { - locs[i].ip = 42 + i; + locs[i]._ip = 42 + i; locs[i]._symbol_idx = i; locs[i]._map_info_idx = i; } From 38acbae6184c69490464d364f5061e1a7625c992 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 30 Nov 2023 15:24:18 +0100 Subject: [PATCH 08/11] inlining - minor refactor Introduce dwarf helpers to simplify some of the structure --- include/dwarf_helpers.hpp | 17 ++++ src/dwarf_helpers.cc | 161 +++++++++++++++++++++++++++++++++ src/dwfl_symbol_lookup.cc | 183 +------------------------------------- 3 files changed, 181 insertions(+), 180 deletions(-) diff --git a/include/dwarf_helpers.hpp b/include/dwarf_helpers.hpp index f20aaaf8e..3118ee849 100644 --- a/include/dwarf_helpers.hpp +++ b/include/dwarf_helpers.hpp @@ -1,13 +1,30 @@ #pragma once +#include "ddprof_defs.hpp" +#include "ddres_def.hpp" #include "dwfl_internals.hpp" #include namespace ddprof { +struct DieInformation { + struct Function { + ElfAddress_t start_addr{}; + ElfAddress_t end_addr{}; + const char *func_name{}; + const char *file_name{}; + int decl_line_number{0}; + int call_line_number{0}; + int parent_pos{-1}; // position within the die vector + SymbolIdx_t symbol_idx = -1; + }; + std::vector die_mem_vec{}; +}; // debug attribute functions const char *get_attribute_name(int attrCode); int print_attribute(Dwarf_Attribute *attr, void *arg); +DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, + DieInformation &die_information); } // namespace ddprof diff --git a/src/dwarf_helpers.cc b/src/dwarf_helpers.cc index ea05aac03..f352a0374 100644 --- a/src/dwarf_helpers.cc +++ b/src/dwarf_helpers.cc @@ -5,13 +5,174 @@ #include "dwarf_helpers.hpp" +#include "ddres.hpp" #include "logger.hpp" +#include #include #include namespace ddprof { +struct DieSearchParam { + Dwarf_Addr addr; + Dwarf_Die *die_mem; +}; + +/* die_find callback for non-inlined function search */ +static int die_search_func_cb(Dwarf_Die *fn_die, void *data) { + DieSearchParam *ad = reinterpret_cast(data); + if (dwarf_tag(fn_die) == DW_TAG_subprogram && dwarf_haspc(fn_die, ad->addr)) { + memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); + return DWARF_CB_ABORT; + } + return DWARF_CB_OK; +} + +Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem) { + DieSearchParam ad; + ad.addr = addr; + ad.die_mem = die_mem; + /* dwarf_getscopes can't find subprogram. */ + if (!dwarf_getfuncs(cu_die, die_search_func_cb, &ad, 0)) + return NULL; + else + return die_mem; +} + +// return index to added element, else returns -1 +static int store_die_information(Dwarf_Die *sc_die, int parent_index, + DieInformation &data, + Dwarf_Files *dwarf_files) { +#ifdef DEEP_DEBUG + // dwarf_dieoffset is good to figure out what element we are working on + dwarf_getattrs(sc_die, print_attribute, nullptr, 0); +#endif + + // function or inlined function + DieInformation::Function function{}; + // die_name is usually the raw function name (no mangling info) + // link name can have mangling info + function.func_name = dwarf_diename(sc_die); + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { + if (attr) { + Dwarf_Addr ret_value; + if (dwarf_formaddr(attr, &ret_value) == 0) { + function.start_addr = ret_value; + } + } + } + // end is stored as a unsigned (not as a pointer) + if ((attr = dwarf_attr(sc_die, DW_AT_high_pc, &attr_mem))) { + if (attr) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.end_addr = function.start_addr + return_uval; + } + } + } + // some of the functions don't have the start and end info + if (!function.start_addr || !function.end_addr) { + return -1; + } + + // declaration files come with an indirection + // dwarf_attr_integrate follows the indirections + // for inlined functions, we could cache this access (as we are making several + // of them) + if (dwarf_files && + ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_file, &attr_mem)))) { + Dwarf_Word fileIdx = 0; + if (dwarf_formudata(attr, &fileIdx) == 0) { + const char *file = dwarf_filesrc(dwarf_files, fileIdx, NULL, NULL); + // Store or process the file name + function.file_name = file; + } + } + + if ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_line, &attr_mem))) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.decl_line_number = return_uval; + } + } + + if ((attr = dwarf_attr(sc_die, DW_AT_call_line, &attr_mem))) { + Dwarf_Word return_uval; + if (dwarf_formudata(attr, &return_uval) == 0) { + function.call_line_number = return_uval; + } + } + // other fields of interest + // - DW_AT_call_file + // - DW_AT_call_line to define parent line + + // we often can find duplicates within the dwarf information + function.parent_pos = parent_index; + data.die_mem_vec.push_back(std::move(function)); + return (data.die_mem_vec.size() - 1); +} + +static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, + int parent_index, + DieInformation &die_info, + Dwarf_Die *die_mem, + Dwarf_Files *dwarf_files) { + Dwarf_Die child_die; + int ret; + ret = dwarf_child(current_die, die_mem); + if (ret != 0) + return nullptr; + do { + int tag_val = dwarf_tag(die_mem); + int next_parent_idx = parent_index; + if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { + int current_idx = + store_die_information(die_mem, parent_index, die_info, dwarf_files); + next_parent_idx = (current_idx != -1 ? current_idx : next_parent_idx); + } + // + // todo: optimize the exploration to avoid going through soo many elements + // Child dies can have functions, even without being a child of another func + find_functions_in_child_die(die_mem, next_parent_idx, die_info, &child_die, + dwarf_files); + } while (dwarf_siblingof(die_mem, die_mem) == 0); + return nullptr; +} + +DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, + DieInformation &die_information) { + Dwarf_Files *files = nullptr; + size_t nfiles = 0; + assert(cudie); + // cached within the CU + if (dwarf_getsrcfiles(cudie, &files, &nfiles) != 0) { + files = nullptr; + } + Dwarf_Die die_mem; + Dwarf_Die *sc_die = die_find_realfunc(cudie, elf_addr, &die_mem); + if (sc_die == nullptr) { + LG_DBG("Unable to retrieve sc_die at %lx", elf_addr); + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + // store parent function at index 0 + if (store_die_information(sc_die, -1, die_information, files) == -1) { + LG_DBG("Incomplete die information for parent function"); + // On some functions we are unable to find start / end info + return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); + } + find_functions_in_child_die(sc_die, 0, die_information, &die_mem, files); + + for (auto &el : die_information.die_mem_vec) { + LG_DBG("Inlined func start=%lx / end=%lx / Sym = %s / file=%s", + el.start_addr, el.end_addr, el.func_name, el.file_name); + } + return {}; +} + const char *get_attribute_name(int attrCode) { // Should not get init unless cablled // Something like following awk can help generate this map: diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 6b629b34b..3c8830918 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -5,12 +5,8 @@ #include "dwfl_symbol_lookup.hpp" -#include "ddprof_module.hpp" #include "dwarf_helpers.hpp" -#include "dwfl_hdr.hpp" -#include "dwfl_internals.hpp" #include "dwfl_symbol.hpp" -#include "logger.hpp" #include #include @@ -18,7 +14,6 @@ #include // For std::iota #include #include -#include #include #define DEBUG @@ -26,24 +21,6 @@ namespace ddprof { namespace { -struct DieInformation { - struct Function { - ElfAddress_t start_addr{}; - ElfAddress_t end_addr{}; - const char *func_name{}; - const char *file_name{}; - int decl_line_number{0}; - int call_line_number{0}; - int parent_pos{-1}; // position within the die vector - SymbolIdx_t symbol_idx = -1; - }; - std::vector die_mem_vec{}; -}; - -struct DieSearchParam { - Dwarf_Addr addr; - Dwarf_Die *die_mem; -}; size_t binary_search_start_index(Dwarf_Lines *lines, size_t nlines, ElfAddress_t start_sym) { @@ -177,160 +154,6 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, return; } -/* die_find callback for non-inlined function search */ -static int die_search_func_cb(Dwarf_Die *fn_die, void *data) { - DieSearchParam *ad = reinterpret_cast(data); - if (dwarf_tag(fn_die) == DW_TAG_subprogram && dwarf_haspc(fn_die, ad->addr)) { - memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); - return DWARF_CB_ABORT; - } - return DWARF_CB_OK; -} - -Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, - Dwarf_Die *die_mem) { - DieSearchParam ad; - ad.addr = addr; - ad.die_mem = die_mem; - /* dwarf_getscopes can't find subprogram. */ - if (!dwarf_getfuncs(cu_die, die_search_func_cb, &ad, 0)) - return NULL; - else - return die_mem; -} - -// return index to added element, else returns -1 -static int store_die_information(Dwarf_Die *sc_die, int parent_index, - DieInformation &data, - Dwarf_Files *dwarf_files) { -#ifdef DEEP_DEBUG - // dwarf_dieoffset is good to figure out what element we are working on - dwarf_getattrs(sc_die, print_attribute, nullptr, 0); -#endif - - // function or inlined function - DieInformation::Function function{}; - // die_name is usually the raw function name (no mangling info) - // link name can have mangling info - function.func_name = dwarf_diename(sc_die); - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - if ((attr = dwarf_attr(sc_die, DW_AT_low_pc, &attr_mem))) { - if (attr) { - Dwarf_Addr ret_value; - if (dwarf_formaddr(attr, &ret_value) == 0) { - function.start_addr = ret_value; - } - } - } - // end is stored as a unsigned (not as a pointer) - if ((attr = dwarf_attr(sc_die, DW_AT_high_pc, &attr_mem))) { - if (attr) { - Dwarf_Word return_uval; - if (dwarf_formudata(attr, &return_uval) == 0) { - function.end_addr = function.start_addr + return_uval; - } - } - } - // some of the functions don't have the start and end info - if (!function.start_addr || !function.end_addr) { - return -1; - } - - // declaration files come with an indirection - // dwarf_attr_integrate follows the indirections - // for inlined functions, we could cache this access (as we are making several - // of them) - if (dwarf_files && - ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_file, &attr_mem)))) { - Dwarf_Word fileIdx = 0; - if (dwarf_formudata(attr, &fileIdx) == 0) { - const char *file = dwarf_filesrc(dwarf_files, fileIdx, NULL, NULL); - // Store or process the file name - function.file_name = file; - } - } - - if ((attr = dwarf_attr_integrate(sc_die, DW_AT_decl_line, &attr_mem))) { - Dwarf_Word return_uval; - if (dwarf_formudata(attr, &return_uval) == 0) { - function.decl_line_number = return_uval; - } - } - - if ((attr = dwarf_attr(sc_die, DW_AT_call_line, &attr_mem))) { - Dwarf_Word return_uval; - if (dwarf_formudata(attr, &return_uval) == 0) { - function.call_line_number = return_uval; - } - } - // other fields of interest - // - DW_AT_call_file - // - DW_AT_call_line to define parent line - - // we often can find duplicates within the dwarf information - function.parent_pos = parent_index; - data.die_mem_vec.push_back(std::move(function)); - return (data.die_mem_vec.size() - 1); -} - -static Dwarf_Die *find_functions_in_child_die(Dwarf_Die *current_die, - int parent_index, - DieInformation &die_info, - Dwarf_Die *die_mem, - Dwarf_Files *dwarf_files) { - Dwarf_Die child_die; - int ret; - ret = dwarf_child(current_die, die_mem); - if (ret != 0) - return nullptr; - do { - int tag_val = dwarf_tag(die_mem); - int next_parent_idx = parent_index; - if (tag_val == DW_TAG_subprogram || tag_val == DW_TAG_inlined_subroutine) { - int current_idx = - store_die_information(die_mem, parent_index, die_info, dwarf_files); - next_parent_idx = (current_idx != -1 ? current_idx : next_parent_idx); - } - // - // todo: optimize the exploration to avoid going through soo many elements - // Child dies can have functions, even without being a child of another func - find_functions_in_child_die(die_mem, next_parent_idx, die_info, &child_die, - dwarf_files); - } while (dwarf_siblingof(die_mem, die_mem) == 0); - return nullptr; -} - -static DDRes parse_die_information(Dwarf_Die *cudie, ElfAddress_t elf_addr, - DieInformation &die_information) { - Dwarf_Files *files = nullptr; - size_t nfiles = 0; - assert(cudie); - // cached within the CU - if (dwarf_getsrcfiles(cudie, &files, &nfiles) != 0) { - files = nullptr; - } - Dwarf_Die die_mem; - Dwarf_Die *sc_die = die_find_realfunc(cudie, elf_addr, &die_mem); - if (sc_die == nullptr) { - LG_DBG("Unable to retrieve sc_die at %lx", elf_addr); - return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); - } - // store parent function at index 0 - if (store_die_information(sc_die, -1, die_information, files) == -1) { - LG_DBG("Incomplete die information for parent function"); - // On some functions we are unable to find start / end info - return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); - } - find_functions_in_child_die(sc_die, 0, die_information, &die_mem, files); - - for (auto &el : die_information.die_mem_vec) { - LG_DBG("Inlined func start=%lx / end=%lx / Sym = %s / file=%s", - el.start_addr, el.end_addr, el.func_name, el.file_name); - } - return {}; -} - static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, DwflSymbolLookup::SymbolWrapper &symbol_wrapper, SymbolTable &table, DieInformation &die_information) { @@ -348,7 +171,7 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, return ddres_warn(DD_WHAT_DWFL_LIB_ERROR); } NestedSymbolKey parent_bound{parent_func.start_addr, parent_func.end_addr}; - int start_index = + size_t start_index = binary_search_start_index(lines, nlines, parent_bound.start); if (start_index >= nlines) { LG_DBG("Unable to match lines for %s", ref_sym->_symname.c_str()); @@ -468,8 +291,8 @@ DDRes DwflSymbolLookup::insert_inlining_info( } // associate line information to die information (includes file info) - if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, symbol_wrapper, - table, die_information))) { + if (IsDDResNotOK(parse_lines(cudie, ddprof_mod, symbol_wrapper, table, + die_information))) { LG_DBG("Error when parsing line information (%s)", dso._filename.c_str()); } return {}; From 55262a368ce4800afd02d28fc29b11e164a9c56c Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 30 Nov 2023 20:47:59 +0100 Subject: [PATCH 09/11] Inlining - minor optim Improve lookup of inlined functions when parsing lines --- src/dwfl_symbol_lookup.cc | 6 +++--- src/symbol_map.cc | 15 +++++++++------ test/symbol_map-ut.cc | 27 ++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 3c8830918..899cd02be 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -217,8 +217,8 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, current_file ? current_file : "undef", ref_sym->_demangle_name.c_str(), ref_sym->_srcpath.c_str()); #endif - // todo: this can be more efficient with hint - current_func = inline_map.find_closest(line_addr, parent_bound); + current_func = inline_map.find_closest_hint(line_addr, parent_bound, + current_func.first); if (!current_func.second) { symbol_idx = parent_func.symbol_idx; } else { @@ -228,7 +228,7 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, ref_sym = &table[symbol_idx]; } // keep line, if it matches the symbol - // todo can be optimized to avoid conversion + // todo can be optimized to avoid conversion to string closest_lines[std::string(current_file)] = lineno; previous_addr = line_addr; } diff --git a/src/symbol_map.cc b/src/symbol_map.cc index c2f5ff019..a065de9e5 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -88,25 +88,28 @@ NestedSymbolMap::FindRes NestedSymbolMap::find_closest_hint( } const NestedSymbolKey leaf_element{norm_pc, 0}; const NestedSymbolKey high_bound{parent_bound.end, 0}; + NestedSymbolMap::FindRes res{end(), false}; + auto it = hint; if (hint->first < leaf_element) { // If the current element is less than or equal to norm_pc, move forward - for (auto it = hint; it != end() && it->first.start <= norm_pc; ++it) { - // Find the lower bound + for (; it != end(); ++it) { + // we will test when looping back from highest to lowest element if (leaf_element < it->first) { // find first element that contains this - return find_parent(it, parent_bound, norm_pc); + break; } + // we could be looking for an out of bound element ? if (high_bound < it->first) { break; } } + return find_parent(it, parent_bound, norm_pc); } else { - auto it = ++hint; + ++it; // always move forward to make sure we can return current element - return find_parent(it, parent_bound, norm_pc); } - return {end(), false}; + return find_parent(it, parent_bound, norm_pc); } } // namespace ddprof diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index 63051b7b0..fc042e650 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -60,9 +60,6 @@ TEST(NestedSymbolMap, same_addr) { map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); NestedSymbolValue span100_300(1); map.emplace(NestedSymbolKey{0x100, 0x300}, span100_300); - for (auto &el : map) { - LG_DBG("Idx = %d", el.second.get_symbol_idx()); - } { // always return the deeper element NestedSymbolMap::FindRes res = map.find_closest(0x100, parent_key); @@ -95,4 +92,28 @@ TEST(NestedSymbolMap, InlinedFunctionLookup) { 6); // Expecting the most specific (deepest) symbol for this address } +TEST(NestedSymbolMap, closest_hint) { + LogHandle handle; + NestedSymbolMap map; + NestedSymbolKey parent_key{0x50, 0x1000}; + NestedSymbolValue span100_1000(0); + map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); + NestedSymbolValue span100_300(1); + map.emplace(NestedSymbolKey{0x100, 0x300}, span100_300); + NestedSymbolValue span300_400(2); + map.emplace(NestedSymbolKey{0x300, 0x400}, span300_400); + + + { // always return the deeper element + NestedSymbolMap::FindRes res = map.find_closest(0x100, parent_key); + EXPECT_TRUE(res.second); + EXPECT_EQ(res.first->second.get_symbol_idx(), 1); + + NestedSymbolMap::FindRes res_2 = + map.find_closest_hint(0x350, parent_key, res.first); + EXPECT_TRUE(res_2.second); + EXPECT_EQ(res_2.first->second.get_symbol_idx(), 2); + } +} + } // namespace ddprof From 3ddddd2086e20342ecdcee93496cc64289829bcd Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 30 Nov 2023 21:50:49 +0100 Subject: [PATCH 10/11] Inlining Add an input option to deactivate inlining --- include/ddprof_cli.hpp | 1 + include/ddprof_context.hpp | 1 + include/dwfl_symbol_lookup.hpp | 10 +++++----- include/symbol_hdr.hpp | 4 ++-- include/symbol_map.hpp | 1 + include/unwind_state.hpp | 4 ++-- src/ddprof_cli.cc | 6 ++++++ src/ddprof_context_lib.cc | 2 ++ src/ddprof_worker.cc | 3 ++- src/dwfl_symbol_lookup.cc | 16 +++++++++------- src/symbol_map.cc | 2 +- test/ddprof_exporter-ut.cc | 3 ++- test/ddprof_pprof-ut.cc | 3 ++- test/symbol_map-ut.cc | 10 +++++----- 14 files changed, 41 insertions(+), 25 deletions(-) diff --git a/include/ddprof_cli.hpp b/include/ddprof_cli.hpp index 0b20b4d15..e02895dea 100644 --- a/include/ddprof_cli.hpp +++ b/include/ddprof_cli.hpp @@ -41,6 +41,7 @@ struct DDProfCLI { // Profiling options int pid{0}; bool global{false}; + bool inlining{true}; std::chrono::seconds upload_period; unsigned worker_period; // worker_period std::vector events; diff --git a/include/ddprof_context.hpp b/include/ddprof_context.hpp index 4396cbe79..6b35928c4 100644 --- a/include/ddprof_context.hpp +++ b/include/ddprof_context.hpp @@ -21,6 +21,7 @@ namespace ddprof { struct DDProfContext { struct { bool enable{true}; + bool inlining{true}; std::chrono::seconds upload_period{}; bool fault_info{true}; int nice{-1}; diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index 0fae3d883..f1c094090 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -46,7 +46,7 @@ class DwflSymbolLookup { public: using SymbolRange = std::pair; // build and check env var to know check setting - DwflSymbolLookup(); + DwflSymbolLookup(bool inlining = true); // Get symbol from internal cache or fetch through dwarf void get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, @@ -87,10 +87,9 @@ class DwflSymbolLookup { ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper); - static void add_fun_loc(DwflSymbolLookup::SymbolWrapper &symbol_wrapper, - const SymbolMap::ValueType &parent_sym, - ElfAddress_t elf_pc, ProcessAddress_t process_pc, - std::vector &func_locs); + void add_fun_loc(DwflSymbolLookup::SymbolWrapper &symbol_wrapper, + const SymbolMap::ValueType &parent_sym, ElfAddress_t elf_pc, + ProcessAddress_t process_pc, std::vector &func_locs); static DDRes insert_inlining_info(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolTable &table, @@ -119,6 +118,7 @@ class DwflSymbolLookup { FileInfo2SymbolWrapper _file_info_function_map; FileInfo2LineMap _file_info_inlining_map; DwflSymbolLookupStats _stats; + bool _inlining; }; } // namespace ddprof diff --git a/include/symbol_hdr.hpp b/include/symbol_hdr.hpp index 621a8b402..7b17f1bab 100644 --- a/include/symbol_hdr.hpp +++ b/include/symbol_hdr.hpp @@ -19,8 +19,8 @@ namespace ddprof { struct SymbolHdr { - explicit SymbolHdr(std::string_view path_to_proc = "") - : _runtime_symbol_lookup(path_to_proc) {} + explicit SymbolHdr(std::string_view path_to_proc = "", bool inlining = true) + : _dwfl_symbol_lookup(inlining), _runtime_symbol_lookup(path_to_proc) {} void display_stats() const { _dwfl_symbol_lookup.stats().display(_dwfl_symbol_lookup.size()); _dso_symbol_lookup.stats_display(); diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index ed3151809..40182cb3b 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -11,6 +11,7 @@ namespace ddprof { +// template class SymbolSpan { public: SymbolSpan() : _end(0), _symbol_idx(-1) {} diff --git a/include/unwind_state.hpp b/include/unwind_state.hpp index 02947c685..51c7c7497 100644 --- a/include/unwind_state.hpp +++ b/include/unwind_state.hpp @@ -39,8 +39,8 @@ struct UnwindRegisters { /// Single structure with everything necessary in unwinding. The structure is /// given through callbacks struct UnwindState { - explicit UnwindState(int dd_profiling_fd = -1) - : dso_hdr("", dd_profiling_fd) { + explicit UnwindState(int dd_profiling_fd = -1, bool inlining = true) + : dso_hdr("", dd_profiling_fd), symbol_hdr("", inlining) { output.clear(); output.locs.reserve(kMaxStackDepth); } diff --git a/src/ddprof_cli.cc b/src/ddprof_cli.cc index 7d38caee3..b0fa95ea7 100644 --- a/src/ddprof_cli.cc +++ b/src/ddprof_cli.cc @@ -163,6 +163,10 @@ int DDProfCLI::parse(int argc, const char *argv[]) { ->group("Profiling settings") ->excludes(pid_opt) ->excludes(exec_option); + + app.add_option("--inlining", inlining, "Add inlining information.\n") + ->group("Profiling settings") + ->default_val(true); app.add_flag("--timeline,-t", timeline, "Enables Timeline view in the Datadog UI.\n" @@ -469,6 +473,8 @@ void DDProfCLI::print() const { if (!enable) { PRINT_NFO(" - enable: %s", enable ? "true" : "false"); } + PRINT_NFO(" - inlining: %s", inlining ? "true" : "false"); + if (!cpu_affinity.empty()) { PRINT_NFO(" - cpu_affinity: %s", cpu_affinity.c_str()); } diff --git a/src/ddprof_context_lib.cc b/src/ddprof_context_lib.cc index 68a06c423..91148a7ce 100644 --- a/src/ddprof_context_lib.cc +++ b/src/ddprof_context_lib.cc @@ -72,6 +72,8 @@ void copy_cli_values(const DDProfCLI &ddprof_cli, DDProfContext &ctx) { ctx.params.pid = ddprof_cli.pid; } ctx.params.upload_period = ddprof_cli.upload_period; + ctx.params.inlining = ddprof_cli.inlining; + // todo : naming ? ctx.params.worker_period = ddprof_cli.worker_period; // Advanced diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index 82994f7a3..1541dc085 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -339,7 +339,8 @@ DDRes worker_library_init(DDProfContext &ctx, // Make sure worker index is initialized correctly ctx.worker_ctx.i_current_pprof = 0; ctx.worker_ctx.exp_tid = {0}; - ctx.worker_ctx.us = new UnwindState(ctx.params.dd_profiling_fd); + ctx.worker_ctx.us = + new UnwindState(ctx.params.dd_profiling_fd, ctx.params.inlining); std::fill(ctx.worker_ctx.lost_events_per_watcher.begin(), ctx.worker_ctx.lost_events_per_watcher.end(), 0UL); diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index 899cd02be..ff211cb9b 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -50,7 +50,7 @@ size_t binary_search_start_index(Dwarf_Lines *lines, size_t nlines, } } // namespace -DwflSymbolLookup::DwflSymbolLookup() { +DwflSymbolLookup::DwflSymbolLookup(bool inlining) : _inlining(inlining) { if (const char *env_p = std::getenv("DDPROF_CACHE_SETTING")) { if (strcmp(env_p, "VALIDATE") == 0) { // Allows to compare the accuracy of the cache @@ -76,8 +76,9 @@ void DwflSymbolLookup::add_fun_loc( DwflSymbolLookup::SymbolWrapper &symbol_wrapper, const SymbolMap::ValueType &parent_sym, ElfAddress_t elf_pc, ProcessAddress_t process_pc, std::vector &func_locs) { - auto last_inlined = - get_inlined(symbol_wrapper, process_pc, elf_pc, parent_sym, func_locs); + const auto last_inlined = _inlining + ? get_inlined(symbol_wrapper, process_pc, elf_pc, parent_sym, func_locs) + : NestedSymbolMap::FindRes{symbol_wrapper._inline_map.end(), false}; uint32_t line = 0; if (last_inlined.second) { line = last_inlined.first->second.get_call_line_number(); @@ -88,7 +89,6 @@ void DwflSymbolLookup::add_fun_loc( line = line_find.first->second.get_symbol_idx(); } } - LG_DBG("Adding parent = %d", parent_sym.second.get_symbol_idx()); func_locs.emplace_back( FunLoc{._ip = process_pc, ._lineno = line, @@ -136,9 +136,11 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, SymbolMap::ValueType &elf_sym = insert(dwfl, ddprof_mod, table, dso_symbol_lookup, process_pc, dso, symbol_wrapper); - // parse associated dwarf info - insert_inlining_info(dwfl, ddprof_mod, table, process_pc, dso, - symbol_wrapper, elf_sym); + if (_inlining) { + // parse associated dwarf info + insert_inlining_info(dwfl, ddprof_mod, table, process_pc, dso, + symbol_wrapper, elf_sym); + } // For newly added symbols, insure we don't leave a blank file name for (unsigned i = previous_table_size; i < table.size(); ++i) { auto &sym = table[i]; diff --git a/src/symbol_map.cc b/src/symbol_map.cc index a065de9e5..61f79df84 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -96,7 +96,7 @@ NestedSymbolMap::FindRes NestedSymbolMap::find_closest_hint( for (; it != end(); ++it) { // we will test when looping back from highest to lowest element if (leaf_element < it->first) { - // find first element that contains this + // We reached an element that was higher break; } // we could be looking for an out of bound element ? diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 411c1cb15..4adba557f 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -20,7 +20,8 @@ namespace ddprof { // todo : cut this dependency -DwflSymbolLookup::DwflSymbolLookup() : _lookup_setting(K_CACHE_ON) {} +DwflSymbolLookup::DwflSymbolLookup(bool inlining) + : _lookup_setting(K_CACHE_ON) {} // Mock int get_nb_hw_thread() { return 2; } diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index b3a128887..e24ca0e7d 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -22,7 +22,8 @@ namespace ddprof { // todo : cut this dependency -DwflSymbolLookup::DwflSymbolLookup() : _lookup_setting(K_CACHE_ON) {} +DwflSymbolLookup::DwflSymbolLookup(bool inlining) + : _lookup_setting(K_CACHE_ON) {} TEST(DDProfPProf, init_profiles) { DDProfPProf pprof; diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index fc042e650..d05872385 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -21,7 +21,6 @@ TEST(SymbolMap, Span) { TEST(SymbolMap, Map) { LogHandle handle; - LG_DBG("Size of SymbolMap::ValueType: %lu", sizeof(SymbolMap::ValueType)); SymbolMap map; SymbolSpan span0_1000(0x1000, 12); map.emplace(0, span0_1000); @@ -37,9 +36,6 @@ TEST(NestedSymbolMap, simple) { map.emplace(NestedSymbolKey{0x100, 0x1000}, span100_1000); NestedSymbolValue span150_300(1); map.emplace(NestedSymbolKey{0x150, 0x300}, span150_300); - for (auto &el : map) { - LG_DBG("Idx = %d", el.second.get_symbol_idx()); - } { NestedSymbolMap::FindRes res = map.find_closest(0x150, parent_key); EXPECT_TRUE(res.second); @@ -103,7 +99,6 @@ TEST(NestedSymbolMap, closest_hint) { NestedSymbolValue span300_400(2); map.emplace(NestedSymbolKey{0x300, 0x400}, span300_400); - { // always return the deeper element NestedSymbolMap::FindRes res = map.find_closest(0x100, parent_key); EXPECT_TRUE(res.second); @@ -113,6 +108,11 @@ TEST(NestedSymbolMap, closest_hint) { map.find_closest_hint(0x350, parent_key, res.first); EXPECT_TRUE(res_2.second); EXPECT_EQ(res_2.first->second.get_symbol_idx(), 2); + + NestedSymbolMap::FindRes res_3 = + map.find_closest_hint(0x900, parent_key, res_2.first); + EXPECT_TRUE(res_3.second); + EXPECT_EQ(res_3.first->second.get_symbol_idx(), 0); } } From a881160c20ea01b0f512e294bf6e59da581bcf94 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 30 Nov 2023 22:37:36 +0100 Subject: [PATCH 11/11] Inlining - refactoring Ensuring the line map has a well defined type --- include/dwfl_symbol_lookup.hpp | 1 - include/symbol_map.hpp | 62 ++++++++++++++++++++++++++-------- src/ddprof_cli.cc | 2 +- src/dwfl_symbol_lookup.cc | 32 ++++++++---------- src/runtime_symbol_lookup.cc | 6 ++-- src/symbol_map.cc | 30 ---------------- test/symbol_map-ut.cc | 4 +-- 7 files changed, 69 insertions(+), 68 deletions(-) diff --git a/include/dwfl_symbol_lookup.hpp b/include/dwfl_symbol_lookup.hpp index f1c094090..349732c40 100644 --- a/include/dwfl_symbol_lookup.hpp +++ b/include/dwfl_symbol_lookup.hpp @@ -64,7 +64,6 @@ class DwflSymbolLookup { DwflSymbolLookupStats &stats() { return _stats; } // todo: we can have a better type than symbol idx for the line - using LineMap = SymbolMap; using InlineMap = NestedSymbolMap; struct SymbolWrapper { LineMap _line_map; diff --git a/include/symbol_map.hpp b/include/symbol_map.hpp index 40182cb3b..005783560 100644 --- a/include/symbol_map.hpp +++ b/include/symbol_map.hpp @@ -11,12 +11,10 @@ namespace ddprof { -// template -class SymbolSpan { +template class TSpan { public: - SymbolSpan() : _end(0), _symbol_idx(-1) {} - SymbolSpan(Offset_t end, SymbolIdx_t symbol_idx) - : _end(end), _symbol_idx(symbol_idx) {} + TSpan() : _end(0), _value(DefaultValue) {} + TSpan(Offset_t end, T value) : _end(end), _value(value) {} // push end further void set_end(Offset_t end) { if (end > _end) { @@ -25,23 +23,27 @@ class SymbolSpan { } [[nodiscard]] Offset_t get_end() const { return _end; } - [[nodiscard]] SymbolIdx_t get_symbol_idx() const { return _symbol_idx; } + [[nodiscard]] T get_value() const { return _value; } private: // symbol end within the segment (considering file offset) Offset_t _end; // element inside internal symbol cache - SymbolIdx_t _symbol_idx; + T _value; }; -class SymbolMap : private std::map { +using SymbolSpan = TSpan; +using LineSpan = TSpan; + +template +class SpanMap : private std::map { public: - using Map = std::map; - using It = Map::iterator; - using ConstIt = Map::const_iterator; + using Map = std::map; + using It = typename Map::iterator; + using ConstIt = typename Map::const_iterator; using FindRes = std::pair; using ValueType = - Map::value_type; // key value pair ElfAddress_t, SymbolSpanMap + typename Map::value_type; // key value pair ElfAddress_t, SymbolSpanMap // Functions we forward from underlying map type using Map::begin; @@ -53,10 +55,39 @@ class SymbolMap : private std::map { using Map::erase; using Map::size; - static bool is_within(const Offset_t &norm_pc, const ValueType &kv); - FindRes find_closest(Offset_t norm_pc); + bool is_within(const Offset_t &norm_pc, const SpanMap::ValueType &kv) { + if (norm_pc < kv.first) { + return false; + } + if (norm_pc > kv.second.get_end()) { + return false; + } + return true; + } + + FindRes find_closest(Offset_t norm_pc) { + // First element not less than (can match exactly a start addr) + auto it = Map::lower_bound(norm_pc); + if (it != end()) { // map is empty + if (SpanMap::is_within(norm_pc, *it)) { + return {it, true}; + } + } + + // previous element is more likely to contain our addr + if (it != begin()) { + --it; + } else { // map is empty + return {end(), false}; + } + // element can not be end (as we reversed or exit) + return {it, is_within(norm_pc, *it)}; + } }; +using SymbolMap = SpanMap; +using LineMap = SpanMap; + class NestedSymbolValue { public: NestedSymbolValue() : _symbol_idx(-1), _call_line_number(0) {} @@ -99,9 +130,12 @@ class NestedSymbolMap : private std::map { using Map::erase; using Map::size; + // todo: possible improvement to return a table of all elements matching + FindRes find_parent(ConstIt it, const NestedSymbolKey &parent_bound, Offset_t norm_pc) const; + // returns the element that is the most leaf FindRes find_closest(Offset_t norm_pc, const NestedSymbolKey &parent_bound) const; diff --git a/src/ddprof_cli.cc b/src/ddprof_cli.cc index b0fa95ea7..019c31e1d 100644 --- a/src/ddprof_cli.cc +++ b/src/ddprof_cli.cc @@ -163,7 +163,7 @@ int DDProfCLI::parse(int argc, const char *argv[]) { ->group("Profiling settings") ->excludes(pid_opt) ->excludes(exec_option); - + app.add_option("--inlining", inlining, "Add inlining information.\n") ->group("Profiling settings") ->default_val(true); diff --git a/src/dwfl_symbol_lookup.cc b/src/dwfl_symbol_lookup.cc index ff211cb9b..93f63609b 100644 --- a/src/dwfl_symbol_lookup.cc +++ b/src/dwfl_symbol_lookup.cc @@ -86,14 +86,13 @@ void DwflSymbolLookup::add_fun_loc( // line can be associated to parent const auto line_find = symbol_wrapper._line_map.find_closest(elf_pc); if (line_find.second) { - line = line_find.first->second.get_symbol_idx(); + line = line_find.first->second.get_value(); } } - func_locs.emplace_back( - FunLoc{._ip = process_pc, - ._lineno = line, - ._symbol_idx = parent_sym.second.get_symbol_idx(), - ._map_info_idx = -1}); + func_locs.emplace_back(FunLoc{._ip = process_pc, + ._lineno = line, + ._symbol_idx = parent_sym.second.get_value(), + ._map_info_idx = -1}); } // Retrieve existing symbol or attempt to read from dwarf @@ -117,14 +116,14 @@ void DwflSymbolLookup::get_or_insert(Dwfl *dwfl, const DDProfMod &ddprof_mod, #ifdef DEBUG LG_DBG("Match: %lx,%lx -> %s,%d", find_res.first->first, find_res.first->second.get_end(), - table[find_res.first->second.get_symbol_idx()]._symname.c_str(), - find_res.first->second.get_symbol_idx()); + table[find_res.first->second.get_value()]._symname.c_str(), + find_res.first->second.get_value()); #endif // cache validation mechanism: force dwfl lookup to compare with matched // symbols if (_lookup_setting == K_CACHE_VALIDATE) { if (symbol_lookup_check(ddprof_mod._mod, process_pc, - table[find_res.first->second.get_symbol_idx()])) { + table[find_res.first->second.get_value()])) { ++_stats._errors; } } @@ -160,7 +159,7 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, DwflSymbolLookup::SymbolWrapper &symbol_wrapper, SymbolTable &table, DieInformation &die_information) { - DwflSymbolLookup::LineMap &line_map = symbol_wrapper._line_map; + LineMap &line_map = symbol_wrapper._line_map; DwflSymbolLookup::InlineMap &inline_map = symbol_wrapper._inline_map; Dwarf_Lines *lines; size_t nlines; @@ -184,7 +183,7 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, Dwarf_Addr previous_addr = 0; NestedSymbolMap::FindRes current_func{inline_map.end(), false}; // store closest line per file (to avoid missmatches) - std::unordered_map closest_lines; + std::unordered_map closest_lines; for (size_t line_index = start_index; line_index < nlines; ++line_index) { Dwarf_Line *line = dwarf_onesrcline(lines, line_index); Dwarf_Addr line_addr; @@ -201,8 +200,7 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, if (previous_addr && line_addr != previous_addr) { if (hint_line != line_map.end() && - hint_line->second.get_symbol_idx() == - closest_lines[ref_sym->_srcpath]) { + hint_line->second.get_value() == closest_lines[ref_sym->_srcpath]) { // extend previous element hint_line->second.set_end(previous_addr); } else { @@ -211,7 +209,7 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, hint_line, std::make_pair( previous_addr, - SymbolSpan{line_addr - 1, closest_lines[ref_sym->_srcpath]})); + LineSpan{line_addr - 1, closest_lines[ref_sym->_srcpath]})); } #ifdef DEBUG LG_DBG("Associate %d (%lx->%lx) / %s to %s (vs %s)", @@ -231,7 +229,7 @@ static DDRes parse_lines(Dwarf_Die *cudie, const DDProfMod &mod, } // keep line, if it matches the symbol // todo can be optimized to avoid conversion to string - closest_lines[std::string(current_file)] = lineno; + closest_lines[std::string(current_file)] = static_cast(lineno); previous_addr = line_addr; } return {}; @@ -242,7 +240,7 @@ DDRes DwflSymbolLookup::insert_inlining_info( ProcessAddress_t process_pc, const Dso &dso, SymbolWrapper &symbol_wrapper, SymbolMap::ValueType &parent_func) { - SymbolIdx_t parent_sym_idx = parent_func.second.get_symbol_idx(); + SymbolIdx_t parent_sym_idx = parent_func.second.get_value(); Dwarf_Addr bias; Dwarf_Die *cudie = dwfl_addrdie(dwfl, process_pc, &bias); if (!cudie) { @@ -388,7 +386,7 @@ NestedSymbolMap::FindRes DwflSymbolLookup::get_inlined( } else { auto find_line = symbol_wrapper._line_map.find_closest(elf_pc); if (find_line.second) { - line = find_line.first->second.get_symbol_idx(); + line = find_line.first->second.get_value(); } } func_locs.emplace_back( diff --git a/src/runtime_symbol_lookup.cc b/src/runtime_symbol_lookup.cc index 8c696ebd9..3279712b3 100644 --- a/src/runtime_symbol_lookup.cc +++ b/src/runtime_symbol_lookup.cc @@ -74,7 +74,7 @@ bool RuntimeSymbolLookup::insert_or_replace(std::string_view symbol, "jit"); } else { // todo managing range erase (we can overal with other syms) - SymbolIdx_t const existing = find_res.first->second.get_symbol_idx(); + SymbolIdx_t const existing = find_res.first->second.get_value(); #ifdef DEBUG LG_DBG("Existyng sym -- %s (%lx-%lx)", symbol_table[existing]._demangle_name.c_str(), find_res.first->first, @@ -194,7 +194,7 @@ RuntimeSymbolLookup::get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, if (!find_res.second) { flag_lookup_failure(symbol_info, jitdump_path); } - return find_res.second ? find_res.first->second.get_symbol_idx() : -1; + return find_res.second ? find_res.first->second.get_value() : -1; } SymbolIdx_t RuntimeSymbolLookup::get_or_insert(pid_t pid, ProcessAddress_t pc, @@ -211,7 +211,7 @@ SymbolIdx_t RuntimeSymbolLookup::get_or_insert(pid_t pid, ProcessAddress_t pc, if (!find_res.second) { flag_lookup_failure(symbol_info, "perfmap"); } - return find_res.second ? find_res.first->second.get_symbol_idx() : -1; + return find_res.second ? find_res.first->second.get_value() : -1; } } // namespace ddprof diff --git a/src/symbol_map.cc b/src/symbol_map.cc index 61f79df84..cf06774ce 100644 --- a/src/symbol_map.cc +++ b/src/symbol_map.cc @@ -9,36 +9,6 @@ namespace ddprof { -bool SymbolMap::is_within(const Offset_t &norm_pc, - const SymbolMap::ValueType &kv) { - if (norm_pc < kv.first) { - return false; - } - if (norm_pc > kv.second.get_end()) { - return false; - } - return true; -} - -SymbolMap::FindRes SymbolMap::find_closest(Offset_t norm_pc) { - // First element not less than (can match exactly a start addr) - auto it = lower_bound(norm_pc); - if (it != end()) { // map is empty - if (SymbolMap::is_within(norm_pc, *it)) { - return {it, true}; - } - } - - // previous element is more likely to contain our addr - if (it != begin()) { - --it; - } else { // map is empty - return {end(), false}; - } - // element can not be end (as we reversed or exit) - return {it, is_within(norm_pc, *it)}; -} - // parent span acts as a bound NestedSymbolMap::FindRes NestedSymbolMap::find_parent(NestedSymbolMap::ConstIt it, diff --git a/test/symbol_map-ut.cc b/test/symbol_map-ut.cc index d05872385..6d7facc34 100644 --- a/test/symbol_map-ut.cc +++ b/test/symbol_map-ut.cc @@ -13,10 +13,10 @@ namespace ddprof { TEST(SymbolMap, Span) { SymbolSpan span1; EXPECT_EQ(span1.get_end(), 0); - EXPECT_EQ(span1.get_symbol_idx(), -1); + EXPECT_EQ(span1.get_value(), -1); SymbolSpan span2(0x1000, 12); EXPECT_EQ(span2.get_end(), 0x1000); - EXPECT_EQ(span2.get_symbol_idx(), 12); + EXPECT_EQ(span2.get_value(), 12); } TEST(SymbolMap, Map) {