| /** |
| * @file bfd_support.cpp |
| * BFD muck we have to deal with. |
| * |
| * @remark Copyright 2005 OProfile authors |
| * @remark Read the file COPYING |
| * |
| * @author John Levon |
| */ |
| |
| #include "bfd_support.h" |
| |
| #include "op_bfd.h" |
| #include "op_fileio.h" |
| #include "op_config.h" |
| #include "string_manip.h" |
| #include "file_manip.h" |
| #include "cverb.h" |
| #include "locate_images.h" |
| |
| #include <cstdlib> |
| #include <cstring> |
| #include <cassert> |
| #include <iostream> |
| #include <fstream> |
| #include <sstream> |
| #include <string> |
| #include <cstring> |
| #include <cstdlib> |
| |
| using namespace std; |
| |
| extern verbose vbfd; |
| |
| namespace { |
| |
| |
| void check_format(string const & file, bfd ** ibfd) |
| { |
| if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) { |
| cverb << vbfd << "BFD format failure for " << file << endl; |
| bfd_close(*ibfd); |
| *ibfd = NULL; |
| } |
| } |
| |
| |
| bool separate_debug_file_exists(string & name, unsigned long const crc, |
| extra_images const & extra) |
| { |
| unsigned long file_crc = 0; |
| // The size of 2 * 1024 elements for the buffer is arbitrary. |
| char buffer[2 * 1024]; |
| |
| image_error img_ok; |
| string const image_path = extra.find_image_path(name, img_ok, true); |
| |
| if (img_ok != image_ok) |
| return false; |
| |
| name = image_path; |
| |
| ifstream file(image_path.c_str()); |
| if (!file) |
| return false; |
| |
| cverb << vbfd << "found " << name; |
| while (file) { |
| file.read(buffer, sizeof(buffer)); |
| file_crc = calc_crc32(file_crc, |
| reinterpret_cast<unsigned char *>(&buffer[0]), |
| file.gcount()); |
| } |
| cverb << vbfd << " with crc32 = " << hex << file_crc << endl; |
| return crc == file_crc; |
| } |
| |
| |
| bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32) |
| { |
| asection * sect; |
| |
| cverb << vbfd << "fetching .gnu_debuglink section" << endl; |
| sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink"); |
| |
| if (sect == NULL) |
| return false; |
| |
| bfd_size_type debuglink_size = bfd_section_size(ibfd, sect); |
| char contents[debuglink_size]; |
| cverb << vbfd |
| << ".gnu_debuglink section has size " << debuglink_size << endl; |
| |
| if (!bfd_get_section_contents(ibfd, sect, |
| reinterpret_cast<unsigned char *>(contents), |
| static_cast<file_ptr>(0), debuglink_size)) { |
| bfd_perror("bfd_get_section_contents:get_debug:"); |
| exit(2); |
| } |
| |
| /* CRC value is stored after the filename, aligned up to 4 bytes. */ |
| size_t filename_len = strlen(contents); |
| size_t crc_offset = filename_len + 1; |
| crc_offset = (crc_offset + 3) & ~3; |
| |
| crc32 = bfd_get_32(ibfd, |
| reinterpret_cast<bfd_byte *>(contents + crc_offset)); |
| filename = string(contents, filename_len); |
| cverb << vbfd << ".gnu_debuglink filename is " << filename << endl; |
| return true; |
| } |
| |
| |
| /** |
| * With Objective C, we'll get strings like: |
| * |
| * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range |
| * |
| * for the symbol name, and: |
| * -[GSUnicodeString rangeOfCharacterFromSet:options:range:] |
| * |
| * for the function name, so we have to do some looser matching |
| * than for other languages (unfortunately, it's not possible |
| * to demangle Objective C symbols). |
| */ |
| bool objc_match(string const & sym, string const & method) |
| { |
| if (method.length() < 3) |
| return false; |
| |
| string mangled; |
| |
| if (is_prefix(method, "-[")) { |
| mangled += "_i_"; |
| } else if (is_prefix(method, "+[")) { |
| mangled += "_c_"; |
| } else { |
| return false; |
| } |
| |
| string::const_iterator it = method.begin() + 2; |
| string::const_iterator const end = method.end(); |
| |
| bool found_paren = false; |
| |
| for (; it != end; ++it) { |
| switch (*it) { |
| case ' ': |
| mangled += '_'; |
| if (!found_paren) |
| mangled += '_'; |
| break; |
| case ':': |
| mangled += '_'; |
| break; |
| case ')': |
| case ']': |
| break; |
| case '(': |
| found_paren = true; |
| mangled += '_'; |
| break; |
| default: |
| mangled += *it; |
| } |
| } |
| |
| return sym == mangled; |
| } |
| |
| |
| /* |
| * With a binary image where some objects are missing debug |
| * info, we can end up attributing to a completely different |
| * function (#484660): bfd_nearest_line() will happily move from one |
| * symbol to the nearest one it can find with debug information. |
| * To mitigate this problem, we check that the symbol name |
| * matches the returned function name. |
| * |
| * However, this check fails in some cases it shouldn't: |
| * Objective C, and C++ static inline functions (as discussed in |
| * GCC bugzilla #11774). So, we have a looser check that |
| * accepts merely a substring, plus some magic for Objective C. |
| * |
| * If even the loose check fails, then we give up. |
| */ |
| bool is_correct_function(string const & function, string const & name) |
| { |
| if (name == function) |
| return true; |
| |
| if (objc_match(name, function)) |
| return true; |
| |
| // warn the user if we had to use the loose check |
| if (name.find(function) != string::npos) { |
| static bool warned = false; |
| if (!warned) { |
| cerr << "warning: some functions compiled without " |
| << "debug information may have incorrect source " |
| << "line attributions" << endl; |
| warned = true; |
| } |
| cverb << vbfd << "is_correct_function(" << function << ", " |
| << name << ") fuzzy match." << endl; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| /* |
| * binutils 2.12 and below have a small bug where functions without a |
| * debug entry at the prologue start do not give a useful line number |
| * from bfd_find_nearest_line(). This can happen with certain gcc |
| * versions such as 2.95. |
| * |
| * We work around this problem by scanning forward for a vma with valid |
| * linenr info, if we can't get a valid line number. Problem uncovered |
| * by Norbert Kaufmann. The work-around decreases, on the tincas |
| * application, the number of failure to retrieve linenr info from 835 |
| * to 173. Most of the remaining are c++ inline functions mainly from |
| * the STL library. Fix #529622 |
| */ |
| void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms, |
| string const & name, bfd_vma pc, |
| char const ** filename, unsigned int * line) |
| { |
| char const * cfilename; |
| char const * function; |
| unsigned int linenr; |
| |
| // FIXME: looking at debug info for all gcc version shows than |
| // the same problems can -perhaps- occur for epilog code: find a |
| // samples files with samples in epilog and try opreport -l -g |
| // on it, check it also with opannotate. |
| |
| // first restrict the search on a sensible range of vma, 16 is |
| // an intuitive value based on epilog code look |
| size_t max_search = 16; |
| size_t section_size = bfd_section_size(abfd, section); |
| if (pc + max_search > section_size) |
| max_search = section_size - pc; |
| |
| for (size_t i = 1; i < max_search; ++i) { |
| bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i, |
| &cfilename, &function, |
| &linenr); |
| |
| if (ret && cfilename && function && linenr != 0 |
| && is_correct_function(function, name)) { |
| *filename = cfilename; |
| *line = linenr; |
| return; |
| } |
| } |
| } |
| |
| |
| } // namespace anon |
| |
| |
| bfd * open_bfd(string const & file) |
| { |
| /* bfd keeps its own reference to the filename char *, |
| * so it must have a lifetime longer than the ibfd */ |
| bfd * ibfd = bfd_openr(file.c_str(), NULL); |
| if (!ibfd) { |
| cverb << vbfd << "bfd_openr failed for " << file << endl; |
| return NULL; |
| } |
| |
| check_format(file, &ibfd); |
| |
| return ibfd; |
| } |
| |
| |
| bfd * fdopen_bfd(string const & file, int fd) |
| { |
| /* bfd keeps its own reference to the filename char *, |
| * so it must have a lifetime longer than the ibfd */ |
| bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd); |
| if (!ibfd) { |
| cverb << vbfd << "bfd_openr failed for " << file << endl; |
| return NULL; |
| } |
| |
| check_format(file, &ibfd); |
| |
| return ibfd; |
| } |
| |
| |
| bool find_separate_debug_file(bfd * ibfd, string const & filepath_in, |
| string & debug_filename, extra_images const & extra) |
| { |
| string filepath(filepath_in); |
| string basename; |
| unsigned long crc32; |
| |
| if (!get_debug_link_info(ibfd, basename, crc32)) |
| return false; |
| |
| // Work out the image file's directory prefix |
| string filedir = op_dirname(filepath); |
| // Make sure it starts with / |
| if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/') |
| filedir += '/'; |
| |
| string first_try(filedir + ".debug/" + basename); |
| string second_try(DEBUGDIR + filedir + basename); |
| string third_try(filedir + basename); |
| |
| cverb << vbfd << "looking for debugging file " << basename |
| << " with crc32 = " << hex << crc32 << endl; |
| |
| if (separate_debug_file_exists(first_try, crc32, extra)) |
| debug_filename = first_try; |
| else if (separate_debug_file_exists(second_try, crc32, extra)) |
| debug_filename = second_try; |
| else if (separate_debug_file_exists(third_try, crc32, extra)) |
| debug_filename = third_try; |
| else |
| return false; |
| |
| return true; |
| } |
| |
| |
| bool interesting_symbol(asymbol * sym) |
| { |
| // #717720 some binutils are miscompiled by gcc 2.95, one of the |
| // typical symptom can be catched here. |
| if (!sym->section) { |
| ostringstream os; |
| os << "Your version of binutils seems to have a bug.\n" |
| << "Read http://oprofile.sf.net/faq/#binutilsbug\n"; |
| throw op_runtime_error(os.str()); |
| } |
| |
| if (!(sym->section->flags & SEC_CODE)) |
| return false; |
| |
| // returning true for fix up in op_bfd_symbol() |
| if (!sym->name || sym->name[0] == '\0') |
| return true; |
| /* ARM assembler internal mapping symbols aren't interesting */ |
| if ((strcmp("$a", sym->name) == 0) || |
| (strcmp("$t", sym->name) == 0) || |
| (strcmp("$d", sym->name) == 0)) |
| return false; |
| |
| // C++ exception stuff |
| if (sym->name[0] == '.' && sym->name[1] == 'L') |
| return false; |
| |
| /* This case cannot be moved to boring_symbol(), |
| * because that's only used for duplicate VMAs, |
| * and sometimes this symbol appears at an address |
| * different from all other symbols. |
| */ |
| if (!strcmp("gcc2_compiled.", sym->name)) |
| return false; |
| |
| if (sym->flags & BSF_SECTION_SYM) |
| return false; |
| |
| if (!(sym->section->flags & SEC_LOAD)) |
| return false; |
| |
| return true; |
| } |
| |
| |
| bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second) |
| { |
| if (first.name() == "Letext") |
| return true; |
| else if (second.name() == "Letext") |
| return false; |
| |
| if (first.name().substr(0, 2) == "??") |
| return true; |
| else if (second.name().substr(0, 2) == "??") |
| return false; |
| |
| if (first.hidden() && !second.hidden()) |
| return true; |
| else if (!first.hidden() && second.hidden()) |
| return false; |
| |
| if (first.name()[0] == '_' && second.name()[0] != '_') |
| return true; |
| else if (first.name()[0] != '_' && second.name()[0] == '_') |
| return false; |
| |
| if (first.weak() && !second.weak()) |
| return true; |
| else if (!first.weak() && second.weak()) |
| return false; |
| |
| return false; |
| } |
| |
| |
| bool bfd_info::has_debug_info() const |
| { |
| if (!valid()) |
| return false; |
| |
| for (asection const * sect = abfd->sections; sect; sect = sect->next) { |
| if (sect->flags & SEC_DEBUGGING) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| bfd_info::~bfd_info() |
| { |
| free(synth_syms); |
| close(); |
| } |
| |
| |
| void bfd_info::close() |
| { |
| if (abfd) |
| bfd_close(abfd); |
| } |
| |
| /** |
| * This function is only called when processing symbols retrieved from a |
| * debuginfo file that is separate from the actual runtime binary image. |
| * Separate debuginfo files may be needed in two different cases: |
| * 1) the real image is completely stripped, where there is no symbol |
| information at all |
| * 2) the real image has debuginfo stripped, and the user is requesting "-g" |
| * (src file/line num info) |
| * After all symbols are gathered up, there will be some filtering/removal of |
| * unnecessary symbols. In particular, the bfd_info::interesting_symbol() |
| * function filters out symbols whose section's flag value does not include |
| * SEC_LOAD. This filtering is required, so it must be retained. However, |
| * we run into a problem with symbols from debuginfo files, since the |
| * section flag does NOT include SEC_LOAD. To solve this problem, the |
| * translate_debuginfo_syms function maps the debuginfo symbol's sections to |
| * that of their corresponding real image. |
| */ |
| void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms) |
| { |
| unsigned int img_sect_cnt = 0; |
| bfd * image_bfd = image_bfd_info->abfd; |
| multimap<string, bfd_section *> image_sections; |
| |
| for (bfd_section * sect = image_bfd->sections; |
| sect && img_sect_cnt < image_bfd->section_count; |
| sect = sect->next) { |
| // A comment section marks the end of the needed sections |
| if (strstr(sect->name, ".comment") == sect->name) |
| break; |
| image_sections.insert(pair<string, bfd_section *>(sect->name, sect)); |
| img_sect_cnt++; |
| } |
| |
| asymbol * sym = dbg_syms[0]; |
| string prev_sect_name = ""; |
| bfd_section * matched_section = NULL; |
| for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) { |
| bool section_switch; |
| |
| if (strcmp(prev_sect_name.c_str(), sym->section->name)) { |
| section_switch = true; |
| prev_sect_name = sym->section->name; |
| } else { |
| section_switch = false; |
| } |
| if (sym->section->owner && sym->section->owner == abfd) { |
| if (section_switch ) { |
| matched_section = NULL; |
| multimap<string, bfd_section *>::iterator it; |
| pair<multimap<string, bfd_section *>::iterator, |
| multimap<string, bfd_section *>::iterator> range; |
| |
| range = image_sections.equal_range(sym->section->name); |
| for (it = range.first; it != range.second; it++) { |
| if ((*it).second->vma == sym->section->vma) { |
| matched_section = (*it).second; |
| break; |
| } |
| } |
| } |
| if (matched_section) { |
| sym->section = matched_section; |
| sym->the_bfd = image_bfd; |
| } |
| } |
| } |
| } |
| |
| #if SYNTHESIZE_SYMBOLS |
| bool bfd_info::get_synth_symbols() |
| { |
| extern const bfd_target bfd_elf64_powerpc_vec; |
| extern const bfd_target bfd_elf64_powerpcle_vec; |
| bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec) |
| || (abfd->xvec == &bfd_elf64_powerpcle_vec); |
| |
| if (!is_elf64_powerpc_target) |
| return false; |
| |
| void * buf; |
| uint tmp; |
| long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp); |
| if (nr_mini_syms < 1) |
| return false; |
| |
| asymbol ** mini_syms = (asymbol **)buf; |
| buf = NULL; |
| bfd * synth_bfd; |
| |
| /* For ppc64, a debuginfo file by itself does not hold enough symbol |
| * information for us to properly attribute samples to symbols. If |
| * the image file's bfd has no symbols (as in a super-stripped library), |
| * then we need to do the extra processing in translate_debuginfo_syms. |
| */ |
| if (image_bfd_info && image_bfd_info->nr_syms == 0) { |
| translate_debuginfo_syms(mini_syms, nr_mini_syms); |
| synth_bfd = image_bfd_info->abfd; |
| } else |
| synth_bfd = abfd; |
| |
| long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd, |
| nr_mini_syms, |
| mini_syms, 0, |
| NULL, &synth_syms); |
| |
| if (nr_synth_syms < 0) { |
| free(mini_syms); |
| return false; |
| } |
| |
| cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl; |
| cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl; |
| |
| nr_syms = nr_mini_syms + nr_synth_syms; |
| syms.reset(new asymbol *[nr_syms + 1]); |
| |
| for (size_t i = 0; i < (size_t)nr_mini_syms; ++i) |
| syms[i] = mini_syms[i]; |
| |
| |
| for (size_t i = 0; i < (size_t)nr_synth_syms; ++i) |
| syms[nr_mini_syms + i] = synth_syms + i; |
| |
| |
| free(mini_syms); |
| |
| // bfd_canonicalize_symtab does this, so shall we |
| syms[nr_syms] = NULL; |
| |
| return true; |
| } |
| #else |
| bool bfd_info::get_synth_symbols() |
| { |
| return false; |
| } |
| #endif /* SYNTHESIZE_SYMBOLS */ |
| |
| |
| void bfd_info::get_symbols() |
| { |
| if (!abfd) |
| return; |
| |
| cverb << vbfd << "bfd_info::get_symbols() for " |
| << bfd_get_filename(abfd) << endl; |
| |
| if (get_synth_symbols()) |
| return; |
| |
| if (bfd_get_file_flags(abfd) & HAS_SYMS) |
| nr_syms = bfd_get_symtab_upper_bound(abfd); |
| |
| cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec |
| << nr_syms << hex << endl; |
| |
| nr_syms /= sizeof(asymbol *); |
| |
| if (nr_syms < 1) |
| return; |
| |
| syms.reset(new asymbol *[nr_syms]); |
| |
| nr_syms = bfd_canonicalize_symtab(abfd, syms.get()); |
| |
| if (image_bfd_info) |
| translate_debuginfo_syms(syms.get(), nr_syms); |
| |
| cverb << vbfd << "bfd_canonicalize_symtab: " << dec |
| << nr_syms << hex << endl; |
| } |
| |
| |
| linenr_info const |
| find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym, |
| bfd_vma offset, bool anon_obj) |
| { |
| char const * function = ""; |
| char const * cfilename = ""; |
| unsigned int linenr = 0; |
| linenr_info info; |
| bfd * abfd; |
| asymbol ** syms; |
| asection * section; |
| bfd_vma pc; |
| bool ret; |
| |
| if (!b.valid()) |
| goto fail; |
| |
| // take care about artificial symbol |
| if (!sym.symbol()) |
| goto fail; |
| |
| abfd = b.abfd; |
| syms = b.syms.get(); |
| if (!syms) |
| goto fail; |
| section = sym.symbol()->section; |
| if (anon_obj) |
| pc = offset - sym.symbol()->section->vma; |
| else |
| pc = (sym.value() + offset) - sym.filepos(); |
| |
| if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0) |
| goto fail; |
| |
| if (pc >= bfd_section_size(abfd, section)) |
| goto fail; |
| |
| ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename, |
| &function, &linenr); |
| |
| if (!ret || !cfilename || !function) |
| goto fail; |
| |
| /* |
| * is_correct_function does not handle the case of static inlines, |
| * but if the linenr is non-zero in the inline case, it is the correct |
| * line number. |
| */ |
| if (linenr == 0 && !is_correct_function(function, sym.name())) |
| goto fail; |
| |
| if (linenr == 0) { |
| fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename, |
| &linenr); |
| } |
| |
| info.found = true; |
| info.filename = cfilename; |
| info.line = linenr; |
| return info; |
| |
| fail: |
| info.found = false; |
| // some stl lacks string::clear() |
| info.filename.erase(info.filename.begin(), info.filename.end()); |
| info.line = 0; |
| return info; |
| } |