blob: 69d69383c4c1737fa14432ef9691ee437cf76b27 [file] [log] [blame]
/**
* @file bfd_support.cpp
* BFD muck we have to deal with.
*
* @remark Copyright 2005 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
*/
#include "bfd_support.h"
#include "op_bfd.h"
#include "op_fileio.h"
#include "op_config.h"
#include "string_manip.h"
#include "file_manip.h"
#include "cverb.h"
#include "locate_images.h"
#include <cstdlib>
#include <cstring>
#include <cassert>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <cstring>
#include <cstdlib>
using namespace std;
extern verbose vbfd;
namespace {
void check_format(string const & file, bfd ** ibfd)
{
if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
cverb << vbfd << "BFD format failure for " << file << endl;
bfd_close(*ibfd);
*ibfd = NULL;
}
}
bool separate_debug_file_exists(string & name, unsigned long const crc,
extra_images const & extra)
{
unsigned long file_crc = 0;
// The size of 2 * 1024 elements for the buffer is arbitrary.
char buffer[2 * 1024];
image_error img_ok;
string const image_path = extra.find_image_path(name, img_ok, true);
if (img_ok != image_ok)
return false;
name = image_path;
ifstream file(image_path.c_str());
if (!file)
return false;
cverb << vbfd << "found " << name;
while (file) {
file.read(buffer, sizeof(buffer));
file_crc = calc_crc32(file_crc,
reinterpret_cast<unsigned char *>(&buffer[0]),
file.gcount());
}
cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
return crc == file_crc;
}
bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
{
asection * sect;
cverb << vbfd << "fetching .gnu_debuglink section" << endl;
sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
if (sect == NULL)
return false;
bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
char contents[debuglink_size];
cverb << vbfd
<< ".gnu_debuglink section has size " << debuglink_size << endl;
if (!bfd_get_section_contents(ibfd, sect,
reinterpret_cast<unsigned char *>(contents),
static_cast<file_ptr>(0), debuglink_size)) {
bfd_perror("bfd_get_section_contents:get_debug:");
exit(2);
}
/* CRC value is stored after the filename, aligned up to 4 bytes. */
size_t filename_len = strlen(contents);
size_t crc_offset = filename_len + 1;
crc_offset = (crc_offset + 3) & ~3;
crc32 = bfd_get_32(ibfd,
reinterpret_cast<bfd_byte *>(contents + crc_offset));
filename = string(contents, filename_len);
cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
return true;
}
/**
* With Objective C, we'll get strings like:
*
* _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
*
* for the symbol name, and:
* -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
*
* for the function name, so we have to do some looser matching
* than for other languages (unfortunately, it's not possible
* to demangle Objective C symbols).
*/
bool objc_match(string const & sym, string const & method)
{
if (method.length() < 3)
return false;
string mangled;
if (is_prefix(method, "-[")) {
mangled += "_i_";
} else if (is_prefix(method, "+[")) {
mangled += "_c_";
} else {
return false;
}
string::const_iterator it = method.begin() + 2;
string::const_iterator const end = method.end();
bool found_paren = false;
for (; it != end; ++it) {
switch (*it) {
case ' ':
mangled += '_';
if (!found_paren)
mangled += '_';
break;
case ':':
mangled += '_';
break;
case ')':
case ']':
break;
case '(':
found_paren = true;
mangled += '_';
break;
default:
mangled += *it;
}
}
return sym == mangled;
}
/*
* With a binary image where some objects are missing debug
* info, we can end up attributing to a completely different
* function (#484660): bfd_nearest_line() will happily move from one
* symbol to the nearest one it can find with debug information.
* To mitigate this problem, we check that the symbol name
* matches the returned function name.
*
* However, this check fails in some cases it shouldn't:
* Objective C, and C++ static inline functions (as discussed in
* GCC bugzilla #11774). So, we have a looser check that
* accepts merely a substring, plus some magic for Objective C.
*
* If even the loose check fails, then we give up.
*/
bool is_correct_function(string const & function, string const & name)
{
if (name == function)
return true;
if (objc_match(name, function))
return true;
// warn the user if we had to use the loose check
if (name.find(function) != string::npos) {
static bool warned = false;
if (!warned) {
cerr << "warning: some functions compiled without "
<< "debug information may have incorrect source "
<< "line attributions" << endl;
warned = true;
}
cverb << vbfd << "is_correct_function(" << function << ", "
<< name << ") fuzzy match." << endl;
return true;
}
return false;
}
/*
* binutils 2.12 and below have a small bug where functions without a
* debug entry at the prologue start do not give a useful line number
* from bfd_find_nearest_line(). This can happen with certain gcc
* versions such as 2.95.
*
* We work around this problem by scanning forward for a vma with valid
* linenr info, if we can't get a valid line number. Problem uncovered
* by Norbert Kaufmann. The work-around decreases, on the tincas
* application, the number of failure to retrieve linenr info from 835
* to 173. Most of the remaining are c++ inline functions mainly from
* the STL library. Fix #529622
*/
void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
string const & name, bfd_vma pc,
char const ** filename, unsigned int * line)
{
char const * cfilename;
char const * function;
unsigned int linenr;
// FIXME: looking at debug info for all gcc version shows than
// the same problems can -perhaps- occur for epilog code: find a
// samples files with samples in epilog and try opreport -l -g
// on it, check it also with opannotate.
// first restrict the search on a sensible range of vma, 16 is
// an intuitive value based on epilog code look
size_t max_search = 16;
size_t section_size = bfd_section_size(abfd, section);
if (pc + max_search > section_size)
max_search = section_size - pc;
for (size_t i = 1; i < max_search; ++i) {
bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
&cfilename, &function,
&linenr);
if (ret && cfilename && function && linenr != 0
&& is_correct_function(function, name)) {
*filename = cfilename;
*line = linenr;
return;
}
}
}
} // namespace anon
bfd * open_bfd(string const & file)
{
/* bfd keeps its own reference to the filename char *,
* so it must have a lifetime longer than the ibfd */
bfd * ibfd = bfd_openr(file.c_str(), NULL);
if (!ibfd) {
cverb << vbfd << "bfd_openr failed for " << file << endl;
return NULL;
}
check_format(file, &ibfd);
return ibfd;
}
bfd * fdopen_bfd(string const & file, int fd)
{
/* bfd keeps its own reference to the filename char *,
* so it must have a lifetime longer than the ibfd */
bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
if (!ibfd) {
cverb << vbfd << "bfd_openr failed for " << file << endl;
return NULL;
}
check_format(file, &ibfd);
return ibfd;
}
bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
string & debug_filename, extra_images const & extra)
{
string filepath(filepath_in);
string basename;
unsigned long crc32;
if (!get_debug_link_info(ibfd, basename, crc32))
return false;
// Work out the image file's directory prefix
string filedir = op_dirname(filepath);
// Make sure it starts with /
if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
filedir += '/';
string first_try(filedir + ".debug/" + basename);
string second_try(DEBUGDIR + filedir + basename);
string third_try(filedir + basename);
cverb << vbfd << "looking for debugging file " << basename
<< " with crc32 = " << hex << crc32 << endl;
if (separate_debug_file_exists(first_try, crc32, extra))
debug_filename = first_try;
else if (separate_debug_file_exists(second_try, crc32, extra))
debug_filename = second_try;
else if (separate_debug_file_exists(third_try, crc32, extra))
debug_filename = third_try;
else
return false;
return true;
}
bool interesting_symbol(asymbol * sym)
{
// #717720 some binutils are miscompiled by gcc 2.95, one of the
// typical symptom can be catched here.
if (!sym->section) {
ostringstream os;
os << "Your version of binutils seems to have a bug.\n"
<< "Read http://oprofile.sf.net/faq/#binutilsbug\n";
throw op_runtime_error(os.str());
}
if (!(sym->section->flags & SEC_CODE))
return false;
// returning true for fix up in op_bfd_symbol()
if (!sym->name || sym->name[0] == '\0')
return true;
/* ARM assembler internal mapping symbols aren't interesting */
if ((strcmp("$a", sym->name) == 0) ||
(strcmp("$t", sym->name) == 0) ||
(strcmp("$d", sym->name) == 0))
return false;
// C++ exception stuff
if (sym->name[0] == '.' && sym->name[1] == 'L')
return false;
/* This case cannot be moved to boring_symbol(),
* because that's only used for duplicate VMAs,
* and sometimes this symbol appears at an address
* different from all other symbols.
*/
if (!strcmp("gcc2_compiled.", sym->name))
return false;
if (sym->flags & BSF_SECTION_SYM)
return false;
if (!(sym->section->flags & SEC_LOAD))
return false;
return true;
}
bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
{
if (first.name() == "Letext")
return true;
else if (second.name() == "Letext")
return false;
if (first.name().substr(0, 2) == "??")
return true;
else if (second.name().substr(0, 2) == "??")
return false;
if (first.hidden() && !second.hidden())
return true;
else if (!first.hidden() && second.hidden())
return false;
if (first.name()[0] == '_' && second.name()[0] != '_')
return true;
else if (first.name()[0] != '_' && second.name()[0] == '_')
return false;
if (first.weak() && !second.weak())
return true;
else if (!first.weak() && second.weak())
return false;
return false;
}
bool bfd_info::has_debug_info() const
{
if (!valid())
return false;
for (asection const * sect = abfd->sections; sect; sect = sect->next) {
if (sect->flags & SEC_DEBUGGING)
return true;
}
return false;
}
bfd_info::~bfd_info()
{
free(synth_syms);
close();
}
void bfd_info::close()
{
if (abfd)
bfd_close(abfd);
}
/**
* This function is only called when processing symbols retrieved from a
* debuginfo file that is separate from the actual runtime binary image.
* Separate debuginfo files may be needed in two different cases:
* 1) the real image is completely stripped, where there is no symbol
information at all
* 2) the real image has debuginfo stripped, and the user is requesting "-g"
* (src file/line num info)
* After all symbols are gathered up, there will be some filtering/removal of
* unnecessary symbols. In particular, the bfd_info::interesting_symbol()
* function filters out symbols whose section's flag value does not include
* SEC_LOAD. This filtering is required, so it must be retained. However,
* we run into a problem with symbols from debuginfo files, since the
* section flag does NOT include SEC_LOAD. To solve this problem, the
* translate_debuginfo_syms function maps the debuginfo symbol's sections to
* that of their corresponding real image.
*/
void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
{
unsigned int img_sect_cnt = 0;
bfd * image_bfd = image_bfd_info->abfd;
multimap<string, bfd_section *> image_sections;
for (bfd_section * sect = image_bfd->sections;
sect && img_sect_cnt < image_bfd->section_count;
sect = sect->next) {
// A comment section marks the end of the needed sections
if (strstr(sect->name, ".comment") == sect->name)
break;
image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
img_sect_cnt++;
}
asymbol * sym = dbg_syms[0];
string prev_sect_name = "";
bfd_section * matched_section = NULL;
for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
bool section_switch;
if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
section_switch = true;
prev_sect_name = sym->section->name;
} else {
section_switch = false;
}
if (sym->section->owner && sym->section->owner == abfd) {
if (section_switch ) {
matched_section = NULL;
multimap<string, bfd_section *>::iterator it;
pair<multimap<string, bfd_section *>::iterator,
multimap<string, bfd_section *>::iterator> range;
range = image_sections.equal_range(sym->section->name);
for (it = range.first; it != range.second; it++) {
if ((*it).second->vma == sym->section->vma) {
matched_section = (*it).second;
break;
}
}
}
if (matched_section) {
sym->section = matched_section;
sym->the_bfd = image_bfd;
}
}
}
}
#if SYNTHESIZE_SYMBOLS
bool bfd_info::get_synth_symbols()
{
extern const bfd_target bfd_elf64_powerpc_vec;
extern const bfd_target bfd_elf64_powerpcle_vec;
bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
|| (abfd->xvec == &bfd_elf64_powerpcle_vec);
if (!is_elf64_powerpc_target)
return false;
void * buf;
uint tmp;
long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
if (nr_mini_syms < 1)
return false;
asymbol ** mini_syms = (asymbol **)buf;
buf = NULL;
bfd * synth_bfd;
/* For ppc64, a debuginfo file by itself does not hold enough symbol
* information for us to properly attribute samples to symbols. If
* the image file's bfd has no symbols (as in a super-stripped library),
* then we need to do the extra processing in translate_debuginfo_syms.
*/
if (image_bfd_info && image_bfd_info->nr_syms == 0) {
translate_debuginfo_syms(mini_syms, nr_mini_syms);
synth_bfd = image_bfd_info->abfd;
} else
synth_bfd = abfd;
long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
nr_mini_syms,
mini_syms, 0,
NULL, &synth_syms);
if (nr_synth_syms < 0) {
free(mini_syms);
return false;
}
cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
nr_syms = nr_mini_syms + nr_synth_syms;
syms.reset(new asymbol *[nr_syms + 1]);
for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
syms[i] = mini_syms[i];
for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
syms[nr_mini_syms + i] = synth_syms + i;
free(mini_syms);
// bfd_canonicalize_symtab does this, so shall we
syms[nr_syms] = NULL;
return true;
}
#else
bool bfd_info::get_synth_symbols()
{
return false;
}
#endif /* SYNTHESIZE_SYMBOLS */
void bfd_info::get_symbols()
{
if (!abfd)
return;
cverb << vbfd << "bfd_info::get_symbols() for "
<< bfd_get_filename(abfd) << endl;
if (get_synth_symbols())
return;
if (bfd_get_file_flags(abfd) & HAS_SYMS)
nr_syms = bfd_get_symtab_upper_bound(abfd);
cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
<< nr_syms << hex << endl;
nr_syms /= sizeof(asymbol *);
if (nr_syms < 1)
return;
syms.reset(new asymbol *[nr_syms]);
nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
if (image_bfd_info)
translate_debuginfo_syms(syms.get(), nr_syms);
cverb << vbfd << "bfd_canonicalize_symtab: " << dec
<< nr_syms << hex << endl;
}
linenr_info const
find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
bfd_vma offset, bool anon_obj)
{
char const * function = "";
char const * cfilename = "";
unsigned int linenr = 0;
linenr_info info;
bfd * abfd;
asymbol ** syms;
asection * section;
bfd_vma pc;
bool ret;
if (!b.valid())
goto fail;
// take care about artificial symbol
if (!sym.symbol())
goto fail;
abfd = b.abfd;
syms = b.syms.get();
if (!syms)
goto fail;
section = sym.symbol()->section;
if (anon_obj)
pc = offset - sym.symbol()->section->vma;
else
pc = (sym.value() + offset) - sym.filepos();
if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
goto fail;
if (pc >= bfd_section_size(abfd, section))
goto fail;
ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
&function, &linenr);
if (!ret || !cfilename || !function)
goto fail;
/*
* is_correct_function does not handle the case of static inlines,
* but if the linenr is non-zero in the inline case, it is the correct
* line number.
*/
if (linenr == 0 && !is_correct_function(function, sym.name()))
goto fail;
if (linenr == 0) {
fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
&linenr);
}
info.found = true;
info.filename = cfilename;
info.line = linenr;
return info;
fail:
info.found = false;
// some stl lacks string::clear()
info.filename.erase(info.filename.begin(), info.filename.end());
info.line = 0;
return info;
}