blob: c06945e483b94b83c6b225fbd7b307316199db09 [file] [log] [blame]
// -*- mode: c++ -*-
// Copyright 2011 Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Author: Jim Blandy <[email protected]> <[email protected]>
// dump_syms.cc: Create a symbol file for use with minidumps
#ifdef HAVE_CONFIG_H
#include <config.h> // Must come first
#endif
#include "common/mac/dump_syms.h"
#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <mach-o/arch.h>
#include <mach-o/fat.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <ostream>
#include <string>
#include <vector>
#include "common/dwarf/bytereader-inl.h"
#include "common/dwarf/dwarf2reader.h"
#include "common/dwarf_cfi_to_module.h"
#include "common/dwarf_cu_to_module.h"
#include "common/dwarf_line_to_module.h"
#include "common/dwarf_range_list_handler.h"
#include "common/mac/file_id.h"
#include "common/mac/arch_utilities.h"
#include "common/mac/macho_reader.h"
#include "common/module.h"
#include "common/path_helper.h"
#include "common/scoped_ptr.h"
#include "common/stabs_reader.h"
#include "common/stabs_to_module.h"
#include "common/symbol_data.h"
#ifndef CPU_TYPE_ARM
#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12))
#endif // CPU_TYPE_ARM
#ifndef CPU_TYPE_ARM64
#define CPU_TYPE_ARM64 (static_cast<cpu_type_t>(16777228))
#endif // CPU_TYPE_ARM64
using google_breakpad::ByteReader;
using google_breakpad::DwarfCUToModule;
using google_breakpad::DwarfLineToModule;
using google_breakpad::DwarfRangeListHandler;
using google_breakpad::mach_o::FatReader;
using google_breakpad::mach_o::FileID;
using google_breakpad::mach_o::Section;
using google_breakpad::mach_o::Segment;
using google_breakpad::Module;
using google_breakpad::StabsReader;
using google_breakpad::StabsToModule;
using google_breakpad::scoped_ptr;
using std::make_pair;
using std::pair;
using std::string;
using std::vector;
namespace {
// Return a vector<string> with absolute paths to all the entries
// in directory (excluding . and ..).
vector<string> list_directory(const string& directory) {
vector<string> entries;
DIR* dir = opendir(directory.c_str());
if (!dir) {
return entries;
}
string path = directory;
if (path[path.length() - 1] != '/') {
path += '/';
}
struct dirent* entry = NULL;
while ((entry = readdir(dir))) {
if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
entries.push_back(path + entry->d_name);
}
}
closedir(dir);
return entries;
}
}
namespace google_breakpad {
bool DumpSymbols::Read(const string& filename) {
selected_object_file_ = nullptr;
struct stat st;
if (stat(filename.c_str(), &st) == -1) {
fprintf(stderr, "Could not access object file %s: %s\n",
filename.c_str(), strerror(errno));
return false;
}
from_disk_ = true;
// Does this filename refer to a dSYM bundle?
string contents_path = filename + "/Contents/Resources/DWARF";
string object_filename;
if (S_ISDIR(st.st_mode) &&
access(contents_path.c_str(), F_OK) == 0) {
// If there's one file under Contents/Resources/DWARF then use that,
// otherwise bail out.
const vector<string> entries = list_directory(contents_path);
if (entries.size() == 0) {
fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
filename.c_str());
return false;
}
if (entries.size() > 1) {
fprintf(stderr, "Too many DWARF files in bundle: %s\n",
filename.c_str());
return false;
}
object_filename = entries[0];
} else {
object_filename = filename;
}
// Read the file's contents into memory.
bool read_ok = true;
string error;
scoped_array<uint8_t> contents;
off_t total = 0;
if (stat(object_filename.c_str(), &st) != -1) {
FILE* f = fopen(object_filename.c_str(), "rb");
if (f) {
contents.reset(new uint8_t[st.st_size]);
while (total < st.st_size && !feof(f)) {
size_t read = fread(&contents[0] + total, 1, st.st_size - total, f);
if (read == 0) {
if (ferror(f)) {
read_ok = false;
error = strerror(errno);
}
break;
}
total += read;
}
fclose(f);
} else {
error = strerror(errno);
}
}
if (!read_ok) {
fprintf(stderr, "Error reading object file: %s: %s\n",
object_filename.c_str(), error.c_str());
return false;
}
return ReadData(contents.release(), total, object_filename);
}
bool DumpSymbols::ReadData(uint8_t* contents, size_t size,
const std::string& filename) {
contents_.reset(contents);
size_ = size;
object_filename_ = filename;
// Get the list of object files present in the file.
FatReader::Reporter fat_reporter(object_filename_);
FatReader fat_reader(&fat_reporter);
if (!fat_reader.Read(contents_.get(), size)) {
return false;
}
// Get our own copy of fat_reader's object file list.
size_t object_files_count;
const SuperFatArch* object_files =
fat_reader.object_files(&object_files_count);
if (object_files_count == 0) {
fprintf(stderr, "Fat binary file contains *no* architectures: %s\n",
object_filename_.c_str());
return false;
}
object_files_.resize(object_files_count);
memcpy(&object_files_[0], object_files,
sizeof(SuperFatArch) * object_files_count);
return true;
}
bool DumpSymbols::SetArchitecture(const ArchInfo& info) {
// Find the best match for the architecture the user requested.
const SuperFatArch* best_match =
FindBestMatchForArchitecture(info.cputype, info.cpusubtype);
if (!best_match) return false;
// Record the selected object file.
selected_object_file_ = best_match;
return true;
}
SuperFatArch* DumpSymbols::FindBestMatchForArchitecture(
cpu_type_t cpu_type,
cpu_subtype_t cpu_subtype) {
SuperFatArch* closest_match = nullptr;
for (auto& object_file : object_files_) {
if (static_cast<cpu_type_t>(object_file.cputype) == cpu_type) {
// If there's an exact match, return it directly.
if ((static_cast<cpu_subtype_t>(object_file.cpusubtype) &
~CPU_SUBTYPE_MASK) == (cpu_subtype & ~CPU_SUBTYPE_MASK)) {
return &object_file;
}
// Otherwise, hold on to this as the closest match since at least the CPU
// type matches.
if (!closest_match) {
closest_match = &object_file;
}
}
}
// No exact match found.
fprintf(stderr,
"Failed to find an exact match for an object file with cpu "
"type: %d and cpu subtype: %d.\n",
cpu_type, cpu_subtype);
if (closest_match) {
fprintf(stderr, "Using %s as the closest match.\n",
GetNameFromCPUType(closest_match->cputype,
closest_match->cpusubtype));
return closest_match;
}
return nullptr;
}
string DumpSymbols::Identifier() {
scoped_ptr<FileID> file_id;
if (from_disk_) {
file_id.reset(new FileID(object_filename_.c_str()));
} else {
file_id.reset(new FileID(contents_.get(), size_));
}
unsigned char identifier_bytes[16];
scoped_ptr<Module> module;
if (!selected_object_file_) {
if (!CreateEmptyModule(module))
return string();
}
cpu_type_t cpu_type = selected_object_file_->cputype;
cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype;
if (!file_id->MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) {
fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
object_filename_.c_str());
return "";
}
char identifier_string[40];
FileID::ConvertIdentifierToString(identifier_bytes, identifier_string,
sizeof(identifier_string));
string compacted(identifier_string);
for(size_t i = compacted.find('-'); i != string::npos;
i = compacted.find('-', i))
compacted.erase(i, 1);
// The pdb for these IDs has an extra byte, so to make everything uniform put
// a 0 on the end of mac IDs.
compacted += "0";
return compacted;
}
// A range handler that accepts rangelist data parsed by
// RangeListReader and populates a range vector (typically
// owned by a function) with the results.
class DumpSymbols::DumperRangesHandler:
public DwarfCUToModule::RangesHandler {
public:
DumperRangesHandler(ByteReader* reader) :
reader_(reader) { }
bool ReadRanges(
enum DwarfForm form, uint64_t data,
RangeListReader::CURangesInfo* cu_info,
vector<Module::Range>* ranges) {
DwarfRangeListHandler handler(ranges);
RangeListReader range_list_reader(reader_, cu_info,
&handler);
return range_list_reader.ReadRanges(form, data);
}
private:
ByteReader* reader_;
};
// A line-to-module loader that accepts line number info parsed by
// LineInfo and populates a Module and a line vector
// with the results.
class DumpSymbols::DumperLineToModule:
public DwarfCUToModule::LineToModuleHandler {
public:
// Create a line-to-module converter using BYTE_READER.
DumperLineToModule(ByteReader* byte_reader)
: byte_reader_(byte_reader) { }
void StartCompilationUnit(const string& compilation_dir) {
compilation_dir_ = compilation_dir;
}
void ReadProgram(const uint8_t* program,
uint64_t length,
const uint8_t* string_section,
uint64_t string_section_length,
const uint8_t* line_string_section,
uint64_t line_string_section_length,
Module* module,
vector<Module::Line>* lines,
std::map<uint32_t, Module::File*>* files) {
DwarfLineToModule handler(module, compilation_dir_, lines, files);
LineInfo parser(program, length, byte_reader_, nullptr, 0,
nullptr, 0, &handler);
parser.Start();
}
private:
string compilation_dir_;
ByteReader* byte_reader_; // WEAK
};
bool DumpSymbols::CreateEmptyModule(scoped_ptr<Module>& module) {
// Select an object file, if SetArchitecture hasn't been called to set one
// explicitly.
if (!selected_object_file_) {
// If there's only one architecture, that's the one.
if (object_files_.size() == 1)
selected_object_file_ = &object_files_[0];
else {
// Look for an object file whose architecture matches our own.
ArchInfo local_arch = GetLocalArchInfo();
if (!SetArchitecture(local_arch)) {
fprintf(stderr, "%s: object file contains more than one"
" architecture, none of which match the current"
" architecture; specify an architecture explicitly"
" with '-a ARCH' to resolve the ambiguity\n",
object_filename_.c_str());
return false;
}
}
}
assert(selected_object_file_);
// Find the name of the selected file's architecture, to appear in
// the MODULE record and in error messages.
const char* selected_arch_name = GetNameFromCPUType(
selected_object_file_->cputype, selected_object_file_->cpusubtype);
// In certain cases, it is possible that architecture info can't be reliably
// determined, e.g. new architectures that breakpad is unware of. In that
// case, avoid crashing and return false instead.
if (selected_arch_name == kUnknownArchName) {
return false;
}
if (strcmp(selected_arch_name, "i386") == 0)
selected_arch_name = "x86";
// Produce a name to use in error messages that includes the
// filename, and the architecture, if there is more than one.
selected_object_name_ = object_filename_;
if (object_files_.size() > 1) {
selected_object_name_ += ", architecture ";
selected_object_name_ + selected_arch_name;
}
// Compute a module name, to appear in the MODULE record.
string module_name;
if (!module_name_.empty()) {
module_name = module_name_;
} else {
module_name = google_breakpad::BaseName(object_filename_);
}
// Choose an identifier string, to appear in the MODULE record.
string identifier = Identifier();
if (identifier.empty())
return false;
// Create a module to hold the debugging information.
module.reset(new Module(module_name, "mac", selected_arch_name, identifier,
"", enable_multiple_, prefer_extern_name_));
return true;
}
void DumpSymbols::StartProcessSplitDwarf(
google_breakpad::CompilationUnit* reader,
Module* module,
google_breakpad::Endianness endianness,
bool handle_inter_cu_refs,
bool handle_inline) const {
std::string split_file;
google_breakpad::SectionMap split_sections;
google_breakpad::ByteReader split_byte_reader(endianness);
uint64_t cu_offset = 0;
if (reader->ProcessSplitDwarf(split_file, split_sections, split_byte_reader,
cu_offset))
return;
DwarfCUToModule::FileContext file_context(split_file, module,
handle_inter_cu_refs);
for (auto section : split_sections)
file_context.AddSectionToSectionMap(section.first, section.second.first,
section.second.second);
// Because DWP/DWO file doesn't have .debug_addr/.debug_line/.debug_line_str,
// its debug info will refer to .debug_addr/.debug_line in the main binary.
if (file_context.section_map().find(".debug_addr") ==
file_context.section_map().end())
file_context.AddSectionToSectionMap(".debug_addr", reader->GetAddrBuffer(),
reader->GetAddrBufferLen());
if (file_context.section_map().find(".debug_line") ==
file_context.section_map().end())
file_context.AddSectionToSectionMap(".debug_line", reader->GetLineBuffer(),
reader->GetLineBufferLen());
if (file_context.section_map().find(".debug_line_str") ==
file_context.section_map().end())
file_context.AddSectionToSectionMap(".debug_line_str",
reader->GetLineStrBuffer(),
reader->GetLineStrBufferLen());
DumperRangesHandler ranges_handler(&split_byte_reader);
DumperLineToModule line_to_module(&split_byte_reader);
DwarfCUToModule::WarningReporter reporter(split_file, cu_offset);
DwarfCUToModule root_handler(
&file_context, &line_to_module, &ranges_handler, &reporter, handle_inline,
reader->GetLowPC(), reader->GetAddrBase(), reader->HasSourceLineInfo(),
reader->GetSourceLineOffset());
google_breakpad::DIEDispatcher die_dispatcher(&root_handler);
google_breakpad::CompilationUnit split_reader(
split_file, file_context.section_map(), cu_offset, &split_byte_reader,
&die_dispatcher);
split_reader.SetSplitDwarf(reader->GetAddrBase(), reader->GetDWOID());
split_reader.Start();
// Normally, it won't happen unless we have transitive reference.
if (split_reader.ShouldProcessSplitDwarf()) {
StartProcessSplitDwarf(&split_reader, module, endianness,
handle_inter_cu_refs, handle_inline);
}
}
void DumpSymbols::ReadDwarf(google_breakpad::Module* module,
const mach_o::Reader& macho_reader,
const mach_o::SectionMap& dwarf_sections,
bool handle_inter_cu_refs) const {
// Build a byte reader of the appropriate endianness.
google_breakpad::Endianness endianness =
macho_reader.big_endian() ? ENDIANNESS_BIG : ENDIANNESS_LITTLE;
ByteReader byte_reader(endianness);
// Construct a context for this file.
DwarfCUToModule::FileContext file_context(selected_object_name_,
module,
handle_inter_cu_refs);
// Build a SectionMap from our mach_o::SectionMap.
for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin();
it != dwarf_sections.end(); ++it) {
file_context.AddSectionToSectionMap(
it->first,
it->second.contents.start,
it->second.contents.Size());
}
// Find the __debug_info section.
SectionMap::const_iterator debug_info_entry =
file_context.section_map().find("__debug_info");
// There had better be a __debug_info section!
if (debug_info_entry == file_context.section_map().end()) {
fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n",
selected_object_name_.c_str());
return;
}
const std::pair<const uint8_t*, uint64_t>& debug_info_section =
debug_info_entry->second;
// Build a line-to-module loader for the root handler to use.
DumperLineToModule line_to_module(&byte_reader);
// .debug_ranges and .debug_rngslists reader
DumperRangesHandler ranges_handler(&byte_reader);
// Walk the __debug_info section, one compilation unit at a time.
uint64_t debug_info_length = debug_info_section.second;
bool handle_inline = symbol_data_ & INLINES;
for (uint64_t offset = 0; offset < debug_info_length;) {
// Make a handler for the root DIE that populates MODULE with the
// debug info.
DwarfCUToModule::WarningReporter reporter(selected_object_name_,
offset);
DwarfCUToModule root_handler(&file_context, &line_to_module,
&ranges_handler, &reporter, handle_inline);
// Make a Dwarf2Handler that drives our DIEHandler.
DIEDispatcher die_dispatcher(&root_handler);
// Make a DWARF parser for the compilation unit at OFFSET.
CompilationUnit dwarf_reader(selected_object_name_,
file_context.section_map(),
offset,
&byte_reader,
&die_dispatcher);
// Process the entire compilation unit; get the offset of the next.
offset += dwarf_reader.Start();
// Start to process split dwarf file.
if (dwarf_reader.ShouldProcessSplitDwarf()) {
StartProcessSplitDwarf(&dwarf_reader, module, endianness,
handle_inter_cu_refs, handle_inline);
}
}
}
bool DumpSymbols::ReadCFI(google_breakpad::Module* module,
const mach_o::Reader& macho_reader,
const mach_o::Section& section,
bool eh_frame) const {
// Find the appropriate set of register names for this file's
// architecture.
vector<string> register_names;
switch (macho_reader.cpu_type()) {
case CPU_TYPE_X86:
register_names = DwarfCFIToModule::RegisterNames::I386();
break;
case CPU_TYPE_X86_64:
register_names = DwarfCFIToModule::RegisterNames::X86_64();
break;
case CPU_TYPE_ARM:
register_names = DwarfCFIToModule::RegisterNames::ARM();
break;
case CPU_TYPE_ARM64:
register_names = DwarfCFIToModule::RegisterNames::ARM64();
break;
default: {
const char* arch_name = GetNameFromCPUType(macho_reader.cpu_type(),
macho_reader.cpu_subtype());
fprintf(
stderr,
"%s: cannot convert DWARF call frame information for architecture "
"'%s' (%d, %d) to Breakpad symbol file: no register name table\n",
selected_object_name_.c_str(), arch_name, macho_reader.cpu_type(),
macho_reader.cpu_subtype());
return false;
}
}
// Find the call frame information and its size.
const uint8_t* cfi = section.contents.start;
size_t cfi_size = section.contents.Size();
// Plug together the parser, handler, and their entourages.
DwarfCFIToModule::Reporter module_reporter(selected_object_name_,
section.section_name);
DwarfCFIToModule handler(module, register_names, &module_reporter);
ByteReader byte_reader(macho_reader.big_endian() ?
ENDIANNESS_BIG :
ENDIANNESS_LITTLE);
byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4);
// At the moment, according to folks at Apple and some cursory
// investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so
// this is the only base address the CFI parser will need.
byte_reader.SetCFIDataBase(section.address, cfi);
CallFrameInfo::Reporter dwarf_reporter(selected_object_name_,
section.section_name);
CallFrameInfo parser(cfi, cfi_size,
&byte_reader, &handler, &dwarf_reporter,
eh_frame);
parser.Start();
return true;
}
// A LoadCommandHandler that loads whatever debugging data it finds into a
// Module.
class DumpSymbols::LoadCommandDumper:
public mach_o::Reader::LoadCommandHandler {
public:
// Create a load command dumper handling load commands from READER's
// file, and adding data to MODULE.
LoadCommandDumper(const DumpSymbols& dumper,
google_breakpad::Module* module,
const mach_o::Reader& reader,
SymbolData symbol_data,
bool handle_inter_cu_refs)
: dumper_(dumper),
module_(module),
reader_(reader),
symbol_data_(symbol_data),
handle_inter_cu_refs_(handle_inter_cu_refs) { }
bool SegmentCommand(const mach_o::Segment& segment);
bool SymtabCommand(const ByteBuffer& entries, const ByteBuffer& strings);
private:
const DumpSymbols& dumper_;
google_breakpad::Module* module_; // WEAK
const mach_o::Reader& reader_;
const SymbolData symbol_data_;
const bool handle_inter_cu_refs_;
};
bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment& segment) {
mach_o::SectionMap section_map;
if (!reader_.MapSegmentSections(segment, &section_map))
return false;
if (segment.name == "__TEXT") {
module_->SetLoadAddress(segment.vmaddr);
if (symbol_data_ & CFI) {
mach_o::SectionMap::const_iterator eh_frame =
section_map.find("__eh_frame");
if (eh_frame != section_map.end()) {
// If there is a problem reading this, don't treat it as a fatal error.
dumper_.ReadCFI(module_, reader_, eh_frame->second, true);
}
}
return true;
}
if (segment.name == "__DWARF") {
if ((symbol_data_ & SYMBOLS_AND_FILES) || (symbol_data_ & INLINES)) {
dumper_.ReadDwarf(module_, reader_, section_map, handle_inter_cu_refs_);
}
if (symbol_data_ & CFI) {
mach_o::SectionMap::const_iterator debug_frame
= section_map.find("__debug_frame");
if (debug_frame != section_map.end()) {
// If there is a problem reading this, don't treat it as a fatal error.
dumper_.ReadCFI(module_, reader_, debug_frame->second, false);
}
}
}
return true;
}
bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer& entries,
const ByteBuffer& strings) {
StabsToModule stabs_to_module(module_);
// Mac OS X STABS are never "unitized", and the size of the 'value' field
// matches the address size of the executable.
StabsReader stabs_reader(entries.start, entries.Size(),
strings.start, strings.Size(),
reader_.big_endian(),
reader_.bits_64() ? 8 : 4,
true,
&stabs_to_module);
if (!stabs_reader.Process())
return false;
stabs_to_module.Finalize();
return true;
}
bool DumpSymbols::ReadSymbolData(Module** out_module) {
scoped_ptr<Module> module;
if (!CreateEmptyModule(module))
return false;
// Parse the selected object file.
mach_o::Reader::Reporter reporter(selected_object_name_);
mach_o::Reader reader(&reporter);
if (!reader.Read(&contents_[0]
+ selected_object_file_->offset,
selected_object_file_->size,
selected_object_file_->cputype,
selected_object_file_->cpusubtype))
return false;
// Walk its load commands, and deal with whatever is there.
LoadCommandDumper load_command_dumper(*this, module.get(), reader,
symbol_data_, handle_inter_cu_refs_);
if (!reader.WalkLoadCommands(&load_command_dumper))
return false;
*out_module = module.release();
return true;
}
// Read the selected object file's debugging information, and write out the
// header only to |stream|. Return true on success; if an error occurs, report
// it and return false.
bool DumpSymbols::WriteSymbolFileHeader(std::ostream& stream) {
scoped_ptr<Module> module;
if (!CreateEmptyModule(module))
return false;
return module->Write(stream, symbol_data_);
}
} // namespace google_breakpad