blob: a25671a9b947f90570742a0a3808113ea80bf7a9 [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
#include "ElfReader.h"
#include <elf.h>
#include <endian.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <zlib.h>
#include "log.h"
#include "util.h"
using namespace std;
namespace rr {
class ElfReaderImplBase {
public:
ElfReaderImplBase(ElfReader& r) : r(r), ok_(false) {}
virtual ~ElfReaderImplBase() {}
virtual SymbolTable read_symbols(const char* symtab, const char* strtab) = 0;
virtual DynamicSection read_dynamic() = 0;
virtual Debuglink read_debuglink() = 0;
virtual Debugaltlink read_debugaltlink() = 0;
virtual string read_buildid() = 0;
virtual string read_interp() = 0;
virtual bool addr_to_offset(uintptr_t addr, uintptr_t& offset) = 0;
virtual SectionOffsets find_section_file_offsets(const char* name) = 0;
virtual const vector<uint8_t>* decompress_section(SectionOffsets offsets) = 0;
bool ok() { return ok_; }
protected:
ElfReader& r;
vector<unique_ptr<vector<uint8_t>>> decompressed_sections;
bool ok_;
};
template <typename Arch> class ElfReaderImpl : public ElfReaderImplBase {
public:
ElfReaderImpl(ElfReader& r);
virtual SymbolTable read_symbols(const char* symtab,
const char* strtab) override;
virtual DynamicSection read_dynamic() override;
virtual Debuglink read_debuglink() override;
virtual Debugaltlink read_debugaltlink() override;
virtual string read_buildid() override;
virtual string read_interp() override;
virtual bool addr_to_offset(uintptr_t addr, uintptr_t& offset) override;
virtual SectionOffsets find_section_file_offsets(const char* name) override;
virtual const vector<uint8_t>* decompress_section(SectionOffsets offsets) override;
private:
const typename Arch::ElfShdr* find_section(const char* n);
const typename Arch::ElfPhdr* find_programheader(uint32_t pt);
const typename Arch::ElfEhdr* elfheader;
const typename Arch::ElfPhdr* programheader;
const typename Arch::ElfShdr* sections;
size_t programheader_size;
size_t sections_size;
vector<char> section_names;
};
template <typename Arch>
unique_ptr<ElfReaderImplBase> elf_reader_impl_arch(ElfReader& r) {
return unique_ptr<ElfReaderImplBase>(new ElfReaderImpl<Arch>(r));
}
unique_ptr<ElfReaderImplBase> elf_reader_impl(ElfReader& r,
SupportedArch arch) {
RR_ARCH_FUNCTION(elf_reader_impl_arch, arch, r);
}
template <typename Arch>
ElfReaderImpl<Arch>::ElfReaderImpl(ElfReader& r) : ElfReaderImplBase(r) {
elfheader = r.read<typename Arch::ElfEhdr>(0);
if (!elfheader || memcmp(elfheader, ELFMAG, SELFMAG) != 0 ||
elfheader->e_ident[EI_CLASS] != Arch::elfclass ||
elfheader->e_ident[EI_DATA] != Arch::elfendian ||
elfheader->e_machine != Arch::elfmachine ||
elfheader->e_shentsize != sizeof(typename Arch::ElfShdr) ||
elfheader->e_phentsize != sizeof(typename Arch::ElfPhdr) ||
elfheader->e_shstrndx >= elfheader->e_shnum) {
LOG(debug) << "Invalid ELF file: invalid header";
return;
}
programheader =
r.read<typename Arch::ElfPhdr>(elfheader->e_phoff, elfheader->e_phnum);
if (!programheader || !elfheader->e_phnum) {
LOG(debug) << "Invalid ELF file: no program headers";
return;
}
programheader_size = elfheader->e_phnum;
sections =
r.read<typename Arch::ElfShdr>(elfheader->e_shoff, elfheader->e_shnum);
if (!sections || !elfheader->e_shnum) {
LOG(debug) << "Invalid ELF file: no sections";
return;
}
sections_size = elfheader->e_shnum;
auto& section_names_section = sections[elfheader->e_shstrndx];
const char* section_names_ptr = r.read<char>(section_names_section.sh_offset,
section_names_section.sh_size);
if (!section_names_ptr || !section_names_section.sh_size) {
LOG(debug) << "Invalid ELF file: can't read section names";
return;
}
// Ensure final 0
section_names.resize(section_names_section.sh_size);
memcpy(section_names.data(), section_names_ptr, section_names.size());
section_names[section_names.size() - 1] = 0;
ok_ = true;
}
template <typename Arch>
const typename Arch::ElfPhdr* ElfReaderImpl<Arch>::find_programheader(uint32_t pt) {
const typename Arch::ElfPhdr* ph = nullptr;
for (size_t i = 0; i < programheader_size; ++i) {
auto& p = programheader[i];
if (p.p_type == pt) {
ph = &p;
}
}
if (!ph) {
LOG(debug) << "Missing program header " << pt;
}
return ph;
}
template <typename Arch>
const typename Arch::ElfShdr* ElfReaderImpl<Arch>::find_section(const char* n) {
const typename Arch::ElfShdr* section = nullptr;
for (size_t i = 0; i < sections_size; ++i) {
auto& s = sections[i];
if (s.sh_name >= section_names.size()) {
LOG(debug) << "Invalid ELF file: invalid name offset for section " << i;
continue;
}
const char* name = section_names.data() + s.sh_name;
if (strcmp(name, n) == 0) {
if (section) {
LOG(debug) << "Invalid ELF file: duplicate symbol section " << n;
return nullptr;
}
section = &s;
}
}
if (!section) {
LOG(debug) << "Missing section " << n;
}
return section;
}
template <typename Arch>
SectionOffsets ElfReaderImpl<Arch>::find_section_file_offsets(
const char* name) {
SectionOffsets offsets = { 0, 0, false };
const typename Arch::ElfShdr* section = find_section(name);
if (!section) {
return offsets;
}
offsets.start = section->sh_offset;
offsets.end = section->sh_offset + section->sh_size;
offsets.compressed = !!(section->sh_flags & SHF_COMPRESSED);
return offsets;
}
template <typename Arch>
const vector<uint8_t>* ElfReaderImpl<Arch>::decompress_section(SectionOffsets offsets) {
DEBUG_ASSERT(offsets.compressed);
auto hdr = r.read<typename Arch::ElfChdr>(offsets.start);
if (!hdr) {
LOG(warn) << "section at " << offsets.start
<< " is marked compressed but is too small";
return nullptr;
}
size_t decompressed_size = 0;
if (hdr->ch_type == ELFCOMPRESS_ZLIB) {
decompressed_size = hdr->ch_size;
offsets.start += sizeof(typename Arch::ElfChdr);
} else {
auto legacy_hdr = r.read_bytes(offsets.start, 4);
if (!memcmp("ZLIB", legacy_hdr, 4)) {
auto be_size = r.read<uint64_t>(offsets.start + 4);
decompressed_size = be64toh(*be_size);
offsets.start += 12;
} else {
LOG(warn) << "section at " << offsets.start
<< " is marked compressed but uses unrecognized"
<< " type " << HEX(hdr->ch_type);
return nullptr;
}
}
unique_ptr<vector<uint8_t>> v(new vector<uint8_t>());
v->resize(decompressed_size);
z_stream stream;
memset(&stream, 0, sizeof(stream));
int result = inflateInit(&stream);
if (result != Z_OK) {
FATAL() << "inflateInit failed!";
return nullptr;
}
stream.avail_in = offsets.end - offsets.start;
stream.next_in = (unsigned char*)r.read_bytes(offsets.start, stream.avail_in);
stream.next_out = &v->front();
stream.avail_out = v->size();
result = inflate(&stream, Z_FINISH);
if (result != Z_STREAM_END) {
FATAL() << "inflate failed!";
return nullptr;
}
result = inflateEnd(&stream);
if (result != Z_OK) {
FATAL() << "inflateEnd failed!";
return nullptr;
}
decompressed_sections.push_back(std::move(v));
return decompressed_sections.back().get();
}
template <typename Arch>
SymbolTable ElfReaderImpl<Arch>::read_symbols(const char* symtab,
const char* strtab) {
SymbolTable result;
if (!ok()) {
return result;
}
const typename Arch::ElfShdr* symbols = find_section(symtab);
if (!symbols) {
return result;
}
const typename Arch::ElfShdr* strings = find_section(strtab);
if (!strings) {
return result;
}
if (symbols->sh_entsize != sizeof(typename Arch::ElfSym)) {
LOG(debug) << "Invalid ELF file: incorrect symbol size "
<< symbols->sh_entsize;
return result;
}
if (symbols->sh_size % symbols->sh_entsize) {
LOG(debug) << "Invalid ELF file: incorrect symbol section size "
<< symbols->sh_size;
return result;
}
if (strings->sh_size == 0) {
LOG(debug) << "Invalid ELF file: empty string table";
return result;
}
size_t symbol_list_size = symbols->sh_size / symbols->sh_entsize;
auto symbol_list = r.read<typename Arch::ElfSym>(
symbols->sh_offset, symbol_list_size);
if (!symbol_list) {
LOG(debug) << "Invalid ELF file: can't read symbols " << symtab;
return result;
}
auto strtab_ptr = r.read<char>(strings->sh_offset, strings->sh_size);
if (!strtab_ptr) {
LOG(debug) << "Invalid ELF file: can't read strings " << strtab;
return result;
}
result.strtab.resize(strings->sh_size);
memcpy(result.strtab.data(), strtab_ptr, result.strtab.size());
result.strtab[result.strtab.size() - 1] = 0;
result.symbols.resize(symbol_list_size);
for (size_t i = 0; i < symbol_list_size; ++i) {
auto& s = symbol_list[i];
if (s.st_shndx >= sections_size) {
// Don't leave this entry uninitialized
result.symbols[i] = SymbolTable::Symbol(0, 0);
continue;
}
result.symbols[i] = SymbolTable::Symbol(s.st_value, s.st_name);
}
return result;
}
template <typename Arch> DynamicSection ElfReaderImpl<Arch>::read_dynamic() {
DynamicSection result;
if (!ok()) {
return result;
}
const typename Arch::ElfShdr* dynamic = find_section(".dynamic");
if (!dynamic) {
return result;
}
const typename Arch::ElfShdr* dynstr = find_section(".dynstr");
if (!dynstr) {
return result;
}
if (dynamic->sh_entsize != sizeof(typename Arch::ElfDyn)) {
LOG(debug) << "Invalid ELF file: incorrect .dynamic size "
<< dynamic->sh_entsize;
return result;
}
if (!dynamic->sh_size) {
return result;
}
if (dynamic->sh_size % dynamic->sh_entsize) {
LOG(debug) << "Invalid ELF file: incorrect .dynamic section size "
<< dynamic->sh_size;
return result;
}
if (dynstr->sh_size == 0) {
LOG(debug) << "Invalid ELF file: empty string table";
return result;
}
size_t dyn_list_size = dynamic->sh_size / dynamic->sh_entsize;
auto dyn_list = r.read<typename Arch::ElfDyn>(
dynamic->sh_offset, dyn_list_size);
if (!dyn_list) {
LOG(debug) << "Invalid ELF file: can't read .dynamic";
return result;
}
auto strtab = r.read<char>(dynstr->sh_offset, dynstr->sh_size);
if (!strtab) {
LOG(debug) << "Invalid ELF file: can't read .dynstr";
return result;
}
result.strtab.resize(dynstr->sh_size);
memcpy(result.strtab.data(), strtab, result.strtab.size());
result.strtab[result.strtab.size() - 1] = 0;
result.entries.resize(dyn_list_size);
for (size_t i = 0; i < dyn_list_size; ++i) {
auto& s = dyn_list[i];
result.entries[i] = DynamicSection::Entry(s.d_tag, s.d_val);
}
return result;
}
static bool null_terminated(const char* p, size_t size, string& out) {
size_t len = strnlen(p, size);
if (len == size) {
LOG(warn) << "Invalid file name";
return false;
}
out = string(p, len);
return true;
}
template <typename Arch> Debuglink ElfReaderImpl<Arch>::read_debuglink() {
Debuglink result;
if (!ok()) {
return result;
}
const typename Arch::ElfShdr* debuglink = find_section(".gnu_debuglink");
if (!debuglink) {
return result;
}
if (debuglink->sh_size < 8) {
LOG(warn) << "Invalid ELF file: unexpected .gnu_debuglink length";
return result;
}
size_t crc_offset = debuglink->sh_size - 4;
if (!r.read_into(debuglink->sh_offset + crc_offset, &result.crc)) {
LOG(warn) << "Invalid ELF file: can't read .gnu_debuglink crc checksum";
return result;
}
const char* file_name = r.read<char>(debuglink->sh_offset, crc_offset);
if (!file_name) {
LOG(warn) << "Invalid ELF file: can't read .gnu_debuglink file_name";
return result;
}
null_terminated(file_name, crc_offset, result.file_name);
return result;
}
template <typename Arch> Debugaltlink ElfReaderImpl<Arch>::read_debugaltlink() {
Debugaltlink result;
if (!ok()) {
return result;
}
const typename Arch::ElfShdr* debuglink = find_section(".gnu_debugaltlink");
if (!debuglink) {
return result;
}
// Last 20 bytes are the build ID of the target file. Ignore for now.
if (debuglink->sh_size < 21) {
LOG(warn) << "Invalid ELF file: unexpected .gnu_debugaltlink length";
return result;
}
size_t build_id_offset = debuglink->sh_size - 20;
const char* file_name = r.read<char>(debuglink->sh_offset, build_id_offset);
if (!file_name) {
LOG(warn) << "Invalid ELF file: can't read .gnu_debugaltlink file_name";
return result;
}
null_terminated(file_name, build_id_offset, result.file_name);
return result;
}
template <typename Arch>
string ElfReaderImpl<Arch>::read_buildid() {
string result;
if (!ok()) {
return result;
}
for (size_t i = 0; i < sections_size; ++i) {
auto& s = sections[i];
if (s.sh_type != SHT_NOTE) {
continue;
}
auto offset = s.sh_offset;
auto nhdr = r.read<typename Arch::ElfNhdr>(offset);
if (!nhdr) {
LOG(error) << "Failed to read ELF note";
return result;
}
offset += sizeof(*nhdr);
char name[4] = { 0 };
if (!(nhdr->n_namesz == 4 &&
r.read_into(offset, &name) &&
memcmp("GNU", name, 4) == 0 &&
nhdr->n_descsz > 0)) {
continue;
}
// Note members are 4 byte aligned, twiddle bits to round up if necessary.
offset += (nhdr->n_namesz + 3) & ~0x3;
if (nhdr->n_type != NT_GNU_BUILD_ID) {
continue;
}
const uint8_t* id = r.read<uint8_t>(offset, nhdr->n_descsz);
if (!id) {
LOG(error) << "Failed to read ELF note contents";
return result;
}
result.reserve(nhdr->n_descsz);
for (unsigned i = 0; i < nhdr->n_descsz; ++i) {
char byte[3] = { 0 };
snprintf(&byte[0], 3, "%02x", id[i]);
result.append(byte);
}
break;
}
return result;
}
template <typename Arch>
string ElfReaderImpl<Arch>::read_interp() {
string result;
if (!ok()) {
return result;
}
const typename Arch::ElfPhdr* ph = find_programheader(PT_INTERP);
if (!ph) {
return result;
}
const char* file_name = r.read<char>(ph->p_offset, ph->p_filesz);
if (!file_name) {
LOG(warn) << "Invalid ELF file: can't read PT_INTERP";
return result;
}
null_terminated(file_name, ph->p_filesz, result);
return result;
}
template <typename Arch>
bool ElfReaderImpl<Arch>::addr_to_offset(uintptr_t addr, uintptr_t& offset) {
for (size_t i = 0; i < sections_size; ++i) {
const auto& section = sections[i];
// Skip the section if it either "occupies no space in the file" or
// doesn't have a valid address because it does not "occupy memory
// during process execution".
if (section.sh_type == SHT_NOBITS || !(section.sh_flags & SHF_ALLOC)) {
continue;
}
if (addr >= section.sh_addr && addr - section.sh_addr < section.sh_size) {
offset = addr - section.sh_addr + section.sh_offset;
return true;
}
}
return false;
}
ElfReader::ElfReader(SupportedArch arch) : arch_(arch), map(nullptr), size(0) {}
ElfReader::~ElfReader() {}
ElfReaderImplBase& ElfReader::impl() {
if (!impl_) {
impl_ = elf_reader_impl(*this, arch_);
}
return *impl_;
}
SymbolTable ElfReader::read_symbols(const char* symtab, const char* strtab) {
return impl().read_symbols(symtab, strtab);
}
DynamicSection ElfReader::read_dynamic() { return impl().read_dynamic(); }
Debuglink ElfReader::read_debuglink() { return impl().read_debuglink(); }
Debugaltlink ElfReader::read_debugaltlink() { return impl().read_debugaltlink(); }
SectionOffsets ElfReader::find_section_file_offsets(const char* name) {
return impl().find_section_file_offsets(name);
}
DwarfSpan ElfReader::dwarf_section(const char* name, bool known_to_be_compressed) {
SectionOffsets offsets = impl().find_section_file_offsets(name);
offsets.compressed |= known_to_be_compressed;
if (offsets.start && offsets.compressed) {
auto decompressed = impl().decompress_section(offsets);
return DwarfSpan(&decompressed->front(), &decompressed->back());
}
return DwarfSpan(map + offsets.start, map + offsets.end);
}
string ElfReader::read_buildid() { return impl().read_buildid(); }
string ElfReader::read_interp() { return impl().read_interp(); }
bool ElfReader::addr_to_offset(uintptr_t addr, uintptr_t& offset) {
return impl().addr_to_offset(addr, offset);
}
bool ElfReader::ok() { return impl().ok(); }
ElfFileReader::ElfFileReader(ScopedFd& fd, SupportedArch arch) : ElfReader(arch) {
struct stat st;
if (fstat(fd, &st) < 0) {
FATAL() << "Can't stat fd";
}
if (st.st_size > 0) {
map = static_cast<uint8_t*>(mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0));
if (map == MAP_FAILED) {
FATAL() << "Can't map fd";
}
}
size = st.st_size;
}
ElfFileReader::~ElfFileReader() {
if (map) {
munmap(map, size);
}
}
ScopedFd ElfFileReader::open_debug_file(const std::string& elf_file_name) {
if (elf_file_name.empty() || elf_file_name[0] != '/') {
return ScopedFd();
}
Debuglink debuglink = read_debuglink();
if (debuglink.file_name.empty()) {
return ScopedFd();
}
size_t last_slash = elf_file_name.find_last_of('/');
string debug_path = "/usr/lib/debug/";
debug_path += elf_file_name.substr(0, last_slash) + '/' + debuglink.file_name;
ScopedFd debug_fd(debug_path.c_str(), O_RDONLY);
if (!debug_fd.is_open()) {
return ScopedFd();
}
// Verify that the CRC checksum matches, in case the debuginfo and text file
// are in separate packages that are out of sync.
uint32_t crc = 0xffffffff;
while (true) {
unsigned char buf[4096];
ssize_t ret = ::read(debug_fd.get(), buf, sizeof(buf));
if (ret < 0) {
if (errno != EINTR) {
LOG(debug) << "Error reading " << debug_path;
return ScopedFd();
}
} else if (ret == 0) {
break;
} else {
crc = crc32(crc, buf, ret);
}
}
if ((crc ^ 0xffffffff) == debuglink.crc) {
return debug_fd;
}
return ScopedFd();
}
SupportedArch ElfFileReader::identify_arch(ScopedFd& fd) {
/**
* This code is quite lax. That's OK because this is only used to create
* a specific ElfReaderImpl, which does much more thorough checking of the
* header.
*/
static const int header_prefix_size = 20;
char buf[header_prefix_size];
ssize_t ret = read_to_end(fd, 0, buf, sizeof(buf));
if (ret != (ssize_t)sizeof(buf) || buf[5] != 1) {
return NativeArch::arch();
}
switch (buf[18] | (buf[19] << 8)) {
case 0x03:
return x86;
case 0x3e:
return x86_64;
default:
return NativeArch::arch();
}
}
bool ElfFileReader::is_x32_abi(__attribute__((unused)) ScopedFd& fd) {
#if defined(__x86_64__)
static const int header_prefix_size = 20;
char buf[header_prefix_size];
ssize_t ret = read_to_end(fd, 0, buf, sizeof(buf));
if (ret != (ssize_t)sizeof(buf) || buf[5] != 1) {
// Who knows what this is.
return false;
}
if ((buf[18] | (buf[19] << 8)) == 0x3e) {
// x32 ABI programs declare themselves with the amd64 architecture but
// only 4 byte wide pointers.
return buf[4] == 1;
}
#endif
return false;
}
} // namespace rr