| // Copyright (c) 2022, Google LLC |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google LLC nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| // disassembler_objdump.: Disassembler that invokes objdump for disassembly. |
| // |
| // Author: Mark Brand |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> // Must come first |
| #endif |
| |
| #include "processor/disassembler_objdump.h" |
| |
| #include <unistd.h> |
| #include <sys/wait.h> |
| |
| #include <array> |
| #include <fstream> |
| #include <iostream> |
| #include <iterator> |
| #include <regex> |
| #include <sstream> |
| #include <vector> |
| |
| #include "common/linux/eintr_wrapper.h" |
| #include "common/linux/scoped_pipe.h" |
| #include "common/linux/scoped_tmpfile.h" |
| #include "processor/logging.h" |
| |
| namespace google_breakpad { |
| namespace { |
| |
| const size_t kMaxX86InstructionLength = 15; |
| |
| bool IsInstructionPrefix(const string& token) { |
| if (token == "lock" || token == "rep" || token == "repz" || |
| token == "repnz") { |
| return true; |
| } |
| return false; |
| } |
| |
| bool IsOperandSize(const string& token) { |
| if (token == "BYTE" || token == "WORD" || token == "DWORD" || |
| token == "QWORD" || token == "PTR") { |
| return true; |
| } |
| return false; |
| } |
| |
| bool GetSegmentAddressX86(const DumpContext& context, string segment_name, |
| uint64_t& address) { |
| if (segment_name == "ds") { |
| address = context.GetContextX86()->ds; |
| } else if (segment_name == "es") { |
| address = context.GetContextX86()->es; |
| } else if (segment_name == "fs") { |
| address = context.GetContextX86()->fs; |
| } else if (segment_name == "gs") { |
| address = context.GetContextX86()->gs; |
| } else { |
| BPLOG(ERROR) << "Unsupported segment register: " << segment_name; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name, |
| uint64_t& address) { |
| if (segment_name == "ds") { |
| address = 0; |
| } else if (segment_name == "es") { |
| address = 0; |
| } else { |
| BPLOG(ERROR) << "Unsupported segment register: " << segment_name; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool GetSegmentAddress(const DumpContext& context, string segment_name, |
| uint64_t& address) { |
| if (context.GetContextCPU() == MD_CONTEXT_X86) { |
| return GetSegmentAddressX86(context, segment_name, address); |
| } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) { |
| return GetSegmentAddressAMD64(context, segment_name, address); |
| } else { |
| BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n"; |
| return false; |
| } |
| } |
| |
| bool GetRegisterValueX86(const DumpContext& context, string register_name, |
| uint64_t& value) { |
| if (register_name == "eax") { |
| value = context.GetContextX86()->eax; |
| } else if (register_name == "ebx") { |
| value = context.GetContextX86()->ebx; |
| } else if (register_name == "ecx") { |
| value = context.GetContextX86()->ecx; |
| } else if (register_name == "edx") { |
| value = context.GetContextX86()->edx; |
| } else if (register_name == "edi") { |
| value = context.GetContextX86()->edi; |
| } else if (register_name == "esi") { |
| value = context.GetContextX86()->esi; |
| } else if (register_name == "ebp") { |
| value = context.GetContextX86()->ebp; |
| } else if (register_name == "esp") { |
| value = context.GetContextX86()->esp; |
| } else if (register_name == "eip") { |
| value = context.GetContextX86()->eip; |
| } else { |
| BPLOG(ERROR) << "Unsupported register: " << register_name; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool GetRegisterValueAMD64(const DumpContext& context, string register_name, |
| uint64_t& value) { |
| if (register_name == "rax") { |
| value = context.GetContextAMD64()->rax; |
| } else if (register_name == "rbx") { |
| value = context.GetContextAMD64()->rbx; |
| } else if (register_name == "rcx") { |
| value = context.GetContextAMD64()->rcx; |
| } else if (register_name == "rdx") { |
| value = context.GetContextAMD64()->rdx; |
| } else if (register_name == "rdi") { |
| value = context.GetContextAMD64()->rdi; |
| } else if (register_name == "rsi") { |
| value = context.GetContextAMD64()->rsi; |
| } else if (register_name == "rbp") { |
| value = context.GetContextAMD64()->rbp; |
| } else if (register_name == "rsp") { |
| value = context.GetContextAMD64()->rsp; |
| } else if (register_name == "r8") { |
| value = context.GetContextAMD64()->r8; |
| } else if (register_name == "r9") { |
| value = context.GetContextAMD64()->r9; |
| } else if (register_name == "r10") { |
| value = context.GetContextAMD64()->r10; |
| } else if (register_name == "r11") { |
| value = context.GetContextAMD64()->r11; |
| } else if (register_name == "r12") { |
| value = context.GetContextAMD64()->r12; |
| } else if (register_name == "r13") { |
| value = context.GetContextAMD64()->r13; |
| } else if (register_name == "r14") { |
| value = context.GetContextAMD64()->r14; |
| } else if (register_name == "r15") { |
| value = context.GetContextAMD64()->r15; |
| } else if (register_name == "rip") { |
| value = context.GetContextAMD64()->rip; |
| } else { |
| BPLOG(ERROR) << "Unsupported register: " << register_name; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // Lookup the value of `register_name` in `context`, store it into `value` on |
| // success. |
| // Support for non-full-size registers not implemented, since we're only using |
| // this to evaluate address expressions. |
| bool GetRegisterValue(const DumpContext& context, string register_name, |
| uint64_t& value) { |
| if (context.GetContextCPU() == MD_CONTEXT_X86) { |
| return GetRegisterValueX86(context, register_name, value); |
| } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) { |
| return GetRegisterValueAMD64(context, register_name, value); |
| } else { |
| BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n"; |
| return false; |
| } |
| } |
| } // namespace |
| |
| // static |
| bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu, |
| const uint8_t* raw_bytes, |
| unsigned int raw_bytes_len, |
| string& instruction) { |
| // Always initialize outputs |
| instruction = ""; |
| |
| if (!raw_bytes || raw_bytes_len == 0) { |
| // There's no need to perform any operation in this case, as there's |
| // clearly no instruction there. |
| return false; |
| } |
| |
| string architecture; |
| if (cpu == MD_CONTEXT_X86) { |
| architecture = "i386"; |
| } else if (cpu == MD_CONTEXT_AMD64) { |
| architecture = "i386:x86-64"; |
| } else { |
| BPLOG(ERROR) << "Unsupported architecture."; |
| return false; |
| } |
| |
| // Create a temporary file for the raw instruction bytes to pass to |
| // objdump, and write the bytes to the input file. |
| ScopedTmpFile raw_bytes_file; |
| if (!raw_bytes_file.InitData(raw_bytes, raw_bytes_len)) { |
| BPLOG(ERROR) << "Failed creating temporary file."; |
| return false; |
| } |
| |
| // Create a pipe to use to read the disassembly back from objdump. |
| ScopedPipe disassembly_pipe; |
| if (!disassembly_pipe.Init()) { |
| BPLOG(ERROR) << "Failed creating pipe for output."; |
| return false; |
| } |
| |
| pid_t child_pid = fork(); |
| if (child_pid < 0) { |
| BPLOG(ERROR) << "Fork failed."; |
| return false; |
| } |
| |
| if (child_pid == 0) { |
| // In the child process, set up the input and output file descriptors. |
| if (dup2(raw_bytes_file.GetFd(), STDIN_FILENO) < 0 || |
| disassembly_pipe.Dup2WriteFd(STDOUT_FILENO) < 0 || |
| disassembly_pipe.Dup2WriteFd(STDERR_FILENO) < 0) { |
| BPLOG(ERROR) << "Failed dup'ing file descriptors."; |
| exit(-1); |
| } |
| |
| // We need to close the read end of the pipe in the child process so that |
| // when the parent closes it, the pipe is disconnected. |
| disassembly_pipe.CloseReadFd(); |
| |
| // We use "/proc/self/fd/0" here to allow objdump to parse an unnamed file, |
| // since objdump does not have a mode to read from stdin. This cannot be |
| // used with a pipe, since objdump requires that the input is a standard |
| // file. |
| execlp("objdump", "objdump", "-D", "--no-show-raw-insn", "-b", "binary", |
| "-M", "intel", "-m", architecture.c_str(), "/proc/self/fd/0", |
| nullptr); |
| |
| BPLOG(ERROR) << "Failed to exec objdump."; |
| exit(-1); |
| } else { |
| // In the parent process, parse the objdump output. |
| |
| // Match the instruction line, from: |
| // 0: lock cmpxchg DWORD PTR [esi+0x10],eax |
| // extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax" |
| std::regex instruction_regex( |
| "^\\s+[0-9a-f]+:\\s+" // " 0:" |
| "((?:\\s*\\S*)+)$"); // "lock cmpxchg..." |
| |
| std::string line; |
| std::smatch match; |
| while (disassembly_pipe.ReadLine(line)) { |
| if (std::regex_match(line, match, instruction_regex)) { |
| instruction = match[1].str(); |
| break; |
| } |
| } |
| |
| // Close the read pipe so that objdump will exit (in case we broke out of |
| // the loop above before reading all of the output). |
| disassembly_pipe.CloseReadFd(); |
| |
| // Now wait for objdump to exit. |
| int status = 0; |
| HANDLE_EINTR(waitpid(child_pid, &status, 0)); |
| |
| if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { |
| BPLOG(ERROR) << "objdump didn't run successfully."; |
| return false; |
| } |
| |
| if (instruction == "") { |
| BPLOG(ERROR) << "Failed to find instruction in objdump output."; |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| // static |
| bool DisassemblerObjdump::TokenizeInstruction(const string& instruction, |
| string& operation, string& dest, |
| string& src) { |
| // Always initialize outputs. |
| operation = ""; |
| dest = ""; |
| src = ""; |
| |
| // Split the instruction into tokens by either whitespace or comma. |
| std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*"); |
| std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(), |
| token_regex); |
| |
| bool found_comma = false; |
| for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator(); |
| ++tokens_iter) { |
| auto token = (*tokens_iter)[1].str(); |
| if (operation.size() == 0) { |
| if (IsInstructionPrefix(token)) |
| continue; |
| operation = token; |
| } else if (dest.size() == 0) { |
| if (IsOperandSize(token)) |
| continue; |
| dest = token; |
| } else if (!found_comma) { |
| if (token == ",") { |
| found_comma = true; |
| } else { |
| BPLOG(ERROR) << "Failed to parse operands from objdump output, expected" |
| " comma but found \"" |
| << token << "\""; |
| return false; |
| } |
| } else if (src.size() == 0) { |
| if (IsOperandSize(token)) |
| continue; |
| src = token; |
| } else { |
| if (token == ",") { |
| BPLOG(ERROR) << "Failed to parse operands from objdump output, found " |
| "unexpected comma after last operand."; |
| return false; |
| } else { |
| // We just ignore other junk after the last operand unless it's a |
| // comma, which would indicate we're probably still in the middle |
| // of the operands and something has gone wrong |
| } |
| } |
| } |
| |
| if (found_comma && src.size() == 0) { |
| BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma " |
| "but no src operand."; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // static |
| bool DisassemblerObjdump::CalculateAddress(const DumpContext& context, |
| const string& expression, |
| uint64_t& address) { |
| address = 0; |
| |
| // Extract the components of the expression. |
| // fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"] |
| std::regex expression_regex( |
| "^(?:(\\ws):)?" // "fs:" |
| "\\[(\\w+)" // "[esi" |
| "(?:\\+(\\w+)(?:\\*(\\d+)))?" // "+edi*4" |
| "(?:([\\+-])(0x[0-9a-f]+))?" // "-0x80" |
| "\\]$"); // "]" |
| |
| std::smatch match; |
| if (!std::regex_match(expression, match, expression_regex) || |
| match.size() != 7) { |
| return false; |
| } |
| |
| string segment_name = match[1].str(); |
| string register_name = match[2].str(); |
| string index_name = match[3].str(); |
| string index_stride = match[4].str(); |
| string offset_sign = match[5].str(); |
| string offset = match[6].str(); |
| |
| uint64_t segment_address = 0; |
| uint64_t register_value = 0; |
| uint64_t index_value = 0; |
| uint64_t index_stride_value = 1; |
| uint64_t offset_value = 0; |
| |
| if (segment_name.size() && |
| !GetSegmentAddress(context, segment_name, segment_address)) { |
| return false; |
| } |
| |
| if (!GetRegisterValue(context, register_name, register_value)) { |
| return false; |
| } |
| |
| if (index_name.size() && |
| !GetRegisterValue(context, index_name, index_value)) { |
| return false; |
| } |
| |
| if (index_stride.size()) { |
| index_stride_value = strtoull(index_stride.c_str(), nullptr, 0); |
| } |
| |
| if (offset.size()) { |
| offset_value = strtoull(offset.c_str(), nullptr, 0); |
| } |
| |
| address = |
| segment_address + register_value + (index_value * index_stride_value); |
| if (offset_sign == "+") { |
| address += offset_value; |
| } else if (offset_sign == "-") { |
| address -= offset_value; |
| } |
| |
| return true; |
| } |
| |
| DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu, |
| const MemoryRegion* memory_region, |
| uint64_t address) { |
| if (address < memory_region->GetBase() || |
| memory_region->GetBase() + memory_region->GetSize() <= address) { |
| return; |
| } |
| |
| uint8_t ip_bytes[kMaxX86InstructionLength] = {0}; |
| size_t ip_bytes_length; |
| for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength; |
| ++ip_bytes_length) { |
| // We have to read byte-by-byte here, since we still want to try and |
| // disassemble an instruction even if we don't have enough bytes. |
| if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length, |
| &ip_bytes[ip_bytes_length])) { |
| break; |
| } |
| } |
| |
| string instruction; |
| if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength, |
| instruction)) { |
| return; |
| } |
| |
| if (!TokenizeInstruction(instruction, operation_, dest_, src_)) { |
| return; |
| } |
| } |
| |
| bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context, |
| uint64_t& address) { |
| return CalculateAddress(context, src_, address); |
| } |
| |
| bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context, |
| uint64_t& address) { |
| return CalculateAddress(context, dest_, address); |
| } |
| |
| } // namespace google_breakpad |