| /* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ |
| |
| #ifndef RR_TASK_H_ |
| #define RR_TASK_H_ |
| |
| #include <memory> |
| #include <vector> |
| #include <unordered_map> |
| |
| #include "preload/preload_interface.h" |
| |
| #include "AddressSpace.h" |
| #include "Event.h" |
| #include "ExtraRegisters.h" |
| #include "FdTable.h" |
| #include "PerfCounters.h" |
| #include "Registers.h" |
| #include "TaskishUid.h" |
| #include "ThreadGroup.h" |
| #include "TraceStream.h" |
| #include "WaitStatus.h" |
| #include "core.h" |
| #include "kernel_abi.h" |
| #include "kernel_supplement.h" |
| #include "remote_code_ptr.h" |
| #include "util.h" |
| |
| struct syscallbuf_hdr; |
| struct syscallbuf_record; |
| |
| namespace rr { |
| |
| class AutoRemoteSyscalls; |
| class RecordSession; |
| class ReplaySession; |
| class ScopedFd; |
| class Session; |
| class ThreadGroup; |
| |
| enum CloneFlags { |
| /** |
| * The child gets a semantic copy of all parent resources (and |
| * becomes a new thread group). This is the semantics of the |
| * fork() syscall. |
| */ |
| CLONE_SHARE_NOTHING = 0, |
| /** |
| * Child will share the table of signal dispositions with its |
| * parent. |
| */ |
| CLONE_SHARE_SIGHANDLERS = 1 << 0, |
| /** Child will join its parent's thread group. */ |
| CLONE_SHARE_THREAD_GROUP = 1 << 1, |
| /** Child will share its parent's address space. */ |
| CLONE_SHARE_VM = 1 << 2, |
| /** Child will share its parent's file descriptor table. */ |
| CLONE_SHARE_FILES = 1 << 3, |
| /** Kernel will clear and notify tid futex on task exit. */ |
| CLONE_CLEARTID = 1 << 4, |
| // Set the thread area to what's specified by the |tls| arg. |
| CLONE_SET_TLS = 1 << 5, |
| }; |
| |
| /** |
| * Enumeration of ways to resume execution. See the ptrace manual for |
| * details of the semantics of these. |
| * |
| * We define a new datatype because the PTRACE_SYSEMU* requests aren't |
| * part of the official ptrace API, and we want to use a strong type |
| * for these resume requests to ensure callers don't confuse their |
| * arguments. |
| */ |
| enum ResumeRequest { |
| RESUME_CONT = PTRACE_CONT, |
| RESUME_SINGLESTEP = PTRACE_SINGLESTEP, |
| RESUME_SYSCALL = PTRACE_SYSCALL, |
| RESUME_SYSEMU = NativeArch::PTRACE_SYSEMU, |
| RESUME_SYSEMU_SINGLESTEP = NativeArch::PTRACE_SYSEMU_SINGLESTEP, |
| }; |
| enum WaitRequest { |
| // Don't wait after resuming. |
| RESUME_NONBLOCKING, |
| // After resuming, blocking-waitpid() until tracee status |
| // changes. |
| RESUME_WAIT, |
| // Like RESUME_WAIT, but we're not expecting a PTRACE_EVENT_EXIT |
| // or reap, so return false also in that case. |
| RESUME_WAIT_NO_EXIT |
| }; |
| enum TicksRequest { |
| // We don't expect to see any ticks (though we seem to on the odd buggy |
| // system...). Using this is a small performance optimization because we don't |
| // have to stop and restart the performance counters. This may also avoid |
| // bugs on some systems that report performance counter advances while |
| // in the kernel... |
| RESUME_NO_TICKS = -2, |
| RESUME_UNLIMITED_TICKS = -1, |
| // Positive values are a request for an interrupt |
| // after that number of ticks |
| // Don't request more than this! |
| MAX_TICKS_REQUEST = 2000000000, |
| }; |
| |
| /** Reasons why a SIGTRAP might have been delivered. Multiple reasons can |
| * apply. Also, none can apply, e.g. if someone sent us a SIGTRAP via kill(). |
| */ |
| struct TrapReasons { |
| /* Singlestep completed (RESUME_SINGLESTEP, RESUME_SYSEMU_SINGLESTEP). */ |
| bool singlestep; |
| /* Hardware watchpoint fired. This includes cases where the actual values |
| * did not change (i.e. AddressSpace::has_any_watchpoint_changes may return |
| * false even though this is set). */ |
| bool watchpoint; |
| /* Breakpoint instruction was executed. */ |
| bool breakpoint; |
| }; |
| |
| struct RseqState { |
| remote_ptr<void> ptr; |
| uint32_t abort_prefix_signature; |
| RseqState(remote_ptr<void> ptr, uint32_t abort_prefix_signature) |
| : ptr(ptr), abort_prefix_signature(abort_prefix_signature) {} |
| }; |
| |
| /** |
| * A "task" is a task in the linux usage: the unit of scheduling. (OS |
| * people sometimes call this a "thread control block".) Multiple |
| * tasks may share the same address space and file descriptors, in |
| * which case they're commonly called "threads". Or two tasks may |
| * have their own address spaces and file descriptors, in which case |
| * they're called "processes". Both look the same to rr (on linux), |
| * so no distinction is made here. |
| */ |
| class Task { |
| friend class Session; |
| friend class RecordSession; |
| friend class ReplaySession; |
| |
| public: |
| typedef std::vector<WatchConfig> HardwareWatchpoints; |
| |
| /** |
| * Ptrace-detach the task. |
| */ |
| void detach(); |
| |
| /* |
| * Re-enable the CPUID instruction in this task (if it was previously |
| * disabled to support CPUID emulation) as well as the use of rdtsc. |
| */ |
| void reenable_cpuid_tsc(); |
| |
| /** |
| * Wait for the task to exit, but do not reap/detach yet. |
| */ |
| void wait_exit(); |
| |
| /** |
| * Advance the task to its exit state if it's not already there. |
| * If `wait` is false, then during recording Scheduler::start() must be |
| * called. |
| */ |
| void proceed_to_exit(bool wait = true); |
| |
| /** |
| * Kill this task and wait for it to exit. |
| * N.B.: If may_reap() is false, this may hang. |
| * Returns the WaitStatus of the task at exit (usually SIGKILL, but may not |
| * be if we raced with another exit reason). |
| */ |
| WaitStatus kill(); |
| |
| /** |
| * This must be in an emulated syscall, entered through |
| * |cont_sysemu()| or |cont_sysemu_singlestep()|, but that's |
| * not checked. If so, step over the system call instruction |
| * to "exit" the emulated syscall. |
| */ |
| void finish_emulated_syscall(); |
| |
| size_t syscallbuf_data_size() { |
| return read_mem(REMOTE_PTR_FIELD(syscallbuf_child, num_rec_bytes)) + |
| sizeof(struct syscallbuf_hdr); |
| } |
| |
| /** |
| * Dump attributes of this process, including pending events, |
| * to |out|, which defaults to LOG_FILE. |
| */ |
| void dump(FILE* out = nullptr) const; |
| |
| /** |
| * Called after the first exec in a session, when the session first |
| * enters a consistent state. Prior to that, the task state |
| * can vary based on how rr set up the child process. We have to flush |
| * out any state that might have been affected by that. |
| */ |
| void flush_inconsistent_state(); |
| |
| /** |
| * Return total number of ticks ever executed by this task. |
| * Updates tick count from the current performance counter values if |
| * necessary. |
| */ |
| Ticks tick_count() { return ticks; } |
| |
| /** |
| * Return the path of this fd as /proc/<pid>/fd/<fd> |
| */ |
| std::string proc_fd_path(int fd); |
| |
| /** |
| * Return the path of /proc/<pid>/pagemap |
| */ |
| std::string proc_pagemap_path(); |
| |
| /** |
| * Return the path of /proc/<pid>/stat |
| */ |
| std::string proc_stat_path(); |
| |
| /** |
| * Return the path of /proc/<pid>/exe |
| */ |
| std::string proc_exe_path(); |
| |
| /** |
| * Return the path of the executable (i.e. what |
| * /proc/<pid>/exe points to). |
| */ |
| std::string exe_path(); |
| |
| /** |
| * Stat |fd| in the context of this task's fd table. |
| */ |
| struct stat stat_fd(int fd); |
| /** |
| * Lstat |fd| in the context of this task's fd table. |
| */ |
| struct stat lstat_fd(int fd); |
| /** |
| * Open |fd| in the context of this task's fd table. |
| */ |
| ScopedFd open_fd(int fd, int flags); |
| /** |
| * Get the name of the file referenced by |fd| in the context of this |
| * task's fd table. |
| */ |
| std::string file_name_of_fd(int fd); |
| /** |
| * Get current offset of |fd| |
| */ |
| int64_t fd_offset(int fd); |
| /** |
| * Get pid of pidfd |fd| |
| */ |
| pid_t pid_of_pidfd(int fd); |
| |
| /** |
| * Records the wait status of this task as |status|, e.g. if |
| * |wait()/try_wait()| has returned it. Call this whenever a waitpid |
| * returned activity for this task. |
| * If this returns false, then the task was kicked out of a ptrace-stop |
| * by SIGKILL or equivalent before we could read registers etc. |
| * We will treat this stop as if it never happened; the caller must |
| * act as if there was no stop. |
| * If `status.reaped()` (i.e. fatal signal or normal exit), this always |
| * returns true. |
| */ |
| bool did_waitpid(WaitStatus status); |
| |
| /** |
| * Syscalls have side effects on registers (e.g. setting the flags register). |
| * Perform those side effects on |registers| to make it look like a syscall |
| * happened. |
| */ |
| void canonicalize_regs(SupportedArch syscall_arch); |
| |
| /** |
| * Return the ptrace message pid associated with the current ptrace |
| * event, f.e. the new child's pid at PTRACE_EVENT_CLONE. |
| * Returns -1 if the ptrace returns ESRCH, i.e. the task is not in a |
| * ptrace-stop. |
| */ |
| pid_t get_ptrace_eventmsg_pid(); |
| |
| /** |
| * Return the siginfo at the signal-stop of this. |
| * Not meaningful unless this is actually at a signal stop. |
| */ |
| const siginfo_t& get_siginfo(); |
| |
| /** |
| * Destroy in the tracee task the scratch buffer and syscallbuf (if |
| * syscallbuf_child is non-null). |
| * Both the as_task and the fd_task must be able to execute remote syscalls |
| * and share the address space, resp. the file descriptor table with the |
| * current task. If either of these is null, the corresponding resource is |
| * not destroyed remote (e.g. if there are no other tasks left in the same |
| * address space or file descriptor table). |
| */ |
| void destroy_buffers(Task *as_task, Task *fd_task); |
| void destroy_buffers() { destroy_buffers(this, this); } |
| |
| void did_kill(); |
| |
| void unmap_buffers_for( |
| AutoRemoteSyscalls& remote, Task* t, |
| remote_ptr<struct syscallbuf_hdr> saved_syscallbuf_child); |
| /* Close fds related to `t`'s syscallbuf, in this task's fd table. |
| If `really_close` is true, actually close the kernel fds through `remote`, |
| otherwise only update our FdTable. */ |
| void close_buffers_for(AutoRemoteSyscalls& remote, Task* t, bool really_close); |
| |
| remote_ptr<const struct syscallbuf_record> next_syscallbuf_record(); |
| long stored_record_size(remote_ptr<const struct syscallbuf_record> record); |
| |
| /** Return the current $ip of this. */ |
| remote_code_ptr ip() { return regs().ip(); } |
| |
| /** |
| * Emulate a jump to a new IP, updating the ticks counter as appropriate. |
| */ |
| void emulate_jump(remote_code_ptr); |
| void count_direct_jump() |
| { |
| ticks += PerfCounters::ticks_for_unconditional_direct_branch(this); |
| } |
| |
| /** |
| * Return true if this is at an arm-desched-event or |
| * disarm-desched-event syscall. |
| */ |
| bool is_desched_event_syscall(); |
| |
| /** |
| * Return true when this task is in a traced syscall made by the |
| * syscallbuf code. Callers may assume |is_in_syscallbuf()| |
| * is implied by this. Note that once we've entered the traced syscall, |
| * ip() is immediately after the syscall instruction. |
| */ |
| bool is_in_traced_syscall() { |
| return ip() == |
| as->traced_syscall_ip().increment_by_syscall_insn_length( |
| arch()) || |
| ip() == |
| as->privileged_traced_syscall_ip() |
| .increment_by_syscall_insn_length(arch()); |
| } |
| bool is_at_traced_syscall_entry() { |
| return ip() == as->traced_syscall_ip() || |
| ip() == as->privileged_traced_syscall_ip(); |
| } |
| |
| /** |
| * Return true when this task is in an untraced syscall, i.e. one |
| * initiated by a function in the syscallbuf. Callers may |
| * assume |is_in_syscallbuf()| is implied by this. Note that once we've |
| * entered the traced syscall, ip() is immediately after the syscall |
| * instruction. |
| */ |
| bool is_in_untraced_syscall() { |
| const AddressSpace::SyscallType *t; |
| if (arch() == aarch64 && stop_sig() > 0) { |
| // On aarch64 we can't distinguish untraced syscall entry and exit |
| // when a signal happened |
| t = AddressSpace::rr_page_syscall_from_entry_point(arch(), ip()); |
| } else { |
| t = AddressSpace::rr_page_syscall_from_exit_point(arch(), ip()); |
| } |
| return t && t->traced == AddressSpace::UNTRACED; |
| } |
| |
| bool is_in_rr_page() { |
| auto p = ip().to_data_ptr<void>(); |
| return AddressSpace::rr_page_start() <= p && |
| p < AddressSpace::rr_page_end(); |
| } |
| |
| /** |
| * Return true if |ptrace_event()| is the trace event |
| * generated by the syscallbuf seccomp-bpf when a traced |
| * syscall is entered. |
| */ |
| bool is_ptrace_seccomp_event() const; |
| |
| /** Dump all pending events to the RecordTask INFO log. */ |
| virtual void log_pending_events() const {} |
| |
| /** |
| * Call this hook just before exiting a syscall. Often Task |
| * attributes need to be updated based on the finishing syscall. |
| * Use 'regs' instead of this->regs() because some registers may not be |
| * set properly in the task yet. |
| */ |
| virtual void on_syscall_exit(int syscallno, SupportedArch arch, |
| const Registers& regs); |
| |
| /** |
| * Hook called by `resume_execution`. |
| * Returns `false` if the task is in the process of dying and setup could not |
| * be completed, `true` otherwise. |
| */ |
| virtual bool will_resume_execution(ResumeRequest, WaitRequest, TicksRequest, |
| int /*sig*/) { return true; } |
| /** |
| * Hook called by `did_waitpid`. |
| */ |
| virtual void did_wait() {} |
| /** |
| * Return the pid of the task in its own pid namespace. |
| * Only RecordTasks actually change pid namespaces, but |
| * this value is stored and present during replay too. |
| */ |
| pid_t own_namespace_tid() { return own_namespace_rec_tid; } |
| |
| /** |
| * Assuming ip() is just past a breakpoint instruction, adjust |
| * ip() backwards to point at that breakpoint insn. |
| */ |
| void move_ip_before_breakpoint(); |
| |
| /** |
| * Assuming we've just entered a syscall, exit that syscall and reset |
| * state to reenter the syscall just as it was called the first time. |
| * Returns false if we see the process exit instead. |
| */ |
| bool exit_syscall_and_prepare_restart(); |
| |
| /** |
| * We're currently in user-space with registers set up to perform a system |
| * call. Continue into the kernel and stop where we can modify the syscall |
| * state. |
| * Return `true` if the syscall entry succeeded. |
| * Return `false` if the tracee exited unexpectedly. |
| */ |
| bool enter_syscall(bool allow_exit=false); |
| |
| /** |
| * We have observed entry to a syscall (either by PTRACE_EVENT_SECCOMP or |
| * a syscall, depending on the value of Session::syscall_seccomp_ordering()). |
| * Continue into the kernel to perform the syscall and stop at the |
| * PTRACE_SYSCALL syscall-exit trap. Returns false if we see the process exit |
| * before that; we may or may not be stopped in that case. |
| */ |
| bool exit_syscall(); |
| |
| /** |
| * Return the "task name"; i.e. what |prctl(PR_GET_NAME)| or |
| * /proc/tid/comm say that the task's name is. |
| * |
| * During recording we don't monitor changes to this, we just let |
| * the kernel update it directly. This lets us syscall-buffer PR_SET_NAME. |
| * During replay we monitor changes to this and cache the name in ReplayTask, |
| * hence these methods are virtual. During replay the task's actual name |
| * is "rr:" followed by the original name. |
| */ |
| virtual std::string name() const; |
| |
| virtual void set_name(AutoRemoteSyscalls& remote, const std::string& name); |
| |
| /** |
| * Called for every PR_SET_NAME during replay but not always during recording |
| * (it is not called for syscall-buffered PR_SET_NAME). |
| */ |
| virtual void did_prctl_set_prname(remote_ptr<void>) {} |
| |
| /** |
| * Call this method when this task has just performed an |execve()| |
| * (so we're in the new address space), but before the system call has |
| * returned. |
| * `exe_file` is the name of the executable file in the trace, if there is one, |
| * otherwise the original exe file name --- a best-effort filename we can |
| * pass to gdb for it to read the exe. |
| */ |
| void post_exec(const std::string& exe_file); |
| |
| /** |
| * Call this method when this task has exited a successful execve() syscall. |
| * At this point it is safe to make remote syscalls. |
| * `original_exe_file` is the original file exe file name. |
| */ |
| void post_exec_syscall(const std::string& original_exe_file); |
| |
| /** |
| * Return true if this task has execed. |
| */ |
| bool execed() const; |
| |
| /** |
| * Return true if this task is dead and just waiting to be reaped. |
| */ |
| virtual bool already_exited() const { return false; } |
| |
| virtual bool is_detached_proxy() const { return false; } |
| |
| /** |
| * Read |N| bytes from |child_addr| into |buf|, or don't |
| * return. |
| */ |
| template <size_t N> |
| void read_bytes(remote_ptr<void> child_addr, uint8_t (&buf)[N]) { |
| return read_bytes_helper(child_addr, N, buf); |
| } |
| |
| /** Return the current regs of this. */ |
| const Registers& regs() const; |
| |
| /** Return the extra registers of this. Asserts if the task died. */ |
| const ExtraRegisters& extra_regs(); |
| |
| /** Return the extra registers of this, or null if the task died. */ |
| const ExtraRegisters* extra_regs_fallible(); |
| |
| /** Return the current arch of this. This can change due to exec(). */ |
| SupportedArch arch() const { |
| // Use 'registers' directly instead of calling regs(), since this can |
| // be called while the task is not stopped. |
| return registers.arch(); |
| } |
| |
| /** |
| * Return the debug status (DR6 on x86). The debug status is always cleared |
| * in resume_execution() before we resume, so it always only reflects the |
| * events since the last resume. Must not be called on non-x86 architectures. |
| */ |
| uintptr_t x86_debug_status(); |
| |
| /** |
| * Set the debug status (DR6 on x86). Noop on non-x86 architectures. |
| */ |
| void set_x86_debug_status(uintptr_t status); |
| |
| /** |
| * Determine why a SIGTRAP occurred. On x86, uses x86_debug_status() but doesn't |
| * consume it. |
| */ |
| TrapReasons compute_trap_reasons(); |
| |
| /** |
| * Called on syscall entry to save any registers that we need to keep, but |
| * cannot get from the kernel (r.g. orig_x0 on aarch64). |
| */ |
| void apply_syscall_entry_regs(); |
| |
| /** |
| * Read |val| from |child_addr|. |
| * If the data can't all be read, then if |ok| is non-null |
| * sets *ok to false, otherwise asserts. |
| */ |
| template <typename T> |
| T read_mem(remote_ptr<T> child_addr, bool* ok = nullptr) { |
| typename std::remove_cv<T>::type val; |
| read_bytes_helper(child_addr, sizeof(val), &val, ok); |
| return val; |
| } |
| |
| /** |
| * Read |count| values from |child_addr|. |
| */ |
| template <typename T> |
| std::vector<T> read_mem(remote_ptr<T> child_addr, size_t count, |
| bool* ok = nullptr) { |
| std::vector<T> v; |
| v.resize(count); |
| read_bytes_helper(child_addr, sizeof(T) * count, v.data(), ok); |
| return v; |
| } |
| |
| /** |
| * Read and return the C string located at |child_addr| in |
| * this address space. If the data can't all be read (because the c string to |
| * be read is invalid), then if |ok| is non-null, sets *ok to |
| * false, otherwise asserts. |
| */ |
| std::string read_c_str(remote_ptr<char> child_addr, bool *ok = nullptr); |
| |
| /** |
| * Resume execution |how|, delivering |sig| if nonzero. |
| * After resuming, |wait_how|. In replay, reset hpcs and |
| * request a tick period of tick_period. The default value |
| * of tick_period is 0, which means effectively infinite. |
| * If interrupt_after_elapsed is nonzero, we interrupt the task |
| * after that many seconds have elapsed. |
| * |
| * All tracee execution goes through here. |
| * |
| * If `wait_how` == RESUME_WAIT and we don't complete a |
| * did_waitpid() (e.g. because the tracee was SIGKILLed or |
| * equivalent), this returns false. |
| */ |
| bool resume_execution(ResumeRequest how, WaitRequest wait_how, |
| TicksRequest tick_period, int sig = 0); |
| |
| /** Return the session this is part of. */ |
| Session& session() const { return *session_; } |
| |
| /** Set the tracee's registers to |regs|. Lazy. */ |
| void set_regs(const Registers& regs); |
| |
| /** Ensure registers are flushed back to the underlying task. */ |
| void flush_regs(); |
| |
| /** Set the tracee's extra registers to |regs|. */ |
| void set_extra_regs(const ExtraRegisters& regs); |
| |
| /** Adjust IP for rseq abort if necessary and return true if an abort is required. |
| * Sets *rseq_cs_invalid if it was invalid */ |
| bool should_apply_rseq_abort(EventType event_type, remote_code_ptr* new_ip, |
| bool* invalid_rseq_cs); |
| |
| /** |
| * Read the aarch64 TLS register via ptrace. Returns true on success, false |
| * on failure. On success `result` is set to the tracee's TLS register. |
| * This can only fail when ptrace_if_stopped fails, i.e. the tracee |
| * is on the exit path due to a SIGKILL or equivalent. |
| */ |
| bool read_aarch64_tls_register(uintptr_t *result); |
| void set_aarch64_tls_register(uintptr_t val); |
| |
| /** |
| * Program the debug registers to the vector of watchpoint |
| * configurations in |reg| (also updating the debug control |
| * register appropriately). Return true if all registers were |
| * successfully programmed, false otherwise. Any time false |
| * is returned, the caller is guaranteed that no watchpoint |
| * has been enabled; either all of |regs| is enabled and true |
| * is returned, or none are and false is returned. |
| */ |
| bool set_debug_regs(const HardwareWatchpoints& watchpoints); |
| |
| bool set_aarch64_debug_regs(int which, ARM64Arch::user_hwdebug_state *regs, size_t nregs); |
| bool get_aarch64_debug_regs(int which, ARM64Arch::user_hwdebug_state *regs); |
| |
| uintptr_t get_debug_reg(size_t regno); |
| bool set_x86_debug_reg(size_t regno, uintptr_t value); |
| |
| /** Update the thread area to |addr|. */ |
| void set_thread_area(remote_ptr<X86Arch::user_desc> tls); |
| |
| /** Set the thread area at index `idx` to desc and reflect this |
| * into the OS task. Returns 0 on success, errno otherwise. |
| */ |
| int emulate_set_thread_area(int idx, X86Arch::user_desc desc); |
| |
| /** Get the thread area from the remote process. |
| * Returns 0 on success, errno otherwise. |
| */ |
| int emulate_get_thread_area(int idx, X86Arch::user_desc& desc); |
| |
| const std::vector<X86Arch::user_desc>& thread_areas() { |
| DEBUG_ASSERT(arch() == x86 || arch() == x86_64); |
| return thread_areas_; |
| } |
| |
| void set_status(WaitStatus status) { wait_status = status; } |
| |
| /** |
| * Return true when the task stopped for a ptrace-stop and we |
| * haven't resumed it yet. |
| */ |
| bool is_stopped() const { return is_stopped_; } |
| |
| /** |
| * Setter for `is_stopped_` to update `Scheduler::ntasks_stopped`. |
| */ |
| virtual void set_stopped(bool stopped) { is_stopped_ = stopped; } |
| |
| /** |
| * Return the status of this as of the last successful wait()/try_wait() call. |
| */ |
| WaitStatus status() const { return wait_status; } |
| |
| /** |
| * Return the ptrace event as of the last call to |wait()/try_wait()|. |
| */ |
| int ptrace_event() const { return wait_status.ptrace_event(); } |
| |
| /** |
| * Return the signal that's pending for this as of the last |
| * call to |wait()/try_wait()|. The signal 0 means "no signal". |
| */ |
| int stop_sig() const { return wait_status.stop_sig(); } |
| |
| void clear_wait_status() { wait_status = WaitStatus(); } |
| |
| /** Return the thread group this belongs to. */ |
| std::shared_ptr<ThreadGroup> thread_group() const { return tg; } |
| |
| /** Return the id of this task's recorded thread group. */ |
| pid_t tgid() const; |
| /** Return id of real OS thread group. */ |
| pid_t real_tgid() const; |
| |
| TaskUid tuid() const { return TaskUid(rec_tid, serial); } |
| |
| /** Return the dir of the trace we're using. */ |
| const std::string& trace_dir() const; |
| |
| /** |
| * Get the current "time" measured as ticks on recording trace |
| * events. |task_time()| returns that "time" wrt this task |
| * only. |
| */ |
| uint32_t trace_time() const; |
| |
| /** |
| * Call this to reset syscallbuf_hdr->num_rec_bytes and zero out the data |
| * recorded in the syscall buffer. This makes for more deterministic behavior |
| * especially during replay, where during checkpointing we only save and |
| * restore the recorded data area. |
| */ |
| void reset_syscallbuf(); |
| |
| /** |
| * Return the virtual memory mapping (address space) of this |
| * task. |
| */ |
| AddressSpace::shr_ptr vm() { return as; } |
| |
| FdTable::shr_ptr fd_table() { return fds; } |
| |
| /** |
| * Block until the status of this changes. wait() expects the wait to end |
| * with the process in a stopped() state. If interrupt_after_elapsed >= 0, |
| * interrupt the task after that many seconds have elapsed. If |
| * interrupt_after_elapsed == 0.0, the interrupt will happen immediately. |
| * Returns false if the wait failed because we reached a stop but we got |
| * SIGKILLed (or equivalent) out of it, in which case it is not safe to wait |
| * because that might block indefinitely waiting for us to acknowledge the |
| * PTRACE_EVENT_EXIT of other tasks. |
| */ |
| bool wait(double interrupt_after_elapsed = -1); |
| |
| /** |
| * Currently we don't allow recording across uid changes, so we can |
| * just return rr's uid. |
| */ |
| uid_t getuid() { return ::getuid(); } |
| |
| /** |
| * Write |N| bytes from |buf| to |child_addr|, or don't return. |
| */ |
| template <size_t N> |
| void write_bytes(remote_ptr<void> child_addr, const uint8_t (&buf)[N]) { |
| write_bytes_helper(child_addr, N, buf); |
| } |
| |
| enum WriteFlags { |
| IS_BREAKPOINT_RELATED = 0x1, |
| }; |
| /** |
| * Write |val| to |child_addr|. |
| */ |
| template <typename T> |
| void write_mem(remote_ptr<T> child_addr, const T& val, bool* ok = nullptr, |
| uint32_t flags = 0) { |
| DEBUG_ASSERT(type_has_no_holes<T>()); |
| write_bytes_helper(child_addr, sizeof(val), static_cast<const void*>(&val), |
| ok, flags); |
| } |
| /** |
| * This is not the helper you're looking for. See above: you |
| * probably accidentally wrote |write_mem(addr, &foo)| when |
| * you meant |write_mem(addr, foo)|. |
| */ |
| template <typename T> |
| void write_mem(remote_ptr<T> child_addr, const T* val) = delete; |
| |
| template <typename T> |
| void write_mem(remote_ptr<T> child_addr, const T* val, int count, |
| bool* ok = nullptr) { |
| DEBUG_ASSERT(type_has_no_holes<T>()); |
| write_bytes_helper(child_addr, sizeof(*val) * count, |
| static_cast<const void*>(val), ok); |
| } |
| |
| uint64_t write_ranges(const std::vector<FileMonitor::Range>& ranges, |
| void* data, size_t size); |
| |
| /** |
| * Writes zeroes to the given memory range. |
| * For efficiency tries using MADV_REMOVE via `remote`. Caches |
| * an AutoRemoteSyscalls in `*remote`. |
| */ |
| void write_zeroes(std::unique_ptr<AutoRemoteSyscalls>* remote, remote_ptr<void> addr, size_t size); |
| |
| /** |
| * Don't use these helpers directly; use the safer and more |
| * convenient variants above. |
| * |
| * Read/write the number of bytes that the template wrapper |
| * inferred. |
| */ |
| ssize_t read_bytes_fallible(remote_ptr<void> addr, ssize_t buf_size, |
| void* buf); |
| /** |
| * If the data can't all be read, then if |ok| is non-null, sets *ok to |
| * false, otherwise asserts. |
| */ |
| void read_bytes_helper(remote_ptr<void> addr, ssize_t buf_size, void* buf, |
| bool* ok = nullptr); |
| /** |
| * |flags| is bits from WriteFlags. |
| */ |
| void write_bytes_helper(remote_ptr<void> addr, ssize_t buf_size, |
| const void* buf, bool* ok = nullptr, |
| uint32_t flags = 0); |
| /** |
| * |flags| is bits from WriteFlags. |
| * Returns number of bytes written. |
| */ |
| ssize_t write_bytes_helper_no_notifications(remote_ptr<void> addr, ssize_t buf_size, |
| const void* buf, bool* ok = nullptr, |
| uint32_t flags = 0); |
| |
| SupportedArch detect_syscall_arch(); |
| |
| /** |
| * Call this when performing a clone syscall in this task. Returns |
| * true if the call completed, false if it was interrupted and |
| * needs to be resumed. When the call returns true, the task is |
| * stopped at a PTRACE_EVENT_CLONE or PTRACE_EVENT_FORK. |
| */ |
| bool clone_syscall_is_complete(pid_t* new_pid, SupportedArch syscall_arch); |
| |
| /** |
| * Called when SYS_rrcall_init_preload has happened. |
| */ |
| virtual void at_preload_init(); |
| |
| /** |
| * Open /proc/[tid]/mem fd for our AddressSpace, closing the old one |
| * first. If necessary we force the tracee to open the file |
| * itself and smuggle the fd back to us. |
| * Returns false if the process no longer exists. |
| */ |
| bool open_mem_fd(); |
| |
| /** |
| * Calls open_mem_fd if this task's AddressSpace doesn't already have one. |
| */ |
| void open_mem_fd_if_needed(); |
| |
| /** |
| * Open /proc/[tid]/pagemap fd for our AddressSpace. |
| */ |
| ScopedFd& pagemap_fd(); |
| |
| /** |
| * Perform a PTRACE_INTERRUPT and set up the counter for potential spurious stops |
| * to be detected in `account_for_potential_ptrace_interrupt_stop`. |
| * Returns true if it succeeded, false if we got ESRCH (i.e. the tracee has |
| * disappeared or is not being ptraced; PTRACE_INTERRUPT doesn't require the |
| * tracee to be stopped). |
| */ |
| bool do_ptrace_interrupt(); |
| |
| /** |
| * Sometimes we use PTRACE_INTERRUPT to kick the tracee out of various |
| * undesirable states. Unfortunately, that can (but need not) result in later |
| * undesired GROUP-STOP-SIGTRAP stops which report the PTRACE_INTERRUPT. |
| * This function may be called when examining stops to account for any |
| * such spurious stops. |
| * |
| * Should be called at exactly once for every ptrace stop. |
| * |
| * Returns true if the stop is caused by a PTRACE_INTERRUPT we know about, |
| * false otherwise. |
| */ |
| bool account_for_potential_ptrace_interrupt_stop(WaitStatus status); |
| |
| /* Imagine that task A passes buffer |b| to the read() |
| * syscall. Imagine that, after A is switched out for task B, |
| * task B then writes to |b|. Then B is switched out for A. |
| * Since rr doesn't schedule the kernel code, the result is |
| * nondeterministic. To avoid that class of replay |
| * divergence, we "redirect" (in)outparams passed to may-block |
| * syscalls, to "scratch memory". The kernel writes to |
| * scratch deterministically, and when A (in the example |
| * above) exits its read() syscall, rr copies the scratch data |
| * back to the original buffers, serializing A and B in the |
| * example above. |
| * |
| * Syscalls can "nest" due to signal handlers. If a syscall A |
| * is interrupted by a signal, and the sighandler calls B, |
| * then we can have scratch buffers set up for args of both A |
| * and B. In linux, B won't actually re-enter A; A is exited |
| * with a "will-restart" error code and its args are saved for |
| * when (or if) it's restarted after the signal. But that |
| * doesn't really matter wrt scratch space. (TODO: in the |
| * future, we may be able to use that fact to simplify |
| * things.) |
| * |
| * Because of nesting, at first blush it seems we should push |
| * scratch allocations onto a stack and pop them as syscalls |
| * (or restarts thereof) complete. But under a critical |
| * assumption, we can actually skip that. The critical |
| * assumption is that the kernel writes its (in)outparams |
| * atomically wrt signal interruptions, and only writes them |
| * on successful exit. Each syscall will complete in stack |
| * order, and it's invariant that the syscall processors must |
| * only write back to user buffers *only* the data that was |
| * written by the kernel. So as long as the atomicity |
| * assumption holds, the completion of syscalls higher in the |
| * event stack may overwrite scratch space, but the completion |
| * of each syscall will overwrite those overwrites again, and |
| * that over-overwritten data is exactly and only what we'll |
| * write back to the tracee. |
| * |
| * |scratch_ptr| points at the mapped address in the child, |
| * and |size| is the total available space. */ |
| remote_ptr<void> scratch_ptr; |
| /* The full size of the scratch buffer. |
| * The last page of the scratch buffer is used as an alternate stack |
| * for the syscallbuf code. So the usable size is less than this. |
| */ |
| ssize_t scratch_size; |
| |
| /* The child's desched counter event fd number */ |
| int desched_fd_child; |
| /* The child's cloned_file_data_fd */ |
| int cloned_file_data_fd_child; |
| /* The filename opened by the child's cloned_file_data_fd */ |
| std::string cloned_file_data_fname; |
| // Current rseq state if registered |
| std::unique_ptr<RseqState> rseq_state; |
| |
| PerfCounters hpc; |
| |
| /* This is always the "real" tid of the tracee. For a detached proxy, |
| * it's the proxy tid. */ |
| pid_t tid; |
| /* This is always the recorded tid of the tracee. During |
| * recording, it's synonymous with |tid|, and during replay |
| * it's the tid that was recorded. For a detached proxy, |
| * this is the tid of the detachd process. */ |
| pid_t rec_tid; |
| /* This is the recorded tid of the tracee *in its own pid namespace*. */ |
| pid_t own_namespace_rec_tid; |
| |
| size_t syscallbuf_size; |
| /* Points at the tracee's mapping of the buffer. */ |
| remote_ptr<struct syscallbuf_hdr> syscallbuf_child; |
| |
| remote_ptr<struct preload_globals> preload_globals; |
| typedef uint8_t ThreadLocals[PRELOAD_THREAD_LOCALS_SIZE]; |
| ThreadLocals thread_locals; |
| |
| size_t usable_scratch_size() { |
| return std::max<ssize_t>(0, scratch_size - page_size()); |
| } |
| remote_ptr<void> syscallbuf_alt_stack() { |
| return scratch_ptr.is_null() ? remote_ptr<void>() |
| : scratch_ptr + scratch_size; |
| } |
| void setup_preload_thread_locals(); |
| void setup_preload_thread_locals_from_clone(Task* origin); |
| // If `fetch_full` is false, avoid fetching the full stub_scratch_2 on aarch64 |
| // and only fetch the first two pointers from it. |
| const ThreadLocals& fetch_preload_thread_locals(); |
| void activate_preload_thread_locals(); |
| |
| struct CapturedState { |
| Ticks ticks; |
| Registers regs; |
| ExtraRegisters extra_regs; |
| std::string prname; |
| uintptr_t fdtable_identity; |
| remote_ptr<struct syscallbuf_hdr> syscallbuf_child; |
| size_t syscallbuf_size; |
| size_t num_syscallbuf_bytes; |
| remote_ptr<struct preload_globals> preload_globals; |
| remote_ptr<void> scratch_ptr; |
| ssize_t scratch_size; |
| remote_ptr<void> top_of_stack; |
| std::unique_ptr<RseqState> rseq_state; |
| uint64_t cloned_file_data_offset; |
| ThreadLocals thread_locals; |
| pid_t rec_tid; |
| pid_t own_namespace_rec_tid; |
| uint32_t serial; |
| ThreadGroupUid tguid; |
| int desched_fd_child; |
| int cloned_file_data_fd_child; |
| std::string cloned_file_data_fname; |
| WaitStatus wait_status; |
| // TLS state (architecture specific) |
| // On x86_64 the tls register is part of the general register state (%fs) |
| // On x86 thread_areas is used |
| // on aarch64, tls_register is used |
| uintptr_t tls_register; |
| std::vector<X86Arch::user_desc> thread_areas; |
| }; |
| |
| /** |
| * Lock or unlock the syscallbuf to prevent the preload library from using it. |
| * Only has an effect if the syscallbuf has been initialized. |
| */ |
| void set_syscallbuf_locked(bool locked); |
| |
| // Disable syscall buffering during diversions |
| void set_in_diversion(bool in_diversion) { |
| if (preload_globals) { |
| write_mem(REMOTE_PTR_FIELD(preload_globals, in_diversion), |
| (unsigned char)in_diversion); |
| } |
| set_syscallbuf_locked(in_diversion); |
| } |
| |
| /** |
| * Executes a ptrace() call that expects the task to be in a ptrace-stop. |
| * Errors other than ESRCH are treated as fatal (those are rr bugs). |
| * Only call this when `Task::is_stopped_`. |
| * Even when `is_stopped_` is true, this can return false because the kernel |
| * could have pushed the task out of the ptrace-stop due to SIGKILL or |
| * equivalent (such as `zap_pid_ns_processes`). |
| * |
| * So when this returns false, one of the following is true: |
| * * The tracee is executing towards its PTRACE_EVENT_EXIT stop. This |
| * happens concurrently with rr so it may enter that stop at any time. |
| * But it can also be indefinitely delayed before reaching the exit stop, |
| * e.g. waiting in`zap_pid_ns_processes`. |
| * * In older kernels (before 9a95f78eab70deeb5a4c879c19b841a6af5b66e7) |
| * it is possible for a tracee stopped in PTRACE_EVENT_EXIT to be kicked |
| * out of that stop by another SIGKILL. In that case it is executing towards |
| * or has actually reached the zombie state. In old kernels it can be |
| * blocked indefinitely from reaching the zombie state due to coredumping. |
| * |
| * In either of these cases, the tracee has been killed via SIGKILL or equivalent |
| * and will not execute user code or system calls again. We can assume |
| * its registers won't change again. It won't handle any more signals. |
| */ |
| bool ptrace_if_stopped(int request, remote_ptr<void> addr, void* data); |
| |
| /** |
| * Make the ptrace |request| with |addr| and |data|, return |
| * the ptrace return value. Just a very thin wrapper around the syscall. |
| */ |
| long fallible_ptrace(int request, remote_ptr<void> addr, void* data); |
| |
| bool seen_ptrace_exit_event() const { |
| return seen_ptrace_exit_event_; |
| } |
| |
| void did_handle_ptrace_exit_event(); |
| |
| remote_code_ptr last_execution_resume() const { |
| return address_of_last_execution_resume; |
| } |
| |
| bool was_reaped() const { |
| return was_reaped_; |
| } |
| bool handled_ptrace_exit_event() const { |
| return handled_ptrace_exit_event_; |
| } |
| |
| void os_exec(SupportedArch arch, std::string filename); |
| void os_exec_stub(SupportedArch arch) { |
| os_exec(arch, find_exec_stub(arch)); |
| } |
| |
| /** |
| * Try to make the current task look exactly like some `other` task |
| * by copying that task's address space and other relevant properties, |
| * but without using the os's clone system call. |
| */ |
| void dup_from(Task *task); |
| |
| virtual ~Task(); |
| |
| /** |
| * Fork and exec the initial task. If something goes wrong later |
| * (i.e. an exec does not occur before an exit), an error may be |
| * readable from the other end of the pipe whose write end is error_fd. |
| */ |
| static Task* spawn(Session& session, ScopedFd& error_fd, |
| ScopedFd* sock_fd_out, |
| ScopedFd* sock_fd_receiver_out, |
| int* tracee_socket_fd_number_out, |
| const std::string& exe_path, |
| const std::vector<std::string>& argv, |
| const std::vector<std::string>& envp, pid_t rec_tid = -1); |
| |
| /** |
| * Do PTRACE_SEIZE on this tid with the correct ptrace options. |
| */ |
| static long ptrace_seize(pid_t tid, Session& session); |
| |
| /** |
| * Do a tgkill to send a specific signal to this task. |
| */ |
| void tgkill(int sig); |
| |
| /** |
| * Try to move this task to a signal stop by signaling it with the |
| * syscallbuf desched signal (which is guaranteed not to be blocked). |
| * Returns false if the task exited unexpectedly. |
| */ |
| bool move_to_signal_stop(); |
| |
| // A map from original table to (potentially detached) clone, to preserve |
| // FdTable sharing relationships during a session fork. |
| using ClonedFdTables = std::unordered_map<uintptr_t, FdTable::shr_ptr>; |
| |
| /** |
| * Just forget that this Task exists. Another rr process will manage it. |
| */ |
| void forget(); |
| |
| // Used on aarch64 to detect whether we've recorded x0 and x8 on syscall entry |
| Ticks ticks_at_last_syscall_entry; |
| remote_code_ptr ip_at_last_syscall_entry; |
| // Whether the syscall entry corresponding to `{ticks,ip}_at_last_syscall_entry` |
| // has been recorded in the trace |
| // (used to avoid double recording on unexpected exit) |
| bool last_syscall_entry_recorded; |
| |
| protected: |
| Task(Session& session, pid_t tid, pid_t rec_tid, uint32_t serial, |
| SupportedArch a); |
| |
| enum CloneReason { |
| // Cloning a task in the same session due to tracee fork()/vfork()/clone() |
| TRACEE_CLONE, |
| // Cloning a task into a new session as the leader for a checkpoint |
| SESSION_CLONE_LEADER, |
| // Cloning a task into the same session to recreate threads while |
| // restoring a checkpoint |
| SESSION_CLONE_NONLEADER, |
| }; |
| /** |
| * Return a new Task cloned from |p|. |flags| are a set of |
| * CloneFlags (see above) that determine which resources are |
| * shared or copied to the new child. |new_tid| is the tid |
| * assigned to the new task by the kernel. |new_rec_tid| is |
| * only relevant to replay, and is the pid that was assigned |
| * to the task during recording. |
| */ |
| virtual Task* clone(CloneReason reason, int flags, remote_ptr<void> stack, |
| remote_ptr<void> tls, remote_ptr<int> cleartid_addr, |
| pid_t new_tid, pid_t new_rec_tid, uint32_t new_serial, |
| Session* other_session = nullptr, |
| FdTable::shr_ptr new_fds = nullptr, |
| ThreadGroup::shr_ptr new_tg = nullptr); |
| |
| /** |
| * Internal method called after the first wait() during a clone(). |
| */ |
| virtual void post_wait_clone(Task*, int) {} |
| |
| /** |
| * Internal method called after the clone to fix up the new address space. |
| */ |
| virtual bool post_vm_clone(CloneReason reason, int flags, Task* origin); |
| |
| template <typename Arch> |
| void on_syscall_exit_arch(int syscallno, const Registers& regs); |
| |
| /** Helper function for init_buffers. */ |
| template <typename Arch> void init_buffers_arch(remote_ptr<void> map_hint); |
| |
| /** |
| * Grab state from this task into a structure that we can use to |
| * initialize a new task via os_clone_into/os_fork_into and copy_state. |
| */ |
| CapturedState capture_state(); |
| |
| /** |
| * Make this task look like an identical copy of the task whose state |
| * was captured by capture_task_state(), in |
| * every way relevant to replay. This task should have been |
| * created by calling os_clone_into() or os_fork_into(), |
| * and if it wasn't results are undefined. |
| * |
| * Some task state must be copied into this by injecting and |
| * running syscalls in this task. Other state is metadata |
| * that can simply be copied over in local memory. |
| */ |
| void copy_state(const CapturedState& state); |
| |
| /** |
| * Read tracee memory using PTRACE_PEEKDATA calls. Slow, only use |
| * as fallback. Returns number of bytes actually read. |
| */ |
| ssize_t read_bytes_ptrace(remote_ptr<void> addr, ssize_t buf_size, void* buf); |
| |
| /** |
| * Write tracee memory using PTRACE_POKEDATA calls. Slow, only use |
| * as fallback. Returns number of bytes actually written. |
| */ |
| ssize_t write_bytes_ptrace(remote_ptr<void> addr, ssize_t buf_size, |
| const void* buf); |
| |
| /** |
| * Try writing 'buf' to 'addr' by replacing pages in the tracee |
| * address-space using a temporary file. This may work around PaX issues. |
| */ |
| bool try_replace_pages(remote_ptr<void> addr, ssize_t buf_size, |
| const void* buf); |
| |
| /** |
| * Map the syscallbuffer for this, shared with this process. |
| * |map_hint| is the address where the syscallbuf is expected |
| * to be mapped --- and this is asserted --- or nullptr if |
| * there are no expectations. |
| * Initializes syscallbuf_child. |
| */ |
| KernelMapping init_syscall_buffer(AutoRemoteSyscalls& remote, |
| remote_ptr<void> map_hint); |
| |
| /** |
| * Make the OS-level calls to create a new fork or clone that |
| * will eventually be a copy of this task and return that Task |
| * metadata. These methods are used in concert with |
| * |Task::copy_state()| to create task copies during |
| * checkpointing. |
| * |
| * For |os_fork_into()|, |session| will be tracking the |
| * returned fork child. |
| * |
| * For |os_clone_into()|, |task_leader| is the "main thread" |
| * in the process into which the copy of this task will be |
| * created. |task_leader| will perform the actual OS calls to |
| * create the new child. |
| */ |
| Task* os_fork_into(Session* session, FdTable::shr_ptr new_fds); |
| static Task* os_clone_into(const CapturedState& state, |
| AutoRemoteSyscalls& remote, |
| const ClonedFdTables& cloned_fd_tables, |
| ThreadGroup::shr_ptr new_tg); |
| |
| /** |
| * Return the TraceStream that we're using, if in recording or replay. |
| * Returns null if we're not in record or replay. |
| */ |
| const TraceStream* trace_stream() const; |
| |
| /** |
| * Make the OS-level calls to clone |parent| into |session| |
| * and return the resulting Task metadata for that new |
| * process. This is as opposed to |Task::clone()|, which only |
| * attaches Task metadata to an /existing/ process. |
| * |
| * The new clone will be tracked in |session|. The other |
| * arguments are as for |Task::clone()| above. |
| */ |
| static Task* os_clone(CloneReason reason, Session* session, |
| AutoRemoteSyscalls& remote, pid_t rec_child_tid, |
| uint32_t new_serial, unsigned base_flags, |
| FdTable::shr_ptr new_fds = nullptr, |
| ThreadGroup::shr_ptr new_tg = nullptr, |
| remote_ptr<void> stack = nullptr, |
| remote_ptr<int> ptid = nullptr, |
| remote_ptr<void> tls = nullptr, |
| remote_ptr<int> ctid = nullptr); |
| |
| void work_around_KNL_string_singlestep_bug(); |
| |
| void* preload_thread_locals(); |
| |
| uint32_t serial; |
| // The address space of this task. |
| AddressSpace::shr_ptr as; |
| // The file descriptor table of this task. |
| FdTable::shr_ptr fds; |
| // Count of all ticks seen by this task since tracees became |
| // consistent and the task last wait()ed. |
| Ticks ticks; |
| // Copy of the child registers. |
| // When is_stopped_ or in_unexpected_exit, these are the source of |
| // truth. Otherwise the child is running and the registers could be |
| // changed by the kernel or user-space execution, and the values here |
| // are meaningless. |
| // See also registers_dirty. |
| Registers registers; |
| // Where we last resumed execution |
| remote_code_ptr address_of_last_execution_resume; |
| // Current hardware watchpoint state as programmed into debug registers |
| HardwareWatchpoints current_hardware_watchpoints; |
| ResumeRequest how_last_execution_resumed; |
| // In certain circumstances, due to hardware bugs, we need to fudge the |
| // cx register. If so, we record the original value here. See comments in |
| // Task.cc |
| uint64_t last_resume_orig_cx; |
| // The instruction type we're singlestepping through. |
| TrappedInstruction singlestepping_instruction; |
| // True if we set a breakpoint after a singlestepped CPUID instruction. |
| // We need this in addition to `singlestepping_instruction` because that |
| // might be CPUID but we failed to set the breakpoint. |
| bool did_set_breakpoint_after_cpuid; |
| // True when we know via waitpid() that the task was stopped in |
| // a ptrace-stop and we haven't resumed it. |
| // It is possible that the task has been pushed out of the ptrace-stop |
| // without our knowledge, due to a SIGKILL or equivalent such as |
| // zap_pid_ns_processes. |
| bool is_stopped_; |
| // True when we've been kicked out of a ptrace-stop via SIGKILL or |
| // equivalent. |
| bool in_unexpected_exit; |
| /* True when the seccomp filter has been enabled via prctl(). This happens |
| * in the first system call issued by the initial tracee (after it returns |
| * from kill(SIGSTOP) to synchronize with the tracer). */ |
| bool seccomp_bpf_enabled; |
| // True when 'registers' has changes that haven't been flushed back to the |
| // task yet. |
| bool registers_dirty; |
| // True when changes to the original syscallno in 'registers' have not been |
| // flushed back to the task yet. Some architectures (e.g. AArch64) require a |
| // separate ptrace call for this. |
| bool orig_syscallno_dirty; |
| // When |extra_registers_known|, we have saved our extra registers. |
| ExtraRegisters extra_registers; |
| bool extra_registers_known; |
| // The session we're part of. |
| Session* session_; |
| // The thread group this belongs to. |
| std::shared_ptr<ThreadGroup> tg; |
| // Entries set by |set_thread_area()| or the |tls| argument to |clone()| |
| // (when that's a user_desc). May be more than one due to different |
| // entry_numbers. |
| // x86(_64) only. |
| std::vector<X86Arch::user_desc> thread_areas_; |
| // The |stack| argument passed to |clone()|, which for |
| // "threads" is the top of the user-allocated stack. |
| remote_ptr<void> top_of_stack; |
| // The most recent status of this task as returned by |
| // waitpid(). |
| WaitStatus wait_status; |
| // The most recent siginfo (captured when wait_status shows pending_sig()) |
| siginfo_t pending_siginfo; |
| // True when a PTRACE_EXIT_EVENT has been observed in the wait_status |
| // for this task. |
| bool seen_ptrace_exit_event_; |
| // True when a PTRACE_EXIT_EVENT has been handled for this task. |
| // By handled we mean either RecordSession's handle_ptrace_exit_event was |
| // run (or the replay equivalent) or we recognized that the task is already |
| // dead and we cleaned up our books so we don't try to destroy our buffers |
| // or anything like that in an already deceased task. |
| // We might defer handling the exit (e.g. if there's an ongoing execve). |
| // If this is true, `seen_ptrace_exit_event` must be true. |
| bool handled_ptrace_exit_event_; |
| |
| // A counter for the number of stops for which the stop may have been caused |
| // by PTRACE_INTERRUPT. See description in do_waitpid |
| int expecting_ptrace_interrupt_stop; |
| |
| bool was_reaped_; |
| // Let this Task object be destroyed with no consequences. |
| bool forgotten; |
| |
| Task(Task&) = delete; |
| Task operator=(Task&) = delete; |
| }; |
| |
| } // namespace rr |
| |
| #endif /* RR_TASK_H_ */ |