blob: 2b60536e5438efd2e9001f43d8294c72aee10243 [file] [log] [blame]
Robert O'Callahand0706392014-09-09 12:01:23 +12001/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
Chris Jonesc43ca322014-07-23 11:44:36 -07002
Robert O'Callahan3bb518b2014-09-27 15:35:25 -04003#include "AutoRemoteSyscalls.h"
Chris Jonesc43ca322014-07-23 11:44:36 -07004
Robert O'Callahanfb935342016-06-24 23:26:03 +12005#include <limits.h>
6#include <linux/net.h>
Keno Fischerc0948922016-06-04 22:13:20 -04007#include <sys/socket.h>
Robert O'Callahan6ca06de2018-04-25 00:18:10 +12008#include <sys/types.h>
Robert O'Callahan8782ae92015-07-30 00:14:28 +12009
Bernhard Übelacker8a840232023-03-03 00:04:39 +010010#include <sstream>
11
Robert O'Callahanbd0d2cf2015-04-17 16:20:45 +120012#include "rr/rr.h"
13
Kyle Hueycd145d12019-07-07 20:07:13 -070014#include "RecordSession.h"
Robert O'Callahan5c9348d2017-08-10 23:25:35 +120015#include "RecordTask.h"
Robert O'Callahan3ecaab02015-05-01 14:05:52 +120016#include "ReplaySession.h"
Robert O'Callahan0e9fe832015-03-14 00:09:00 +130017#include "Session.h"
Robert O'Callahaneed69152016-03-19 16:10:49 +130018#include "Task.h"
Robert O'Callahand218a532017-08-07 17:20:51 +120019#include "core.h"
Robert O'Callahand83c3612023-01-02 20:51:14 +130020#include "kernel_abi.h"
Robert O'Callahanfb935342016-06-24 23:26:03 +120021#include "kernel_metadata.h"
22#include "log.h"
Chris Jonesc43ca322014-07-23 11:44:36 -070023#include "util.h"
24
Robert O'Callahan978e59e2014-11-20 16:22:24 +130025using namespace std;
Robert O'Callahan771de352014-10-02 11:52:34 -040026
Robert O'Callahan3ce49c62016-03-22 18:31:02 +130027namespace rr {
28
Robert O'Callahane0f7a3d2014-11-20 15:30:45 +130029/**
30 * The ABI of the socketcall syscall is a nightmare; the first arg to
31 * the kernel is the sub-operation, and the second argument is a
32 * pointer to the args. The args depend on the sub-op.
33 */
Robert O'Callahan3fca38a2014-11-21 09:58:15 +130034template <typename Arch> struct socketcall_args {
35 typename Arch::signed_long args[3];
Robert O'Callahane0f7a3d2014-11-20 15:30:45 +130036} __attribute__((packed));
37
Robert O'Callahan128123a2016-04-26 17:34:44 +120038void AutoRestoreMem::init(const void* mem, ssize_t num_bytes) {
Keno Fischer55e68a92016-09-21 19:53:13 -040039 ASSERT(remote.task(),
40 remote.enable_mem_params() == AutoRemoteSyscalls::ENABLE_MEMORY_PARAMS)
Robert O'Callahan8412c2a2015-07-23 12:08:15 +120041 << "Memory parameters were disabled";
42
Robert O'Callahand0706392014-09-09 12:01:23 +120043 len = num_bytes;
Robert O'Callahanbc8c7ee2014-09-22 10:06:31 -040044 saved_sp = remote.regs().sp();
Chris Jonesc43ca322014-07-23 11:44:36 -070045
Robert O'Callahand0706392014-09-09 12:01:23 +120046 remote.regs().set_sp(remote.regs().sp() - len);
47 remote.task()->set_regs(remote.regs());
Robert O'Callahanbc8c7ee2014-09-22 10:06:31 -040048 addr = remote.regs().sp();
Chris Jonesc43ca322014-07-23 11:44:36 -070049
Robert O'Callahan0a9e58f2014-09-27 18:18:54 -040050 data.resize(len);
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +130051 bool ok = true;
52 remote.task()->read_bytes_helper(addr, len, data.data(), &ok);
Robert O'Callahand0706392014-09-09 12:01:23 +120053 if (mem) {
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +130054 remote.task()->write_bytes_helper(addr, len, mem, &ok);
55 }
56 if (!ok) {
57 addr = nullptr;
Robert O'Callahand0706392014-09-09 12:01:23 +120058 }
Chris Jonesc43ca322014-07-23 11:44:36 -070059}
60
Robert O'Callahand0706392014-09-09 12:01:23 +120061AutoRestoreMem::~AutoRestoreMem() {
Robert O'Callahand218a532017-08-07 17:20:51 +120062 DEBUG_ASSERT(saved_sp == remote.regs().sp() + len);
Chris Jonesc43ca322014-07-23 11:44:36 -070063
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +130064 if (addr) {
65 // XXX what should we do if this task was sigkilled but the address
66 // space is used by other live tasks?
67 remote.task()->write_bytes_helper(addr, len, data.data());
68 }
Robert O'Callahand0706392014-09-09 12:01:23 +120069 remote.regs().set_sp(remote.regs().sp() + len);
Robert O'Callahan8c468fb2023-09-18 19:28:02 +120070 remote.task()->set_regs(remote.regs());
Chris Jonesc43ca322014-07-23 11:44:36 -070071}
72
Robert O'Callahaned45cd62017-08-16 01:29:05 +120073static bool is_SIGTRAP_default_and_unblocked(Task* t) {
Robert O'Callahanfa24c202017-08-12 00:46:23 +120074 if (!t->session().is_recording()) {
Robert O'Callahaned45cd62017-08-16 01:29:05 +120075 return true;
Robert O'Callahanfa24c202017-08-12 00:46:23 +120076 }
Robert O'Callahaned45cd62017-08-16 01:29:05 +120077 RecordTask* rt = static_cast<RecordTask*>(t);
78 return rt->sig_disposition(SIGTRAP) == SIGNAL_DEFAULT &&
79 !rt->is_sig_blocked(SIGTRAP);
Robert O'Callahanfa24c202017-08-12 00:46:23 +120080}
81
Robert O'Callahan8412c2a2015-07-23 12:08:15 +120082AutoRemoteSyscalls::AutoRemoteSyscalls(Task* t,
83 MemParamsEnabled enable_mem_params)
Robert O'Callahand0706392014-09-09 12:01:23 +120084 : t(t),
85 initial_regs(t->regs()),
86 initial_ip(t->ip()),
Robert O'Callahan0e9fe832015-03-14 00:09:00 +130087 initial_sp(t->regs().sp()),
Keno Fischer3bef84d2021-01-13 03:46:57 -050088 initial_at_seccomp(t->ptrace_event() == PTRACE_EVENT_SECCOMP),
Robert O'Callahan955b5e02017-08-12 16:59:09 +120089 restore_wait_status(t->status()),
Robert O'Callahan5c9348d2017-08-10 23:25:35 +120090 new_tid_(-1),
Keno Fischer55e68a92016-09-21 19:53:13 -040091 scratch_mem_was_mapped(false),
Robert O'Callahanfa24c202017-08-12 00:46:23 +120092 use_singlestep_path(false),
Keno Fischer8721ae82022-04-13 23:17:37 +000093 enable_mem_params_(enable_mem_params),
Keno Fischer42728be2022-04-12 00:23:09 +000094 restore_sigmask(false),
Keno Fischer8721ae82022-04-13 23:17:37 +000095 need_sigpending_renable(false) {
Keno Fischer3bef84d2021-01-13 03:46:57 -050096 if (initial_at_seccomp) {
97 // This should only ever happen during recording - we don't use the
98 // seccomp traps during replay.
99 ASSERT(t, t->session().is_recording());
100 }
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200101 // We support two paths for syscalls:
102 // -- a fast path using a privileged untraced syscall and PTRACE_SINGLESTEP.
103 // This only requires a single task-wait.
104 // -- a slower path using a privileged traced syscall and PTRACE_SYSCALL/
105 // PTRACE_CONT via Task::enter_syscall(). This requires 2 or 3 task-waits
106 // depending on whether the seccomp event fires before the syscall-entry
107 // event.
108 // Use the slow path when running under rr, because the rr recording us
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200109 // needs to see and trace these tracee syscalls, and if they're untraced by
110 // us they're also untraced by the outer rr.
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200111 // Use the slow path if SIGTRAP is blocked or ignored because otherwise
112 // the PTRACE_SINGLESTEP will cause the kernel to unblock it.
Robert O'Callahaned45cd62017-08-16 01:29:05 +1200113 setup_path(t->vm()->has_rr_page() && !running_under_rr() &&
114 is_SIGTRAP_default_and_unblocked(t));
115 if (enable_mem_params == ENABLE_MEMORY_PARAMS) {
116 maybe_fix_stack_pointer();
117 }
Keno Fischer8721ae82022-04-13 23:17:37 +0000118 if (t->status().is_syscall() && t->regs().syscall_may_restart()) {
119 // VERY rare corner case alert: It is possible for the following sequence
120 // of events to occur:
121 //
122 // 1. Thread A is in a blocking may-restart syscall and gets interrupted by a tg-targeted signal
123 // 2. Thread B dequeues the signal
124 // 3. Thread A is in the syscall-exit-stop with TIF_SIGPENDING set (with registers indicating syscall restart)
125 // 4. We get here to perform an AutoRemoteSyscall
126 // 5. During AutoRemoteSyscall, TIF_SIGPENDING gets cleared on return to userspace
127 // 6. We finish the AutoRemoteSyscall and re-apply the registers.
128 // 7. ... As a result, the kernel does not check whether it needs to perform the
129 /// syscall-restart register adjustment because TIF_SIGPENDING is not set.
130 // 8. The -ERESTART error code leaks to userspace.
131 //
132 // Arguably this is a kernel bug, but it's not clear how the behavior should be changed.
133 //
134 // To work around this, we forcibly re-enable TIF_SIGPENDING when cleaning up
135 // AutoRemoteSyscall (see below).
136 need_sigpending_renable = true;
137 }
Keno Fischer42728be2022-04-12 00:23:09 +0000138 if (t->session().is_recording()) {
139 RecordTask *rt = static_cast<RecordTask*>(t);
140 if (rt->schedule_frozen) {
141 // If we're explicitly controlling the schedule, make sure not to accidentally run
142 // any signals that we were not meant to be able to see.
143 restore_sigmask = true;
144 sigmask_to_restore = rt->get_sigmask();
145 sig_set_t all_blocked;
146 memset(&all_blocked, 0xff, sizeof(all_blocked));
Keno Fischerde0f22f2022-07-02 21:24:32 +0000147 // Ignore the process dying here - we'll notice later.
148 (void)rt->set_sigmask(all_blocked);
Keno Fischer42728be2022-04-12 00:23:09 +0000149 }
150 }
Robert O'Callahaned45cd62017-08-16 01:29:05 +1200151}
152
153void AutoRemoteSyscalls::setup_path(bool enable_singlestep_path) {
Kyle Huey26fccb32021-06-28 11:28:11 -0700154#if defined(__aarch64__)
155 // XXXkhuey this fast path doesn't work on AArch64 yet, go slow instead
156 enable_singlestep_path = false;
157#endif
158
Robert O'Callahaned45cd62017-08-16 01:29:05 +1200159 if (!replaced_bytes.empty()) {
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300160 // XXX what to do here to clean up if the task died unexpectedly?
Robert O'Callahaned45cd62017-08-16 01:29:05 +1200161 t->write_mem(remote_ptr<uint8_t>(initial_regs.ip().to_data_ptr<uint8_t>()),
162 replaced_bytes.data(), replaced_bytes.size());
163 }
164
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200165 remote_code_ptr syscall_ip;
Robert O'Callahaned45cd62017-08-16 01:29:05 +1200166 use_singlestep_path = enable_singlestep_path;
Robert O'Callahan4a9e24b2018-09-05 13:57:34 +1200167 if (use_singlestep_path) {
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200168 syscall_ip = AddressSpace::rr_page_syscall_entry_point(
169 AddressSpace::UNTRACED, AddressSpace::PRIVILEGED,
170 AddressSpace::RECORDING_AND_REPLAY, t->arch());
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200171 } else {
172 syscall_ip = t->vm()->traced_syscall_ip();
173 }
174 initial_regs.set_ip(syscall_ip);
Keno Fischer55e68a92016-09-21 19:53:13 -0400175
Keno Fischer3d782362016-06-05 20:01:35 -0400176 // We need to make sure to clear any breakpoints or other alterations of
177 // the syscall instruction we're using. Note that the tracee may have set its
178 // own breakpoints or otherwise modified the instruction, so suspending our
179 // own breakpoint is insufficient.
180 std::vector<uint8_t> syscall = rr::syscall_instruction(t->arch());
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300181 bool ok = true;
Keno Fischer3d782362016-06-05 20:01:35 -0400182 replaced_bytes =
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300183 t->read_mem(initial_regs.ip().to_data_ptr<uint8_t>(), syscall.size(), &ok);
184 if (!ok) {
185 // The task died
186 return;
187 }
Keno Fischera8d922e2016-06-05 22:10:13 -0400188 if (replaced_bytes == syscall) {
189 replaced_bytes.clear();
190 } else {
191 t->write_mem(initial_regs.ip().to_data_ptr<uint8_t>(), syscall.data(),
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300192 syscall.size(), &ok);
Keno Fischera8d922e2016-06-05 22:10:13 -0400193 }
Robert O'Callahan0e9fe832015-03-14 00:09:00 +1300194}
195
Robert O'Callahanf8c2fee2015-08-28 23:41:09 +1200196static bool is_usable_area(const KernelMapping& km) {
197 return (km.prot() & (PROT_READ | PROT_WRITE)) == (PROT_READ | PROT_WRITE) &&
Robert O'Callahanb7e40522015-09-15 23:59:32 +1200198 (km.flags() & MAP_PRIVATE);
Robert O'Callahanf8c2fee2015-08-28 23:41:09 +1200199}
200
Robert O'Callahan0e9fe832015-03-14 00:09:00 +1300201void AutoRemoteSyscalls::maybe_fix_stack_pointer() {
Robert O'Callahan0e6ac7e2016-03-02 17:27:02 +1300202 if (!t->session().done_initial_exec()) {
Robert O'Callahan0e9fe832015-03-14 00:09:00 +1300203 return;
204 }
205
206 remote_ptr<void> last_stack_byte = t->regs().sp() - 1;
207 if (t->vm()->has_mapping(last_stack_byte)) {
208 auto m = t->vm()->mapping_of(last_stack_byte);
Robert O'Callahanf8c2fee2015-08-28 23:41:09 +1200209 if (is_usable_area(m.map) && m.map.start() + 2048 <= t->regs().sp()) {
Robert O'Callahan0e9fe832015-03-14 00:09:00 +1300210 // 'sp' is in a stack region and there's plenty of space there. No need
211 // to fix anything.
212 return;
213 }
214 }
215
Robert O'Callahana48afd12015-07-27 16:15:54 +1200216 MemoryRange found_stack;
Robert O'Callahanf26ca552017-08-03 23:03:46 +1200217 for (const auto& m : t->vm()->maps()) {
Robert O'Callahanf8c2fee2015-08-28 23:41:09 +1200218 if (is_usable_area(m.map)) {
219 found_stack = m.map;
220 break;
Robert O'Callahan0e9fe832015-03-14 00:09:00 +1300221 }
222 };
Robert O'Callahan0e9fe832015-03-14 00:09:00 +1300223
Keno Fischer55e68a92016-09-21 19:53:13 -0400224 if (found_stack.start().is_null()) {
225 AutoRemoteSyscalls remote(t, DISABLE_MEMORY_PARAMS);
226 found_stack =
Robert O'Callahan4bd24672022-03-26 14:23:23 +1300227 MemoryRange(remote.infallible_mmap_syscall_if_alive(
Keno Fischer55e68a92016-09-21 19:53:13 -0400228 remote_ptr<void>(), 4096, PROT_READ | PROT_WRITE,
229 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
230 4096);
Robert O'Callahan4bd24672022-03-26 14:23:23 +1300231 ASSERT(t, !found_stack.start().is_null())
232 << "Tracee unexpectedly died here";
Keno Fischer55e68a92016-09-21 19:53:13 -0400233 scratch_mem_was_mapped = true;
234 }
235
236 fixed_sp = found_stack.end();
Robert O'Callahand218a532017-08-07 17:20:51 +1200237 DEBUG_ASSERT(!fixed_sp.is_null());
Keno Fischer55e68a92016-09-21 19:53:13 -0400238 initial_regs.set_sp(fixed_sp);
Chris Jonesc43ca322014-07-23 11:44:36 -0700239}
240
Robert O'Callahand0706392014-09-09 12:01:23 +1200241AutoRemoteSyscalls::~AutoRemoteSyscalls() { restore_state_to(t); }
Chris Jonesc43ca322014-07-23 11:44:36 -0700242
Robert O'Callahand0706392014-09-09 12:01:23 +1200243void AutoRemoteSyscalls::restore_state_to(Task* t) {
Robert O'Callahan024064f2023-08-23 07:34:49 +1200244 // Check if the task was unexpectedly killed via SIGKILL or equivalent.
245 bool is_exiting = !t->is_stopped() || t->ptrace_event() == PTRACE_EVENT_EXIT ||
246 t->was_reaped();
247
Keno Fischer55e68a92016-09-21 19:53:13 -0400248 // Unmap our scatch region if required
Robert O'Callahan024064f2023-08-23 07:34:49 +1200249 if (scratch_mem_was_mapped && !is_exiting) {
Keno Fischer55e68a92016-09-21 19:53:13 -0400250 AutoRemoteSyscalls remote(t, DISABLE_MEMORY_PARAMS);
Robert O'Callahan1659fd32018-01-24 23:58:32 +1300251 remote.infallible_syscall(syscall_number_for_munmap(arch()),
252 fixed_sp - 4096, 4096);
Keno Fischer55e68a92016-09-21 19:53:13 -0400253 }
Keno Fischera8d922e2016-06-05 22:10:13 -0400254 if (!replaced_bytes.empty()) {
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300255 // XXX how to clean up if the task died and the address space is shared with live task?
Robert O'Callahaned45cd62017-08-16 01:29:05 +1200256 t->write_mem(remote_ptr<uint8_t>(initial_regs.ip().to_data_ptr<uint8_t>()),
Keno Fischera8d922e2016-06-05 22:10:13 -0400257 replaced_bytes.data(), replaced_bytes.size());
258 }
Kyle Huey9d2c6742017-10-18 12:47:44 -0700259 auto regs = initial_regs;
260 regs.set_ip(initial_ip);
261 regs.set_sp(initial_sp);
Robert O'Callahan024064f2023-08-23 07:34:49 +1200262 if (is_exiting) {
263 // Don't restore status; callers need to see the task is exiting.
264 // And the other stuff we don't below won't work.
265 // But do restore registers so it looks like the exit happened in a clean state.
Robert O'Callahan8c468fb2023-09-18 19:28:02 +1200266 t->set_regs(regs);
Robert O'Callahan024064f2023-08-23 07:34:49 +1200267 return;
268 }
269
Yichao Yu5871ae52022-06-17 00:41:38 -0400270 if (t->arch() == aarch64 && regs.syscall_may_restart()) {
271 // On AArch64, the kernel restarts aborted syscalls using an internal `orig_x0`.
272 // This gets overwritten everytime we make a syscall so we need to restore it
273 // if we are at a syscall that may restart.
274 // The kernel `orig_x0` isn't accessible from ptrace AFAICT but fortunately
275 // it does **NOT** get reset on syscall exit so we can actually set it's value
276 // just by making a dummy syscall with the correct x0 value.
277 auto restart_res = regs.syscall_result();
278 regs.set_ip(t->vm()->traced_syscall_ip());
279 // This can be any side-effect-free syscall that doesn't care about arg1.
280 // The kernel sets its `orig_x0` no matter whether the syscall actually needs it.
281 regs.set_syscallno(rr::ARM64Arch::getpid);
282 regs.set_arg1(regs.orig_arg1());
283 t->set_regs(regs);
284 if (t->enter_syscall(true)) {
Robert O'Callahan024064f2023-08-23 07:34:49 +1200285 if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
286 // Tracee died unexpectedly, there is nothing more we can do.
287 // Do not restore the status, we want callers to see that the task died.
288 return;
289 }
Yichao Yu5871ae52022-06-17 00:41:38 -0400290 }
291 regs.set_ip(initial_ip);
292 regs.set_syscallno(regs.original_syscallno());
293 regs.set_syscall_result(restart_res);
294 }
Keno Fischer3bef84d2021-01-13 03:46:57 -0500295 // If we were sitting at a seccomp trap, try to get back there by resuming
296 // here. Since the original register contents caused a seccomp trap,
297 // re-running the syscall with the same registers should put us right back
298 // to this same seccomp trap.
299 if (initial_at_seccomp && t->ptrace_event() != PTRACE_EVENT_SECCOMP) {
Keno Fischer31140d02022-04-13 22:32:52 +0000300 regs.set_ip(initial_ip.decrement_by_syscall_insn_length(t->arch()));
301 regs.set_syscallno(regs.original_syscallno());
302 t->set_regs(regs);
Keno Fischer3bef84d2021-01-13 03:46:57 -0500303 RecordTask* rt = static_cast<RecordTask*>(t);
304 while (true) {
Robert O'Callahan024064f2023-08-23 07:34:49 +1200305 if (!rt->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
306 // Tracee died unexpectedly, there is nothing more we can do.
307 // Do not restore the status, we want callers to see that the task died.
308 return;
309 }
Keno Fischer3bef84d2021-01-13 03:46:57 -0500310 if (rt->ptrace_event())
311 break;
312 rt->stash_sig();
313 }
314 ASSERT(rt, rt->ptrace_event() == PTRACE_EVENT_SECCOMP);
Keno Fischer31140d02022-04-13 22:32:52 +0000315 } else {
316 // Restore stomped registers.
317 t->set_regs(regs);
Keno Fischer3bef84d2021-01-13 03:46:57 -0500318 }
Robert O'Callahan955b5e02017-08-12 16:59:09 +1200319 t->set_status(restore_wait_status);
Keno Fischer42728be2022-04-12 00:23:09 +0000320 if (restore_sigmask) {
321 static_cast<RecordTask*>(t)->set_sigmask(sigmask_to_restore);
322 }
Keno Fischer8721ae82022-04-13 23:17:37 +0000323 if (need_sigpending_renable) {
324 // The purpose of this PTRACE_INTERRUPT is to re-enable TIF_SIGPENDING on
325 // the tracee, without forcing any actual signals on it. Since PTRACE_INTERRUPT
326 // needs to be able to interrupt re-startable system calls, it is required
327 // to set TIF_SIGPENDING, but the fact that this works is of course a very
328 // deep implementation detail.
Robert O'Callahan70395342023-08-19 15:27:44 +1200329 // If this fails then the tracee must be dead or no longer traced, in which
330 // case we no longer care about its TIF_SIGPENDING status.
Keno Fischer25105202022-06-26 23:08:20 +0000331 t->do_ptrace_interrupt();
Keno Fischer8721ae82022-04-13 23:17:37 +0000332 }
Chris Jonesc43ca322014-07-23 11:44:36 -0700333}
334
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200335static bool ignore_signal(Task* t) {
336 int sig = t->stop_sig();
337 if (!sig) {
338 return false;
339 }
340 if (t->session().is_replaying()) {
341 if (ReplaySession::is_ignored_signal(sig)) {
342 return true;
343 }
344 } else if (t->session().is_recording()) {
Kyle Hueycd145d12019-07-07 20:07:13 -0700345 auto rt = static_cast<RecordTask*>(t);
346 if (sig != rt->session().syscallbuf_desched_sig()) {
347 rt->stash_sig();
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200348 }
349 return true;
350 }
Robert O'Callahan336edc32022-08-16 08:17:41 +1200351 siginfo_t siginfo;
Robert O'Callahan57d0a802023-08-19 23:05:23 +1200352 errno = 0;
353 t->fallible_ptrace(PTRACE_GETSIGINFO, nullptr, &siginfo);
354 if (errno) {
Robert O'Callahan336edc32022-08-16 08:17:41 +1200355 ASSERT(t, false) << "Unexpected signal " << signal_name(sig);
Robert O'Callahan57d0a802023-08-19 23:05:23 +1200356 } else {
357 ASSERT(t, false) << "Unexpected signal " << siginfo;
Robert O'Callahan336edc32022-08-16 08:17:41 +1200358 }
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200359 return false;
360}
361
Robert O'Callahanf24e09c2017-08-11 16:36:30 +1200362long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
Keno Fischer4e100882020-03-19 23:19:20 -0400363 LOG(debug) << "syscall " << syscall_name(syscallno, t->arch()) << " " << callregs;
Robert O'Callahana8eac642016-08-01 11:06:20 +1200364
Robert O'Callahan28feb3b2023-08-25 22:09:52 +1200365 if (t->seen_ptrace_exit_event()) {
Robert O'Callahanda7e2e82020-07-27 15:55:45 +1200366 LOG(debug) << "Task is dying, don't try anything.";
367 return -ESRCH;
368 }
369
Robert O'Callahaned45cd62017-08-16 01:29:05 +1200370 if ((int)callregs.arg1() == SIGTRAP && use_singlestep_path &&
371 (is_sigaction_syscall(syscallno, t->arch()) ||
372 is_rt_sigaction_syscall(syscallno, t->arch()) ||
373 is_signal_syscall(syscallno, t->arch()))) {
374 // Don't use the fast path if we're about to set up a signal handler
375 // for SIGTRAP!
376 LOG(debug) << "Disabling singlestep path due to SIGTRAP sigaction";
377 setup_path(false);
378 callregs.set_ip(initial_regs.ip());
379 }
380
Keno Fischer04f22d92020-05-21 23:18:41 +0000381 callregs.set_original_syscallno(syscallno);
Robert O'Callahand0706392014-09-09 12:01:23 +1200382 callregs.set_syscallno(syscallno);
383 t->set_regs(callregs);
Chris Jonesc43ca322014-07-23 11:44:36 -0700384
Keno Fischer31140d02022-04-13 22:32:52 +0000385 bool from_seccomp = initial_at_seccomp && t->ptrace_event() == PTRACE_EVENT_SECCOMP;
386 if (use_singlestep_path && !from_seccomp) {
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200387 while (true) {
Robert O'Callahan024064f2023-08-23 07:34:49 +1200388 if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
389 // Tracee was killed, there is nothing more we can do.
390 return -ESRCH;
391 }
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200392 LOG(debug) << "Used singlestep path; status=" << t->status();
Robert O'Callahanb1435042017-08-11 18:27:49 +1200393 // When a PTRACE_EVENT_EXIT is returned we don't update registers
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200394 if (t->ip() != callregs.ip()) {
395 // We entered the syscall, so stop now
396 break;
397 }
Keno Fischer04f22d92020-05-21 23:18:41 +0000398 if (t->stop_sig() == SIGTRAP && t->get_siginfo().si_code == TRAP_TRACE) {
399 // On aarch64, if we were previously in a syscall-exit stop, continuing
400 // with PTRACE_SINGLESTEP will result in incurring a trap upon execution
401 // of the first instruction in userspace. Ignore such a trap.
402 continue;
403 }
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200404 if (ignore_signal(t)) {
405 // We were interrupted by a signal before we even entered the syscall
406 continue;
407 }
408 ASSERT(t, false) << "Unexpected status " << t->status();
409 }
410 } else {
Keno Fischer31140d02022-04-13 22:32:52 +0000411 if (from_seccomp) {
Keno Fischer3bef84d2021-01-13 03:46:57 -0500412 LOG(debug) << "Skipping enter_syscall - already at seccomp stop";
413 } else {
Robert O'Callahan024064f2023-08-23 07:34:49 +1200414 if (!t->enter_syscall(true)) {
415 // Tracee was killed, there is nothing more we can do.
416 // Ensure callers see the task death status.
417 return -ESRCH;
418 }
Keno Fischer31140d02022-04-13 22:32:52 +0000419 LOG(debug) << "Used enter_syscall; status=" << t->status();
Keno Fischer3bef84d2021-01-13 03:46:57 -0500420 }
Robert O'Callahan024064f2023-08-23 07:34:49 +1200421 if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
422 // Tracee was killed, there is nothing more we can do.
423 // Ensure callers see the task death status.
Robert O'Callahan2c1a6d42023-08-24 01:23:28 +1200424 return -ESRCH;
Robert O'Callahan955b5e02017-08-12 16:59:09 +1200425 }
Robert O'Callahan024064f2023-08-23 07:34:49 +1200426 LOG(debug) << "syscall exit status=" << t->status();
427 }
428 while (true) {
Robert O'Callahan955b5e02017-08-12 16:59:09 +1200429 if (t->status().is_syscall() ||
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200430 (t->stop_sig() == SIGTRAP &&
431 is_kernel_trap(t->get_siginfo().si_code))) {
432 // If we got a SIGTRAP then we assume that's our singlestep and we're
433 // done.
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200434 break;
435 }
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200436 if (is_clone_syscall(syscallno, t->arch()) &&
Robert O'Callahan27d47d62017-08-15 13:59:29 +1200437 t->clone_syscall_is_complete(&new_tid_, t->arch())) {
Robert O'Callahan024064f2023-08-23 07:34:49 +1200438 if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
439 // Tracee was killed, there is nothing more we can do.
440 return -ESRCH;
441 }
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200442 LOG(debug) << "got clone event; new status=" << t->status();
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200443 continue;
444 }
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200445 if (ignore_signal(t)) {
446 if (t->regs().syscall_may_restart()) {
Yichao Yu3e0378c2022-06-20 11:25:36 -0400447 if (!t->enter_syscall(true)) {
Robert O'Callahan024064f2023-08-23 07:34:49 +1200448 // Tracee was killed, there is nothing more we can do.
449 return -ESRCH;
Yichao Yu3e0378c2022-06-20 11:25:36 -0400450 }
Robert O'Callahan955b5e02017-08-12 16:59:09 +1200451 LOG(debug) << "signal ignored; restarting syscall, status="
452 << t->status();
Robert O'Callahan024064f2023-08-23 07:34:49 +1200453 if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
454 // Tracee was killed, there is nothing more we can do.
455 return -ESRCH;
456 }
Robert O'Callahanfa24c202017-08-12 00:46:23 +1200457 LOG(debug) << "syscall exit status=" << t->status();
458 continue;
459 }
460 LOG(debug) << "signal ignored";
461 // We have been notified of a signal after a non-interruptible syscall
462 // completed. Don't continue, we're done here.
463 break;
464 }
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200465 ASSERT(t, false) << "Unexpected status " << t->status();
Robert O'Callahanf24e09c2017-08-11 16:36:30 +1200466 break;
Robert O'Callahan5c9348d2017-08-10 23:25:35 +1200467 }
Chris Jonesc43ca322014-07-23 11:44:36 -0700468
Robert O'Callahan2c1a6d42023-08-24 01:23:28 +1200469 LOG(debug) << "done, result=" << t->regs().syscall_result();
470 return t->regs().syscall_result();
Chris Jonesc43ca322014-07-23 11:44:36 -0700471}
Nathan Froyde5caec22014-09-11 08:48:34 -0400472
Robert O'Callahan53bd4b32014-09-13 09:54:47 +1200473SupportedArch AutoRemoteSyscalls::arch() const { return t->arch(); }
Robert O'Callahan771de352014-10-02 11:52:34 -0400474
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300475template <typename Arch>
476static void write_socketcall_args(Task* t, remote_ptr<void> remote_mem,
Robert O'Callahan37529cd2014-11-21 17:07:08 +1300477 typename Arch::signed_long arg1,
478 typename Arch::signed_long arg2,
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300479 typename Arch::signed_long arg3,
480 bool* ok) {
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300481 socketcall_args<Arch> sc_args = { { arg1, arg2, arg3 } };
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300482 t->write_mem(remote_mem.cast<socketcall_args<Arch>>(), sc_args, ok);
Robert O'Callahan771de352014-10-02 11:52:34 -0400483}
484
Keno Fischer0ec10682020-04-02 01:03:49 -0400485template <typename Arch>
486struct fd_message {
487 // Unfortunately we need to send at least one byte of data in our
488 // message for it to work
489 char data;
490 typename Arch::iovec msgdata;
491 char cmsgbuf[Arch::cmsg_space(sizeof(int))];
492 typename Arch::msghdr msg;
493 // XXX: Could make this conditional on Arch
494 socketcall_args<Arch> socketcall;
495 void init(remote_ptr<fd_message<Arch>> base) {
496 data = 0;
497 msgdata.iov_base = REMOTE_PTR_FIELD(base, data);
498 msgdata.iov_len = 1;
499 memset(&msg, 0, sizeof(msg));
500 msg.msg_control = REMOTE_PTR_FIELD(base, cmsgbuf);
501 msg.msg_controllen = sizeof(cmsgbuf);
502 msg.msg_iov = REMOTE_PTR_FIELD(base, msgdata);
503 msg.msg_iovlen = 1;
Robert O'Callahanbc6f1232017-08-07 16:14:19 +1200504 }
Keno Fischer0ec10682020-04-02 01:03:49 -0400505 fd_message(remote_ptr<fd_message<Arch>> base) {
506 init(base);
507 }
508 fd_message() {
509 init((uintptr_t)this);
510 }
511 remote_ptr<fd_message<Arch>> remote_this() {
512 return msgdata.iov_base.rptr().as_int();
513 }
514 remote_ptr<typename Arch::msghdr> remote_msg() {
515 return REMOTE_PTR_FIELD(remote_this(), msg);
516 }
517 remote_ptr<socketcall_args<Arch>> remote_sc_args() {
518 return REMOTE_PTR_FIELD(remote_this(), socketcall);
519 }
520 remote_ptr<int> remote_cmsgdata() {
521 return REMOTE_PTR_FIELD(remote_this(), cmsgbuf).as_int() +
522 (uintptr_t)Arch::cmsg_data(NULL);
523 }
524};
Robert O'Callahan771de352014-10-02 11:52:34 -0400525
Robert O'Callahan044b97f2016-05-26 10:43:00 +1200526template <typename Arch>
Keno Fischer0ec10682020-04-02 01:03:49 -0400527static long child_sendmsg(AutoRemoteSyscalls& remote, int child_sock, int fd) {
528 AutoRestoreMem remote_buf(remote, nullptr, sizeof(fd_message<Arch>));
529 fd_message<Arch> msg(remote_buf.get().cast<fd_message<Arch>>());
Robert O'Callahan771de352014-10-02 11:52:34 -0400530 // Pull the puppet strings to have the child send its fd
531 // to us. Similarly to above, we DONT_WAIT on the
532 // call to finish, since it's likely not defined whether the
533 // sendmsg() may block on our recvmsg()ing what the tracee
534 // sent us (in which case we would deadlock with the tracee).
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300535 // We call sendmsg on child socket, but first we have to prepare a lot of
536 // data.
Keno Fischer0ec10682020-04-02 01:03:49 -0400537 auto cmsg = reinterpret_cast<typename Arch::cmsghdr*>(msg.cmsgbuf);
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300538 cmsg->cmsg_len = Arch::cmsg_len(sizeof(fd));
Robert O'Callahan771de352014-10-02 11:52:34 -0400539 cmsg->cmsg_level = SOL_SOCKET;
540 cmsg->cmsg_type = SCM_RIGHTS;
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300541 *static_cast<int*>(Arch::cmsg_data(cmsg)) = fd;
Keno Fischer0ec10682020-04-02 01:03:49 -0400542
543 if (has_socketcall_syscall(Arch::arch())) {
544 socketcall_args<Arch> sc_args = { { child_sock, (typename Arch::signed_long)msg.remote_msg().as_int(), 0 } };
545 msg.socketcall = sc_args;
546 }
547
548 bool ok = true;
549 remote.task()->write_bytes_helper(remote_buf.get().cast<char>(),
550 sizeof(msg), &msg, &ok);
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300551
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300552 if (!ok) {
553 return -ESRCH;
554 }
Keno Fischer0ec10682020-04-02 01:03:49 -0400555 if (!has_socketcall_syscall(Arch::arch())) {
556 return remote.syscall(Arch::sendmsg, child_sock, msg.remote_msg(), 0);
Robert O'Callahan771de352014-10-02 11:52:34 -0400557 }
Keno Fischer0ec10682020-04-02 01:03:49 -0400558 return remote.syscall(Arch::socketcall, SYS_SENDMSG, msg.remote_sc_args());
559}
560
561template <typename Arch>
562static long child_recvmsg(AutoRemoteSyscalls& remote, int child_sock) {
563 AutoRestoreMem remote_buf(remote, nullptr, sizeof(fd_message<Arch>));
564 fd_message<Arch> msg(remote_buf.get().cast<fd_message<Arch>>());
565 bool ok = true;
566
567 if (has_socketcall_syscall(Arch::arch())) {
568 socketcall_args<Arch> sc_args = { { child_sock,
569 (typename Arch::signed_long)msg.remote_msg().as_int(), 0 } };
570 msg.socketcall = sc_args;
571 }
572
573 remote.task()->write_bytes_helper(remote_buf.get().cast<char>(),
574 sizeof(msg), &msg, &ok);
575
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300576 if (!ok) {
Robert O'Callahanc88aaa72023-02-25 16:21:08 +1300577 ASSERT(remote.task(), errno == ESRCH) << "Error writing " << remote_buf.get()
578 << " in " << remote.task()->tid;
Robert O'Callahan30b72a82021-11-30 17:46:18 +1300579 LOG(debug) << "Failed to write memory";
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300580 return -ESRCH;
581 }
Keno Fischer0ec10682020-04-02 01:03:49 -0400582 int ret = 0;
Robert O'Callahan42cf5bb2020-07-30 15:28:43 +1200583 if (has_socketcall_syscall(Arch::arch())) {
Keno Fischer0ec10682020-04-02 01:03:49 -0400584 ret = remote.syscall(Arch::socketcall, SYS_RECVMSG, msg.remote_sc_args());
Robert O'Callahan42cf5bb2020-07-30 15:28:43 +1200585 } else {
586 ret = remote.syscall(Arch::recvmsg, child_sock, msg.remote_msg(), 0);
Keno Fischer0ec10682020-04-02 01:03:49 -0400587 }
588 if (ret < 0) {
Robert O'Callahan30b72a82021-11-30 17:46:18 +1300589 LOG(debug) << "Failed to recvmsg " << ret;
Keno Fischer0ec10682020-04-02 01:03:49 -0400590 return ret;
591 }
592 int their_fd = remote.task()->read_mem(msg.remote_cmsgdata(), &ok);
593 if (!ok) {
Robert O'Callahan30b72a82021-11-30 17:46:18 +1300594 ASSERT(remote.task(), errno == ESRCH);
595 LOG(debug) << "Failed to read msg";
Keno Fischer0ec10682020-04-02 01:03:49 -0400596 return -ESRCH;
597 }
598 return their_fd;
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300599}
Robert O'Callahan771de352014-10-02 11:52:34 -0400600
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300601#define MAX_FDS_READ 2
602
603// Try to read a single-character message from `sock`. Will collect
604// up to MAX_FDS_READ fds in an SCM_RIGHTS control message and return those
Josh Soref9dd66582023-03-01 04:32:13 -0500605// fds. Returns an empty vector if reading the message fails.
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300606static vector<ScopedFd> maybe_receive_fds(ScopedFd& sock, bool blocking = true) {
607 vector<ScopedFd> ret;
608 struct msghdr msg;
609 memset(&msg, 0, sizeof(msg));
610 char ch;
611 struct iovec iov = { &ch, 1 };
612 msg.msg_iov = &iov;
613 msg.msg_iovlen = 1;
614 char cmsgbuf[(CMSG_SPACE(MAX_FDS_READ * sizeof(int)))];
615 msg.msg_control = cmsgbuf;
616 msg.msg_controllen = sizeof(cmsgbuf);
617 int flags = MSG_CMSG_CLOEXEC;
Yichao Yu8af63e42022-06-26 23:37:38 -0400618 if (!blocking) {
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300619 flags |= MSG_DONTWAIT;
Yichao Yu8af63e42022-06-26 23:37:38 -0400620 }
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300621 if (recvmsg(sock, &msg, flags) < 0) {
622 return ret;
Robert O'Callahan771de352014-10-02 11:52:34 -0400623 }
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300624
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300625 struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
626 if (!cmsg || cmsg->cmsg_level != SOL_SOCKET ||
627 cmsg->cmsg_type != SCM_RIGHTS) {
628 FATAL() << "Invalid cmsg";
629 }
630 int num_fds = (cmsg->cmsg_len - CMSG_LEN(0))/sizeof(int);
631 for (int i = 0; i < num_fds; i++) {
632 int fd;
633 memcpy(&fd, CMSG_DATA(cmsg) + i*sizeof(int), sizeof(int));
634 DEBUG_ASSERT(fd >= 0);
635 ret.push_back(ScopedFd(fd));
636 }
637 return ret;
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300638}
Robert O'Callahan771de352014-10-02 11:52:34 -0400639
Keno Fischer0ec10682020-04-02 01:03:49 -0400640static void sendmsg_socket(ScopedFd& sock, int fd_to_send)
641{
642 fd_message<NativeArch> msg;
643
644 struct msghdr *msgp = (struct msghdr*)&msg.msg;
645 struct cmsghdr* cmsg = CMSG_FIRSTHDR(msgp);
646 cmsg->cmsg_level = SOL_SOCKET;
647 cmsg->cmsg_type = SCM_RIGHTS;
648 cmsg->cmsg_len = CMSG_LEN(sizeof(fd_to_send));
649 *(int*)CMSG_DATA(cmsg) = fd_to_send;
650
651 if (0 > sendmsg(sock, msgp, 0)) {
652 FATAL() << "Failed to send fd";
653 }
654}
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300655
Robert O'Callahand83c3612023-01-02 20:51:14 +1300656static Task* thread_group_leader_for_fds(Task* t) {
657 for (Task* tt : t->fd_table()->task_set()) {
Robert O'Callahan28feb3b2023-08-25 22:09:52 +1200658 if (tt->tgid() == tt->rec_tid && !tt->seen_ptrace_exit_event()) {
Robert O'Callahand83c3612023-01-02 20:51:14 +1300659 return tt;
660 }
661 }
662 return nullptr;
663}
664
Robert O'Callahan3fca38a2014-11-21 09:58:15 +1300665template <typename Arch> ScopedFd AutoRemoteSyscalls::retrieve_fd_arch(int fd) {
Robert O'Callahand83c3612023-01-02 20:51:14 +1300666 ScopedFd ret;
Robert O'Callahand83c3612023-01-02 20:51:14 +1300667 if (!pid_fd.is_open()) {
Robert O'Callahan0e377962023-07-15 22:41:21 +1200668 // Try to use pidfd_getfd to get the fd without round-tripping to the tracee.
669 // pidfd_getfd requires a threadgroup leader, so find one if we can.
Robert O'Callahand83c3612023-01-02 20:51:14 +1300670 Task* tg_leader_for_fds = thread_group_leader_for_fds(t);
671 if (tg_leader_for_fds) {
672 pid_fd = ScopedFd(::syscall(NativeArch::pidfd_open, tg_leader_for_fds->tid, 0));
673 ASSERT(t, pid_fd.is_open() || errno == ENOSYS)
674 << "Error in pidfd_open errno=" << errno_name(errno);
675 }
676 }
Kyle Hueyf5ab8f12023-07-12 23:52:47 -0700677 if (pid_fd.is_open()) {
Robert O'Callahand83c3612023-01-02 20:51:14 +1300678 ret = ScopedFd(::syscall(NativeArch::pidfd_getfd, pid_fd.get(), fd, 0));
679 if (ret.is_open()) {
680 return ret;
681 }
682 ASSERT(t, errno == ENOSYS) << "Failed in pidfd_getfd errno=" << errno_name(errno);
683 }
684
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300685 // Clear out any pending message in the socket.
686 maybe_receive_fds(task()->session().tracee_socket_receiver_fd(), false);
687
Robert O'Callahanf24e09c2017-08-11 16:36:30 +1200688 long child_syscall_result =
Keno Fischer0ec10682020-04-02 01:03:49 -0400689 child_sendmsg<Arch>(*this, task()->session().tracee_fd_number(), fd);
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300690 if (child_syscall_result == -ESRCH) {
Robert O'Callahand83c3612023-01-02 20:51:14 +1300691 return ret;
Robert O'Callahanfd2a2c32019-03-06 16:52:18 +1300692 }
Robert O'Callahanf24e09c2017-08-11 16:36:30 +1200693 ASSERT(t, child_syscall_result > 0) << "Failed to sendmsg() in tracee; err="
694 << errno_name(-child_syscall_result);
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300695 vector<ScopedFd> fds = maybe_receive_fds(task()->session().tracee_socket_fd());
696 ASSERT(t, !fds.empty()) << "Failed to receive fd";
697 ASSERT(t, fds.size() == 1);
Sidharth Kshatriya9dbc8622023-04-21 11:25:47 +0530698 return std::move(fds[0]);
Robert O'Callahan771de352014-10-02 11:52:34 -0400699}
Robert O'Callahanc55b3552014-11-20 16:53:53 +1300700
701ScopedFd AutoRemoteSyscalls::retrieve_fd(int fd) {
702 RR_ARCH_FUNCTION(retrieve_fd_arch, arch(), fd);
703}
Robert O'Callahan3d5e5802014-12-06 20:38:56 -0600704
Keno Fischer0ec10682020-04-02 01:03:49 -0400705template <typename Arch> int AutoRemoteSyscalls::send_fd_arch(const ScopedFd &our_fd) {
Keno Fischer2e595322020-04-04 02:35:45 -0400706 if (!our_fd.is_open()) {
707 return -EBADF;
708 }
709
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300710 // Clear out any pending message from the socket.
711 maybe_receive_fds(task()->session().tracee_socket_receiver_fd(), false);
712
Keno Fischer2e595322020-04-04 02:35:45 -0400713 LOG(debug) << "Sending fd " << our_fd.get() << " via socket fd " << task()->session().tracee_socket_fd().get();
Keno Fischer0ec10682020-04-02 01:03:49 -0400714 sendmsg_socket(task()->session().tracee_socket_fd(), our_fd.get());
715
716 long child_syscall_result =
717 child_recvmsg<Arch>(*this, task()->session().tracee_fd_number());
Robert O'Callahan2c9833f2023-02-04 13:59:47 +1300718 // If the child died before reading the message from the socket,
719 // the message will still be in the socket buffer and will be received
720 // the next time we try to send something to a tracee. That's why
721 // before using tracee_socket_receiver_fd we need to drain up to one message
722 // from it.
723 ASSERT(t, child_syscall_result >= 0 || child_syscall_result == -ESRCH)
724 << "Failed to recvmsg() in tracee; err=" << errno_name(-child_syscall_result);
Keno Fischer0ec10682020-04-02 01:03:49 -0400725 return child_syscall_result;
726}
727
728int AutoRemoteSyscalls::send_fd(const ScopedFd &our_fd) {
729 RR_ARCH_FUNCTION(send_fd_arch, arch(), our_fd);
730}
731
Robert O'Callahane9177282022-03-28 11:59:09 +1300732void AutoRemoteSyscalls::infallible_close_syscall_if_alive(int child_fd) {
733 infallible_syscall_if_alive(syscall_number_for_close(arch()), child_fd);
734}
735
Robert O'Callahanb422fed2022-03-26 12:36:09 +1300736int AutoRemoteSyscalls::infallible_send_fd_if_alive(const ScopedFd &our_fd) {
Robert O'Callahan768e3482022-03-08 23:14:25 +1300737 int child_fd = send_fd(our_fd);
Robert O'Callahanb422fed2022-03-26 12:36:09 +1300738 ASSERT(t, child_fd >= 0 || (child_fd == -ESRCH && !t->session().is_replaying()))
739 << "Failed to send fd; err=" << errno_name(-child_fd);
Robert O'Callahan768e3482022-03-08 23:14:25 +1300740 return child_fd;
741}
742
Robert O'Callahanb8566d22021-11-30 17:51:53 +1300743void AutoRemoteSyscalls::infallible_send_fd_dup(const ScopedFd& our_fd, int dup_to, int dup3_flags) {
Robert O'Callahanb422fed2022-03-26 12:36:09 +1300744 int remote_fd = infallible_send_fd_if_alive(our_fd);
745 ASSERT(t, remote_fd >= 0);
Keno Fischer8208c542020-05-06 22:02:22 -0400746 if (remote_fd != dup_to) {
747 long ret = infallible_syscall(syscall_number_for_dup3(arch()), remote_fd,
Robert O'Callahanb8566d22021-11-30 17:51:53 +1300748 dup_to, dup3_flags);
Keno Fischer8208c542020-05-06 22:02:22 -0400749 ASSERT(task(), ret == dup_to);
Robert O'Callahane9177282022-03-28 11:59:09 +1300750 infallible_close_syscall_if_alive(remote_fd);
Keno Fischer8208c542020-05-06 22:02:22 -0400751 }
752}
753
Robert O'Callahan4bd24672022-03-26 14:23:23 +1300754remote_ptr<void> AutoRemoteSyscalls::infallible_mmap_syscall_if_alive(
Robert O'Callahanb7e40522015-09-15 23:59:32 +1200755 remote_ptr<void> addr, size_t length, int prot, int flags, int child_fd,
Yichao Yu1b74f962022-04-13 18:27:13 -0400756 uint64_t offset_bytes) {
757 ASSERT(t, offset_bytes % page_size() == 0)
758 << "mmap offset (" << offset_bytes << ") must be multiple of page size ("
759 << page_size() << ")";
Robert O'Callahan3ca78b92015-08-03 16:55:22 +1200760 // The first syscall argument is called "arg 1", so
761 // our syscall-arg-index template parameter starts
762 // with "1".
Robert O'Callahanb7e40522015-09-15 23:59:32 +1200763 remote_ptr<void> ret =
764 has_mmap2_syscall(arch())
Robert O'Callahan4bd24672022-03-26 14:23:23 +1300765 ? infallible_syscall_ptr_if_alive(syscall_number_for_mmap2(arch()), addr,
766 length, prot, flags, child_fd,
Yichao Yu1b74f962022-04-13 18:27:13 -0400767 (off_t)offset_bytes / 4096)
Robert O'Callahan4bd24672022-03-26 14:23:23 +1300768 : infallible_syscall_ptr_if_alive(syscall_number_for_mmap(arch()), addr,
769 length, prot, flags, child_fd,
Yichao Yu1b74f962022-04-13 18:27:13 -0400770 offset_bytes);
Robert O'Callahan77644132023-08-24 09:59:14 +1200771 if (flags & MAP_FIXED) {
772 if (ret) {
773 ASSERT(t, addr == ret) << "MAP_FIXED at " << addr << " but got " << ret;
774 } else {
775 if (!t->vm()->has_mapping(addr)) {
776 KernelMapping km = t->vm()->read_kernel_mapping(t, addr);
777 if (km.size()) {
778 ASSERT(t, km.start() == addr && km.size() == ceil_page_size(length));
779 // The mapping was created. Pretend this call succeeded.
780 ret = addr;
781 }
782 }
783 }
Robert O'Callahan3d5e5802014-12-06 20:38:56 -0600784 }
Robert O'Callahanb7e40522015-09-15 23:59:32 +1200785 return ret;
786}
787
Robert O'Callahan77644132023-08-24 09:59:14 +1200788bool AutoRemoteSyscalls::infallible_munmap_syscall_if_alive(
789 remote_ptr<void> addr, size_t length) {
790 long ret = infallible_syscall_if_alive(syscall_number_for_munmap(arch()),
791 addr, length);
792 if (ret) {
793 if (t->vm()->has_mapping(addr)) {
794 KernelMapping km = t->vm()->read_kernel_mapping(t, addr);
795 if (!km.size()) {
796 // The unmap happened but the task must have died before
797 // reporting the status.
798 ret = 0;
799 }
800 }
801 }
802 return !ret;
803}
804
Robert O'Callahan128123a2016-04-26 17:34:44 +1200805int64_t AutoRemoteSyscalls::infallible_lseek_syscall(int fd, int64_t offset,
806 int whence) {
807 switch (arch()) {
808 case x86: {
809 AutoRestoreMem mem(*this, &offset, sizeof(int64_t));
810 infallible_syscall(syscall_number_for__llseek(arch()), fd, offset >> 32,
811 offset, mem.get(), whence);
812 return t->read_mem(mem.get().cast<int64_t>());
813 }
814 case x86_64:
Yichao Yu76991e02022-04-29 17:04:40 -0400815 case aarch64:
Robert O'Callahan128123a2016-04-26 17:34:44 +1200816 return infallible_syscall(syscall_number_for_lseek(arch()), fd, offset,
817 whence);
818 default:
Robert O'Callahanf24e09c2017-08-11 16:36:30 +1200819 ASSERT(t, false) << "Unknown arch";
Robert O'Callahan128123a2016-04-26 17:34:44 +1200820 return -1;
821 }
822}
823
Keno Fischer851c5dc2020-05-16 19:08:45 -0400824void AutoRemoteSyscalls::check_syscall_result(long ret, int syscallno, bool allow_death) {
Robert O'Callahan272c2f42021-09-01 12:00:28 +1200825 if (word_size(t->arch()) == 4) {
826 // Sign-extend ret because it can be a 32-bit negative errno
827 ret = (int)ret;
828 }
Robert O'Callahane6843582022-03-26 12:00:33 +1300829 if (ret == -ESRCH && allow_death && !t->session().is_replaying()) {
Keno Fischer851c5dc2020-05-16 19:08:45 -0400830 return;
831 }
Robert O'Callahanb7e40522015-09-15 23:59:32 +1200832 if (-4096 < ret && ret < 0) {
833 string extra_msg;
834 if (is_open_syscall(syscallno, arch())) {
835 extra_msg = " opening " + t->read_c_str(t->regs().arg1());
836 } else if (is_openat_syscall(syscallno, arch())) {
837 extra_msg = " opening " + t->read_c_str(t->regs().arg2());
Robert O'Callahan79752d02023-08-04 14:53:08 +1200838 } else if (is_mremap_syscall(syscallno, arch()) ||
839 is_mmap_syscall(syscallno, arch())) {
840 AddressSpace::print_process_maps(t);
Robert O'Callahanb7e40522015-09-15 23:59:32 +1200841 }
842 ASSERT(t, false) << "Syscall " << syscall_name(syscallno, arch())
Robert O'Callahan8f07c072023-03-14 21:15:54 +1300843 << " failed with errno " << errno_name(-ret) << extra_msg
844 << " arg1=0x" << hex << t->regs().arg1() << " arg2=0x" << t->regs().arg2()
845 << " arg3=0x" << hex << t->regs().arg3() << " arg4=0x" << t->regs().arg4()
846 << " arg5=0x" << hex << t->regs().arg5() << " arg6=0x" << t->regs().arg6();
Robert O'Callahanb7e40522015-09-15 23:59:32 +1200847 }
Robert O'Callahan3d5e5802014-12-06 20:38:56 -0600848}
Robert O'Callahan3ce49c62016-03-22 18:31:02 +1300849
Keno Fischer114706b2020-04-04 02:30:59 -0400850void AutoRemoteSyscalls::finish_direct_mmap(
851 remote_ptr<void> rec_addr, size_t length,
852 int prot, int flags,
853 const string& backing_file_name,
854 int backing_file_open_flags,
Yichao Yu1b74f962022-04-13 18:27:13 -0400855 off64_t backing_offset_bytes,
Keno Fischer114706b2020-04-04 02:30:59 -0400856 struct stat& real_file, string& real_file_name) {
857 int fd;
858
859 LOG(debug) << "directly mmap'ing " << length << " bytes of "
Yichao Yu1b74f962022-04-13 18:27:13 -0400860 << backing_file_name << " at offset "
861 << HEX(backing_offset_bytes);
Keno Fischer114706b2020-04-04 02:30:59 -0400862
863 ASSERT(task(), !(flags & MAP_GROWSDOWN));
864
865 /* Open in the tracee the file that was mapped during
866 * recording. */
867 {
868 AutoRestoreMem child_str(*this, backing_file_name.c_str());
Robert O'Callahan4bc38772023-05-16 23:18:03 +1200869 if (word_size(t->arch()) == 4) {
870 backing_file_open_flags |= RR_LARGEFILE_32;
871 }
Keno Fischerae5a0752020-05-17 00:08:39 -0400872 fd = infallible_syscall(syscall_number_for_openat(arch()), -1,
Keno Fischer114706b2020-04-04 02:30:59 -0400873 child_str.get().as_int(),
Robert O'Callahan4bc38772023-05-16 23:18:03 +1200874 backing_file_open_flags);
Keno Fischer114706b2020-04-04 02:30:59 -0400875 }
876 /* And mmap that file. */
Robert O'Callahan4bd24672022-03-26 14:23:23 +1300877 infallible_mmap_syscall_if_alive(rec_addr, length,
Keno Fischer114706b2020-04-04 02:30:59 -0400878 /* (We let SHARED|WRITEABLE
879 * mappings go through while
880 * they're not handled properly,
881 * but we shouldn't do that.) */
882 prot, (flags & ~MAP_SYNC) | MAP_FIXED, fd,
883 /* MAP_SYNC is used to request direct mapping
884 * (DAX) from the filesystem for persistent
885 * memory devices (requires
886 * MAP_SHARED_VALIDATE). Drop it for the
887 * backing file. */
Yichao Yu1b74f962022-04-13 18:27:13 -0400888 backing_offset_bytes);
Keno Fischer114706b2020-04-04 02:30:59 -0400889
890 // While it's open, grab the link reference.
891 real_file = task()->stat_fd(fd);
892 real_file_name = task()->file_name_of_fd(fd);
893
894 /* Don't leak the tmp fd. The mmap doesn't need the fd to
895 * stay open. */
Robert O'Callahane9177282022-03-28 11:59:09 +1300896 infallible_close_syscall_if_alive(fd);
Keno Fischer114706b2020-04-04 02:30:59 -0400897}
898
899
Robert O'Callahan3ce49c62016-03-22 18:31:02 +1300900} // namespace rr