blob: e3e0000f033433ecb1588b2fae8486e3375bc747 [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
#include "Session.h"
#include <linux/limits.h>
#include <linux/unistd.h>
#include <syscall.h>
#include <sys/wait.h>
#include <algorithm>
#include <limits>
#include "rr/rr.h"
#include "AutoRemoteSyscalls.h"
#include "EmuFs.h"
#include "Flags.h"
#include "PerfCounters.h"
#include "RecordSession.h"
#include "RecordTask.h"
#include "Task.h"
#include "ThreadGroup.h"
#include "core.h"
#include "kernel_metadata.h"
#include "log.h"
#include "util.h"
#include "preload/preload_interface.h"
using namespace std;
namespace rr {
struct Session::CloneCompletion {
struct AddressSpaceClone {
Task* clone_leader;
Task::CapturedState clone_leader_state;
vector<Task::CapturedState> member_states;
vector<pair<remote_ptr<void>, vector<uint8_t>>> captured_memory;
};
vector<AddressSpaceClone> address_spaces;
Task::ClonedFdTables cloned_fd_tables;
};
Session::Session()
: tracee_socket(make_shared<ScopedFd>()),
tracee_socket_receiver(make_shared<ScopedFd>()),
tracee_socket_fd_number(0),
next_task_serial_(1),
rrcall_base_(RR_CALL_BASE),
syscallbuf_fds_disabled_size_(SYSCALLBUF_FDS_DISABLED_SIZE),
syscall_seccomp_ordering_(PTRACE_SYSCALL_BEFORE_SECCOMP_UNKNOWN),
ticks_semantics_(PerfCounters::default_ticks_semantics()),
done_initial_exec_(false),
visible_execution_(true) {
LOG(debug) << "Session " << this << " created";
}
Session::~Session() {
kill_all_tasks();
LOG(debug) << "Session " << this << " destroyed";
for (auto tg : thread_group_map_) {
tg.second->forget_session();
}
}
Session::Session(const Session& other) {
statistics_ = other.statistics_;
next_task_serial_ = other.next_task_serial_;
done_initial_exec_ = other.done_initial_exec_;
rrcall_base_ = other.rrcall_base_;
syscallbuf_fds_disabled_size_ = other.syscallbuf_fds_disabled_size_;
visible_execution_ = other.visible_execution_;
tracee_socket = other.tracee_socket;
tracee_socket_receiver = other.tracee_socket_receiver;
tracee_socket_fd_number = other.tracee_socket_fd_number;
ticks_semantics_ = other.ticks_semantics_;
original_affinity_ = other.original_affinity_;
}
void Session::on_create(ThreadGroup* tg) { thread_group_map_[tg->tguid()] = tg; }
void Session::on_destroy(ThreadGroup* tg) {
thread_group_map_.erase(tg->tguid());
}
void Session::post_exec() {
/* We just saw a successful exec(), so from now on we know
* that the address space layout for the replay tasks will
* (should!) be the same as for the recorded tasks. So we can
* start validating registers at events. */
assert_fully_initialized();
if (done_initial_exec_) {
return;
}
done_initial_exec_ = true;
DEBUG_ASSERT(tasks().size() == 1);
Task* t = tasks().begin()->second;
t->flush_inconsistent_state();
spawned_task_error_fd_.close();
}
AddressSpace::shr_ptr Session::create_vm(Task* t, const std::string& exe,
uint32_t exec_count) {
assert_fully_initialized();
AddressSpace::shr_ptr as(new AddressSpace(t, exe, exec_count));
as->insert_task(t);
vm_map[as->uid()] = as.get();
return as;
}
AddressSpace::shr_ptr Session::clone(Task* t, AddressSpace::shr_ptr vm) {
assert_fully_initialized();
// If vm already belongs to our session this is a fork, otherwise it's
// a session-clone
AddressSpace::shr_ptr as;
if (this == vm->session()) {
as = AddressSpace::shr_ptr(
new AddressSpace(this, *vm, t->rec_tid, t->tuid().serial(), 0));
} else {
as = AddressSpace::shr_ptr(new AddressSpace(this, *vm, vm->uid().tid(),
vm->uid().serial(),
vm->uid().exec_count()));
}
vm_map[as->uid()] = as.get();
return as;
}
ThreadGroup::shr_ptr Session::create_initial_tg(Task* t) {
ThreadGroup::shr_ptr tg(
new ThreadGroup(this, nullptr, t->rec_tid, t->rec_tid,
t->tuid().serial()));
tg->insert_task(t);
return tg;
}
ThreadGroup::shr_ptr Session::clone(Task* t, ThreadGroup::shr_ptr tg) {
assert_fully_initialized();
// If tg already belongs to our session this is a fork to create a new
// taskgroup, otherwise it's a session-clone of an existing taskgroup
if (this == tg->session()) {
return ThreadGroup::shr_ptr(
new ThreadGroup(this, tg.get(), t->rec_tid,
t->own_namespace_tid(), t->tuid().serial()));
}
ThreadGroup* parent =
tg->parent() ? find_thread_group(tg->parent()->tguid()) : nullptr;
return ThreadGroup::shr_ptr(
new ThreadGroup(this, parent, tg->tgid,
t->own_namespace_tid(), tg->tguid().serial()));
}
Task* Session::new_task(pid_t tid, pid_t rec_tid, uint32_t serial,
SupportedArch a, const std::string&) {
return new Task(*this, tid, rec_tid, serial, a);
}
vector<AddressSpace*> Session::vms() const {
vector<AddressSpace*> result;
for (auto& vm : vm_map) {
result.push_back(vm.second);
}
return result;
}
Task* Session::clone(Task* p, int flags, remote_ptr<void> stack,
remote_ptr<void> tls, remote_ptr<int> cleartid_addr,
pid_t new_tid, pid_t new_rec_tid) {
assert_fully_initialized();
Task* c = p->clone(Task::TRACEE_CLONE, flags, stack, tls, cleartid_addr,
new_tid, new_rec_tid, next_task_serial());
on_create(c);
return c;
}
Task* Session::find_task(pid_t rec_tid) const {
finish_initializing();
auto it = tasks().find(rec_tid);
return tasks().end() != it ? it->second : nullptr;
}
Task* Session::find_task(const TaskUid& tuid) const {
Task* t = find_task(tuid.tid());
return t && t->tuid() == tuid ? t : nullptr;
}
ThreadGroup* Session::find_thread_group(const ThreadGroupUid& tguid) const {
finish_initializing();
auto it = thread_group_map_.find(tguid);
if (thread_group_map_.end() == it) {
return nullptr;
}
return it->second;
}
ThreadGroup* Session::find_thread_group(pid_t pid) const {
finish_initializing();
for (auto& tg : thread_group_map_) {
if (tg.first.tid() == pid) {
return tg.second;
}
}
return nullptr;
}
AddressSpace* Session::find_address_space(const AddressSpaceUid& vmuid) const {
finish_initializing();
auto it = vm_map.find(vmuid);
if (vm_map.end() == it) {
return nullptr;
}
return it->second;
}
void Session::kill_all_tasks() {
LOG(debug) << "Killing all tasks ...";
for (int pass = 0; pass <= 1; ++pass) {
/* We delete tasks in two passes. First, we kill
* every non-thread-group-leader, then we kill every group leader.
* Linux expects threads group leaders to survive until the last
* member of the thread group has exited, so we accomodate that.
*/
for (auto& v : task_map) {
Task* t = v.second;
bool is_group_leader = t->tid == t->real_tgid();
if (pass == 0 ? is_group_leader : !is_group_leader) {
continue;
}
t->kill();
}
}
while (!task_map.empty()) {
Task* t = task_map.rbegin()->second;
delete t;
}
assert(task_map.empty());
}
void Session::on_destroy(AddressSpace* vm) {
DEBUG_ASSERT(vm->task_set().size() == 0);
DEBUG_ASSERT(vm_map.count(vm->uid()) == 1);
vm_map.erase(vm->uid());
}
void Session::on_destroy(Task* t) {
DEBUG_ASSERT(task_map.count(t->rec_tid) == 1);
task_map.erase(t->rec_tid);
}
void Session::on_create(Task* t) { task_map[t->rec_tid] = t; }
ScopedFd Session::create_spawn_task_error_pipe() {
int fds[2];
if (0 != pipe2(fds, O_CLOEXEC)) {
FATAL();
}
spawned_task_error_fd_ = ScopedFd(fds[0]);
return ScopedFd(fds[1]);
}
string Session::read_spawned_task_error() const {
char buf[1024] = "";
ssize_t len = read(spawned_task_error_fd_, buf, sizeof(buf));
if (len <= 0) {
return string();
}
buf[len] = 0;
return string(buf, len);
}
BreakStatus Session::diagnose_debugger_trap(Task* t, RunCommand run_command) {
assert_fully_initialized();
BreakStatus break_status;
break_status.task_context = TaskContext(t);
int stop_sig = t->stop_sig();
if (!stop_sig) {
// This can happen if we were INCOMPLETE because we're close to
// the ticks_target.
return break_status;
}
if (SIGTRAP != stop_sig) {
BreakpointType pending_bp = t->vm()->get_breakpoint_type_at_addr(t->ip());
if (BKPT_USER == pending_bp) {
// A signal was raised /just/ before a trap
// instruction for a SW breakpoint. This is
// observed when debuggers write trap
// instructions into no-exec memory, for
// example the stack.
//
// We report the breakpoint before any signal
// that might have been raised in order to let
// the debugger do something at the breakpoint
// insn; possibly clearing the breakpoint and
// changing the $ip. Otherwise, we expect the
// debugger to clear the breakpoint and resume
// execution, which should raise the original
// signal again.
LOG(debug) << "hit debugger breakpoint BEFORE ip " << t->ip() << " for "
<< t->get_siginfo();
break_status.breakpoint_hit = true;
} else if (stop_sig && stop_sig != PerfCounters::TIME_SLICE_SIGNAL) {
break_status.signal =
unique_ptr<siginfo_t>(new siginfo_t(t->get_siginfo()));
LOG(debug) << "Got signal " << *break_status.signal << " (expected sig "
<< stop_sig << ")";
break_status.signal->si_signo = stop_sig;
}
} else {
TrapReasons trap_reasons = t->compute_trap_reasons();
// Conceal any internal singlestepping
if (trap_reasons.singlestep && is_singlestep(run_command)) {
LOG(debug) << " finished debugger stepi";
break_status.singlestep_complete = true;
}
if (trap_reasons.watchpoint) {
check_for_watchpoint_changes(t, break_status);
}
if (trap_reasons.breakpoint) {
BreakpointType retired_bp =
t->vm()->get_breakpoint_type_for_retired_insn(t->ip());
if (BKPT_USER == retired_bp) {
// SW breakpoint: $ip is just past the
// breakpoint instruction. Move $ip back
// right before it.
t->move_ip_before_breakpoint();
break_status.breakpoint_hit = true;
LOG(debug) << "hit debugger breakpoint at ip " << t->ip();
}
}
}
return break_status;
}
void Session::check_for_watchpoint_changes(Task* t, BreakStatus& break_status) {
assert_fully_initialized();
break_status.watchpoints_hit = t->vm()->consume_watchpoint_changes();
}
void Session::assert_fully_initialized() const {
DEBUG_ASSERT(!clone_completion && "Session not fully initialized");
}
void Session::finish_initializing() const {
if (!clone_completion) {
return;
}
Session* self = const_cast<Session*>(this);
for (auto& asleader : clone_completion->address_spaces) {
{
AutoRemoteSyscalls remote(asleader.clone_leader);
for (const auto& m : asleader.clone_leader->vm()->maps()) {
// Creating this mapping was delayed in capture_state for performance
if (m.flags & AddressSpace::Mapping::IS_SYSCALLBUF) {
self->recreate_shared_mmap(remote, m);
}
}
for (auto& mem : asleader.captured_memory) {
asleader.clone_leader->write_bytes_helper(mem.first, mem.second.size(),
mem.second.data());
}
for (auto& asmember : asleader.member_states) {
auto it = thread_group_map_.find(asmember.tguid);
ThreadGroup::shr_ptr tg(it == thread_group_map_.end() ? nullptr :
it->second->shared_from_this());
if (!tg) {
tg = std::make_shared<ThreadGroup>
(self, nullptr, asmember.tguid.tid(), asmember.tguid.tid(), asmember.tguid.serial());
}
Task* t_clone = Task::os_clone_into(
asmember, remote, clone_completion->cloned_fd_tables, tg);
self->on_create(t_clone);
t_clone->copy_state(asmember);
}
}
asleader.clone_leader->copy_state(asleader.clone_leader_state);
}
self->clone_completion = nullptr;
}
static void remap_shared_mmap(AutoRemoteSyscalls& remote, EmuFs& emu_fs,
EmuFs& dest_emu_fs,
const AddressSpace::Mapping& m_in_mem) {
AddressSpace::Mapping m = m_in_mem;
LOG(debug) << " remapping shared region at " << m.map.start() << "-"
<< m.map.end();
remote.infallible_syscall(syscall_number_for_munmap(remote.arch()),
m.map.start(), m.map.size());
EmuFile::shr_ptr emu_file;
if (dest_emu_fs.has_file_for(m.recorded_map)) {
emu_file = dest_emu_fs.at(m.recorded_map);
} else {
emu_file = dest_emu_fs.clone_file(emu_fs.at(m.recorded_map));
}
// TODO: this duplicates some code in replay_syscall.cc, but
// it's somewhat nontrivial to factor that code out.
int remote_fd = remote.infallible_send_fd_if_alive(emu_file->fd());
if (remote_fd < 0) {
if (remote.task()->vm()->task_set().size() > remote.task()->thread_group()->task_set().size()) {
// XXX not sure how to handle the case where the tracee died after
// we unmapped the area
FATAL() << "Unexpected task death leaving this address space in a bad state";
}
return;
}
struct stat real_file = remote.task()->stat_fd(remote_fd);
string real_file_name = remote.task()->file_name_of_fd(remote_fd);
// XXX this condition is x86/x64-specific, I imagine.
// The remapped segment *must* be remapped at the same address,
// or else many things will go haywire.
auto ret = remote.infallible_mmap_syscall_if_alive(m.map.start(), m.map.size(), m.map.prot(),
(m.map.flags() & ~MAP_ANONYMOUS) | MAP_FIXED,
remote_fd,
m.map.file_offset_bytes());
if (!ret) {
if (remote.task()->vm()->task_set().size() > remote.task()->thread_group()->task_set().size()) {
// XXX not sure how to handle the case where the tracee died after
// we unmapped the area
FATAL() << "Unexpected task death leaving this address space in a bad state";
}
return;
}
// We update the AddressSpace mapping too, since that tracks the real file
// name and we need to update that.
remote.task()->vm()->map(
remote.task(), m.map.start(), m.map.size(), m.map.prot(), m.map.flags(),
m.map.file_offset_bytes(), real_file_name, real_file.st_dev,
real_file.st_ino, nullptr, &m.recorded_map, emu_file);
remote.infallible_close_syscall_if_alive(remote_fd);
}
/*static*/ const char* Session::rr_mapping_prefix() { return "/rr-shared-"; }
KernelMapping Session::create_shared_mmap(
AutoRemoteSyscalls& remote, size_t size, remote_ptr<void> required_child_addr,
const char* name, int tracee_prot, int tracee_flags,
MonitoredSharedMemory::shr_ptr monitored) {
Task* t = remote.task();
static int nonce = 0;
// Create the segment we'll share with the tracee.
char path[PATH_MAX];
snprintf(path, sizeof(path) - 1, "%s%s%s-%d-%d", tmp_dir(),
rr_mapping_prefix(), name, t->real_tgid(), nonce++);
ScopedFd shmem_fd(path, O_CREAT | O_EXCL | O_RDWR);
ASSERT(t, shmem_fd.is_open());
/* Remove the fs name so that we don't have to worry about
* cleaning up this segment in error conditions. */
unlink(path);
void* map_addr = mmap(nullptr, size, PROT_READ | PROT_WRITE,
MAP_SHARED, shmem_fd, 0);
if (map_addr == MAP_FAILED) {
FATAL() << "Failed to mmap shmem region";
}
resize_shmem_segment(shmem_fd, size);
remote_ptr<void> child_map_addr = required_child_addr;
if (child_map_addr.is_null()) {
if (t->session().is_recording() &&
static_cast<RecordTask*>(t)->enable_chaos_memory_allocations()) {
child_map_addr = t->vm()->chaos_mode_find_free_memory(static_cast<RecordTask*>(t),
size, nullptr);
} else {
child_map_addr = t->vm()->find_free_memory(t, size, RR_PAGE_ADDR,
AddressSpace::FindFreeMemoryPolicy::USE_LAST_FREE_HINT);
if (!child_map_addr) {
FATAL() << "Can't find free memory for shared mmap";
}
}
}
struct stat st;
ASSERT(t, 0 == ::fstat(shmem_fd, &st));
int flags = MAP_SHARED;
if (!required_child_addr.is_null()) {
flags |= MAP_FIXED;
}
int child_shmem_fd = remote.infallible_send_fd_if_alive(shmem_fd);
if (child_shmem_fd < 0) {
return KernelMapping();
}
LOG(debug) << "created shmem segment " << path;
// Map the segment in ours and the tracee's address spaces.
remote_ptr<void> addr = remote.infallible_mmap_syscall_if_alive(
child_map_addr, size, tracee_prot, flags | MAP_FIXED, child_shmem_fd, 0);
if (!addr) {
// tracee unexpectedly died.
// We leak the fd; cleaning it up is probably impossible/unnecessary.
return KernelMapping();
}
// Note the mapping after we successfully created it in the child.
// If the child mapping fails for some reason (e.g. SIGKILL) we still
// want our cache to be correct (and not contain the mapping).
KernelMapping km = t->vm()->map(
t, child_map_addr, size, tracee_prot, flags | tracee_flags, 0,
path, st.st_dev, st.st_ino, nullptr, nullptr, nullptr, map_addr,
std::move(monitored));
remote.infallible_close_syscall_if_alive(child_shmem_fd);
return km;
}
static char* extract_name(char* name_buffer, size_t buffer_size) {
// Recover the name that was originally chosen by finding the part of the
// name between rr_mapping_prefix and the -%d-%d at the end.
char* path_start = strstr(name_buffer, Session::rr_mapping_prefix());
DEBUG_ASSERT(path_start &&
"Passed something to create_shared_mmap that"
" wasn't a mapping shared between rr and the tracee?");
size_t prefix_len = path_start - name_buffer;
buffer_size -= prefix_len;
name_buffer += prefix_len;
char* name_end = name_buffer + strnlen(name_buffer, buffer_size);
char* name_start = name_buffer + strlen(Session::rr_mapping_prefix());
int hyphens_seen = 0;
while (name_end > name_start) {
--name_end;
if (*name_end == '-') {
++hyphens_seen;
} else if (*name_end == '/') {
DEBUG_ASSERT(false &&
"Passed something to create_shared_mmap that"
" wasn't a mapping shared between rr and the tracee?");
}
if (hyphens_seen == 2) {
break;
}
}
DEBUG_ASSERT(hyphens_seen == 2);
*name_end = '\0';
return name_start;
}
const AddressSpace::Mapping Session::recreate_shared_mmap(
AutoRemoteSyscalls& remote, const AddressSpace::Mapping& m,
PreserveContents preserve, MonitoredSharedMemory::shr_ptr monitored) {
char name[PATH_MAX];
strncpy(name, m.map.fsname().c_str(), sizeof(name) - 1);
name[sizeof(name) - 1] = 0;
uint32_t flags = m.flags;
size_t size = m.map.size();
void* preserved_data = preserve == PRESERVE_CONTENTS ? m.local_addr : nullptr;
if (preserved_data) {
remote.task()->vm()->detach_local_mapping(m.map.start());
}
remote_ptr<void> new_addr =
create_shared_mmap(remote, m.map.size(), m.map.start(),
extract_name(name, sizeof(name)), m.map.prot(), 0,
std::move(monitored))
.start();
AddressSpace::Mapping new_map;
if (new_addr) {
// m may be invalid now
remote.task()->vm()->mapping_flags_of(new_addr) = flags;
new_map = remote.task()->vm()->mapping_of(new_addr);
if (preserved_data) {
memcpy(new_map.local_addr, preserved_data, size);
munmap(preserved_data, size);
}
}
return new_map;
}
AddressSpace::Mapping Session::steal_mapping(
AutoRemoteSyscalls& remote, const AddressSpace::Mapping& m,
MonitoredSharedMemory::shr_ptr monitored) {
// We will include the name of the full path of the original mapping in the
// name of the shared mapping, replacing slashes by dashes.
char name[PATH_MAX - 40];
strncpy(name, m.map.fsname().c_str(), sizeof(name)-1);
name[sizeof(name) - 1] = '\0';
for (char* ptr = name; *ptr != '\0'; ++ptr) {
if (*ptr == '/') {
*ptr = '-';
}
}
// Now create the new mapping in its place
remote_ptr<void> start = m.map.start();
size_t sz = m.map.size();
const AddressSpace::Mapping& new_m = remote.task()->vm()->mapping_of(
create_shared_mmap(remote, sz, start, name, m.map.prot(),
m.map.flags() & (MAP_GROWSDOWN | MAP_STACK),
std::move(monitored))
.start());
return new_m;
}
// Replace a MAP_PRIVATE segment by one that is shared between rr and the
// tracee.
void Session::make_private_shared(AutoRemoteSyscalls& remote,
const AddressSpace::Mapping m) {
if (!(m.map.flags() & MAP_PRIVATE)) {
return;
}
// Find a place to map the current segment to temporarily
remote_ptr<void> start = m.map.start();
size_t sz = m.map.size();
remote_ptr<void> free_mem = remote.task()->vm()->find_free_memory(remote.task(), sz);
remote.infallible_syscall(syscall_number_for_mremap(remote.arch()), start, sz,
sz, MREMAP_MAYMOVE | MREMAP_FIXED, free_mem);
remote.task()->vm()->remap(remote.task(), start, sz, free_mem, sz,
MREMAP_MAYMOVE | MREMAP_FIXED);
// AutoRemoteSyscalls may have gotten unlucky and picked the old stack
// segment as it's scratch space, reevaluate that choice
AutoRemoteSyscalls remote2(remote.task());
AddressSpace::Mapping new_m = steal_mapping(remote2, m);
if (!new_m.local_addr) {
return;
}
// And copy over the contents. Since we can't just call memcpy in the
// inferior, just copy directly from the remote private into the local
// reference of the shared mapping. We use the fallible read method to
// handle the case where the mapping is larger than the backing file, which
// would otherwise cause a short read.
remote2.task()->read_bytes_fallible(free_mem, sz, new_m.local_addr);
// Finally unmap the original segment
remote2.infallible_syscall(syscall_number_for_munmap(remote.arch()), free_mem,
sz);
remote.task()->vm()->unmap(remote.task(), free_mem, sz);
}
static vector<uint8_t> capture_syscallbuf(const AddressSpace::Mapping& m,
Task* clone_leader) {
remote_ptr<uint8_t> start = m.map.start().cast<uint8_t>();
auto syscallbuf_hdr = start.cast<struct syscallbuf_hdr>();
size_t data_size;
if (clone_leader->read_mem(REMOTE_PTR_FIELD(syscallbuf_hdr, locked))) {
// There may be an incomplete syscall record after num_rec_bytes that
// we need to capture here. We don't know how big that record is,
// so just record the entire buffer. This should not be common.
data_size = m.map.size();
} else {
data_size = clone_leader->read_mem(
REMOTE_PTR_FIELD(syscallbuf_hdr, num_rec_bytes)) +
sizeof(struct syscallbuf_hdr);
}
return clone_leader->read_mem(start, data_size);
}
static FdTable::shr_ptr& get_or_clone_fd_table(
Task::ClonedFdTables& existing_clones, Task* task_to_clone) {
auto original_fd_table = task_to_clone->fd_table();
FdTable::shr_ptr& existing_clone =
existing_clones[uintptr_t(original_fd_table.get())];
if (!existing_clone) {
existing_clone = original_fd_table->clone();
}
return existing_clone;
}
void Session::copy_state_to(Session& dest, EmuFs& emu_fs, EmuFs& dest_emu_fs) {
assert_fully_initialized();
DEBUG_ASSERT(!dest.clone_completion);
auto completion = unique_ptr<CloneCompletion>(new CloneCompletion());
auto& cloned_fd_tables = completion->cloned_fd_tables;
for (auto vm : vm_map) {
// Pick an arbitrary task to be group leader. The actual group leader
// might have died already.
Task* group_leader = *vm.second->task_set().begin();
LOG(debug) << " forking tg " << group_leader->tgid()
<< " (real: " << group_leader->real_tgid() << ")";
completion->address_spaces.push_back(CloneCompletion::AddressSpaceClone());
auto& group = completion->address_spaces.back();
group.clone_leader = group_leader->os_fork_into(
&dest, get_or_clone_fd_table(cloned_fd_tables, group_leader));
dest.on_create(group.clone_leader);
LOG(debug) << " forked new group leader " << group.clone_leader->tid;
{
AutoRemoteSyscalls remote(group.clone_leader);
vector<AddressSpace::Mapping> shared_maps_to_clone;
for (const auto& m : group.clone_leader->vm()->maps()) {
// Special case the syscallbuf as a performance optimization. The amount
// of data we need to capture is usually significantly smaller than the
// size of the mapping, so allocating the whole mapping here would be
// wasteful.
if (m.flags & AddressSpace::Mapping::IS_SYSCALLBUF) {
group.captured_memory.push_back(make_pair(
m.map.start(), capture_syscallbuf(m, group.clone_leader)));
} else if (m.local_addr != nullptr) {
ASSERT(group.clone_leader,
m.map.start() == AddressSpace::preload_thread_locals_start());
} else if ((m.recorded_map.flags() & MAP_SHARED) &&
emu_fs.has_file_for(m.recorded_map)) {
shared_maps_to_clone.push_back(m);
}
}
// Do this in a separate loop to avoid iteration invalidation issues
for (const auto& m : shared_maps_to_clone) {
remap_shared_mmap(remote, emu_fs, dest_emu_fs, m);
}
for (auto t : vm.second->task_set()) {
if (group_leader == t) {
continue;
}
LOG(debug) << " cloning " << t->rec_tid;
get_or_clone_fd_table(cloned_fd_tables, t);
group.member_states.push_back(t->capture_state());
}
}
group.clone_leader_state = group_leader->capture_state();
}
dest.clone_completion = std::move(completion);
DEBUG_ASSERT(dest.vms().size() > 0);
}
bool Session::has_cpuid_faulting() {
return !Flags::get().disable_cpuid_faulting && cpuid_faulting_works();
}
int Session::cpu_binding() const {
return const_cast<Session*>(this)->trace_stream()->bound_to_cpu();
}
// Returns true if we succeeded, false if we failed because the
// requested CPU does not exist/is not available.
static bool set_cpu_affinity(int cpu) {
DEBUG_ASSERT(cpu >= 0);
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
if (0 > sched_setaffinity(0, sizeof(mask), &mask)) {
if (errno == EINVAL) {
return false;
}
FATAL() << "Couldn't bind to CPU " << cpu;
}
return true;
}
void Session::do_bind_cpu() {
sched_getaffinity(0, sizeof(original_affinity_), &original_affinity_);
int cpu_index = this->cpu_binding();
if (cpu_index >= 0) {
// Set CPU affinity now, after we've created any helper threads
// (so they aren't affected), but before we create any
// tracees (so they are all affected).
// Note that we're binding rr itself to the same CPU as the
// tracees, since this seems to help performance.
if (!set_cpu_affinity(cpu_index)) {
if (has_cpuid_faulting() && !is_recording()) {
cpu_index = choose_cpu(BIND_CPU, cpu_lock);
if (!set_cpu_affinity(cpu_index)) {
FATAL() << "Can't bind to requested CPU " << cpu_index
<< " even after we re-selected it";
}
LOG(warn) << "Bound to CPU " << cpu_index
<< "instead of selected " << trace_stream()->bound_to_cpu()
<< "because the latter is not available;\n"
<< "Hoping tracee doesn't use LSL instruction!";
trace_stream()->set_bound_cpu(cpu_index);
} else {
FATAL() << "Can't bind to requested CPU " << cpu_index
<< ", and CPUID faulting not available";
}
} else if (!is_recording()) {
// Make sure to mark this CPU as in use in the cpu_lock.
(void)choose_cpu((BindCPU)cpu_index, cpu_lock);
}
}
}
} // namespace rr