Update rr prebuilts to build 10827144. am: 72266da4be am: 74924e4573 am: 77e9faec15 Original change: https://android-review.googlesource.com/c/platform/tools/rr_prebuilt/+/2755597 Change-Id: Iea5da26ecc3ea6953c7b68f1c5f8918715da5795 Signed-off-by: Automerger Merge Worker <[email protected]>

commit: 3c9bbafe9a22bfa7f5bd467952608d04eef0b20d [log] [tgz]
author: Dan Albert <[email protected]> Thu Sep 21 21:28:27 2023 +0000
committer: Automerger Merge Worker <[email protected]> Thu Sep 21 21:28:27 2023 +0000
tree: be4aff1d27fc7f3f2a79caef37acfc79f5383106
parent: 9e0c5b5a03f76944979e593d5f934c4ddb32b5b0 [diff]
parent: 77e9faec15a2f30b8cad585bb2bcc8f3078287dd [diff]
diff --git a/rr/android/x86_64/bin/rr b/rr/android/x86_64/bin/rr
new file mode 100755
index 0000000..3128823
--- /dev/null
+++ b/rr/android/x86_64/bin/rr
Binary files differ

diff --git a/rr/android/x86_64/bin/rr-collect-symbols.py b/rr/android/x86_64/bin/rr-collect-symbols.py
new file mode 100755
index 0000000..79c72a7
--- /dev/null
+++ b/rr/android/x86_64/bin/rr-collect-symbols.py

@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+
+import errno
+import glob
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+from urllib.request import urlretrieve
+from urllib.error import HTTPError, ContentTooShortError
+
+# Usage: rr-collect-symbols.py <trace-dir> [<url> | <path>]
+#
+# Given a <url>, downloads the zip/.tar.zst file at <url>, uncompresses it,
+# runs "gunzip" on any .gz files, and for any ELF files found whose build-ids
+# match the build-id of an ELF file in the trace, moves it into the trace.
+#
+# Given a <path>, which must contain a .build-id directory with the usual
+# structure (e.g. as Ubuntu and Fedora create under /usr/lib/debug), searches
+# the directory tree for any ELF files whose build-ids match the build-id of
+# an ELF file in the trace and copies them into the trace. <path> defaults to
+# "/usr/lib/debug", which will grab any available system debuginfo files
+# in Ubuntu and Fedora at least.
+#
+# This script assumes that the trace-dir has been packed via `rr pack` so all
+# relevant files actually appear in the trace-dir.
+# It also assumes rr is on the PATH.
+#
+# The debuginfo files are placed in the trace under a "debug" subdirectory,
+# in a ".build-id" subdirectory with the usual structure.
+#
+# If a debuginfo file contains a .gnu_debugaltlink section then we also
+# attempt to find the referenced file and copy it into the trace with the
+# same file name as the .debug file, but with a .sup suffix.
+
+if len(sys.argv) < 2:
+    print("Usage: rr-collect-symbols.py <trace-dir> [<url> | <path>]", file=sys.stderr)
+    sys.exit(1)
+trace_dir = sys.argv[1]
+
+if len(sys.argv) < 3:
+    source = "/usr/lib/debug"
+else:
+    source = sys.argv[2]
+
+rr_buildid = subprocess.Popen(["rr", "buildid"],
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE)
+
+def build_id_for(file):
+    global rr_buildid
+    rr_buildid.stdin.write(("%s\n"%file).encode('utf-8'))
+    try:
+        rr_buildid.stdin.flush()
+    except BrokenPipeError:
+        print("Can't write to rr, termination code %s"%rr_buildid.returncode, file=sys.stderr)
+        sys.exit(2)
+    return rr_buildid.stdout.readline().rstrip().decode('utf-8')
+
+altref_regex = re.compile(rb"^\s+\[\s*0\]\s+(.*)");
+
+def find_altref(file):
+    proc = subprocess.Popen(["readelf", "-p", ".gnu_debugaltlink", file], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+    try:
+        for line in proc.stdout:
+            m = altref_regex.match(line)
+            if m:
+                return m.group(1).rstrip()
+    finally:
+        proc.wait()
+    return None
+
+def find_altref_for_trace_file(trace_file, altref):
+    proc = subprocess.Popen(["rr", "filename", trace_file], stdout=subprocess.PIPE)
+    try:
+        for line in proc.stdout:
+            file = line.rstrip()
+            altref_file = os.path.join(os.path.dirname(file), altref)
+            if os.path.isfile(altref_file):
+                return altref_file
+    finally:
+        proc.wait()
+    return None
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc:
+        if exc.errno == errno.EEXIST and os.path.isdir(path):
+            pass
+        else:
+            raise
+
+# 'dst' must be a complete file name, not a directory.
+def copy_file(src, dst):
+    try:
+        # Remove the destination file in case it's a hard link
+        # or owned by someone else.
+        os.remove(dst)
+    except:
+        pass
+    shutil.copy(src, dst)
+
+# 'dst' must be a complete file name, not a directory
+def create_link(src, dst):
+    try:
+        # Remove the destination file in case it's wrong.
+        os.remove(dst)
+    except:
+        pass
+    os.symlink(src, dst)
+
+def collect_trace_build_ids():
+    ret = {}
+    for file in glob.iglob("%s/mmap_*"%trace_dir):
+        build_id = build_id_for(file)
+        if build_id:
+            ret[build_id] = True
+            altref = find_altref(file)
+            if altref:
+                altref_file = find_altref_for_trace_file(file, altref)
+                if not altref_file:
+                    print("WARNING: Can't find alt file %s for %s"%(altref, file))
+                    continue
+                dir = "%s/debug/.build-id/%s"%(trace_dir, build_id[:2])
+                mkdir_p(dir)
+                copy_file(altref_file, "%s/%s.sup"%(dir, build_id[2:]))
+    return ret
+
+trace_build_ids = collect_trace_build_ids()
+
+def collect_archive(url):
+    is_tar_zst = url.endswith(".tar.zst")
+    tmp_dir = tempfile.mkdtemp(dir=trace_dir)
+    if is_tar_zst:
+        tmp_file_name = "%s/archive.tar.zst"%tmp_dir
+    else:
+        # Assume its a ZIP
+        tmp_file_name = "%s/archive.zip"%tmp_dir
+    try:
+        (file, headers) = urlretrieve(url, tmp_file_name)
+    except (HTTPError, ContentTooShortError) as exc:
+        print("Failed to load archive %s: %s"%(url, exc), file=sys.stderr)
+        sys.exit(2)
+    if is_tar_zst:
+        subprocess.check_call(["tar", "-C", tmp_dir, "-I", "zstd", "-xvf", file])
+    else:
+        subprocess.check_call(["unzip", "-d", tmp_dir, file])
+    os.remove(file)
+
+    for root, dirs, files in os.walk(tmp_dir):
+        for name in files:
+            file = os.path.join(root, name)
+            if file.endswith(".gz"):
+                subprocess.check_call(["gunzip", file])
+                file = file[:-3]
+            build_id = build_id_for(file)
+            if build_id and build_id in trace_build_ids:
+                dir = "%s/debug/.build-id/%s"%(trace_dir, build_id[:2])
+                mkdir_p(dir)
+                dst = "%s/%s.debug"%(dir, build_id[2:])
+                os.rename(file, dst)
+            else:
+                os.remove(file)
+
+    shutil.rmtree(tmp_dir)
+
+def collect_filesystem(path):
+    for root, dirs, files in os.walk(path):
+        for name in files:
+            file = os.path.join(root, name)
+            if not os.path.islink(file):
+                build_id = build_id_for(file)
+                if build_id and build_id in trace_build_ids:
+                    dir = "%s/debug/.build-id/%s"%(trace_dir, build_id[:2])
+                    mkdir_p(dir)
+                    copy_file(file, "%s/%s.debug"%(dir, build_id[2:]))
+                    altref = find_altref(file)
+                    if altref:
+                        altref = altref.decode('utf-8')
+                        altref_file = os.path.join(os.path.dirname(file), altref)
+                        copy_file(altref_file, "%s/%s.sup"%(dir, build_id[2:]))
+                        if altref.startswith("../../../.dwz/"):
+                            mkdir_p("%s/.dwz"%trace_dir)
+                            src = "../debug/.build-id/%s/%s.sup"%(build_id[:2], build_id[2:])
+                            create_link(src, "%s/.dwz/%s"%(trace_dir, altref[14:]))
+                        elif altref.startswith("../../.dwz/"):
+                            mkdir_p("%s/debug/.dwz"%trace_dir)
+                            src = "../.build-id/%s/%s.sup"%(build_id[:2], build_id[2:])
+                            create_link(src, "%s/debug/.dwz/%s"%(trace_dir, altref[11:]))
+                        elif altref.startswith("../.dwz/"):
+                            mkdir_p("%s/debug/.build-id/.dwz"%trace_dir)
+                            src = "../%s/%s.sup"%(build_id[:2], build_id[2:])
+                            create_link(src, "%s/debug/.build-id/.dwz/%s"%(trace_dir, altref[8:]))
+
+if re.search("^[^:/]+:", source):
+    collect_archive(source)
+else:
+    collect_filesystem(source)
+
+rr_buildid.terminate()

diff --git a/rr/android/x86_64/bin/rr_exec_stub b/rr/android/x86_64/bin/rr_exec_stub
new file mode 100755
index 0000000..5136526
--- /dev/null
+++ b/rr/android/x86_64/bin/rr_exec_stub
Binary files differ

diff --git a/rr/android/x86_64/bin/signal-rr-recording.sh b/rr/android/x86_64/bin/signal-rr-recording.sh
new file mode 100755
index 0000000..18a4cfd
--- /dev/null
+++ b/rr/android/x86_64/bin/signal-rr-recording.sh

@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+signal=$1
+if [[ "$signal" == "" ]]; then
+    echo "Usage: $0 <signal>" >&2
+    echo "Sends <signal> to all processes being recorded by rr" >&2
+    exit 1
+fi
+
+function signal_descendants { pid=$1
+    for child in `ps -o pid= --ppid $pid`; do
+        echo Sending $signal to $child
+        kill -s $signal $child
+        signal_descendants $child
+    done
+}
+
+for rr_pid in `pidof rr` ; do
+    if cat /proc/$rr_pid/cmdline | tr '\0' '\n' | head -n2 | tail -n1 | grep -qz '\(^record$\)\|/'  ; then
+        signal_descendants $rr_pid
+    fi
+done

diff --git a/rr/android/x86_64/lib/rr/librrpage.so b/rr/android/x86_64/lib/rr/librrpage.so
new file mode 100644
index 0000000..1290a2d
--- /dev/null
+++ b/rr/android/x86_64/lib/rr/librrpage.so
Binary files differ

diff --git a/rr/android/x86_64/lib/rr/librrpreload.so b/rr/android/x86_64/lib/rr/librrpreload.so
new file mode 100644
index 0000000..f3e3b1b
--- /dev/null
+++ b/rr/android/x86_64/lib/rr/librrpreload.so
Binary files differ

diff --git a/rr/android/x86_64/share/bash-completion/completions/rr b/rr/android/x86_64/share/bash-completion/completions/rr
new file mode 100755
index 0000000..7325165
--- /dev/null
+++ b/rr/android/x86_64/share/bash-completion/completions/rr

@@ -0,0 +1,29 @@
+# vi:syntax=sh
+#
+# completion script for rr commands (to be sourced)
+
+_rr_subcmd_completion() {
+    local cmd=$1
+    local short_opts=$(rr help $cmd | sed -n 's/\s*-\([a-zA-Z]\),.*/-\1/p')
+    local long_opts=$(rr help $cmd | sed -n 's/.*--\([^= ]*\).*/--\1/p')
+    echo "$short_opts" "$long_opts"
+}
+
+_rr_completion() {
+    COMPREPLY=()
+    local rr_commands="$(rr --list-commands | cut -s -d ' ' -f 3)"
+
+    # completion for rr
+    if [ $COMP_CWORD -eq 1 ]; then
+        COMPREPLY=( $( compgen -W "$rr_commands" -- "${COMP_WORDS[1]}" ) )
+        return
+    fi
+
+    # completion for rr <command>'s options
+    local cmd="$(echo "${COMP_WORDS[1]}" | tr -d '[:space:]')"
+
+    if [ "$(echo $rr_commands | grep -w "$cmd")" ] ; then
+        COMPREPLY=( $( compgen -W "$(_rr_subcmd_completion "$cmd")" -- "${COMP_WORDS[COMP_CWORD]}" ) )
+    fi
+}
+complete -F _rr_completion rr

diff --git a/rr/android/x86_64/share/rr/32bit-avx.xml b/rr/android/x86_64/share/rr/32bit-avx.xml
new file mode 100644
index 0000000..6eb44fe
--- /dev/null
+++ b/rr/android/x86_64/share/rr/32bit-avx.xml

@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.avx">
+  <reg name="ymm0h" bitsize="128" type="uint128"/>
+  <reg name="ymm1h" bitsize="128" type="uint128"/>
+  <reg name="ymm2h" bitsize="128" type="uint128"/>
+  <reg name="ymm3h" bitsize="128" type="uint128"/>
+  <reg name="ymm4h" bitsize="128" type="uint128"/>
+  <reg name="ymm5h" bitsize="128" type="uint128"/>
+  <reg name="ymm6h" bitsize="128" type="uint128"/>
+  <reg name="ymm7h" bitsize="128" type="uint128"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/32bit-core.xml b/rr/android/x86_64/share/rr/32bit-core.xml
new file mode 100644
index 0000000..48c5890
--- /dev/null
+++ b/rr/android/x86_64/share/rr/32bit-core.xml

@@ -0,0 +1,65 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.core">
+  <flags id="i386_eflags" size="4">
+    <field name="CF" start="0" end="0"/>
+    <field name="" start="1" end="1"/>
+    <field name="PF" start="2" end="2"/>
+    <field name="AF" start="4" end="4"/>
+    <field name="ZF" start="6" end="6"/>
+    <field name="SF" start="7" end="7"/>
+    <field name="TF" start="8" end="8"/>
+    <field name="IF" start="9" end="9"/>
+    <field name="DF" start="10" end="10"/>
+    <field name="OF" start="11" end="11"/>
+    <field name="NT" start="14" end="14"/>
+    <field name="RF" start="16" end="16"/>
+    <field name="VM" start="17" end="17"/>
+    <field name="AC" start="18" end="18"/>
+    <field name="VIF" start="19" end="19"/>
+    <field name="VIP" start="20" end="20"/>
+    <field name="ID" start="21" end="21"/>
+  </flags>
+
+  <reg name="eax" bitsize="32" type="int32"/>
+  <reg name="ecx" bitsize="32" type="int32"/>
+  <reg name="edx" bitsize="32" type="int32"/>
+  <reg name="ebx" bitsize="32" type="int32"/>
+  <reg name="esp" bitsize="32" type="data_ptr"/>
+  <reg name="ebp" bitsize="32" type="data_ptr"/>
+  <reg name="esi" bitsize="32" type="int32"/>
+  <reg name="edi" bitsize="32" type="int32"/>
+
+  <reg name="eip" bitsize="32" type="code_ptr"/>
+  <reg name="eflags" bitsize="32" type="i386_eflags"/>
+  <reg name="cs" bitsize="32" type="int32"/>
+  <reg name="ss" bitsize="32" type="int32"/>
+  <reg name="ds" bitsize="32" type="int32"/>
+  <reg name="es" bitsize="32" type="int32"/>
+  <reg name="fs" bitsize="32" type="int32"/>
+  <reg name="gs" bitsize="32" type="int32"/>
+
+  <reg name="st0" bitsize="80" type="i387_ext"/>
+  <reg name="st1" bitsize="80" type="i387_ext"/>
+  <reg name="st2" bitsize="80" type="i387_ext"/>
+  <reg name="st3" bitsize="80" type="i387_ext"/>
+  <reg name="st4" bitsize="80" type="i387_ext"/>
+  <reg name="st5" bitsize="80" type="i387_ext"/>
+  <reg name="st6" bitsize="80" type="i387_ext"/>
+  <reg name="st7" bitsize="80" type="i387_ext"/>
+
+  <reg name="fctrl" bitsize="32" type="int" group="float"/>
+  <reg name="fstat" bitsize="32" type="int" group="float"/>
+  <reg name="ftag" bitsize="32" type="int" group="float"/>
+  <reg name="fiseg" bitsize="32" type="int" group="float"/>
+  <reg name="fioff" bitsize="32" type="int" group="float"/>
+  <reg name="foseg" bitsize="32" type="int" group="float"/>
+  <reg name="fooff" bitsize="32" type="int" group="float"/>
+  <reg name="fop" bitsize="32" type="int" group="float"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/32bit-linux.xml b/rr/android/x86_64/share/rr/32bit-linux.xml
new file mode 100644
index 0000000..7139db8
--- /dev/null
+++ b/rr/android/x86_64/share/rr/32bit-linux.xml

@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.linux">
+  <reg name="orig_eax" bitsize="32" type="int" regnum="41"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/32bit-pkeys.xml b/rr/android/x86_64/share/rr/32bit-pkeys.xml
new file mode 100644
index 0000000..6f6723c
--- /dev/null
+++ b/rr/android/x86_64/share/rr/32bit-pkeys.xml

@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2016-2021 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.pkeys">
+
+  <reg name="pkru" bitsize="32" type="uint32"/>
+
+</feature>

diff --git a/rr/android/x86_64/share/rr/32bit-sse.xml b/rr/android/x86_64/share/rr/32bit-sse.xml
new file mode 100644
index 0000000..03b6421
--- /dev/null
+++ b/rr/android/x86_64/share/rr/32bit-sse.xml

@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.sse">
+  <vector id="v4f" type="ieee_single" count="4"/>
+  <vector id="v2d" type="ieee_double" count="2"/>
+  <vector id="v16i8" type="int8" count="16"/>
+  <vector id="v8i16" type="int16" count="8"/>
+  <vector id="v4i32" type="int32" count="4"/>
+  <vector id="v2i64" type="int64" count="2"/>
+  <union id="vec128">
+    <field name="v4_float" type="v4f"/>
+    <field name="v2_double" type="v2d"/>
+    <field name="v16_int8" type="v16i8"/>
+    <field name="v8_int16" type="v8i16"/>
+    <field name="v4_int32" type="v4i32"/>
+    <field name="v2_int64" type="v2i64"/>
+    <field name="uint128" type="uint128"/>
+  </union>
+  <flags id="i386_mxcsr" size="4">
+    <field name="IE" start="0" end="0"/>
+    <field name="DE" start="1" end="1"/>
+    <field name="ZE" start="2" end="2"/>
+    <field name="OE" start="3" end="3"/>
+    <field name="UE" start="4" end="4"/>
+    <field name="PE" start="5" end="5"/>
+    <field name="DAZ" start="6" end="6"/>
+    <field name="IM" start="7" end="7"/>
+    <field name="DM" start="8" end="8"/>
+    <field name="ZM" start="9" end="9"/>
+    <field name="OM" start="10" end="10"/>
+    <field name="UM" start="11" end="11"/>
+    <field name="PM" start="12" end="12"/>
+    <field name="FZ" start="15" end="15"/>
+  </flags>
+
+  <reg name="xmm0" bitsize="128" type="vec128" regnum="32"/>
+  <reg name="xmm1" bitsize="128" type="vec128"/>
+  <reg name="xmm2" bitsize="128" type="vec128"/>
+  <reg name="xmm3" bitsize="128" type="vec128"/>
+  <reg name="xmm4" bitsize="128" type="vec128"/>
+  <reg name="xmm5" bitsize="128" type="vec128"/>
+  <reg name="xmm6" bitsize="128" type="vec128"/>
+  <reg name="xmm7" bitsize="128" type="vec128"/>
+
+  <reg name="mxcsr" bitsize="32" type="i386_mxcsr" group="vector"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/64bit-avx.xml b/rr/android/x86_64/share/rr/64bit-avx.xml
new file mode 100644
index 0000000..5dfe45e
--- /dev/null
+++ b/rr/android/x86_64/share/rr/64bit-avx.xml

@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.avx">
+  <reg name="ymm0h" bitsize="128" type="uint128"/>
+  <reg name="ymm1h" bitsize="128" type="uint128"/>
+  <reg name="ymm2h" bitsize="128" type="uint128"/>
+  <reg name="ymm3h" bitsize="128" type="uint128"/>
+  <reg name="ymm4h" bitsize="128" type="uint128"/>
+  <reg name="ymm5h" bitsize="128" type="uint128"/>
+  <reg name="ymm6h" bitsize="128" type="uint128"/>
+  <reg name="ymm7h" bitsize="128" type="uint128"/>
+  <reg name="ymm8h" bitsize="128" type="uint128"/>
+  <reg name="ymm9h" bitsize="128" type="uint128"/>
+  <reg name="ymm10h" bitsize="128" type="uint128"/>
+  <reg name="ymm11h" bitsize="128" type="uint128"/>
+  <reg name="ymm12h" bitsize="128" type="uint128"/>
+  <reg name="ymm13h" bitsize="128" type="uint128"/>
+  <reg name="ymm14h" bitsize="128" type="uint128"/>
+  <reg name="ymm15h" bitsize="128" type="uint128"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/64bit-core.xml b/rr/android/x86_64/share/rr/64bit-core.xml
new file mode 100644
index 0000000..7cd0673
--- /dev/null
+++ b/rr/android/x86_64/share/rr/64bit-core.xml

@@ -0,0 +1,73 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.core">
+  <flags id="i386_eflags" size="4">
+    <field name="CF" start="0" end="0"/>
+    <field name="" start="1" end="1"/>
+    <field name="PF" start="2" end="2"/>
+    <field name="AF" start="4" end="4"/>
+    <field name="ZF" start="6" end="6"/>
+    <field name="SF" start="7" end="7"/>
+    <field name="TF" start="8" end="8"/>
+    <field name="IF" start="9" end="9"/>
+    <field name="DF" start="10" end="10"/>
+    <field name="OF" start="11" end="11"/>
+    <field name="NT" start="14" end="14"/>
+    <field name="RF" start="16" end="16"/>
+    <field name="VM" start="17" end="17"/>
+    <field name="AC" start="18" end="18"/>
+    <field name="VIF" start="19" end="19"/>
+    <field name="VIP" start="20" end="20"/>
+    <field name="ID" start="21" end="21"/>
+  </flags>
+
+  <reg name="rax" bitsize="64" type="int64"/>
+  <reg name="rbx" bitsize="64" type="int64"/>
+  <reg name="rcx" bitsize="64" type="int64"/>
+  <reg name="rdx" bitsize="64" type="int64"/>
+  <reg name="rsi" bitsize="64" type="int64"/>
+  <reg name="rdi" bitsize="64" type="int64"/>
+  <reg name="rbp" bitsize="64" type="data_ptr"/>
+  <reg name="rsp" bitsize="64" type="data_ptr"/>
+  <reg name="r8" bitsize="64" type="int64"/>
+  <reg name="r9" bitsize="64" type="int64"/>
+  <reg name="r10" bitsize="64" type="int64"/>
+  <reg name="r11" bitsize="64" type="int64"/>
+  <reg name="r12" bitsize="64" type="int64"/>
+  <reg name="r13" bitsize="64" type="int64"/>
+  <reg name="r14" bitsize="64" type="int64"/>
+  <reg name="r15" bitsize="64" type="int64"/>
+
+  <reg name="rip" bitsize="64" type="code_ptr"/>
+  <reg name="eflags" bitsize="32" type="i386_eflags"/>
+  <reg name="cs" bitsize="32" type="int32"/>
+  <reg name="ss" bitsize="32" type="int32"/>
+  <reg name="ds" bitsize="32" type="int32"/>
+  <reg name="es" bitsize="32" type="int32"/>
+  <reg name="fs" bitsize="32" type="int32"/>
+  <reg name="gs" bitsize="32" type="int32"/>
+
+  <reg name="st0" bitsize="80" type="i387_ext"/>
+  <reg name="st1" bitsize="80" type="i387_ext"/>
+  <reg name="st2" bitsize="80" type="i387_ext"/>
+  <reg name="st3" bitsize="80" type="i387_ext"/>
+  <reg name="st4" bitsize="80" type="i387_ext"/>
+  <reg name="st5" bitsize="80" type="i387_ext"/>
+  <reg name="st6" bitsize="80" type="i387_ext"/>
+  <reg name="st7" bitsize="80" type="i387_ext"/>
+
+  <reg name="fctrl" bitsize="32" type="int" group="float"/>
+  <reg name="fstat" bitsize="32" type="int" group="float"/>
+  <reg name="ftag" bitsize="32" type="int" group="float"/>
+  <reg name="fiseg" bitsize="32" type="int" group="float"/>
+  <reg name="fioff" bitsize="32" type="int" group="float"/>
+  <reg name="foseg" bitsize="32" type="int" group="float"/>
+  <reg name="fooff" bitsize="32" type="int" group="float"/>
+  <reg name="fop" bitsize="32" type="int" group="float"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/64bit-linux.xml b/rr/android/x86_64/share/rr/64bit-linux.xml
new file mode 100644
index 0000000..b4229d0
--- /dev/null
+++ b/rr/android/x86_64/share/rr/64bit-linux.xml

@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.linux">
+  <reg name="orig_rax" bitsize="64" type="int" regnum="57"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/64bit-pkeys.xml b/rr/android/x86_64/share/rr/64bit-pkeys.xml
new file mode 100644
index 0000000..6f6723c
--- /dev/null
+++ b/rr/android/x86_64/share/rr/64bit-pkeys.xml

@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2016-2021 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.pkeys">
+
+  <reg name="pkru" bitsize="32" type="uint32"/>
+
+</feature>

diff --git a/rr/android/x86_64/share/rr/64bit-seg.xml b/rr/android/x86_64/share/rr/64bit-seg.xml
new file mode 100644
index 0000000..1fa6c9e
--- /dev/null
+++ b/rr/android/x86_64/share/rr/64bit-seg.xml

@@ -0,0 +1,5 @@
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.segments">
+  <reg name="fs_base" bitsize="64" type="data_ptr"/>
+  <reg name="gs_base" bitsize="64" type="data_ptr"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/64bit-sse.xml b/rr/android/x86_64/share/rr/64bit-sse.xml
new file mode 100644
index 0000000..eec4b79
--- /dev/null
+++ b/rr/android/x86_64/share/rr/64bit-sse.xml

@@ -0,0 +1,60 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.i386.sse">
+  <vector id="v4f" type="ieee_single" count="4"/>
+  <vector id="v2d" type="ieee_double" count="2"/>
+  <vector id="v16i8" type="int8" count="16"/>
+  <vector id="v8i16" type="int16" count="8"/>
+  <vector id="v4i32" type="int32" count="4"/>
+  <vector id="v2i64" type="int64" count="2"/>
+  <union id="vec128">
+    <field name="v4_float" type="v4f"/>
+    <field name="v2_double" type="v2d"/>
+    <field name="v16_int8" type="v16i8"/>
+    <field name="v8_int16" type="v8i16"/>
+    <field name="v4_int32" type="v4i32"/>
+    <field name="v2_int64" type="v2i64"/>
+    <field name="uint128" type="uint128"/>
+  </union>
+  <flags id="i386_mxcsr" size="4">
+    <field name="IE" start="0" end="0"/>
+    <field name="DE" start="1" end="1"/>
+    <field name="ZE" start="2" end="2"/>
+    <field name="OE" start="3" end="3"/>
+    <field name="UE" start="4" end="4"/>
+    <field name="PE" start="5" end="5"/>
+    <field name="DAZ" start="6" end="6"/>
+    <field name="IM" start="7" end="7"/>
+    <field name="DM" start="8" end="8"/>
+    <field name="ZM" start="9" end="9"/>
+    <field name="OM" start="10" end="10"/>
+    <field name="UM" start="11" end="11"/>
+    <field name="PM" start="12" end="12"/>
+    <field name="FZ" start="15" end="15"/>
+  </flags>
+
+  <reg name="xmm0" bitsize="128" type="vec128" regnum="40"/>
+  <reg name="xmm1" bitsize="128" type="vec128"/>
+  <reg name="xmm2" bitsize="128" type="vec128"/>
+  <reg name="xmm3" bitsize="128" type="vec128"/>
+  <reg name="xmm4" bitsize="128" type="vec128"/>
+  <reg name="xmm5" bitsize="128" type="vec128"/>
+  <reg name="xmm6" bitsize="128" type="vec128"/>
+  <reg name="xmm7" bitsize="128" type="vec128"/>
+  <reg name="xmm8" bitsize="128" type="vec128"/>
+  <reg name="xmm9" bitsize="128" type="vec128"/>
+  <reg name="xmm10" bitsize="128" type="vec128"/>
+  <reg name="xmm11" bitsize="128" type="vec128"/>
+  <reg name="xmm12" bitsize="128" type="vec128"/>
+  <reg name="xmm13" bitsize="128" type="vec128"/>
+  <reg name="xmm14" bitsize="128" type="vec128"/>
+  <reg name="xmm15" bitsize="128" type="vec128"/>
+
+  <reg name="mxcsr" bitsize="32" type="i386_mxcsr" group="vector"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/aarch64-core.xml b/rr/android/x86_64/share/rr/aarch64-core.xml
new file mode 100644
index 0000000..ee6a3a6
--- /dev/null
+++ b/rr/android/x86_64/share/rr/aarch64-core.xml

@@ -0,0 +1,91 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2020 Free Software Foundation, Inc.
+     Contributed by ARM Ltd.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.aarch64.core">
+  <reg name="x0" bitsize="64"/>
+  <reg name="x1" bitsize="64"/>
+  <reg name="x2" bitsize="64"/>
+  <reg name="x3" bitsize="64"/>
+  <reg name="x4" bitsize="64"/>
+  <reg name="x5" bitsize="64"/>
+  <reg name="x6" bitsize="64"/>
+  <reg name="x7" bitsize="64"/>
+  <reg name="x8" bitsize="64"/>
+  <reg name="x9" bitsize="64"/>
+  <reg name="x10" bitsize="64"/>
+  <reg name="x11" bitsize="64"/>
+  <reg name="x12" bitsize="64"/>
+  <reg name="x13" bitsize="64"/>
+  <reg name="x14" bitsize="64"/>
+  <reg name="x15" bitsize="64"/>
+  <reg name="x16" bitsize="64"/>
+  <reg name="x17" bitsize="64"/>
+  <reg name="x18" bitsize="64"/>
+  <reg name="x19" bitsize="64"/>
+  <reg name="x20" bitsize="64"/>
+  <reg name="x21" bitsize="64"/>
+  <reg name="x22" bitsize="64"/>
+  <reg name="x23" bitsize="64"/>
+  <reg name="x24" bitsize="64"/>
+  <reg name="x25" bitsize="64"/>
+  <reg name="x26" bitsize="64"/>
+  <reg name="x27" bitsize="64"/>
+  <reg name="x28" bitsize="64"/>
+  <reg name="x29" bitsize="64"/>
+  <reg name="x30" bitsize="64"/>
+  <reg name="sp" bitsize="64" type="data_ptr"/>
+
+  <reg name="pc" bitsize="64" type="code_ptr"/>
+
+  <flags id="cpsr_flags" size="4">
+    <!-- Stack Pointer.  -->
+    <field name="SP" start="0" end="0"/>
+
+    <!-- Exception Level.  -->
+    <field name="EL" start="2" end="3"/>
+    <!-- Execution state.  -->
+    <field name="nRW" start="4" end="4"/>
+
+    <!-- FIQ interrupt mask.  -->
+    <field name="F" start="6" end="6"/>
+    <!-- IRQ interrupt mask.  -->
+    <field name="I" start="7" end="7"/>
+    <!-- SError interrupt mask.  -->
+    <field name="A" start="8" end="8"/>
+    <!-- Debug exception mask.  -->
+    <field name="D" start="9" end="9"/>
+
+    <!-- ARMv8.0-A: Speculative Store Bypass.  -->
+    <field name="SSBS" start="12" end="12"/>
+
+    <!-- Illegal Execution state.  -->
+    <field name="IL" start="20" end="20"/>
+    <!-- Software Step.  -->
+    <field name="SS" start="21" end="21"/>
+    <!-- ARMv8.1-A: Privileged Access Never.  -->
+    <field name="PAN" start="22" end="22"/>
+    <!-- ARMv8.2-A: User Access Override.  -->
+    <field name="UAO" start="23" end="23"/>
+    <!-- ARMv8.4-A: Data Independent Timing.  -->
+    <field name="DIT" start="24" end="24"/>
+    <!-- ARMv8.5-A: Tag Check Override.  -->
+    <field name="TCO" start="25" end="25"/>
+
+    <!-- Overflow Condition flag.  -->
+    <field name="V" start="28" end="28"/>
+    <!-- Carry Condition flag.  -->
+    <field name="C" start="29" end="29"/>
+    <!-- Zero Condition flag.  -->
+    <field name="Z" start="30" end="30"/>
+    <!-- Negative Condition flag.  -->
+    <field name="N" start="31" end="31"/>
+  </flags>
+  <reg name="cpsr" bitsize="32" type="cpsr_flags"/>
+
+</feature>

diff --git a/rr/android/x86_64/share/rr/aarch64-fpu.xml b/rr/android/x86_64/share/rr/aarch64-fpu.xml
new file mode 100644
index 0000000..eae763c
--- /dev/null
+++ b/rr/android/x86_64/share/rr/aarch64-fpu.xml

@@ -0,0 +1,88 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2020 Free Software Foundation, Inc.
+     Contributed by ARM Ltd.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.aarch64.fpu">
+  <vector id="v2d" type="ieee_double" count="2"/>
+  <vector id="v2u" type="uint64" count="2"/>
+  <vector id="v2i" type="int64" count="2"/>
+  <vector id="v4f" type="ieee_single" count="4"/>
+  <vector id="v4u" type="uint32" count="4"/>
+  <vector id="v4i" type="int32" count="4"/>
+  <vector id="v8f" type="ieee_half" count="8"/>
+  <vector id="v8u" type="uint16" count="8"/>
+  <vector id="v8i" type="int16" count="8"/>
+  <vector id="v16u" type="uint8" count="16"/>
+  <vector id="v16i" type="int8" count="16"/>
+  <vector id="v1u" type="uint128" count="1"/>
+  <vector id="v1i" type="int128" count="1"/>
+  <union id="vnd">
+    <field name="f" type="v2d"/>
+    <field name="u" type="v2u"/>
+    <field name="s" type="v2i"/>
+  </union>
+  <union id="vns">
+    <field name="f" type="v4f"/>
+    <field name="u" type="v4u"/>
+    <field name="s" type="v4i"/>
+  </union>
+  <union id="vnh">
+    <field name="f" type="v8f"/>
+    <field name="u" type="v8u"/>
+    <field name="s" type="v8i"/>
+  </union>
+  <union id="vnb">
+    <field name="u" type="v16u"/>
+    <field name="s" type="v16i"/>
+  </union>
+  <union id="vnq">
+    <field name="u" type="v1u"/>
+    <field name="s" type="v1i"/>
+  </union>
+  <union id="aarch64v">
+    <field name="d" type="vnd"/>
+    <field name="s" type="vns"/>
+    <field name="h" type="vnh"/>
+    <field name="b" type="vnb"/>
+    <field name="q" type="vnq"/>
+  </union>
+  <reg name="v0" bitsize="128" type="aarch64v" regnum="34"/>
+  <reg name="v1" bitsize="128" type="aarch64v" />
+  <reg name="v2" bitsize="128" type="aarch64v" />
+  <reg name="v3" bitsize="128" type="aarch64v" />
+  <reg name="v4" bitsize="128" type="aarch64v" />
+  <reg name="v5" bitsize="128" type="aarch64v" />
+  <reg name="v6" bitsize="128" type="aarch64v" />
+  <reg name="v7" bitsize="128" type="aarch64v" />
+  <reg name="v8" bitsize="128" type="aarch64v" />
+  <reg name="v9" bitsize="128" type="aarch64v" />
+  <reg name="v10" bitsize="128" type="aarch64v"/>
+  <reg name="v11" bitsize="128" type="aarch64v"/>
+  <reg name="v12" bitsize="128" type="aarch64v"/>
+  <reg name="v13" bitsize="128" type="aarch64v"/>
+  <reg name="v14" bitsize="128" type="aarch64v"/>
+  <reg name="v15" bitsize="128" type="aarch64v"/>
+  <reg name="v16" bitsize="128" type="aarch64v"/>
+  <reg name="v17" bitsize="128" type="aarch64v"/>
+  <reg name="v18" bitsize="128" type="aarch64v"/>
+  <reg name="v19" bitsize="128" type="aarch64v"/>
+  <reg name="v20" bitsize="128" type="aarch64v"/>
+  <reg name="v21" bitsize="128" type="aarch64v"/>
+  <reg name="v22" bitsize="128" type="aarch64v"/>
+  <reg name="v23" bitsize="128" type="aarch64v"/>
+  <reg name="v24" bitsize="128" type="aarch64v"/>
+  <reg name="v25" bitsize="128" type="aarch64v"/>
+  <reg name="v26" bitsize="128" type="aarch64v"/>
+  <reg name="v27" bitsize="128" type="aarch64v"/>
+  <reg name="v28" bitsize="128" type="aarch64v"/>
+  <reg name="v29" bitsize="128" type="aarch64v"/>
+  <reg name="v30" bitsize="128" type="aarch64v"/>
+  <reg name="v31" bitsize="128" type="aarch64v"/>
+  <reg name="fpsr" bitsize="32"/>
+  <reg name="fpcr" bitsize="32"/>
+</feature>

diff --git a/rr/android/x86_64/share/rr/aarch64-pauth.xml b/rr/android/x86_64/share/rr/aarch64-pauth.xml
new file mode 100644
index 0000000..2ce14b4
--- /dev/null
+++ b/rr/android/x86_64/share/rr/aarch64-pauth.xml

@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.aarch64.pauth">
+  <reg name="pauth_dmask" bitsize="64"/>
+  <reg name="pauth_cmask" bitsize="64"/>
+</feature>
+

diff --git a/rr/android/x86_64/share/rr/amd64-avx-linux.xml b/rr/android/x86_64/share/rr/amd64-avx-linux.xml
new file mode 100644
index 0000000..d2dc3bc
--- /dev/null
+++ b/rr/android/x86_64/share/rr/amd64-avx-linux.xml

@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- AMD64 with AVX - Includes Linux-only special "register".  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>i386:x86-64</architecture>
+  <osabi>GNU/Linux</osabi>
+  <xi:include href="64bit-core.xml"/>
+  <xi:include href="64bit-sse.xml"/>
+  <xi:include href="64bit-linux.xml"/>
+  <xi:include href="64bit-seg.xml"/>
+  <xi:include href="64bit-avx.xml"/>
+</target>

diff --git a/rr/android/x86_64/share/rr/amd64-linux.xml b/rr/android/x86_64/share/rr/amd64-linux.xml
new file mode 100644
index 0000000..aad02a3
--- /dev/null
+++ b/rr/android/x86_64/share/rr/amd64-linux.xml

@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- AMD64 - Includes Linux-only special "register".  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>i386:x86-64</architecture>
+  <osabi>GNU/Linux</osabi>
+  <xi:include href="64bit-core.xml"/>
+  <xi:include href="64bit-sse.xml"/>
+  <xi:include href="64bit-linux.xml"/>
+  <xi:include href="64bit-seg.xml"/>
+  <xi:include href="64bit-pkeys.xml"/>
+</target>

diff --git a/rr/android/x86_64/share/rr/amd64-pkeys-linux.xml b/rr/android/x86_64/share/rr/amd64-pkeys-linux.xml
new file mode 100644
index 0000000..1fa5bde
--- /dev/null
+++ b/rr/android/x86_64/share/rr/amd64-pkeys-linux.xml

@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- AMD64 with AVX - Includes Linux-only special "register".  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>i386:x86-64</architecture>
+  <osabi>GNU/Linux</osabi>
+  <xi:include href="64bit-core.xml"/>
+  <xi:include href="64bit-sse.xml"/>
+  <xi:include href="64bit-linux.xml"/>
+  <xi:include href="64bit-seg.xml"/>
+  <xi:include href="64bit-avx.xml"/>
+  <xi:include href="64bit-pkeys.xml"/>
+</target>

diff --git a/rr/android/x86_64/share/rr/i386-avx-linux.xml b/rr/android/x86_64/share/rr/i386-avx-linux.xml
new file mode 100644
index 0000000..c957fab
--- /dev/null
+++ b/rr/android/x86_64/share/rr/i386-avx-linux.xml

@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- I386 with AVX- Includes Linux-only special "register".  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>i386</architecture>
+  <osabi>GNU/Linux</osabi>
+  <xi:include href="32bit-core.xml"/>
+  <xi:include href="32bit-sse.xml"/>
+  <xi:include href="32bit-linux.xml"/>
+  <xi:include href="32bit-avx.xml"/>
+</target>

diff --git a/rr/android/x86_64/share/rr/i386-linux.xml b/rr/android/x86_64/share/rr/i386-linux.xml
new file mode 100644
index 0000000..625984e
--- /dev/null
+++ b/rr/android/x86_64/share/rr/i386-linux.xml

@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- I386 with SSE - Includes Linux-only special "register".  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>i386</architecture>
+  <osabi>GNU/Linux</osabi>
+  <xi:include href="32bit-core.xml"/>
+  <xi:include href="32bit-linux.xml"/>
+  <xi:include href="32bit-sse.xml"/>
+  <xi:include href="32bit-pkeys.xml"/>
+</target>

diff --git a/rr/android/x86_64/share/rr/i386-pkeys-linux.xml b/rr/android/x86_64/share/rr/i386-pkeys-linux.xml
new file mode 100644
index 0000000..47f7b2f
--- /dev/null
+++ b/rr/android/x86_64/share/rr/i386-pkeys-linux.xml

@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- I386 with AVX- Includes Linux-only special "register".  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>i386</architecture>
+  <osabi>GNU/Linux</osabi>
+  <xi:include href="32bit-core.xml"/>
+  <xi:include href="32bit-sse.xml"/>
+  <xi:include href="32bit-linux.xml"/>
+  <xi:include href="32bit-avx.xml"/>
+  <xi:include href="32bit-pkeys.xml"/>
+</target>

diff --git a/rr/android/x86_64/share/rr/src/preload/overrides.c b/rr/android/x86_64/share/rr/src/preload/overrides.c
new file mode 100644
index 0000000..2f572b3
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/overrides.c

@@ -0,0 +1,334 @@
+/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
+
+#define RR_IMPLEMENT_PRELOAD
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "preload_interface.h"
+#include "syscallbuf.h"
+
+#define PTHREAD_MUTEX_PRIO_INHERIT_NP 32
+
+#define DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE 1
+#ifdef __GLIBC_PREREQ
+#if __GLIBC_PREREQ(2, 34)
+#undef DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE
+#endif
+#endif
+
+#ifndef __BIONIC__
+
+// Use an old version of dlsym so this code still works when built against glibc > 2.34
+// but loaded into a process linking a pre-2.34 glibc.
+#ifdef __x86_64__
+__asm__(".symver dlsym,dlsym@GLIBC_2.2.5");
+#elif defined(__i386__)
+__asm__(".symver dlsym,dlsym@GLIBC_2.0");
+#endif
+
+static int (*real_pthread_mutex_init)(void* mutex, const void* attr);
+static int (*real_pthread_mutex_lock)(void* mutex);
+static int (*real_pthread_mutex_trylock)(void* mutex);
+static int (*real_pthread_mutex_timedlock)(void* mutex,
+                                           const struct timespec* abstime);
+static int (*real_pthread_mutexattr_setprotocol)(void* attr, int protocol);
+
+static void __attribute__((constructor)) init_override(void) {
+  real_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
+  real_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
+  real_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
+  real_pthread_mutex_timedlock = dlsym(RTLD_NEXT, "pthread_mutex_timedlock");
+  real_pthread_mutexattr_setprotocol = dlsym(RTLD_NEXT, "pthread_mutexattr_setprotocol");
+}
+
+static void fix_mutex_kind(pthread_mutex_t* mutex) {
+  /* Disable priority inheritance. */
+  mutex->__data.__kind &= ~PTHREAD_MUTEX_PRIO_INHERIT_NP;
+}
+
+#ifdef DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE
+/*
+ * We need to able to call directly to __pthread_mutex_lock and
+ * __pthread_mutex_trylock because setting up our indirect function pointers
+ * calls dlsym which itself can call pthread_mutex_lock (e.g. via application
+ * code overriding malloc/calloc to use a pthreads-based implementation).
+ * So before our pointers are set up, call these.
+ *
+ * If we're building against glibc 2.34 *but* we get run against a binary
+ * linking with glibc < 2.34 *and* the application overrides malloc to use
+ * pthreads-based synchronization then this won't work and we lose. Let's
+ * hope this doesn't happen.
+ */
+extern int __pthread_mutex_init(pthread_mutex_t* mutex,
+                                const pthread_mutexattr_t* attr);
+extern int __pthread_mutex_lock(pthread_mutex_t* mutex);
+extern int __pthread_mutex_trylock(pthread_mutex_t* mutex);
+#endif
+
+int pthread_mutex_init(pthread_mutex_t* mutex,
+                       const pthread_mutexattr_t* attr) {
+  int ret;
+  pthread_mutexattr_t realattr;
+
+  if (attr) {
+    /* We wish to enforce the use of plain (no PI) mutex to avoid
+     * needing to handle PI futex() operations.
+     * We also wish to ensure that pthread_mutexattr_getprotocol()
+     * still returns the requested protocol.
+     * So we copy the attribute and force PTHREAD_PRIO_NONE.
+     */
+    memcpy(&realattr, attr, sizeof(realattr));
+    // We assume dlsym doesn't call pthread_mutex_init with attributes.
+    // We avoid calling pthread_mutexattr_setprotocol (and any other pthread functions)
+    // directly because that won't work when we're built against glibc 2.34 but loaded
+    // into a process using glibc < 2.34. (pthread functions got a symbol version bump
+    // in 2.34.)
+    //
+    // But note that we can't use dlsym in cases where we would want to use the double
+    // underscore methods (i.e. glibc < 2.34). There is no double underscore version of
+    // pthread_mutexattr_setprotocol, so we call it directly.
+    if (!real_pthread_mutexattr_setprotocol) {
+#ifdef DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE
+      ret = pthread_mutexattr_setprotocol(&realattr, PTHREAD_PRIO_NONE);
+      goto setprotocol;
+#else
+      real_pthread_mutexattr_setprotocol = dlsym(RTLD_NEXT, "pthread_mutexattr_setprotocol");
+#endif
+    }
+    ret = real_pthread_mutexattr_setprotocol(&realattr, PTHREAD_PRIO_NONE);
+#ifdef DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE
+setprotocol:
+#endif
+    if (ret) {
+      return ret;
+    }
+    attr = &realattr;
+  }
+  if (!real_pthread_mutex_init) {
+#ifdef DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE
+    return __pthread_mutex_init(mutex, attr);
+#else
+    real_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
+#endif
+  }
+  return real_pthread_mutex_init(mutex, attr);
+}
+
+/* Prevent use of lock elision; Haswell's TSX/RTM features used by
+   lock elision increment the rbc perf counter for instructions which
+   are later rolled back if the transaction fails. */
+int pthread_mutex_lock(pthread_mutex_t* mutex) {
+  fix_mutex_kind(mutex);
+  if (!real_pthread_mutex_lock) {
+#ifdef DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE
+    return __pthread_mutex_lock(mutex);
+#else
+    real_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
+#endif
+  }
+  return real_pthread_mutex_lock(mutex);
+}
+
+int pthread_mutex_timedlock(pthread_mutex_t* mutex,
+                            const struct timespec* abstime) {
+  fix_mutex_kind(mutex);
+  /* No __pthread_mutex_timedlock stub exists, so we have to use the
+   * indirect call no matter what.
+   */
+  if (!real_pthread_mutex_timedlock) {
+    real_pthread_mutex_timedlock = dlsym(RTLD_NEXT, "pthread_mutex_timedlock");
+  }
+  return real_pthread_mutex_timedlock(mutex, abstime);
+}
+
+int pthread_mutex_trylock(pthread_mutex_t* mutex) {
+  fix_mutex_kind(mutex);
+  if (!real_pthread_mutex_trylock) {
+#ifdef DOUBLE_UNDERSCORE_PTHREAD_LOCK_AVAILABLE
+    return __pthread_mutex_trylock(mutex);
+#else
+    real_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
+#endif
+  }
+  return real_pthread_mutex_trylock(mutex);
+}
+
+#endif
+
+typedef void* Dlopen(const char* filename, int flags);
+
+void* dlopen(const char* filename, int flags) {
+  // Give up our timeslice now. This gives us a full timeslice to
+  // execute the dlopen(), reducing the chance we'll hit
+  // https://sourceware.org/bugzilla/show_bug.cgi?id=19329.
+  Dlopen* f_ptr = (Dlopen*)dlsym(RTLD_NEXT, "dlopen");
+  sched_yield();
+  return f_ptr(filename, flags);
+}
+
+/** Disable XShm since rr doesn't work with it */
+int XShmQueryExtension(__attribute__((unused)) void* dpy) { return 0; }
+
+/** Make sure XShmCreateImage returns null in case an application doesn't do
+    extension checks first. */
+void* XShmCreateImage(__attribute__((unused)) register void* dpy,
+                      __attribute__((unused)) register void* visual,
+                      __attribute__((unused)) unsigned int depth,
+                      __attribute__((unused)) int format,
+                      __attribute__((unused)) char* data,
+                      __attribute__((unused)) void* shminfo,
+                      __attribute__((unused)) unsigned int width,
+                      __attribute__((unused)) unsigned int height) {
+  return 0;
+}
+
+RR_HIDDEN char impose_syscall_delay;
+RR_HIDDEN char impose_spurious_desched;
+
+/**
+ * This is for testing purposes only.
+ */
+void delayed_syscall(struct syscall_info* info) {
+  impose_syscall_delay = 1;
+  /* Make sure 'result' is used so it's not optimized out! */
+  syscall(info->no, info->args[0], info->args[1], info->args[2], info->args[3],
+          info->args[4], info->args[5]);
+  impose_syscall_delay = 0;
+}
+
+/**
+ * This is for testing purposes only.
+ * Note that this must be defined outside of the syscallbuf code.
+ * Otherwise, the signal recording code may expect exit from this function
+ * to trigger the syscallbuf exit breakpoint.
+ */
+void* syscallbuf_ptr(void) {
+  return ((struct preload_thread_locals*)PRELOAD_THREAD_LOCALS_ADDR)->buffer;
+}
+
+/**
+ * This is for testing purposes only.
+ */
+void spurious_desched_syscall(struct syscall_info* info) {
+  impose_spurious_desched = 1;
+  /* Make sure 'result' is used so it's not optimized out! */
+  syscall(info->no, info->args[0], info->args[1], info->args[2], info->args[3],
+          info->args[4], info->args[5]);
+  impose_spurious_desched = 0;
+}
+
+/**
+ * clang's LeakSanitizer has regular threads call sched_yield() in a loop while
+ * a helper thread ptrace-attaches to them. If we let sched_yield() enter the
+ * syscallbuf, the helper thread sees that the regular thread SP register
+ * is pointing to the syscallbuf alt-stack, outside the stack region it
+ * expects, which causes it to freak out.
+ * So, override sched_yield() to perform the syscall in a way that can't
+ * be syscall-buffered.
+ */
+int sched_yield(void) {
+#ifdef __i386__
+  // We have no syscall hook for `syscall` followed by `inc %ecx`
+  int trash;
+  asm volatile ("int $0x80; inc %0" : "=c"(trash) : "a"(SYS_sched_yield));
+#elif defined(__x86_64__)
+  // We have no syscall hook for `syscall` followed by `inc %ecx`
+  int trash;
+  asm volatile ("syscall; inc %0" : "=c"(trash) : "a"(SYS_sched_yield));
+#elif defined(__aarch64__)
+  register long x8 __asm__("x8") = SYS_sched_yield;
+  // We explicitly blacklisted syscall that follows `mov x8, 0xdc`
+  // to avoid patching clone. Abuse that to prevent this from being patched.
+  __asm__ __volatile__("b 1f\n\t"
+                       "mov x8, 0xdc\n"
+                       "1:\n\t"
+                       "svc 0\n"
+                       :: "r"(x8) : "x0", "x30"); // x30 = lr
+#else
+#error "Unknown architecture"
+#endif
+  return 0;
+}
+
+#ifndef __aarch64__
+
+/**
+ * glibc geteuid() can be compiled to instructions ending in "syscall; ret"
+ * which sometimes can't be hooked. So override it here with something that
+ * can be hooked.
+ * This is not an issue on aarch64 since we only need to patch a single instruction.
+ */
+uid_t geteuid(void) {
+#ifdef __i386__
+  return syscall(SYS_geteuid32);
+#else
+  return syscall(SYS_geteuid);
+#endif
+}
+
+static void libstdcpp_not_found(void) {
+  const char msg[] = "[rr] Interposition for libstdc++ called but symbol lookups into libstdc++ failed.\n"
+    "Was libstdc++ loaded with RTLD_LOCAL? Try recording with `-v LD_PRELOAD=libstdc++.so.6`.\n"
+    "About to crash! ";
+  syscall(SYS_write, STDERR_FILENO, msg, sizeof(msg));
+}
+
+/**
+ * libstdc++3 uses RDRAND. Bypass that with this incredible hack.
+ */
+void _ZNSt13random_device7_M_initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE(
+    void* this, __attribute__((unused)) void* token) {
+  static void (*assign_string)(void *, char*) = NULL;
+  static void (*random_init)(void *, void*) = NULL;
+  if (!assign_string) {
+    assign_string = (void (*)(void *, char*))dlsym(RTLD_NEXT,
+      "_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6assignEPKc");
+    if (!assign_string) {
+      libstdcpp_not_found();
+    }
+  }
+  assign_string(token, "/dev/urandom");
+  if (!random_init) {
+    random_init = (void (*)(void *, void*))dlsym(RTLD_NEXT, __func__);
+    if (!random_init) {
+      libstdcpp_not_found();
+    }
+  }
+  random_init(this, token);
+}
+
+/**
+ * gcc 4.8.4 in Ubuntu 14.04-32
+ */
+void _ZNSt13random_device7_M_initERKSs(void* this,
+                                       __attribute__((unused)) void* token) {
+  static void (*assign_string)(void *, char*) = NULL;
+  static void (*random_init)(void *, void*) = NULL;
+  if (!assign_string) {
+    assign_string = (void (*)(void *, char*))dlsym(RTLD_NEXT,
+      "_ZNSs6assignEPKc");
+    if (!assign_string) {
+      libstdcpp_not_found();
+    }
+  }
+  assign_string(token, "/dev/urandom");
+  if (!random_init) {
+    random_init = (void (*)(void *, void*))dlsym(RTLD_NEXT, __func__);
+    if (!random_init) {
+      libstdcpp_not_found();
+    }
+  }
+  random_init(this, token);
+}
+
+#endif

diff --git a/rr/android/x86_64/share/rr/src/preload/preload_interface.h b/rr/android/x86_64/share/rr/src/preload/preload_interface.h
new file mode 100644
index 0000000..5266498
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/preload_interface.h

@@ -0,0 +1,750 @@
+/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
+
+#ifndef RR_PRELOAD_INTERFACE_H_
+#define RR_PRELOAD_INTERFACE_H_
+
+/* Bump this whenever the interface between syscallbuf and rr changes in a way
+ * that would require changes to replay. So be very careful making changes to
+ * this file! Many changes would require a bump in this value, and support
+ * code in rr to handle old protocol versions. And when we bump it we'll need
+ * to figure out a way to test the old protocol versions.
+ * To be clear, changes that only affect recording and not replay, such as
+ * changes to the layout of syscall_patch_hook, do not need to bump this.
+ * Note also that SYSCALLBUF_PROTOCOL_VERSION is stored in the trace header, so
+ * replay always has access to the SYSCALLBUF_PROTOCOL_VERSION used during
+ * recording, even before the preload library is ever loaded.
+ *
+ * Version 0: initial rr 5.0.0 release
+ */
+#define SYSCALLBUF_PROTOCOL_VERSION 0
+
+#if defined(RR_IMPLEMENT_PRELOAD) || defined(RR_IMPLEMENT_AUDIT)
+/* Avoid using <string.h> library functions */
+static inline int streq(const char* s1, const char* s2) {
+  while (1) {
+    if (*s1 != *s2) {
+      return 0;
+    }
+    if (!*s1) {
+      return 1;
+    }
+    ++s1;
+    ++s2;
+  }
+  return 1;
+}
+static inline size_t rrstrlen(const char* s) {
+  size_t ret = 0;
+  while (*s) {
+    ++s;
+    ++ret;
+  }
+  return ret;
+}
+#else
+#include <string.h>
+static inline int streq(const char* s1, const char* s2) {
+  return !strcmp(s1, s2);
+}
+static inline size_t rrstrlen(const char* s) { return strlen(s); }
+#include "../remote_ptr.h"
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+
+static inline int strprefix(const char* s1, const char* s2) {
+  while (1) {
+    if (!*s1) {
+      return 1;
+    }
+    if (*s1 != *s2) {
+      return 0;
+    }
+    ++s1;
+    ++s2;
+  }
+  return 1;
+}
+
+static inline const char* extract_file_name(const char* s) {
+  const char* ret = s;
+  while (*s) {
+    if (*s == '/') {
+      ret = s + 1;
+    }
+    ++s;
+  }
+  return ret;
+}
+
+/* This header file is included by preload.c and various rr .cc files. It
+ * defines the interface between the preload library and rr. preload.c
+ * #defines RR_IMPLEMENT_PRELOAD to let us handle situations where rr and
+ * preload.c need to see slightly different definitions of the same constructs.
+ *
+ * preload.c compiles this as C code. All rr modules compile this as C++ code.
+ * We do not use 'extern "C"' because we don't actually link between C and C++
+ * and 'extern "C"' is not compatible with our use of templates below.
+ */
+
+#define SYSCALLBUF_LIB_FILENAME_BASE "librrpreload"
+#define SYSCALLBUF_LIB_FILENAME SYSCALLBUF_LIB_FILENAME_BASE ".so"
+#define SYSCALLBUF_LIB_FILENAME_PADDED SYSCALLBUF_LIB_FILENAME_BASE ".so:::"
+#define SYSCALLBUF_LIB_FILENAME_32 SYSCALLBUF_LIB_FILENAME_BASE "_32.so"
+
+#define RTLDAUDIT_LIB_FILENAME_BASE "librraudit"
+#define RTLDAUDIT_LIB_FILENAME RTLDAUDIT_LIB_FILENAME_BASE ".so"
+#define RTLDAUDIT_LIB_FILENAME_PADDED RTLDAUDIT_LIB_FILENAME_BASE ".so:::"
+#define RTLDAUDIT_LIB_FILENAME_32 RTLDAUDIT_LIB_FILENAME_BASE "_32.so"
+
+#define RRPAGE_LIB_FILENAME_BASE "librrpage"
+#define RRPAGE_LIB_FILENAME RRPAGE_LIB_FILENAME_BASE ".so"
+#define RRPAGE_LIB_FILENAME_32 RRPAGE_LIB_FILENAME_BASE "_32.so"
+
+/* Set this env var to enable syscall buffering. */
+#define SYSCALLBUF_ENABLED_ENV_VAR "_RR_USE_SYSCALLBUF"
+
+/* Size of table mapping fd numbers to syscallbuf-disabled flag. */
+#define SYSCALLBUF_FDS_DISABLED_SIZE 16384
+
+#define MPROTECT_RECORD_COUNT 1000
+
+#if defined(__x86_64__) || defined(__i386__)
+#define RR_PAGE_SYSCALL_STUB_SIZE 3
+#define RR_PAGE_SYSCALL_INSTRUCTION_END 2
+#elif defined(__aarch64__)
+#define RR_PAGE_SYSCALL_STUB_SIZE 8
+#define RR_PAGE_SYSCALL_INSTRUCTION_END 4
+#else
+#error "Must be defined for this architecture"
+#endif
+
+/* Must match generate_rr_page.py */
+#define RR_PAGE_ADDR 0x70000000
+#ifdef __aarch64__
+#define PRELOAD_LIBRARY_PAGE_SIZE 65536
+#else
+#define PRELOAD_LIBRARY_PAGE_SIZE 4096
+#endif
+#define RR_PAGE_SYSCALL_ADDR(index)                                            \
+  ((void*)(RR_PAGE_ADDR + RR_PAGE_SYSCALL_STUB_SIZE * (index)))
+#define RR_PAGE_SYSCALL_TRACED RR_PAGE_SYSCALL_ADDR(0)
+#define RR_PAGE_SYSCALL_PRIVILEGED_TRACED RR_PAGE_SYSCALL_ADDR(1)
+#define RR_PAGE_SYSCALL_UNTRACED RR_PAGE_SYSCALL_ADDR(2)
+#define RR_PAGE_SYSCALL_UNTRACED_REPLAY_ONLY RR_PAGE_SYSCALL_ADDR(3)
+#define RR_PAGE_SYSCALL_UNTRACED_RECORDING_ONLY RR_PAGE_SYSCALL_ADDR(4)
+#define RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED RR_PAGE_SYSCALL_ADDR(5)
+#define RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_REPLAY_ONLY RR_PAGE_SYSCALL_ADDR(6)
+#define RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY                     \
+  RR_PAGE_SYSCALL_ADDR(7)
+#define RR_PAGE_SYSCALL_UNTRACED_REPLAY_ASSIST RR_PAGE_SYSCALL_ADDR(8)
+#define RR_PAGE_IN_REPLAY_FLAG (RR_PAGE_ADDR + RR_PAGE_SYSCALL_STUB_SIZE * 9)
+#define RR_PAGE_BREAKPOINT_VALUE (RR_PAGE_IN_REPLAY_FLAG + 4)
+
+/* Not ABI stable - in record page only */
+#define RR_PAGE_FF_BYTES RR_PAGE_BREAKPOINT_VALUE
+
+/* PRELOAD_THREAD_LOCALS_ADDR should not change.
+ * Tools depend on this address. */
+#define PRELOAD_THREAD_LOCALS_ADDR (RR_PAGE_ADDR + PRELOAD_LIBRARY_PAGE_SIZE)
+#ifdef __aarch64__
+#define PRELOAD_THREAD_LOCAL_SCRATCH2_SIZE (1024 + 8 * 2)
+#else
+#define PRELOAD_THREAD_LOCAL_SCRATCH2_SIZE 0
+#endif
+#define PRELOAD_THREAD_LOCALS_SIZE (144 + PRELOAD_THREAD_LOCAL_SCRATCH2_SIZE)
+
+#include "rrcalls.h"
+
+/* Define macros that let us compile a struct definition either "natively"
+ * (when included by preload.c) or as a template over Arch for use by rr.
+ */
+#if defined(RR_IMPLEMENT_PRELOAD) || defined(RR_IMPLEMENT_AUDIT)
+#define TEMPLATE_ARCH
+#define PTR(T) T*
+#define PTR_ARCH(T) T*
+#define EMBED_STRUCT(T) struct T
+#define VOLATILE volatile
+#define SIGNED_LONG long
+#define UNSIGNED_LONG unsigned long
+#else
+#define TEMPLATE_ARCH template <typename Arch>
+#define PTR(T) typename Arch::template ptr<T>
+#define PTR_ARCH(T) typename Arch::template ptr<T<Arch>>
+#define EMBED_STRUCT(T) T<Arch>
+#define VOLATILE
+#define SIGNED_LONG typename Arch::signed_long
+#define UNSIGNED_LONG typename Arch::unsigned_long
+#endif
+
+#define PATCH_IS_MULTIPLE_INSTRUCTIONS (1 << 0)
+/* The syscall instruction is the last instruction in the patched area
+ * (rather than the first), which requires special handling.
+ */
+#define PATCH_SYSCALL_INSTRUCTION_IS_LAST (1 << 1)
+/* All instructions in the patch are nop and their execution is thus not
+ * observable. This may allow more aggressive handling of interfering branches.
+ */
+#define PATCH_IS_NOP_INSTRUCTIONS (1 << 2)
+
+
+/**
+ * To support syscall buffering, we replace syscall instructions with a "call"
+ * instruction that calls a hook in the preload library to handle the syscall.
+ * Since the call instruction takes more space than the syscall instruction,
+ * the patch replaces one or more instructions after the syscall instruction as
+ * well; those instructions are folded into the tail of the hook function
+ * and we have multiple hook functions, each one corresponding to an
+ * instruction that follows a syscall instruction.
+ * Each instance of this struct describes an instruction that can follow a
+ * syscall and a hook function to patch with.
+ *
+ * This is not (and must not ever be) used during replay so we can change it
+ * without bumping SYSCALLBUF_PROTOCOL_VERSION.
+ */
+struct syscall_patch_hook {
+  uint8_t flags;
+  uint8_t patch_region_length;
+  /* Avoid any padding or anything that would make the layout arch-specific. */
+  uint8_t patch_region_bytes[14];
+  uint64_t hook_address;
+};
+
+/**
+ * We buffer mprotect syscalls. Their effects need to be noted so we can
+ * update AddressSpace's cache of memory layout, which stores prot bits. So,
+ * the preload code builds a list of mprotect_records corresponding to the
+ * mprotect syscalls that have been buffered. This list is read by rr whenever
+ * we flush the syscallbuf, and its effects performed. The actual mprotect
+ * syscalls are performed during recording and replay.
+ *
+ * We simplify things by making this arch-independent.
+ */
+struct mprotect_record {
+  uint64_t start;
+  uint64_t size;
+  int32_t prot;
+  int32_t padding;
+};
+
+/**
+ * Must be arch-independent.
+ * Variables used to communicate between preload and rr.
+ * We package these up into a single struct to simplify the preload/rr
+ * interface.
+ * You can add to the end of this struct without breaking trace compatibility,
+ * but don't move existing fields. Do not write to it during replay except for
+ * the 'in_replay' field. Be careful reading fields during replay as noted
+ * below, since they don't all exist in all trace versions.
+ */
+struct preload_globals {
+  /* RESERVED in current versions of rr.
+   *
+   * QUIRK: With UsesGlobalsInReplayQuirk:
+   * 0 during recording, 1 during replay. Set by rr.
+   * This MUST NOT be used in conditional branches. It should only be used
+   * as the condition for conditional moves so that control flow during replay
+   * does not diverge from control flow during recording.
+   * We also have to be careful that values different between record and replay
+   * don't accidentally leak into other memory locations or registers.
+   * USE WITH CAUTION.
+   */
+  unsigned char reserved_legacy_in_replay;
+  /* 0 during recording and replay, 1 during diversion. Set by rr.
+   */
+  unsigned char in_diversion;
+  /* 1 if chaos mode is enabled. DO NOT READ from rr during replay, because
+     this field is not initialized in old traces. */
+  unsigned char in_chaos;
+  /* The signal to use for desched events */
+  unsigned char desched_sig;
+  /* RESERVED */
+  int reserved;
+  /**
+   * Set by rr.
+   * For each fd, indicate a class that is valid for all fds with the given
+   * number in all tasks that share this address space. For fds >=
+   * SYSCALLBUF_FDS_DISABLED_SIZE - 1, the class is given by by
+   * syscallbuf_fd_class[SYSCALLBUF_FDS_DISABLED_SIZE - 1]. See the
+   */
+  VOLATILE char syscallbuf_fd_class[SYSCALLBUF_FDS_DISABLED_SIZE];
+
+  /* WARNING! SYSCALLBUF_FDS_DISABLED_SIZE can change, so
+     access to the following fields during replay is dangerous. Use
+     PRELOAD_GLOBALS_FIELD_AFTER_SYSCALLBUF_FDS_DISABLED or something
+     like it! */
+  /* mprotect records. Set by preload. Us
+     PRELOAD_GLOBALS_FIELD_AFTER_SYSCALLBUF_FDS_DISABLED to access. */
+  struct mprotect_record mprotect_records[MPROTECT_RECORD_COUNT];
+  /* Random seed that can be used for various purposes. DO NOT READ from rr
+     during replay, because this field does not exist in old traces. */
+  uint64_t random_seed;
+  /* RESERVED in current versions of rr.
+   *
+   * QUIRK: With UsesGlobalsInReplayQuirk:
+   * Indicates the value (in 8-byte increments) at which to raise a SIGSEGV
+   * trap once reached. NOTE: This remains constant during record, and is
+   * used only during replay. The same restrictions as in_replay above apply.
+   *
+   * Use PRELOAD_GLOBALS_FIELD_AFTER_SYSCALLBUF_FDS_DISABLED to access during
+   * replay. */
+  uint64_t reserved_legacy_breakpoint_value;
+  /* Indicates whether or not all tasks in this address space have the same
+     fd table. Set by rr during record (modifications are recorded).
+     Read by the syscallbuf. Not read during replay. */
+  unsigned char fdt_uniform;
+  /* The CPU we're bound to, if any; -1 if not bound. Not read during replay. */
+  int32_t cpu_binding;
+};
+
+/**
+ * Represents syscall params.  Makes it simpler to pass them around,
+ * and avoids pushing/popping all the data for calls.
+ */
+TEMPLATE_ARCH
+struct syscall_info {
+  SIGNED_LONG no;
+  SIGNED_LONG args[6];
+};
+
+TEMPLATE_ARCH
+struct robust_list_info {
+  PTR(void) head;
+  uint32_t len;
+};
+
+TEMPLATE_ARCH
+struct rseq_info {
+  PTR(void) rseq;
+  uint32_t len;
+  uint32_t sig;
+};
+
+/**
+ * Can be architecture dependent. The rr process does not manipulate
+ * these except to save and restore the values on task switches so that
+ * the values are always effectively local to the current task. rr also
+ * sets the |syscallbuf_stub_alt_stack| field.
+ * We use this instead of regular libc TLS because sometimes buggy application
+ * code breaks libc TLS for some tasks. With this approach we can be sure
+ * thread-locals are usable for any task in any state.
+ */
+TEMPLATE_ARCH
+struct preload_thread_locals {
+  /* The offset of this field MUST NOT CHANGE, it is part of the preload ABI
+   * rr depends on.
+   * Offset of this field is hardcoded in syscall_hook.S and
+   * assembly_templates.py.
+   * Pointer to alt-stack used by syscallbuf stubs (allocated at the end of
+   * the scratch buffer.
+   */
+  PTR(void) syscallbuf_stub_alt_stack;
+  /* The offset of this field MUST NOT CHANGE, it is part of the preload ABI
+   * tools can depend on.
+   * Where syscall result will be (or during replay, has been) saved.
+   */
+  PTR(int64_t) pending_untraced_syscall_result;
+  /* The offset of this field MUST NOT CHANGE, it is part of the preload ABI
+   * rr depends on.
+   * Scratch space used by stub code.
+   */
+  PTR(void) stub_scratch_1;
+  /* The offset of this field MUST NOT CHANGE, it is part of the preload ABI
+   * rr depends on.
+   */
+  int32_t alt_stack_nesting_level;
+  /* Syscall hook saved flags (bottom 16 bits only) */
+  int32_t saved_flags;
+  /* The offset of this field MUST NOT CHANGE, it is part of the preload ABI
+   * rr depends on. It contains the parameters to the patched syscall, or
+   * zero if we're not processing a buffered syscall. Do not depend on this
+   * existing during replay, some traces with SYSCALLBUF_PROTOCOL_VERSION 0
+   * don't have it.
+   */
+  PTR_ARCH(const struct syscall_info) original_syscall_parameters;
+
+  /* Nonzero when thread-local state like the syscallbuf has been
+   * initialized.  */
+  int32_t thread_inited;
+  /* The offset of this field MUST NOT CHANGE, it is part of the ABI tools
+   * depend on. When buffering is enabled, points at the thread's mapped buffer
+   * segment.  At the start of the segment is an object of type |struct
+   * syscallbuf_hdr|, so |buffer| is also a pointer to the buffer
+   * header. */
+  PTR(uint8_t) buffer;
+  UNSIGNED_LONG buffer_size;
+  /* This is used to support the buffering of "may-block" system calls.
+   * The problem that needs to be addressed can be introduced with a
+   * simple example; assume that we're buffering the "read" and "write"
+   * syscalls.
+   *
+   *  o (Tasks W and R set up a synchronous-IO pipe open between them; W
+   *    "owns" the write end of the pipe; R owns the read end; the pipe
+   *    buffer is full)
+   *  o Task W invokes the write syscall on the pipe
+   *  o Since write is a buffered syscall, the seccomp filter traps W
+   *    directly to the kernel; there's no trace event for W delivered
+   *    to rr.
+   *  o The pipe is full, so W is descheduled by the kernel because W
+   *    can't make progress.
+   *  o rr thinks W is still running and doesn't schedule R.
+   *
+   * At this point, progress in the recorded application can only be
+   * made by scheduling R, but no one tells rr to do that.  Oops!
+   *
+   * Thus enter the "desched counter".  It's a perf_event for the "sw t
+   * switches" event (which, more precisely, is "sw deschedule"; it
+   * counts schedule-out, not schedule-in).  We program the counter to
+   * deliver a signal to this task when there's new counter data
+   * available.  And we set up the "sample period", how many descheds
+   * are triggered before the signal is delivered, to be "1".  This
+   * means that when the counter is armed, the next desched (i.e., the
+   * next time the desched counter is bumped up) of this task will
+   * deliver the signal to it.  And signal delivery always generates a
+   * ptrace trap, so rr can deduce that this task was descheduled and
+   * schedule another.
+   *
+   * The description above is sort of an idealized view; there are
+   * numerous implementation details that are documented in
+   * handle_signal.c, where they're dealt with. */
+  int32_t desched_counter_fd;
+  int32_t cloned_file_data_fd;
+  SIGNED_LONG cloned_file_data_offset;
+  PTR(void) scratch_buf;
+  UNSIGNED_LONG usable_scratch_size;
+
+  PTR(struct msghdr) notify_control_msg;
+
+  /* The offset of this field MUST NOT CHANGE, it is part of the preload ABI
+   * rr depends on, on ARM.
+   */
+  uint8_t stub_scratch_2[PRELOAD_THREAD_LOCAL_SCRATCH2_SIZE];
+
+  /** When the size is non-zero, there has been a buffered set_robust_list
+   * that must be accounted for. Set by preload code only, read by rr
+   * only during recording.
+   */
+  EMBED_STRUCT(robust_list_info) robust_list;
+
+  /** True when either a buffered rseq or unbuffered rseq has been called
+   * for this thread. Set by rr for buffered rseq and preload for unbuffered
+   * rseq. */
+  int32_t rseq_called;
+
+  /** When the len is non-zero, there has been a buffered rseq
+   * that must be accounted for. Set by preload code only, read by rr
+   * only during recording.
+   */
+  EMBED_STRUCT(rseq_info) rseq;
+};
+#if defined(__aarch64__) && (defined(RR_IMPLEMENT_PRELOAD) || \
+                             defined(RR_IMPLEMENT_AUDIT))
+// On aarch64, we the stub_scratch_2 offset is hardcoded in the syscallbuf code
+_Static_assert(offsetof(struct preload_thread_locals, stub_scratch_2) == 8 * 13,
+               "stub_scratch_2 offset mismatch");
+#endif
+
+// The set of flags that can be set for each fd in syscallbuf_fds_disabled.
+enum syscallbuf_fd_classes {
+  // fd is invalid, all syscalls will error (syscallbuf internal use only)
+  FD_CLASS_INVALID = -1,
+  // The fd is allowed to be completely untraced. No notification to the
+  // syscall buf is required.
+  FD_CLASS_UNTRACED = 0x0,
+  // This is the most conservative option. All operations on this fd are
+  // always traced. If there is a conflict between other options, this one
+  // should be chosen.
+  FD_CLASS_TRACED   = 0x1,
+  // This fd either refers to a /proc/<pid>/mem or is untrace (if this as
+  // is shared with another fd table)
+  FD_CLASS_PROC_MEM = 0x2,
+};
+
+#define CURRENT_INIT_PRELOAD_PARAMS_VERSION 2
+
+/**
+ * Packs up the parameters passed to |SYS_rrcall_init_preload|.
+ * We use this struct because it's a little cleaner.
+ * When evolving this struct, add new fields at the end and don't
+ * depend on them during replay.
+ */
+TEMPLATE_ARCH
+struct rrcall_init_preload_params {
+  /* All "In" params. */
+  /* The syscallbuf lib's idea of whether buffering is enabled.
+   * We let the syscallbuf code decide in order to more simply
+   * replay the same decision that was recorded. */
+  int syscallbuf_enabled;
+  int syscall_patch_hook_count;
+  PTR(struct syscall_patch_hook) syscall_patch_hooks;
+  PTR(void) unused;
+  PTR(void) syscallbuf_code_start;
+  PTR(void) syscallbuf_code_end;
+  PTR(void) get_pc_thunks_start;
+  PTR(void) get_pc_thunks_end;
+  PTR(void) syscallbuf_final_exit_instruction;
+  PTR(struct preload_globals) globals;
+  union {
+    struct {
+    /* Address of the first entry of the breakpoint table.
+     * After processing a sycallbuf record (and unlocking the syscallbuf),
+     * we call a function in this table corresponding to the record processed.
+     * rr can set a breakpoint in this table to break on the completion of a
+     * particular syscallbuf record.
+     * This method of setting the breakpoint is deprecated. Instead, use the
+     * interface below. It is retained for compatibility */
+      PTR(void) breakpoint_table;
+      int breakpoint_table_entry_size;
+    };
+    struct {
+      PTR(void) breakpoint_instr_addr;
+      // Set of -1 to indicate non-legacy mode
+      int breakpoint_mode_sentinel;
+    };
+  };
+  PTR(void) syscallbuf_syscall_hook;
+};
+
+/**
+ * Packs up the inout parameters passed to |SYS_rrcall_init_buffers|.
+ * We use this struct because there are too many params to pass
+ * through registers on at least x86.  (It's also a little cleaner.)
+ */
+TEMPLATE_ARCH
+struct rrcall_init_buffers_params {
+  /* The fd we're using to track desched events. */
+  int desched_counter_fd;
+  /* "Out" params. */
+  int cloned_file_data_fd;
+  /* Returned pointer to and size of the shared syscallbuf
+   * segment. */
+  PTR(void) syscallbuf_ptr;
+  /* Returned pointer to rr's syscall scratch buffer */
+  PTR(void) scratch_buf;
+  uint32_t syscallbuf_size;
+  uint32_t usable_scratch_size;
+};
+
+/**
+ * The syscall buffer comprises an array of these variable-length
+ * records, along with the header below.
+ */
+struct syscallbuf_record {
+  /* Return value from the syscall.  This can be a memory
+   * address, so must be as big as a memory address can be.
+   * We use 64 bits rather than make syscallbuf_record Arch-specific as that
+   * gets cumbersome.
+   */
+  int64_t ret;
+  /* Syscall number.
+   *
+   * NB: the x86 linux ABI has 350 syscalls as of 3.9.6 and
+   * x86-64 defines 313, so this is a pretty safe storage
+   * allocation.  It would be an earth-shattering event if the
+   * syscall surface were doubled in a short period of time, and
+   * even then we would have a comfortable cushion.  Still,
+   *
+   * TODO: static_assert this can hold largest syscall num */
+  uint16_t syscallno;
+  /* Did the tracee arm/disarm the desched notification for this
+   * syscall? */
+  uint8_t desched : 1;
+  /* Does this record require an assist during replay ? */
+  uint8_t replay_assist : 1;
+  uint8_t _flags_padding : 6;
+  uint8_t _padding;
+  /* Size of entire record in bytes: this struct plus extra
+   * recorded data stored inline after the last field, not
+   * including padding.
+   *
+   * TODO: static_assert this can repr >= buffer size */
+  uint32_t size;
+  /* Extra recorded outparam data starts here. */
+  uint8_t extra_data[0];
+};
+
+/**
+ * This struct summarizes the state of the syscall buffer.  It happens
+ * to be located at the start of the buffer.
+ */
+struct syscallbuf_hdr {
+  /* The number of valid syscallbuf_record bytes in the buffer,
+   * not counting this header.
+   * Make this volatile so that memory writes aren't reordered around
+   * updates to this field. */
+  volatile uint32_t num_rec_bytes;
+  /* Number of mprotect calls since last syscallbuf flush. The last record in
+   * the list may not have been applied yet.
+   */
+  volatile uint32_t mprotect_record_count;
+  /* Number of records whose syscalls have definitely completed.
+   * May be one less than mprotect_record_count.
+   */
+  volatile uint32_t mprotect_record_count_completed;
+  /* True if the current syscall should not be committed to the
+   * buffer, for whatever reason; likely interrupted by
+   * desched. Set by rr. */
+  volatile uint8_t abort_commit;
+  /* True if, next time we exit the syscall buffer hook, libpreload should
+   * execute SYS_rrcall_notify_syscall_hook_exit to give rr the opportunity to
+   * deliver a signal and/or reset the syscallbuf. */
+  volatile uint8_t notify_on_syscall_hook_exit;
+  /* This tracks whether the buffer is currently in use for a
+   * system call or otherwise unavailable. This is helpful when
+   * a signal handler runs during a wrapped system call; we don't want
+   * it to use the buffer for its system calls. The different reasons why the
+   * buffer could be locked, use different bits of this field and the buffer
+   * may be used only if all are clear. See enum syscallbuf_locked_why for
+   * used bits.
+   */
+  volatile uint8_t locked;
+  /* Nonzero when rr needs to worry about the desched signal.
+   * When it's zero, the desched signal can safely be
+   * discarded. */
+  volatile uint8_t desched_signal_may_be_relevant;
+  /* A copy of the tasks's signal mask. Updated by preload when a buffered
+   * rt_sigprocmask executes.
+   */
+  volatile uint64_t blocked_sigs;
+  /* Incremented by preload every time a buffered rt_sigprocmask executes.
+   * Cleared during syscallbuf reset.
+   */
+  volatile uint32_t blocked_sigs_generation;
+  /* Nonzero when preload is in the process of calling an untraced
+   * sigprocmask; the real sigprocmask may or may not match blocked_sigs.
+   */
+  volatile uint8_t in_sigprocmask_critical_section;
+  /* Nonzero when the syscall was aborted during preparation without doing
+   * anything. This is set when a user seccomp filter forces a SIGSYS. */
+  volatile uint8_t failed_during_preparation;
+
+  struct syscallbuf_record recs[0];
+} __attribute__((__packed__));
+/* TODO: static_assert(sizeof(uint32_t) ==
+ *                     sizeof(struct syscallbuf_hdr)) */
+
+/**
+ * Each bit of of syscallbuf_hdr->locked indicates a reason why the syscallbuf
+ * is locked. These are all the bits that are currently defined.
+ */
+enum syscallbuf_locked_why {
+  /* Used by the tracee, during interruptible syscalls to avoid recursion */
+  SYSCALLBUF_LOCKED_TRACEE = 0x1,
+  /* Used by the tracer to prevent syscall buffering when necessary to preserve
+     semantics (e.g. for ptracees whose syscalls are being observed) */
+  SYSCALLBUF_LOCKED_TRACER = 0x2
+};
+
+/**
+ * Return a pointer to what may be the next syscall record.
+ *
+ * THIS POINTER IS NOT GUARANTEED TO BE VALID!!!  Caveat emptor.
+ */
+inline static struct syscallbuf_record* next_record(
+    struct syscallbuf_hdr* hdr) {
+  uintptr_t next = (uintptr_t)hdr->recs + hdr->num_rec_bytes;
+  return (struct syscallbuf_record*)next;
+}
+
+/**
+ * Return the amount of space that a record of |length| will occupy in
+ * the buffer if committed, including padding.
+ */
+inline static long stored_record_size(size_t length) {
+  /* Round up to a whole number of 64-bit words. */
+  return (length + 7) & ~7;
+}
+
+/**
+ * Return nonzero if an attempted open() of |filename| should be
+ * blocked.
+ *
+ * The background of this hack is that rr doesn't support DRI/DRM
+ * currently, so we use the blunt stick of refusing to open this
+ * interface file as a way of disabling it entirely.  (In addition to
+ * tickling xorg.conf, which doesn't entirely do the trick.)  It's
+ * known how to fix this particular, so let's not let this hack grow
+ * too much by piling on.
+ */
+inline static int is_blacklisted_filename(const char* filename) {
+  const char* f;
+  if (strprefix("/dev/dri/", filename) || streq("/dev/nvidiactl", filename) ||
+      streq("/usr/share/alsa/alsa.conf", filename) ||
+      streq("/dev/nvidia-uvm", filename)) {
+    return 1;
+  }
+  f = extract_file_name(filename);
+  return strprefix("rr-test-blacklist-file_name", f) ||
+         strprefix("pulse-shm-", f);
+}
+
+inline static int is_blacklisted_memfd(const char* name) {
+  return streq("pulseaudio", name);
+}
+
+inline static int is_blacklisted_socket(const char* filename) {
+  /* Blacklist the nscd socket because glibc communicates with the daemon over
+   * shared memory rr can't handle.
+   */
+  return streq("/var/run/nscd/socket", filename);
+}
+
+inline static int is_gcrypt_deny_file(const char* filename) {
+  return streq("/etc/gcrypt/hwf.deny", filename);
+}
+
+inline static int is_terminal(const char* filename) {
+  return strprefix("/dev/tty", filename) || strprefix("/dev/pts", filename);
+}
+
+inline static int is_proc_mem_file(const char* filename) {
+  if (!strprefix("/proc/", filename)) {
+    return 0;
+  }
+  return streq(filename + rrstrlen(filename) - 4, "/mem");
+}
+
+inline static int is_proc_fd_dir(const char* filename) {
+  if (!strprefix("/proc/", filename)) {
+    return 0;
+  }
+
+  int len = rrstrlen(filename);
+  const char* fd_bit = filename + len;
+  if (*fd_bit == '/') {
+    fd_bit--;
+  }
+
+  return strprefix("/fd", fd_bit - 3);
+}
+
+inline static int is_sys_cpu_online_file(const char* filename) {
+  return streq("/sys/devices/system/cpu/online", filename);
+}
+
+inline static int is_proc_stat_file(const char* filename) {
+  return streq("/proc/stat", filename);
+}
+
+inline static int is_rr_page_lib(const char* filename) {
+  return streq(extract_file_name(filename), RRPAGE_LIB_FILENAME) ||
+         streq(extract_file_name(filename), RRPAGE_LIB_FILENAME_32);
+}
+
+/**
+ * Returns nonzero if an attempted open() of |filename| can be syscall-buffered.
+ * When this returns zero, the open must be forwarded to the rr process.
+ * |filename| must be absolute.
+ * This is imperfect because it doesn't handle hard links and files (re)mounted
+ * in different places.
+ */
+inline static int allow_buffered_open(const char* filename) {
+  return filename &&
+         !is_blacklisted_filename(filename) && !is_gcrypt_deny_file(filename) &&
+         !is_terminal(filename) && !is_proc_mem_file(filename) &&
+         !is_proc_fd_dir(filename) && !is_sys_cpu_online_file(filename) &&
+         !is_proc_stat_file(filename) && !is_rr_page_lib(filename);
+}
+
+#endif /* RR_PRELOAD_INTERFACE_H_ */

diff --git a/rr/android/x86_64/share/rr/src/preload/raw_syscall.S b/rr/android/x86_64/share/rr/src/preload/raw_syscall.S
new file mode 100644
index 0000000..4c7b6a3
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/raw_syscall.S

@@ -0,0 +1,176 @@
+#if defined(__i386__)
+        .text
+        .globl _raw_syscall
+        .hidden _raw_syscall
+        .type _raw_syscall, @function
+_raw_syscall:     /* syscallno = 4(%esp) */
+        .cfi_startproc
+        pushl %ebx        /* syscallno = 8(%esp) */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %ebx, 0
+        pushl %esi        /* syscallno = 12(%esp) */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %esi, 0
+        pushl %edi        /* syscallno = 16(%esp) */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %edi, 0
+        pushl %ebp        /* syscallno = 20(%esp) */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %ebp, 0
+
+        movl 20(%esp), %eax /* %eax = syscallno */
+        movl 24(%esp), %ebx /* %ebx = a0 */
+        movl 28(%esp), %ecx /* %ecx = a1 */
+        movl 32(%esp), %edx /* %edx = a2 */
+        movl 36(%esp), %esi /* %esi = a3 */
+        movl 40(%esp), %edi /* %edi = a4 */
+        movl 44(%esp), %ebp /* %ebp = a5 */
+
+        pushl 56(%esp)
+        .cfi_adjust_cfa_offset 4
+        pushl 56(%esp)
+        .cfi_adjust_cfa_offset 4
+
+        call *56(%esp)
+
+        addl $8,%esp
+        .cfi_adjust_cfa_offset -8
+        popl %ebp
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %ebp
+        popl %edi
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %edi
+        popl %esi
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %esi
+        popl %ebx
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %ebx
+        ret
+        .cfi_endproc
+        .size _raw_syscall, . - _raw_syscall
+
+#elif defined(__x86_64__)
+        .text
+        .globl _raw_syscall
+        .hidden _raw_syscall
+        .type _raw_syscall, @function
+_raw_syscall:
+        .cfi_startproc
+        /* Incoming args are in %rdi, %rsi, %rdx, %rcx, %r8, %r9, and 8(%rsp).
+	       Syscall arguments are %rdi, %rsi, %rdx, %r10, %r8, %r9. */
+        movq %rdi, %rax         /* syscall number */
+        movq %rsi, %rdi         /* first syscall arg */
+        movq %rdx, %rsi         /* second syscall arg */
+        movq %rcx, %rdx         /* third syscall arg */
+        movq %r8, %r10          /* fourth syscall arg */
+        movq %r9, %r8           /* fifth syscall arg */
+        movq 8(%rsp), %r9       /* sixth syscall arg */
+
+        pushq 32(%rsp)
+        .cfi_adjust_cfa_offset 8
+        pushq 32(%rsp)
+        .cfi_adjust_cfa_offset 8
+
+        /* During a system call the kernel makes some user-space-visible
+           register changes:
+           a) on entry, %r11 is set to %rflags
+           b) %rcx is sometimes set to -1 (perhaps because of something rr does)
+           c) on entry or exit, some flags are sometimes changed
+           Also, during replay we may perform single-stepping which can set
+           TF (trace flag). We need to hide this.
+
+           fixup_syscall_registers is responsible for fixing up registers
+           to hide these effects when we get a ptrace trap from system calls
+           in the kernel: it clears TF from %r11, forces %rcx to -1, and sets
+           flags to fixed values (ZF+PF+IF+reserved, same as for "xor reg,reg").
+           Task::canonicalize_and_set_regs is responsible for fixing up registers
+           when we emulate a system call that was traced during recording (by
+           running to a breakpoint at that system call). It does the above
+           effects after setting %r11 to %rflags.
+
+           For untraced system calls there is no trap to rr during recording or
+           replay, so we must handle these issues here. We do not need
+           untraced system calls to behave exactly the same as traced
+           system calls, since whether a given system call was traced or not is
+           the same whether recording or replaying, but it's a good idea to
+           make them as similar as possible. We do need register values
+           to be perfectly consistent at every instruction in every replay
+           whether or not singlestepping is used (because a ReplayTimeline::mark
+           might be created at any point). During replay, untraced syscall
+           instructions are replaced with "xor %eax,%eax".
+
+           The following code is harmless for traced syscalls (and needs to be,
+           because traced syscalls go through here too).
+         */
+
+        /* Set %r11 and %rcx to the values we expect them to have after the
+           system call.
+           Set flags to ZF+PF+IF+reserved (0x246) first. This simplifies
+           everything.
+           This all has to be independent of TF being set at any point during
+           replay! But the way we're doing it here, it's trivial.
+         */
+        xor %ecx,%ecx
+        /* At this point, flags are 0x246 + possibly TF. */
+        movq $0x246,%r11
+        movq $-1,%rcx
+
+        callq *32(%rsp)
+
+        /* At this point, during recording we don't trust the kernel to have
+           restored flags correctly. It probably doesn't matter, but fix it
+           anyway. */
+        xor %ecx,%ecx
+        /* At this point, the high 32 bits of %rcx are unknown. Fix that by
+           setting to -1 to match traced syscalls. */
+        movq $-1,%rcx
+        /* At this point, %r11 is always 0x246 during replay and during
+           recording (because TF is never set during recording). Nothing to
+           fix in %r11. */
+
+        addq $16,%rsp
+        .cfi_adjust_cfa_offset -16
+        ret
+        .cfi_endproc
+        .size _raw_syscall, . - _raw_syscall
+
+#elif defined(__aarch64__)
+        .text
+        .globl _raw_syscall
+        .hidden _raw_syscall
+        .type _raw_syscall, @function
+_raw_syscall:
+        .cfi_startproc
+        // The two stack arguments needs to be at sp + 8 and sp + 16
+        // but they are currently at sp and sp + 8.
+        // Since sp needs to be 16 bytes aligned we need to load and push them again.
+        str x30, [sp, -32]!
+        .cfi_def_cfa_offset 32
+        .cfi_offset x30, -32
+        ldp x8, x30, [sp, 32]
+        stp x8, x30, [sp, 8]
+        mov x8,x0
+        mov x0,x1
+        mov x1,x2
+        mov x2,x3
+        mov x3,x4
+        mov x4,x5
+        mov x5,x6
+        blr x7
+        ldr x30, [sp], 32
+        .cfi_def_cfa_offset 0
+        .cfi_restore x30
+        ret
+        .cfi_endproc
+        .size _raw_syscall, . - _raw_syscall
+#else
+#error unknown CPU architecture
+#endif /* __i386__/__x86_64__ */
+        .global _syscallbuf_code_end
+        .hidden _syscallbuf_code_end
+_syscallbuf_code_end:
+
+        .section .note.GNU-stack,"",@progbits
+        .previous

diff --git a/rr/android/x86_64/share/rr/src/preload/rr_page.S b/rr/android/x86_64/share/rr/src/preload/rr_page.S
new file mode 100644
index 0000000..e0d253e
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/rr_page.S

@@ -0,0 +1,100 @@
+// # Layout of the librrpage.so file
+//
+// The `rr page` is a special page mapped in low memory (at RR_PAGE_ADDR) that
+// contains syscall instructions at known ip values. These values must be fixed
+// for all processes in a given rr session, since rr cannot adjust the seccomp
+// filter that makes use of these values once it has been set. `librrpage.so`
+// contains this page, and rr will map it in place at process start and inform
+// the process about it by passing it as the address of the vdso. This way
+// the tracee's unwinders, as well as GDB will load the librrpage.so symbols and
+// unwind info and function correctly if execution is stopped in these locations.
+//
+// The `librrpage.so` file is made up of five pages:
+// 1: The ELF header, dynamic symbol/string table, and eh_frame sections
+// 2: The ELF section, symbol string tables (moved here in a post-processing step)
+// 3: A fake vdso that rr will ask the kernel to treat as the real vdso
+// 4: The rr page to be used during recording
+// 5: The rr page to be used during replay
+//
+// During record, rr will map the first four pages of librrpage.so only.
+// During replay, rr will replace the record page by the replay page.
+// Note however, that we only have one copy of the eh_frame and symbol
+// information - we expect all offsets and unwind instructions to match between
+// the record and replay versions (anything else would likely result in
+// divergences anyway)
+
+#ifdef __i386__
+#define CALL \
+    int $0x80; \
+    ret
+#define NOCALL \
+    xor %eax, %eax; \
+    ret
+#define TRAP \
+    nop; int $3; \
+    ret
+#define PAGE_ALIGN \
+    .align 0x1000
+#define PRELOAD_LIBRARY_PAGE_SIZE 0x1000
+#elif defined(__x86_64__)
+#define CALL \
+    syscall; \
+    ret
+#define NOCALL \
+    xor %eax, %eax; \
+    ret
+#define TRAP \
+    nop; int $3; \
+    ret
+#define PAGE_ALIGN \
+    .align 0x1000
+#define PRELOAD_LIBRARY_PAGE_SIZE 0x1000
+#elif defined(__aarch64__)
+#define CALL \
+    svc #0; \
+    ret
+#define NOCALL \
+    movz x0, #0; \
+    ret
+#define TRAP \
+    brk #0; \
+    ret
+#define PAGE_ALIGN \
+    .align 16
+#define PRELOAD_LIBRARY_PAGE_SIZE 0x10000
+#endif
+
+.section .sh_placeholder, "a"
+PAGE_ALIGN
+.fill PRELOAD_LIBRARY_PAGE_SIZE, 1, 0xff
+
+.section .vdso.text, "a", @progbits
+PAGE_ALIGN
+
+#include "rr_vdso.S"
+
+.section .record.text, "a", @progbits
+PAGE_ALIGN
+
+.global rr_page_start
+rr_page_start:
+
+#define LABEL(name) #name:;
+#define STARTPROC(name) #name:; .cfi_startproc
+#define STARTPROC_GLOBAL(name) .global #name; #name:; .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#include "rr_page_instructions.S"
+
+.section .replay.text, "", @progbits
+PAGE_ALIGN
+replay_page:
+// No CFI instructions or symbols for the replay page - we'll implicitly share
+// those of the record copy
+#undef LABEL
+#undef STARTPROC
+#undef CFI_ENDPROC
+#define LABEL(name)
+#define STARTPROC(name)
+#define CFI_ENDPROC
+#define IS_REPLAY 1
+#include "rr_page_instructions.S"

diff --git a/rr/android/x86_64/share/rr/src/preload/rr_page.ld b/rr/android/x86_64/share/rr/src/preload/rr_page.ld
new file mode 100644
index 0000000..df30100
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/rr_page.ld

@@ -0,0 +1,58 @@
+PHDRS
+{
+  header PT_LOAD FILEHDR PHDRS;
+  text PT_LOAD ;
+  dynamic PT_DYNAMIC ;
+  note PT_NOTE ;
+  eh_frame 0x6474e550 ;
+  replay PT_NULL;
+}
+SECTIONS
+{
+  . = 0x70000000 - 3 * 4096 + SIZEOF_HEADERS;
+  .eh_frame_hdr   : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } :header :eh_frame
+  .eh_frame       : { KEEP (*(.eh_frame)) *(.eh_frame.*) } :header :eh_frame
+  .note.gnu.build-id  : { *(.note.gnu.build-id) } :header :note
+  .note.gnu.property : { *(.note.gnu.property) } :header :note
+  .hash           : { *(.hash) } :header
+  .gnu.hash       : { *(.gnu.hash) } :header
+  .dynsym         : { *(.dynsym) } :header
+  .dynstr         : { *(.dynstr) } :header
+  .dynamic        : { *(.dynamic) } :header :dynamic
+  .gnu.version    : { *(.gnu.version) } :header
+  .gnu.version_d  : { *(.gnu.version_d) } :header
+  .gnu.version_r  : { *(.gnu.version_r) } :header
+  .got            : { *(.got) } :header
+  .got.plt        : { *(.got.plt) } :header
+  . = 0x70000000 - 2 * 4096;
+  /* This space in .sh_placeholder is reserved for the section table
+     symtab/strtab, which ordinarily go after the text sections,
+     but we need to have before the rr page.
+     We move it there in a post-processing step, since linker
+     scripts can't specify these locations for legacy reasons */
+  .sh_placeholder : { *(.sh_placeholder) } :header
+  . = 0x70000000 - 4096;
+  .vdso.text : { *(.vdso.text) } :text
+  . = 0x70000000;
+  .record.text : { *(.record.text) } :text
+  . = 0x70000000 + 4096;
+  .replay.text : { *(.replay.text) } :replay
+  /DISCARD/ : { *(.debug_* ) }
+}
+
+VERSION {
+  LINUX_2.6 {
+    global:
+      gettimeofday;
+      clock_gettime;
+      __vdso_gettimeofday;
+      __vdso_clock_getres;
+      __vdso_time;
+      __vdso_clock_gettime;
+      __vdso_getcpu;
+      __kernel_clock_getres;
+      __kernel_rt_sigreturn;
+      __kernel_gettimeofday;
+      __kernel_clock_gettime;
+  };
+}

diff --git a/rr/android/x86_64/share/rr/src/preload/rr_page_instructions.S b/rr/android/x86_64/share/rr/src/preload/rr_page_instructions.S
new file mode 100644
index 0000000..a679187
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/rr_page_instructions.S

@@ -0,0 +1,61 @@
+// See rr_page.S
+
+#ifdef IS_REPLAY
+#define REPLAY_ONLY_CALL CALL
+#define RECORD_ONLY_CALL NOCALL
+#else
+#define REPLAY_ONLY_CALL NOCALL
+#define RECORD_ONLY_CALL CALL
+#endif
+
+STARTPROC(syscall_traced)
+    CALL
+    CFI_ENDPROC
+STARTPROC(syscall_priv_traced)
+    CALL
+    CFI_ENDPROC
+STARTPROC(syscall_untraced)
+    CALL
+    CFI_ENDPROC
+STARTPROC(syscall_untraced_replay_only)
+    REPLAY_ONLY_CALL
+    CFI_ENDPROC
+STARTPROC(syscall_untraced_record_only)
+    RECORD_ONLY_CALL
+    CFI_ENDPROC
+STARTPROC(syscall_priv_untraced)
+    CALL
+    CFI_ENDPROC
+STARTPROC(syscall_priv_untraced_replay_only)
+    REPLAY_ONLY_CALL
+    CFI_ENDPROC
+STARTPROC(syscall_priv_untraced_record_only)
+    RECORD_ONLY_CALL
+    CFI_ENDPROC
+STARTPROC(syscall_untraced_replay_assist)
+#ifdef IS_REPLAY
+    TRAP
+#else
+    CALL
+#endif
+    CFI_ENDPROC
+
+LABEL(in_replay_flag)
+#ifdef IS_REPLAY
+    .byte 0x01
+#else
+    .byte 0x00
+#endif
+.byte 0x00, 0x00, 0x00
+
+// During replay, we put the breakpoint_value here. During record this remains
+// as -1, giving us 8 ff bytes at a well known address during record. These are used
+// during exit.
+LABEL(breakpoint_value)
+LABEL(ff_bytes)
+.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+// ABI stability ends here.
+
+#undef REPLAY_ONLY_CALL
+#undef RECORD_ONLY_CALL

diff --git a/rr/android/x86_64/share/rr/src/preload/rr_vdso.S b/rr/android/x86_64/share/rr/src/preload/rr_vdso.S
new file mode 100644
index 0000000..faa1799
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/rr_vdso.S

@@ -0,0 +1,138 @@
+#ifdef __aarch64__
+#define STARTPROC_GLOBAL(name) .globl #name; .type #name, @function; \
+ #name:; .cfi_startproc
+#else
+#define STARTPROC_GLOBAL(name) .global #name; .type #name, @function; \
+ #name:; .cfi_startproc
+#endif
+#define CFI_ENDPROC .cfi_endproc
+
+// Older libs don't use the __vdso symbols, but try to look for the syscall
+// names directly. Follow the kernel vdso and make them weak aliases
+#define WEAK_ALIAS(sym, target) .weak sym; .set sym, target
+
+#if defined(__x86_64__)
+
+#define SYSCALL(which) \
+    movq $which, %rax;  \
+    syscall; \
+    nop; \
+    nop; \
+    nop; \
+    retq
+
+STARTPROC_GLOBAL(__vdso_clock_getres)
+SYSCALL(229)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_getcpu)
+SYSCALL(309)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_time)
+SYSCALL(201)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_clock_gettime)
+SYSCALL(228)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_gettimeofday)
+SYSCALL(96)
+CFI_ENDPROC
+
+WEAK_ALIAS(clock_getres, __vdso_clock_getres)
+WEAK_ALIAS(getcpu, __vdso_getcpu)
+WEAK_ALIAS(time, __vdso_time)
+WEAK_ALIAS(clock_gettime, __vdso_clock_gettime)
+WEAK_ALIAS(gettimeofday,__vdso_gettimeofday)
+
+.symver gettimeofday,gettimeofday@LINUX_2.6
+.symver clock_gettime,clock_gettime@LINUX_2.6
+.symver __vdso_gettimeofday,__vdso_gettimeofday@LINUX_2.6
+.symver __vdso_clock_getres,__vdso_clock_getres@LINUX_2.6
+.symver __vdso_time,__vdso_time@LINUX_2.6
+.symver __vdso_clock_gettime,__vdso_clock_gettime@LINUX_2.6
+.symver __vdso_getcpu,__vdso_getcpu@LINUX_2.6
+
+#elif defined(__i386__)
+
+// __vdso functions use the C calling convention, so
+// we have to set up the syscall parameters here.
+// No x86-32 __vdso functions take more than two parameters.
+#define SYSCALL(which) \
+    push %ebx; \
+    .cfi_adjust_cfa_offset 4; \
+    .cfi_rel_offset %ebx, 0; \
+    mov 8(%esp),%ebx; \
+    mov 12(%esp),%ecx; \
+    mov $which, %eax;  \
+    int $0x80; \
+    nop; \
+    nop; \
+    nop; \
+    pop %ebx; \
+    .cfi_adjust_cfa_offset -4; \
+    .cfi_restore %ebx; \
+    ret
+
+// N.B.: We depend on this being the first symbol in the vdso page.
+STARTPROC_GLOBAL(__kernel_vsyscall)
+int $0x80
+nop
+nop
+nop
+ret
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_clock_getres)
+SYSCALL(266)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_time)
+SYSCALL(13)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_clock_gettime)
+SYSCALL(265)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_clock_gettime64)
+SYSCALL(403)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__vdso_gettimeofday)
+SYSCALL(78)
+CFI_ENDPROC
+
+WEAK_ALIAS(clock_getres, __vdso_clock_getres)
+WEAK_ALIAS(time, __vdso_time)
+WEAK_ALIAS(clock_gettime, __vdso_clock_gettime)
+WEAK_ALIAS(clock_gettime64, __vdso_clock_gettime64)
+WEAK_ALIAS(gettimeofday,__vdso_gettimeofday)
+
+.symver __vdso_gettimeofday,__vdso_gettimeofday@LINUX_2.6
+.symver __vdso_clock_getres,__vdso_clock_getres@LINUX_2.6
+.symver __vdso_time,__vdso_time@LINUX_2.6
+.symver __vdso_clock_gettime,__vdso_clock_gettime@LINUX_2.6
+.symver __vdso_getcpu,__vdso_getcpu@LINUX_2.6
+
+#elif defined(__aarch64__)
+
+#define SYSCALL(which)          \
+        mov     x8, which;      \
+        svc     0;              \
+        ret
+
+STARTPROC_GLOBAL(__kernel_clock_getres)
+SYSCALL(114)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__kernel_rt_sigreturn)
+SYSCALL(139)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__kernel_gettimeofday)
+SYSCALL(169)
+CFI_ENDPROC
+STARTPROC_GLOBAL(__kernel_clock_gettime)
+SYSCALL(113)
+CFI_ENDPROC
+
+#else
+
+#error "VDSO Hooks not defined for this platform"
+
+#endif
+
+#undef STARTPROC_GLOBAL
+#undef CFI_ENDPROC

diff --git a/rr/android/x86_64/share/rr/src/preload/rrcalls.h b/rr/android/x86_64/share/rr/src/preload/rrcalls.h
new file mode 100644
index 0000000..b448495
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/rrcalls.h

@@ -0,0 +1,103 @@
+/* "Magic" (rr-implemented) syscalls that we use to initialize the
+ * syscallbuf.
+ *
+ * NB: magic syscalls must be positive, because with at least linux
+ * 3.8.0 / eglibc 2.17, rr only gets a trap for the *entry* of invalid
+ * syscalls, not the exit.  rr can't handle that yet. */
+/* TODO: static_assert(LAST_SYSCALL < SYS_rrcall_init_buffers) */
+
+#define RR_CALL_BASE 1000
+
+/**
+ * The preload library calls SYS_rrcall_init_preload during its
+ * initialization.
+ */
+#define SYS_rrcall_init_preload RR_CALL_BASE
+/**
+ * The preload library calls SYS_rrcall_init_buffers in each thread that
+ * gets created (including the initial main thread).
+ */
+#define SYS_rrcall_init_buffers (RR_CALL_BASE + 1)
+/**
+ * The preload library calls SYS_rrcall_notify_syscall_hook_exit when
+ * unlocking the syscallbuf and notify_after_syscall_hook_exit has been set.
+ * The word at 4/8(sp) is returned in the syscall result and the word at
+ * 8/16(sp) is stored in original_syscallno.
+ */
+#define SYS_rrcall_notify_syscall_hook_exit (RR_CALL_BASE + 2)
+/**
+ * When the preload library detects that control data has been received in a
+ * syscallbuf'ed recvmsg, it calls this syscall with a pointer to the
+ * 'struct msg' returned.
+ */
+#define SYS_rrcall_notify_control_msg (RR_CALL_BASE + 3)
+/**
+ * When rr replay has restored the auxv vectors for a new process (completing
+ * emulation of exec), it calls this syscall. It takes one parameter, the tid
+ * of the task that it has restored auxv vectors for.
+ */
+#define SYS_rrcall_reload_auxv (RR_CALL_BASE + 4)
+/**
+ * When rr replay has flushed a syscallbuf 'mprotect' record, notify any outer
+ * rr of that flush. The first parameter is the tid of the task, the second
+ * parameter is the address, the third parameter is the length, and the
+ * fourth parameter is the prot.
+ */
+#define SYS_rrcall_mprotect_record (RR_CALL_BASE + 5)
+/**
+ * The audit library calls SYS_rrcall_notify_stap_semaphore_added once a batch
+ * of SystemTap semaphores have been incremented. The first parameter is the
+ * beginning of an address interval containing semaphores (inclusive) and the
+ * second parameter is the end of the address interval (exclusive).
+ *
+ * In practice a particular probe may be listed in an object's notes more than
+ * once, so be prepared to handle overlapping or redundant intervals.
+ */
+#define SYS_rrcall_notify_stap_semaphore_added (RR_CALL_BASE + 6)
+/**
+ * The audit library calls SYS_rrcall_notify_stap_semaphore_removed once a
+ * batch of previously-incremented SystemTap semaphores have been decremented.
+ * The first parameter is the beginning of an address interval containing
+ * semaphores (inclusive) and the second parameter is the end of the address
+ * interval (exclusive).
+ *
+ * In practice a particular probe may be listed in an object's notes more than
+ * once, so be prepared to handle overlapping or redundant intervals.
+ */
+#define SYS_rrcall_notify_stap_semaphore_removed (RR_CALL_BASE + 7)
+/**
+ * This syscall can be used be the application being recorded to check for the
+ * presence of the rr recorder. It is used e.g. to enable nested recording of
+ * rr itself. Use of this syscall should be limited to situations where it is
+ * absolutely necessary to avoid deviation of behavior depending on the
+ * presence of absence of rr.
+ */
+#define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
+/**
+ * Requests that rr detach from this process and re-create outside of its
+ * process tree, such that it may run without seccomp.
+ */
+#define SYS_rrcall_detach_teleport (RR_CALL_BASE + 9)
+/**
+ * Requests that rr reset the time slice signal to the
+ * requested period. Used for testing interaction corner
+ * cases between the time slice signal and other rr behavior.
+ */
+#define SYS_rrcall_arm_time_slice (RR_CALL_BASE + 10)
+/**
+ * Use as
+ *
+ *  int rr_freeze_tid(pid_t tid, int freeze) {
+ *      return syscall(SYS_rrcall_freeze_tid, tid, freeze, 0, 0, 0, 0); }
+ *
+ * With `freeze=1`, requests that rr's Scheduler not schedule task `tid` again
+ * until unfrozen using `rr_freeze_tid(tid, 0)`. Note that kernel scheduling
+ * behavior is unaffected. Used for testing Scheduler-sensitive scenarios.
+ */
+#define SYS_rrcall_freeze_tid (RR_CALL_BASE + 11)
+/**
+ * Requests a simulated (buffered) RDTSC.
+ * The RDTSC value is returned as a 64-bit value stored in the
+ * memory location given by the first argument. RAX returns 0.
+ */
+#define SYS_rrcall_rdtsc (RR_CALL_BASE + 12)
\ No newline at end of file

diff --git a/rr/android/x86_64/share/rr/src/preload/syscall_hook.S b/rr/android/x86_64/share/rr/src/preload/syscall_hook.S
new file mode 100644
index 0000000..45b4d98
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/syscall_hook.S

@@ -0,0 +1,1047 @@
+#if defined(__aarch64__)
+        .set preload_thread_locals,0x70010000
+#else
+        .set preload_thread_locals,0x70001000
+#endif
+
+        .global _syscallbuf_code_start
+        .hidden _syscallbuf_code_start
+
+        .global _syscallbuf_final_exit_instruction
+        .hidden _syscallbuf_final_exit_instruction
+        .type _syscallbuf_final_exit_instruction, @function
+
+#define DW_OP_CONST4U(val)                      \
+        0x0c, /* DW_OP_const4u */               \
+        /* Individually place bytes */          \
+        (val) & 0xFF,                           \
+        ((val) & (0xFF <<  0x8)) >>  0x8,       \
+        ((val) & (0xFF << 0x10)) >> 0x10,       \
+        ((val) & (0xFF << 0x18)) >> 0x18
+
+#define DW_OP_CONST8U(val)                      \
+        0x0e, /* DW_OP_const8u */               \
+        /* Individually place bytes */          \
+        (val) & 0xFF,                           \
+        ((val) & (0xFF <<  0x8)) >>  0x8,       \
+        ((val) & (0xFF << 0x10)) >> 0x10,       \
+        ((val) & (0xFF << 0x18)) >> 0x18,       \
+        ((val) & (0xFF << 0x20)) >> 0x20,       \
+        ((val) & (0xFF << 0x28)) >> 0x28,       \
+        ((val) & (0xFF << 0x30)) >> 0x30,       \
+        ((val) & (0xFF << 0x38)) >> 0x38
+
+#define REG_AT_ADDR32(reg, addr)                                \
+        .cfi_escape 0x10, /* DW_CFA_expression */               \
+                    reg,                                        \
+                    0x05, /* 5 byte expression follows */       \
+                    DW_OP_CONST4U(addr)
+#define REG_AT_ADDR64(reg, addr)                                \
+        .cfi_escape 0x10, /* DW_CFA_expression */               \
+                    reg,                                        \
+                    0x09, /* 9 byte expression follows */       \
+                    DW_OP_CONST8U(addr)
+
+// 10 bytes LEB128 is enough to encode 64bit integer and we shouldn't
+// really need anything longer than that.
+#define COUNT_LEB128(lebs...)                                   \
+        _COUNT_LEB128(lebs, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
+#define _COUNT_LEB128(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
+
+#define REG_AT_REG_OFFSET(reg, base, lebs...)                                   \
+        .cfi_escape 0x10, /* DW_CFA_expression */                               \
+                    reg,                                                        \
+                    (COUNT_LEB128(lebs) + 1), /* 1 byte + LEB128 bytes */       \
+                    (0x70 + base), /* DW_OP_breg0 + base */                     \
+                    lebs
+
+#if defined(__i386__)
+.text
+.set syscallbuf_stub_alt_stack, preload_thread_locals
+.set stub_scratch_1, preload_thread_locals + 8
+.set alt_stack_nesting_level, preload_thread_locals + 12
+.set saved_flags, preload_thread_locals + 16
+
+.p2align 4
+
+_syscallbuf_code_start:
+/* Insert a NOP here so we have no symbol clashes. Otherwise
+   in some configurations (gdb 7.7.1, Ubuntu 14.04) gdb sometimes gets confused.
+ */
+        nop
+
+
+_syscallbuf_final_exit_instruction:
+        jmp *(stub_scratch_1)
+
+_syscall_hook_trampoline:
+        .cfi_startproc
+        /* Build a |struct syscall_info| by pushing all the syscall
+         * args and the number onto the stack. */
+                          /* struct syscall_info info; */
+        pushl %ebp        /* info.args[5] = $ebp; */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %ebp, 0
+        pushl %edi        /* info.args[4] = $edi; */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %edi, 0
+        pushl %esi        /* info.args[3] = $esi; */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %esi, 0
+        pushl %edx        /* info.args[2] = $edx; */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %edx, 0
+        pushl %ecx        /* info.args[1] = $ecx; */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %ecx, 0
+        pushl %ebx        /* info.args[0] = $ebx; */
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset %ebx, 0
+        pushl %eax        /* info.no = $eax; */
+        .cfi_adjust_cfa_offset 4
+
+        /* $esp points at &info.  Push that pointer on the stack as
+         * our arg for vsyscall_hook().
+         * Use %ebp as our temporary CFA register here. Don't use %ebx or
+         * any other GP register, since x86-64 gdb 7.7 (at least) treats all GP
+         * regs other than %esp/%ebp as *signed* and sign-extends their values.
+         * Having some CFA values sign-extended and others not breaks gdb
+         * stack walking.
+         */
+        movl %esp, %ebp
+        .cfi_def_cfa_register %ebp
+
+        /* Align stack to 16 bytes */
+        and $0xfffffff0,%esp
+
+        /* Save XMM registers */
+        sub $0x80,%esp
+        movdqa %xmm0,(%esp)
+        movdqa %xmm1,0x10(%esp)
+        movdqa %xmm2,0x20(%esp)
+        movdqa %xmm3,0x30(%esp)
+        movdqa %xmm4,0x40(%esp)
+        movdqa %xmm5,0x50(%esp)
+        movdqa %xmm6,0x60(%esp)
+        movdqa %xmm7,0x70(%esp)
+
+        sub $12,%esp
+        pushl %ebp
+
+        call syscall_hook
+        /* $eax = vsyscall_hook(&info); */
+
+        movdqa 0x10(%esp),%xmm0
+        movdqa 0x20(%esp),%xmm1
+        movdqa 0x30(%esp),%xmm2
+        movdqa 0x40(%esp),%xmm3
+        movdqa 0x50(%esp),%xmm4
+        movdqa 0x60(%esp),%xmm5
+        movdqa 0x70(%esp),%xmm6
+        movdqa 0x80(%esp),%xmm7
+
+        mov $saved_flags, %esp
+        popfw
+        /* From here on, non-application flag changes are not allowed */
+
+        /* Restore ESP */
+        mov %ebp, %esp
+        .cfi_def_cfa_register %esp
+
+        /* $eax is now the syscall return value.  Erase |info.no| from the
+         * stack so that we can restore the other registers we saved. */
+        lea 4(%esp),%esp
+        .cfi_adjust_cfa_offset -4
+
+        /* Contract of __kernel_vsyscall() and real syscalls is that even
+         * callee-save registers aren't touched, so we restore everything
+         * here. */
+        popl %ebx
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %ebx
+        popl %ecx
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %ecx
+        popl %edx
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %edx
+        popl %esi
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %esi
+        popl %edi
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %edi
+        mov (alt_stack_nesting_level),%ebp
+        lea -1(%ebp),%ebp
+        mov %ebp,(alt_stack_nesting_level)
+        popl %ebp
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore %ebp
+
+        ret
+        .cfi_endproc
+        .size _syscall_hook_trampoline, .-_syscall_hook_trampoline
+
+#define SYSCALLHOOK_START(name) \
+       .global name;            \
+       .hidden name;            \
+       .type name, @function;   \
+name:                           \
+       .cfi_startproc;          \
+       .cfi_def_cfa_offset 0;   \
+       .cfi_offset %eip, 0;     \
+       .cfi_offset %esp, 4
+
+#define SYSCALLHOOK_END(name)                                   \
+        pop (stub_scratch_1);                                   \
+        .cfi_adjust_cfa_offset -4;                              \
+        pop %esp;                                               \
+        .cfi_same_value %esp;                                   \
+        REG_AT_ADDR32(0x08 /* %eip */, stub_scratch_1);         \
+        jmp _syscallbuf_final_exit_instruction;                 \
+       .cfi_endproc;                                            \
+       .size name, .-name
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_3d_01_f0_ff_ff)
+        call _syscall_hook_trampoline
+        cmpl $0xfffff001,%eax
+SYSCALLHOOK_END(_syscall_hook_trampoline_3d_01_f0_ff_ff)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_90_90_90)
+        call _syscall_hook_trampoline
+SYSCALLHOOK_END(_syscall_hook_trampoline_90_90_90)
+
+/* Declare gcc get_pc thunks here so they're in a known region of code */
+
+        .global _get_pc_thunks_start
+        .hidden _get_pc_thunks_start
+_get_pc_thunks_start:
+
+#define THUNK(name, reg) \
+        .section        .text.__x86.get_pc_thunk.name,"axG",@progbits,__x86.get_pc_thunk.name,comdat; \
+        .global  __x86.get_pc_thunk.name; \
+        .hidden __x86.get_pc_thunk.name; \
+        .type   __x86.get_pc_thunk.name, @function; \
+__x86.get_pc_thunk.name: \
+        .cfi_startproc; \
+        movl    (%esp), %reg; \
+        ret; \
+        .cfi_endproc
+
+THUNK(ax, eax)
+THUNK(bx, ebx)
+THUNK(cx, ecx)
+THUNK(dx, edx)
+THUNK(si, esi)
+THUNK(di, edi)
+THUNK(bp, ebp)
+
+        .global _get_pc_thunks_end
+        .hidden _get_pc_thunks_end
+_get_pc_thunks_end:
+
+#elif defined(__x86_64__)
+        .text
+
+        .set stub_scratch_1, preload_thread_locals + 16
+        .set alt_stack_nesting_level, preload_thread_locals + 24
+        .set saved_flags, preload_thread_locals + 28
+
+        .p2align 4
+_syscallbuf_code_start:
+
+_syscall_hook_trampoline:
+        .cfi_startproc
+        /* Save RBX because we need a callee-saves register */
+        pushq %rbx
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %rbx, 0
+
+        /* Build a |struct syscall_info| on the stack by pushing the arguments
+           and syscall number. */
+        pushq %r9
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %r9, 0
+        pushq %r8
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %r8, 0
+        pushq %r10
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %r10, 0
+        pushq %rdx
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %rdx, 0
+        pushq %rsi
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %rsi, 0
+        pushq %rdi
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %rdi, 0
+        pushq %rax
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset %rax, 0
+
+        /* Align stack */
+        mov %rsp,%rbx
+        .cfi_def_cfa_register %rbx
+        and $0xfffffffffffffff0,%rsp
+
+        /* Save XMM registers */
+        sub $0x80,%rsp
+        movdqa %xmm0,(%rsp)
+        movdqa %xmm1,0x10(%rsp)
+        movdqa %xmm2,0x20(%rsp)
+        movdqa %xmm3,0x30(%rsp)
+        movdqa %xmm4,0x40(%rsp)
+        movdqa %xmm5,0x50(%rsp)
+        movdqa %xmm6,0x60(%rsp)
+        movdqa %xmm7,0x70(%rsp)
+
+        /* Save registers that aren't callee-saves preserved by syscall_hook,
+           and that we aren't already restoring from the syscall args */
+        push %rcx
+        push %r11
+        /* stack is 16-byte aligned again for entry to C */
+
+        /* Call our hook. */
+        mov %rbx,%rdi
+        callq syscall_hook
+
+        pop %r11
+        pop %rcx
+
+        /* Restore XMM registers */
+        movdqa (%rsp),%xmm0
+        movdqa 0x10(%rsp),%xmm1
+        movdqa 0x20(%rsp),%xmm2
+        movdqa 0x30(%rsp),%xmm3
+        movdqa 0x40(%rsp),%xmm4
+        movdqa 0x50(%rsp),%xmm5
+        movdqa 0x60(%rsp),%xmm6
+        movdqa 0x70(%rsp),%xmm7
+
+        mov $saved_flags, %rsp
+        popfw
+        /* From here on, non-application flag changes are not allowed */
+
+        mov %rbx,%rsp
+        .cfi_def_cfa_register %rsp
+
+        /* On entrance, we pushed the %rax, the syscall number. But we don't
+           want to |pop %rax|, as that will overwrite our return value. Skip over it. */
+        pop %rdi
+        .cfi_adjust_cfa_offset -8
+
+        /* We don't really *need* to restore these, since the kernel could have
+           trashed them all anyway. But it seems reasonable to do so. */
+        pop %rdi
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore %rdi
+        pop %rsi
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore %rsi
+        pop %rdx
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore %rdx
+        pop %r10
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore %r10
+        pop %r8
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore %r8
+        mov (alt_stack_nesting_level),%r9d
+        lea -1(%r9),%r9
+        mov %r9d,(alt_stack_nesting_level)
+        pop %r9
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore %r9
+
+        pop %rbx
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore %rbx
+
+        /* ...and we're done. */
+        ret
+        .cfi_endproc
+        .size _syscall_hook_trampoline, . - _syscall_hook_trampoline
+
+_syscallbuf_final_exit_instruction:
+        jmp *(stub_scratch_1)
+
+/**
+ * Ok, bear with me here. When gdb sees our stack switch, it gets suspicious and if
+ * we're unlucky may decide that our unwind info is broken and abort the unwind. However,
+ * it decides to allow the unwind to proceed anyway if we happen to be in a function called
+ * __morestack (because that's what gcc calls its stack switching mechanism). Now,
+ * GDB does the stack switching comparison based on the CFA. What we thus need to do is keep the
+ * CFA pointing to the old stack until we get to a function named __morestack. We set the CFA for every
+ * syscallhook to what it will be at the end of the function (which, well, is an ok definition
+ * of the CFA). Then, we insert a __morestack function (still with the old CFA) that just jumps
+ * through to the trampoline. This way, we can force gdb's stack switch detection to think the
+ * stack switch happens between the hook and the common trampoline code (and add a __morestack
+ * local symbol to the trampoline code to avoid GDB messing with our stack trace).
+ */
+#define CFA_AT_RSP_OFFSET(offset) \
+.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */\
+        0x03, /* 3 bytes follow */\
+        0x77, offset, /* DW_OP_breg7, offset */\
+        0x06; /* DW_OP_deref */
+
+#define RSP_IS_CFA \
+.cfi_escape 0x16, /* DW_CFA_val_expression */\
+            0x7,  /* %rsp */\
+            0;     /* 0 bytes follow */
+
+#define RSP_IS_CFA_PLUS_OFFSET(offset) \
+.cfi_escape 0x16, /* DW_CFA_val_expression */\
+            0x7,  /* %rsp */\
+            2,     /* 2 bytes follow */\
+            0x23, /* DW_OP_plus_uconst */\
+            offset;
+
+#define RSP_IS_RSP_PLUS_OFFSET(offset) \
+.cfi_escape 0x16, /* DW_CFA_val_expression */\
+            0x07, /* %rsp */\
+            0x02, /* 2 bytes follow */\
+            0x77, offset; /* DW_OP_breg7, offset */
+
+#define RIP_IS_DEREF_RSP(offset) REG_AT_REG_OFFSET(0x10 /* %rip */, 7, offset)
+
+/**
+ * On syscallhook entry, the stack has been switched to the end of per-task
+ * scratch space, then the old RSP and the return address have been pushed.
+ */
+#define SYSCALLHOOK_START(name)    \
+        .global name;              \
+        .hidden name;              \
+        .type name, @function;     \
+name:                              \
+        .cfi_startproc;            \
+        CFA_AT_RSP_OFFSET(8)       \
+        RSP_IS_CFA                 \
+        RIP_IS_DEREF_RSP(0)
+
+#define SYSCALLHOOK_END(name)                                   \
+        pop (stub_scratch_1);                                   \
+        CFA_AT_RSP_OFFSET(0)                                    \
+        REG_AT_ADDR32(0x10 /* %rip */, stub_scratch_1);         \
+        pop %rsp;                                               \
+        .cfi_def_cfa %rsp, 0;                                   \
+        jmp _syscallbuf_final_exit_instruction;                 \
+        .cfi_endproc;                                           \
+        .size name, .-name
+
+/* See note above on what __morestack is for */
+.global __morestack
+.hidden __morestack
+.type __morestack, @function
+__morestack:
+.cfi_startproc
+CFA_AT_RSP_OFFSET(16)
+RSP_IS_RSP_PLUS_OFFSET(8)
+RIP_IS_DEREF_RSP(0)
+callq _syscall_hook_trampoline
+/* GDB likes to override valid CFI with its own heuristics if the current
+   instruction is a retq. This becomes a problem here, because GDB will set
+   a breakpoint at the next instruction after the callq when continuing out of
+   `_syscall_hook_trampoline`. This `nop` makes said instruction not a retq,
+   thus preventing that GDB heuristic from kicking in and letting GDB realize
+   that it did in fact manage to step out of the `_syscall_hook_trampoline`
+   frame. */
+nop
+retq
+.cfi_endproc
+.size __morestack, .-__morestack
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_3d_01_f0_ff_ff)
+        callq __morestack
+        cmpq $0xfffffffffffff001,%rax
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_3d_01_f0_ff_ff)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_3d_00_f0_ff_ff)
+        callq __morestack
+        cmpq $0xfffffffffffff000,%rax
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_3d_00_f0_ff_ff)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_3d_00_f0_ff_ff)
+        callq __morestack
+        cmpl $0xfffff000,%eax
+SYSCALLHOOK_END(_syscall_hook_trampoline_3d_00_f0_ff_ff)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_89_45_f8)
+        callq __morestack
+        mov %rax,-8(%rbp)
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_89_45_f8)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_89_c3)
+        callq __morestack
+        mov %rax,%rbx
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_89_c3)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_8b_3c_24)
+         callq __morestack
+         /* The original instruction after the syscall is movq (%rsp),%rdi. */
+         movq 8(%rsp),%rdi
+         movq (%rdi),%rdi
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_8b_3c_24)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_5a_5e_c3)
+        .cfi_offset %rip, 16
+        RSP_IS_CFA_PLUS_OFFSET(24)
+        callq __morestack
+        /* The original instructions after the syscall are
+           pop %rdx; pop %rsi; retq. */
+        /* We're not returning to the dynamically generated stub, so
+           we need to fix the stack pointer ourselves. */
+        pop %rdx
+        CFA_AT_RSP_OFFSET(0)
+        pop %rsp
+        .cfi_def_cfa %rsp, 0;
+        pop %rdx
+        .cfi_adjust_cfa_offset -8
+        pop %rsi
+        .cfi_adjust_cfa_offset -8
+        pop (stub_scratch_1)
+        .cfi_adjust_cfa_offset -8
+        jmp _syscallbuf_final_exit_instruction
+
+        .cfi_endproc
+        .size _syscall_hook_trampoline_5a_5e_c3, .-_syscall_hook_trampoline_5a_5e_c3
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_89_c2_f7_da)
+        call __morestack
+        mov %eax,%edx
+        neg %edx
+SYSCALLHOOK_END(_syscall_hook_trampoline_89_c2_f7_da)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_90_90_90)
+        call __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_90_90_90)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_ba_01_00_00_00)
+        call __morestack
+        mov $1,%edx
+SYSCALLHOOK_END(_syscall_hook_trampoline_ba_01_00_00_00)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_89_c1_31_d2)
+        call __morestack
+        mov %eax,%ecx
+        xor %edx,%edx
+SYSCALLHOOK_END(_syscall_hook_trampoline_89_c1_31_d2)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_c3_nop)
+        .cfi_offset %rip, 16
+        RSP_IS_CFA_PLUS_OFFSET(24)
+        callq __morestack
+        /* The original instructions after the syscall are
+           retq; nopl 0x0(%rax,%rax,1) */
+        /* We're not returning to the dynamically generated stub, so
+           we need to fix the stack pointer ourselves. */
+        pop %rdx
+        CFA_AT_RSP_OFFSET(0)
+        pop %rsp
+        .cfi_def_cfa %rsp, 0;
+        pop (stub_scratch_1)
+        .cfi_adjust_cfa_offset -8
+        jmp _syscallbuf_final_exit_instruction
+
+        .cfi_endproc
+        .size _syscall_hook_trampoline_c3_nop, .-_syscall_hook_trampoline_c3_nop
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_40_80_f6_81)
+	xor $0x81, %sil
+	call __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_40_80_f6_81)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_8d_b3_f0_08_00_00)
+	lea    0x8f0(%rbx),%rsi
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_8d_b3_f0_08_00_00)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_49_89_ca)
+	mov %rcx, %r10
+	call __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_49_89_ca)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_89_c1)
+        callq __morestack
+        mov %rax, %rcx
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_89_c1)
+
+#define MOV_RDX_VARIANTS \
+  MOV_RDX_TO_REG(48, d0) \
+  MOV_RDX_TO_REG(48, d1) \
+  MOV_RDX_TO_REG(48, d2) \
+  MOV_RDX_TO_REG(48, d3) \
+  MOV_RDX_TO_REG(48, d4) \
+  MOV_RDX_TO_REG(48, d5) \
+  MOV_RDX_TO_REG(48, d6) \
+  MOV_RDX_TO_REG(48, d7) \
+  MOV_RDX_TO_REG(49, d0) \
+  MOV_RDX_TO_REG(49, d1) \
+  MOV_RDX_TO_REG(49, d2) \
+  MOV_RDX_TO_REG(49, d3) \
+  MOV_RDX_TO_REG(49, d4) \
+  MOV_RDX_TO_REG(49, d5) \
+  MOV_RDX_TO_REG(49, d6) \
+  MOV_RDX_TO_REG(49, d7)
+
+#define MOV_RDX_TO_REG(rex, op) \
+SYSCALLHOOK_START(_syscall_hook_trampoline_##rex##_89_##op); \
+        callq __morestack;                                   \
+        .byte 0x##rex, 0x89, 0x##op;                         \
+SYSCALLHOOK_END(_syscall_hook_trampoline_##rex##_89_##op);
+
+  MOV_RDX_VARIANTS
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_c1_e2_20)
+        callq __morestack
+        shl $32, %rdx
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_c1_e2_20)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_49_8b_44_24_28)
+        callq __morestack
+        mov 0x28(%r12),%rax
+SYSCALLHOOK_END(_syscall_hook_trampoline_49_8b_44_24_28)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_4c_89_f7)
+        mov %r14, %rdi
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_4c_89_f7)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_4c_89_ff)
+        mov %r15, %rdi
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_4c_89_ff)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_49_c7_c1_ff_ff_ff_ff)
+        mov $0xffffffffffffffff,%r9
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_49_c7_c1_ff_ff_ff_ff)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_b8_0e_00_00_00)
+        mov $0x0e,%eax
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_b8_0e_00_00_00)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_b8_11_01_00_00)
+        mov $0x111,%eax
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_b8_11_01_00_00)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_b8_ca_00_00_00)
+        mov $0xca,%eax
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_b8_ca_00_00_00)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_be_18_00_00_00)
+        mov $0x18,%esi
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_be_18_00_00_00)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_89_e5)
+        /* Previous RSP is stored on the stack above our return address */
+        mov 8(%rsp),%rbp
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_89_e5)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_48_89_fb)
+        mov %rdi,%rbx
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_48_89_fb)
+
+SYSCALLHOOK_START(_syscall_hook_trampoline_nops)
+        callq __morestack
+SYSCALLHOOK_END(_syscall_hook_trampoline_nops)
+
+#elif defined(__aarch64__)
+        .text
+
+        .set syscallbuf_stub_alt_stack, preload_thread_locals
+        .set stub_scratch_1, preload_thread_locals + 16
+        .set alt_stack_nesting_level, preload_thread_locals + 24
+        .set stub_scratch_2, preload_thread_locals + 8 * 13
+
+// Store a pair of x registers to stack at offset sp.
+// Assuming that CFA register is sp
+#define STPX_STACK(r1, r2, offset)              \
+        stp     x##r1, x##r2, [sp, offset];     \
+        .cfi_rel_offset x##r1, offset;          \
+        .cfi_rel_offset x##r2, offset + 8
+
+#define LDPX_STACK(r1, r2, offset)              \
+        ldp     x##r1, x##r2, [sp, offset];     \
+        .cfi_same_value x##r1;                  \
+        .cfi_same_value x##r2
+
+// Store a pair of q registers to stack at offset sp.
+// Assuming that CFA register is sp
+#define STPQ_STACK(r1, r2, offset)              \
+        stp     q##r1, q##r2, [sp, offset];     \
+        .cfi_rel_offset q##r1, offset;          \
+        .cfi_rel_offset q##r2, offset + 16
+
+#define LDPQ_STACK(r1, r2, offset)              \
+        ldp     q##r1, q##r2, [sp, offset];     \
+        .cfi_same_value q##r1;                  \
+        .cfi_same_value q##r2
+
+// Mark all temporary registers as same_value except x8 and x15
+#define SAME_VALUE_X_NO8_NO15   \
+        .cfi_same_value x0;     \
+        .cfi_same_value x1;     \
+        .cfi_same_value x2;     \
+        .cfi_same_value x3;     \
+        .cfi_same_value x4;     \
+        .cfi_same_value x5;     \
+        .cfi_same_value x6;     \
+        .cfi_same_value x7;     \
+        .cfi_same_value x9;     \
+        .cfi_same_value x10;    \
+        .cfi_same_value x11;    \
+        .cfi_same_value x12;    \
+        .cfi_same_value x13;    \
+        .cfi_same_value x14;    \
+        .cfi_same_value x16;    \
+        .cfi_same_value x17;    \
+        .cfi_same_value x18
+
+#define SAME_VALUE_X            \
+        SAME_VALUE_X_NO8_NO15;  \
+        .cfi_same_value x8;     \
+        .cfi_same_value x15
+
+#define SAME_VALUE_ALL_Q        \
+        .cfi_same_value q0;     \
+        .cfi_same_value q1;     \
+        .cfi_same_value q2;     \
+        .cfi_same_value q3;     \
+        .cfi_same_value q4;     \
+        .cfi_same_value q5;     \
+        .cfi_same_value q6;     \
+        .cfi_same_value q7;     \
+        .cfi_same_value q8;     \
+        .cfi_same_value q9;     \
+        .cfi_same_value q10;    \
+        .cfi_same_value q11;    \
+        .cfi_same_value q12;    \
+        .cfi_same_value q13;    \
+        .cfi_same_value q14;    \
+        .cfi_same_value q15;    \
+        .cfi_same_value q16;    \
+        .cfi_same_value q17;    \
+        .cfi_same_value q18;    \
+        .cfi_same_value q19;    \
+        .cfi_same_value q20;    \
+        .cfi_same_value q21;    \
+        .cfi_same_value q22;    \
+        .cfi_same_value q23;    \
+        .cfi_same_value q24;    \
+        .cfi_same_value q25;    \
+        .cfi_same_value q26;    \
+        .cfi_same_value q27;    \
+        .cfi_same_value q28;    \
+        .cfi_same_value q29;    \
+        .cfi_same_value q30;    \
+        .cfi_same_value q31
+
+        .p2align 4
+_syscallbuf_code_start:
+
+_syscall_hook_trampoline:
+        // stack frame:
+        // 208-688: q2 - q31
+        // 128-200: x10 - x18
+        // 112-128: x7, x9
+        // 104-112: x6
+        // 48-104: syscall_info
+        // 32-48: x29, x30
+        // 0-32: q0, q1
+        .cfi_startproc
+        // GAS correctly put these in CIE as long as they
+        // appears right after .cfi_startproc
+        SAME_VALUE_X
+        SAME_VALUE_ALL_Q
+        // Store the vector registers at the bottom so that we can take advantage of
+        // the larger pre-offset that can be encoded in the instruction
+        // to adjust the stack pointer.
+        stp     q0, q1, [sp, -688]!
+        .cfi_def_cfa_offset 688
+        .cfi_rel_offset q0, 0
+        .cfi_rel_offset q1, 0 + 16
+        STPX_STACK(29, 30, 32)
+        /* Build a |struct syscall_info| on the stack by pushing the arguments
+           and syscall number. */
+        STPX_STACK(8, 0, 48)
+        add     x0, sp, 48 // x0 saved, store new argument for syscall_hook in x0.
+        STPX_STACK(1, 2, 64)
+        STPX_STACK(3, 4, 80)
+        STPX_STACK(5, 6, 96)
+        STPX_STACK(7, 9, 112)
+        STPX_STACK(10, 11, 128)
+        STPX_STACK(12, 13, 144)
+        STPX_STACK(14, 15, 160)
+        STPX_STACK(16, 17, 176)
+        str     x18, [sp, 192]
+        .cfi_rel_offset x18, 192
+        STPQ_STACK(2, 3, 208)
+        STPQ_STACK(4, 5, 240)
+        STPQ_STACK(6, 7, 272)
+        // function call only maintain the bottom half of v8-v15
+        // whereas syscall maintains all the v registers
+        // so we actually need to save and restore v8-v15 as well...
+        // (in principle we could save only the upper half but
+        //  that's too much effort especially for the unwind info...)
+        STPQ_STACK(8, 9, 304)
+        STPQ_STACK(10, 11, 336)
+        STPQ_STACK(12, 13, 368)
+        STPQ_STACK(14, 15, 400)
+        STPQ_STACK(16, 17, 432)
+        STPQ_STACK(18, 19, 464)
+        STPQ_STACK(20, 21, 496)
+        STPQ_STACK(22, 23, 528)
+        STPQ_STACK(24, 25, 560)
+        STPQ_STACK(26, 27, 592)
+        STPQ_STACK(28, 29, 624)
+        STPQ_STACK(30, 31, 656)
+
+        bl      syscall_hook
+
+        movz    x29, #:abs_g1:alt_stack_nesting_level // assume 32bit address
+        movk    x29, #:abs_g0_nc:alt_stack_nesting_level
+        ldr     w30, [x29]
+        sub     w30, w30, 1
+        str     w30, [x29]
+
+        ldp     x29, x30, [sp, 32]
+        .cfi_same_value x29
+        // x30 should not use same_value since it's value is changed
+        // by the function call instruction
+        .cfi_restore x30
+        ldr     x8, [sp, 48]
+        .cfi_same_value x8
+        LDPX_STACK(1, 2, 64)
+        LDPX_STACK(3, 4, 80)
+        LDPX_STACK(5, 6, 96)
+        LDPX_STACK(7, 9, 112)
+        LDPX_STACK(10, 11, 128)
+        LDPX_STACK(14, 15, 160)
+        LDPX_STACK(16, 17, 176)
+        ldr     x18, [sp, 192]
+        .cfi_same_value x18
+
+        LDPQ_STACK(2, 3, 208)
+        LDPQ_STACK(4, 5, 240)
+        LDPQ_STACK(6, 7, 272)
+        LDPQ_STACK(8, 9, 304)
+        LDPQ_STACK(10, 11, 336)
+        LDPQ_STACK(12, 13, 368)
+        LDPQ_STACK(14, 15, 400)
+        LDPQ_STACK(16, 17, 432)
+        LDPQ_STACK(18, 19, 464)
+        LDPQ_STACK(20, 21, 496)
+        LDPQ_STACK(22, 23, 528)
+        LDPQ_STACK(24, 25, 560)
+        LDPQ_STACK(26, 27, 592)
+        LDPQ_STACK(28, 29, 624)
+        LDPQ_STACK(30, 31, 656)
+
+        ldp     q0, q1, [sp], 688
+        .cfi_same_value q0
+        .cfi_same_value q1
+        .cfi_def_cfa_offset 0
+        ret
+        .cfi_endproc
+        .size   _syscall_hook_trampoline, .-_syscall_hook_trampoline
+
+/**
+ * On syscallhook entry, we are still on the old stack,
+ * with x30 (lr) points to right after the blr instruction that got us here.
+ * The old values of x15 and x30 are saved to [x8], which is the syscall number
+ * with an offset to land in the stub_scratch_2 area.
+ */
+        .globl _syscall_hook_trampoline_raw
+        .hidden _syscall_hook_trampoline_raw
+        .type _syscall_hook_trampoline_raw, @function
+_syscall_hook_trampoline_raw:
+        .cfi_startproc
+        // GAS correctly put these in CIE as long as they
+        // appears right after .cfi_startproc
+        .cfi_return_column 32 // pc
+        SAME_VALUE_X_NO8_NO15
+        SAME_VALUE_ALL_Q
+        // We define CFA as the value of the stack pointer when we enter this function
+        // as specified in aadwarf64.
+        // Since we aren't using the caller stack, none of the registers
+        // we save will be in the CFA...
+        .cfi_def_cfa sp, 0
+        REG_AT_REG_OFFSET(0x20 /* pc */, 30, 16)
+        REG_AT_REG_OFFSET(0x0f /* x15 */, 8,
+                          (stub_scratch_2 - preload_thread_locals) | 0x80, 0)
+        REG_AT_REG_OFFSET(0x1e /* x30 */, 8,
+                          (stub_scratch_2 - preload_thread_locals + 8) | 0x80, 0)
+        // x8 = x8 - preload_thread_locals
+        // The last byte of the signed number LEB128 contains the top 4 bits
+        // from the 32bit negative number (obtained using the shifted 0xF mask)
+        // and 3 bits of leading ones above it (the `or`ing of the `0x70`).
+        // The top bit of the byte is 0 signaling the end of the LEB128 encoding.
+        .cfi_escape     0x16, /* DW_CFA_val_expression */                       \
+                        0x08, /* x8 */                                          \
+                        0x06, /* length 6 */                                    \
+                        0x78, /* DW_OP_breg8 */                                 \
+                        ((-preload_thread_locals) & 0x7F) | 0x80,               \
+                        ((-preload_thread_locals) & (0x7F << 7)) >> 7 | 0x80,   \
+                        ((-preload_thread_locals) & (0x7F << 14)) >> 14 | 0x80, \
+                        ((-preload_thread_locals) & (0x7F << 21)) >> 21 | 0x80, \
+                        ((-preload_thread_locals) & ( 0xF << 28)) >> 28 | 0x70
+        // old gcc version doesn't want to encode bti
+        // unless we specify armv8.5-a even though this was in the nop space.
+        .inst   0xd503245f // bti     c
+        mov     x15, preload_thread_locals
+        // Stash away x30 so that we can have two registers to use again
+        // we can't use stub_scratch_2 since we might overwrite the data there
+        str     x30, [x15, stub_scratch_1 - preload_thread_locals]
+        .cfi_escape     0x10, /* DW_CFA_expression */   \
+                        0x20, /* pc */                  \
+                        0x08, /* length 8 */            \
+                        DW_OP_CONST4U(stub_scratch_1),  \
+                        0x06, /* DW_OP_deref */         \
+                        0x23, /* DW_OP_plus_uconst */   \
+                        16
+        // Move the register stash region from
+        // `x8 + stub_scratch_2 - preload_thread_locals`
+        // (i.e. `stub_scratch_2 + original_x8`) to the start of `stub_scratch_2`
+        // Do it in the forward order since we know x8 >= stub_scratch_2
+        ldr     x30, [x8, stub_scratch_2 - preload_thread_locals]
+        str     x30, [x15, stub_scratch_2 - preload_thread_locals]
+        ldr     x30, [x8, stub_scratch_2 - preload_thread_locals + 8]
+        str     x30, [x15, stub_scratch_2 - preload_thread_locals + 8]
+        // Restore x8
+        movk    x8, 0, lsl 16
+        .cfi_same_value x8
+        REG_AT_ADDR32(0x0f /* x15 */, stub_scratch_2)
+        REG_AT_ADDR32(0x1e /* x30 */, stub_scratch_2 + 8)
+
+        cmp     x8, 0xdc // SYS_clone
+        .cfi_remember_state
+        b.eq    .Lfallback_rawsyscall
+
+        ldr     w30, [x15, alt_stack_nesting_level - preload_thread_locals]
+        cmp     w30, 0
+        add     w30, w30, 1
+        str     w30, [x15, alt_stack_nesting_level - preload_thread_locals]
+
+        b.ne    .Lnest_syscall_hook_trampoline_raw
+        ldr     x30, [x15, syscallbuf_stub_alt_stack - preload_thread_locals]
+        sub     x30, x30, 48
+        b       .Lstackset_syscall_hook_trampoline_raw
+.Lnest_syscall_hook_trampoline_raw:
+        sub     x30, sp, 48
+.Lstackset_syscall_hook_trampoline_raw:
+        // Now x30 points to the new stack with 48 bytes of space allocated
+
+        // Move sp into a normal register. Otherwise we can't store it
+        mov     x15, sp
+        // Save sp to new stack.
+        str     x15, [x30, 16]
+        mov     sp, x30
+        REG_AT_REG_OFFSET(0x1f /* sp */, 31, 16)
+        .cfi_escape     0x0f, /* DW_CFA_def_cfa_expression */   \
+                        0x03, /* 3 bytes follow */              \
+                        0x8f, /* DW_OP_breg31 */                \
+                        16,                                     \
+                        0x06 /* DW_OP_deref */
+        // sp is switched, x15 and x30 are free to use
+        // [stub_scratch_1] holds the stub address
+
+        // Now we need to construct the stack frame, with everything
+        // in the scratch area copied over so that we can nest again.
+        mov     x15, preload_thread_locals
+        // load runtime stub address
+        ldr     x30, [x15, stub_scratch_1 - preload_thread_locals]
+        // save stub return address
+        str     x30, [sp]
+        // load syscall return address
+        ldr     x30, [x30, 16]
+        str     x30, [sp, 8]
+        ldr     x30, [x15, stub_scratch_2 - preload_thread_locals]
+        str     x30, [sp, 24]
+        ldr     x30, [x15, stub_scratch_2 - preload_thread_locals + 8]
+        str     x30, [sp, 32]
+
+        // stackframe layout
+        // 32: original x30
+        // 24: original x15
+        // 16: original sp
+        // 8: return address to syscall
+        // 0: return address to stub
+        REG_AT_REG_OFFSET(0x20 /* pc */, 31, 8)
+        REG_AT_REG_OFFSET(0x0f /* x15 */, 31, 24)
+        REG_AT_REG_OFFSET(0x1e /* x30 */, 31, 32)
+
+        bl _syscall_hook_trampoline
+
+/**
+ * The _syscall_hook_trampoline restores all the registers to the previous values
+ * (minus the register for syscall return value) so we just need to restore
+ * the registers we’ve overwritten by the end of the stack switch,
+ * i.e. x15 , x30 and sp.
+ * x15 and x30 will be restored when we get back to the stub
+ * so we don’t need to restore them here but we do need to copy their values
+ * to stub_scratch_2 again so that the stub can restore them
+ * (since without a valid stack that is still the only memory
+ * we can use to restore things).
+ * We also need to store the return address to stub_scratch_1
+ * since that’ll help rr with setting breakpoint.
+ */
+
+        movz    x15, #:abs_g1:stub_scratch_2 // assume 32bit address
+        movk    x15, #:abs_g0_nc:stub_scratch_2
+        ldr     x30, [sp, 24] // x15
+        str     x30, [x15]
+        ldr     x30, [sp, 32] // x30
+        str     x30, [x15, 8]
+        REG_AT_ADDR32(0x0f /* x15 */, stub_scratch_2)
+        REG_AT_ADDR32(0x1e /* x30 */, stub_scratch_2 + 8)
+        ldr     x30, [sp, 8] // syscall return address
+        // tell rr breakpoint handling where we are going
+        str     x30, [x15, stub_scratch_1 - stub_scratch_2]
+        REG_AT_ADDR32(0x20 /* pc */, stub_scratch_1)
+        ldr     x30, [sp] // stub return address
+        ldr     x15, [sp, 16] // sp
+        mov     sp, x15
+        .cfi_restore sp
+        .cfi_def_cfa sp, 0
+        movz    x15, #:abs_g1:stub_scratch_2 // assume 32bit address
+        movk    x15, #:abs_g0_nc:stub_scratch_2
+_syscallbuf_final_exit_instruction:
+        ret
+
+.Lfallback_rawsyscall:
+        .cfi_restore_state
+        // Must not touch sp in this branch.
+        // Use x15 to remember the return address since we are only copying
+        // the first two elements of stub_scratch_2 for the child.
+        ldr     x15, [x15, stub_scratch_1 - preload_thread_locals]
+        REG_AT_REG_OFFSET(0x20 /* pc */, 15, 16)
+        mov     x30, 0x70000000 // RR_PAGE_SYSCALL_TRACED
+        blr     x30
+        // stub_scratch_2 content is maintained by rr
+        // we need to put the syscall return address in stub_scratch_1
+        movz    x30, #:abs_g1:stub_scratch_2 // assume 32bit address
+        movk    x30, #:abs_g0_nc:stub_scratch_2
+        str     x15, [x30, 16] // stash away stub address
+        ldr     x15, [x15, 16] // syscall return address
+        .cfi_register 32, x15
+        str     x15, [x30, stub_scratch_1 - stub_scratch_2]
+        REG_AT_ADDR32(0x20 /* pc */, stub_scratch_1)
+        mov     x15, x30
+        ldr     x30, [x15, 16]
+        b       _syscallbuf_final_exit_instruction
+
+        .cfi_endproc
+        .size _syscall_hook_trampoline_raw, .-_syscall_hook_trampoline_raw
+
+#endif /* __aarch64__ */
+
+        .section .note.GNU-stack,"",@progbits

diff --git a/rr/android/x86_64/share/rr/src/preload/syscallbuf.c b/rr/android/x86_64/share/rr/src/preload/syscallbuf.c
new file mode 100644
index 0000000..c201ba7
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/syscallbuf.c

@@ -0,0 +1,4327 @@
+/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
+
+#define RR_IMPLEMENT_PRELOAD
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include "syscallbuf.h"
+
+/**
+ * Buffer syscalls, so that rr can process the entire buffer with one
+ * trap instead of a trap per call.
+ *
+ * This file is compiled into a dso that's PRELOADed in recorded
+ * applications.  The dso replaces libc syscall wrappers with our own
+ * implementation that saves nondeterministic outparams in a fixed-size
+ * buffer.  When the buffer is full or the recorded application
+ * invokes an un-buffered syscall or receives a signal, we trap to rr
+ * and it records the state of the buffer.
+ *
+ * During replay, rr simply refills the buffer with the recorded data
+ * when it reaches the "flush-buffer" events that were recorded.  Then
+ * rr emulates each buffered syscall, and the code here restores the
+ * client data from the refilled buffer.
+ *
+ * The crux of the implementation here is to selectively ptrace-trap
+ * syscalls.  The normal (un-buffered) syscalls generate a ptrace
+ * trap, and the buffered syscalls trap directly to the kernel.  This
+ * is implemented with a seccomp-bpf which examines the syscall and
+ * decides how to handle it (see seccomp-bpf.h and Task::spawn).
+ *
+ * Because this code runs in the tracee's address space and overrides
+ * system calls, the code is rather delicate.  The following rules
+ * must be followed
+ *
+ * o No rr headers (other than seccomp-bpf.h and rr.h) may be included
+ * o All syscalls invoked by this code must be called directly, not
+ *   through libc wrappers (which this file may itself indirectly override)
+ *
+ * The wrapper functions are named sys_xxxx. Each wrapper normally makes one
+ * untraced syscall or one traced syscall of the same type, but there are
+ * exceptions. For example sys_read can make a number of untraced syscalls
+ * instead of a single untraced syscall. A critical rule is that any traced
+ * or MAY_BLOCK untraced syscall *must* be the last syscall performed by the
+ * wrapper.
+ */
+
+#include <dlfcn.h>
+#include <limits.h>
+#include <unistd.h>
+#include <asm/errno.h>
+#include <asm/ioctls.h>
+#include <asm/poll.h>
+#include <asm/signal.h>
+#include <asm/siginfo.h>
+#include <asm/stat.h>
+#include <asm/statfs.h>
+#include <linux/eventpoll.h>
+#include <linux/futex.h>
+#include <linux/fcntl.h>
+#include <linux/if_packet.h>
+#include <linux/ioctl.h>
+#include <linux/mman.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/quota.h>
+#include <linux/resource.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/stat.h>
+#include <linux/time.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/un.h>
+#include <linux/utsname.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <syscall.h>
+#include <sysexits.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "preload_interface.h"
+#include "rr/rr.h"
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+#ifndef BTRFS_IOCTL_MAGIC
+#define BTRFS_IOCTL_MAGIC 0x94
+#endif
+#ifndef BTRFS_IOC_CLONE_RANGE
+struct btrfs_ioctl_clone_range_args {
+  int64_t src_fd;
+  uint64_t src_offset;
+  uint64_t src_length;
+  uint64_t dest_offset;
+};
+#define BTRFS_IOC_CLONE_RANGE                                                  \
+  _IOW(BTRFS_IOCTL_MAGIC, 13, struct btrfs_ioctl_clone_range_args)
+#endif
+#ifndef MADV_FREE
+#define MADV_FREE 8
+#endif
+
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK 1
+#endif
+
+struct rr_rseq {
+  uint32_t cpu_id_start;
+  uint32_t cpu_id;
+  uint64_t rseq_cs;
+  uint32_t flags;
+} __attribute__((aligned(32)));
+
+/* NB: don't include any other local headers here. */
+
+#ifdef memcpy
+#undef memcpy
+#endif
+#define memcpy you_must_use_local_memcpy
+
+static long _traced_init_syscall(int syscallno, long a0, long a1, long a2,
+                                 long a3, long a4, long a5)
+{
+  return syscall(syscallno, a0, a1, a2, a3, a4, a5);
+}
+
+#ifdef syscall
+#undef syscall
+#endif
+#define syscall you_must_use_traced_syscall
+
+static inline unsigned char *rr_page_replay_flag_addr(void) {
+  return (unsigned char *)RR_PAGE_IN_REPLAY_FLAG;
+}
+
+/**
+ * Declaring this to avoid issues with the declaration of f_owner_ex
+ * across distros. See https://github.com/rr-debugger/rr/issues/2693 */
+struct rr_f_owner_ex {
+  int type;
+  int pid;
+};
+
+#ifndef __ARCH_FLOCK64_PAD
+#define __ARCH_FLOCK64_PAD
+#endif
+struct rr_flock64 {
+    short  l_type;
+    short  l_whence;
+    __kernel_loff_t l_start;
+    __kernel_loff_t l_len;
+    __kernel_pid_t  l_pid;
+    __ARCH_FLOCK64_PAD
+};
+
+// The alignment of this struct is incorrect, but as long as it's not
+// used inside other structures, defining it this way makes the code below
+// easier.
+typedef uint64_t kernel_sigset_t;
+
+/* Nonzero when syscall buffering is enabled. */
+static int buffer_enabled;
+/* Nonzero after process-global state has been initialized. */
+static int process_inited;
+
+RR_HIDDEN struct preload_globals globals;
+
+static struct preload_thread_locals* const thread_locals =
+    (struct preload_thread_locals*)PRELOAD_THREAD_LOCALS_ADDR;
+
+/**
+ * Return a pointer to the buffer header, which happens to occupy the
+ * initial bytes in the mapped region.
+ */
+static struct syscallbuf_hdr* buffer_hdr(void) {
+  return (struct syscallbuf_hdr*)thread_locals->buffer;
+}
+
+/**
+ * Return a pointer to the byte just after the last valid syscall record in
+ * the buffer.
+ */
+static uint8_t* buffer_last(void) {
+  return (uint8_t*)next_record(buffer_hdr());
+}
+
+/**
+ * Return a pointer to the byte just after the very end of the mapped
+ * region.
+ */
+static uint8_t* buffer_end(void) {
+  return thread_locals->buffer + thread_locals->buffer_size;
+}
+
+/**
+ * Same as libc memcpy(), but usable within syscallbuf transaction
+ * critical sections.
+ */
+static void local_memcpy(void* dest, const void* source, int n) {
+#if defined(__i386__) || defined(__x86_64__)
+  /* On modern x86-ish CPUs rep movsb is fast, usually able to move
+   * 64 bytes at a time.
+   */
+  __asm__ __volatile__("rep movsb\n\t"
+                       : "+S"(source), "+D"(dest), "+c"(n)
+                       :
+                       : "cc", "memory");
+#elif defined(__aarch64__)
+  long c1;
+  long c2;
+  __asm__ __volatile__("subs %4, %2, 16\n\t"
+                       "b.lt 2f\n\t"
+                       "1:\n\t"
+                       "mov %2, %4\n\t"
+                       "ldp %3, %4, [%1], #16\n\t"
+                       "stp %3, %4, [%0], #16\n\t"
+                       "subs %4, %2, #16\n\t"
+                       "b.ge 1b\n"
+                       "2:\n\t"
+                       "tbz %2, 3, 3f\n\t"
+                       "ldr %3, [%1], #8\n\t"
+                       "str %3, [%0], #8\n\t"
+                       "3:\n\t"
+                       "tbz %2, 2, 3f\n\t"
+                       "ldr %w3, [%1], #4\n\t"
+                       "str %w3, [%0], #4\n\t"
+                       "3:\n\t"
+                       "tbz %2, 1, 3f\n\t"
+                       "ldrh %w3, [%1], #2\n\t"
+                       "strh %w3, [%0], #2\n\t"
+                       "3:\n\t"
+                       "tbz %2, 0, 3f\n\t"
+                       "ldrb %w3, [%1]\n\t"
+                       "strb %w3, [%0]\n\t"
+                       "3:\n\t"
+                       : "+r"(dest), "+r"(source), "+r"(n), "=&r"(c1), "=&r"(c2)
+                       :
+                       : "cc", "memory");
+#else
+#error Unknown architecture
+#endif
+}
+
+/**
+ * Same as libc memset(), but usable within syscallbuf transaction
+ * critical sections.
+ */
+static void local_memset(void* dest, uint8_t c, int n) {
+#if defined(__i386__) || defined(__x86_64__)
+  /* On modern x86-ish CPUs rep stosb is fast, usually able to move
+   * 64 bytes at a time.
+   */
+  __asm__ __volatile__("rep stosb\n\t"
+                       : "+a"(c), "+D"(dest), "+c"(n)
+                       :
+                       : "cc", "memory");
+#elif defined(__aarch64__)
+  double v1;
+  long n2;
+  __asm__ __volatile__("subs %4, %2, 32\n\t"
+                       "b.lt 2f\n\t"
+                       "dup %3.16b, %w0\n"
+                       "1:\n\t"
+                       "mov %2, %4\n\t"
+                       "stp %q3, %q3, [%1], #32\n\t"
+                       "subs %4, %2, #32\n\t"
+                       "b.ge 1b\n"
+                       "2:\n\t"
+                       "cbz %2, 4f\n"
+                       "3:\n\t"
+                       "strb %w0, [%1], #1\n\t"
+                       "subs %2, %2, #1\n\t"
+                       "b.ne 3b\n"
+                       "4:\n\t"
+                       : "+r"(c), "+r"(dest), "+r"(n), "=x"(v1), "=r"(n2)
+                       :
+                       : "cc", "memory");
+#else
+#error Unknown architecture
+#endif
+}
+
+/**
+ * Xorshift* RNG
+ */
+static int64_t local_random(void) {
+  uint64_t x = globals.random_seed;
+  x ^= x >> 12;
+  x ^= x << 25;
+  x ^= x >> 27;
+  globals.random_seed = x;
+  return x * 0x2545F4914F6CDD1D;
+}
+
+/* The following are wrappers for the syscalls invoked by this library
+ * itself.  These syscalls will generate ptrace traps.
+ * stack_param_1 and stack_param_2 are pushed onto the stack just before
+ * the syscall, for SYS_rrcall_notify_syscall_hook_exit which takes stack
+ * parameters as well as register parameters.
+ * syscall_instruction is the actual syscall invocation instruction
+ * (a function which we call with the registers set up appropriately).
+ */
+
+extern RR_HIDDEN long _raw_syscall(int syscallno, long a0, long a1, long a2,
+                                   long a3, long a4, long a5,
+                                   void* syscall_instruction,
+                                   long stack_param_1, long stack_param_2);
+
+static int privileged_traced_syscall(int syscallno, long a0, long a1, long a2,
+                                     long a3, long a4, long a5) {
+  return _raw_syscall(syscallno, a0, a1, a2, a3, a4, a5,
+                      RR_PAGE_SYSCALL_PRIVILEGED_TRACED, 0, 0);
+}
+#define privileged_traced_syscall6(no, a0, a1, a2, a3, a4, a5)                 \
+  privileged_traced_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2,   \
+                            (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5)
+#define privileged_traced_syscall5(no, a0, a1, a2, a3, a4)                     \
+  privileged_traced_syscall6(no, a0, a1, a2, a3, a4, 0)
+#define privileged_traced_syscall4(no, a0, a1, a2, a3)                         \
+  privileged_traced_syscall5(no, a0, a1, a2, a3, 0)
+#define privileged_traced_syscall3(no, a0, a1, a2)                             \
+  privileged_traced_syscall4(no, a0, a1, a2, 0)
+#define privileged_traced_syscall2(no, a0, a1)                                 \
+  privileged_traced_syscall3(no, a0, a1, 0)
+#define privileged_traced_syscall1(no, a0) privileged_traced_syscall2(no, a0, 0)
+#define privileged_traced_syscall0(no) privileged_traced_syscall1(no, 0)
+
+/**
+ * Make a raw traced syscall using the params in |call|.
+ */
+static long traced_raw_syscall(struct syscall_info* call) {
+  if (call->no == SYS_rrcall_rdtsc) {
+    // Handle this specially because the rrcall writes to a memory out-param
+    // and we need to actually modify the outgoing AX/DX registers instead.
+    uint32_t tsc[2];
+    privileged_traced_syscall1(SYS_rrcall_rdtsc, tsc);
+    // Overwrite RDX (syscall arg 3) with our TSC value.
+    call->args[2] = tsc[1];
+    return tsc[0];
+  }
+  /* FIXME: pass |call| to avoid pushing these on the stack
+   * again. */
+  return _raw_syscall(call->no, call->args[0], call->args[1], call->args[2],
+                      call->args[3], call->args[4], call->args[5],
+                      RR_PAGE_SYSCALL_TRACED, 0, 0);
+}
+
+/**
+ * Make a raw traced syscall using the params in |call|, privileged.
+ */
+static long privileged_traced_raw_syscall(const struct syscall_info* call) {
+  /* FIXME: pass |call| to avoid pushing these on the stack
+   * again. */
+  return _raw_syscall(call->no, call->args[0], call->args[1], call->args[2],
+                      call->args[3], call->args[4], call->args[5],
+                      RR_PAGE_SYSCALL_PRIVILEGED_TRACED, 0, 0);
+}
+
+#if defined(SYS_fcntl64)
+#define RR_FCNTL_SYSCALL SYS_fcntl64
+#else
+#define RR_FCNTL_SYSCALL SYS_fcntl
+#endif
+
+static int privileged_traced_fcntl(int fd, int cmd, ...) {
+  va_list ap;
+  void* arg;
+
+  va_start(ap, cmd);
+  arg = va_arg(ap, void*);
+  va_end(ap);
+
+  return privileged_traced_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg);
+}
+
+static pid_t privileged_traced_getpid(void) {
+  return privileged_traced_syscall0(SYS_getpid);
+}
+
+static pid_t privileged_traced_gettid(void) {
+  return privileged_traced_syscall0(SYS_gettid);
+}
+
+static int privileged_traced_perf_event_open(struct perf_event_attr* attr,
+                                             pid_t pid, int cpu, int group_fd,
+                                             unsigned long flags) {
+  return privileged_traced_syscall5(SYS_perf_event_open, attr, pid, cpu,
+                                    group_fd, flags);
+}
+
+static __attribute__((noreturn)) void privileged_traced_raise(int sig) {
+  privileged_traced_syscall2(SYS_kill, privileged_traced_getpid(), sig);
+  __builtin_unreachable();
+}
+
+static ssize_t privileged_traced_write(int fd, const void* buf, size_t count) {
+  return privileged_traced_syscall3(SYS_write, fd, buf, count);
+}
+
+static void logmsg(const char* msg) {
+  privileged_traced_write(STDERR_FILENO, msg, rrstrlen(msg));
+}
+
+#define STR_HELPER(x) #x
+#define STR(x) STR_HELPER(x)
+
+#ifndef NDEBUG
+#define assert(cond)                                                           \
+  do {                                                                         \
+    if (!(cond)) {                                                             \
+      logmsg(__FILE__ ":" STR(__LINE__) ": Assertion `" #cond "' failed.\n");  \
+      privileged_traced_raise(SIGABRT);                                        \
+    }                                                                          \
+  } while (0)
+#else
+#define assert(cond)                                                           \
+  do {                                                                         \
+    __attribute__((unused)) size_t s = sizeof(cond);                           \
+  } while (0)
+#endif
+
+#define fatal(msg)                                                             \
+  do {                                                                         \
+    logmsg(__FILE__ ":" STR(__LINE__) ": Fatal error: " msg "\n");            \
+    privileged_traced_raise(SIGABRT);                                          \
+  } while (0)
+
+/**
+ * Unlike |traced_syscall()|, this helper is implicitly "raw" (returns
+ * the direct kernel return value), because the syscall hooks have to
+ * save that raw return value.
+ * This is only called from syscall wrappers that are doing a proper
+ * buffered syscall.
+ */
+static long untraced_syscall_full(int syscallno, long a0, long a1, long a2,
+                                  long a3, long a4, long a5,
+                                  void* syscall_instruction,
+                                  long stack_param_1, long stack_param_2) {
+  struct syscallbuf_record* rec = (struct syscallbuf_record*)buffer_last();
+  /* Ensure tools analyzing the replay can find the pending syscall result */
+  thread_locals->pending_untraced_syscall_result = &rec->ret;
+  long ret = _raw_syscall(syscallno, a0, a1, a2, a3, a4, a5,
+                          syscall_instruction, stack_param_1, stack_param_2);
+/* During replay, return the result that's already in the buffer, instead
+   of what our "syscall" returned. */
+#if defined(__i386__) || defined(__x86_64__)
+  /* On entry, during recording %eax/%rax are whatever the kernel returned
+   * but during replay they may be invalid (e.g. 0). During replay, reload
+   * %eax/%rax from |rec->ret|. At the end of this sequence all registers
+   * will match between recording and replay. We clobber the temporary
+   * in_replay register, and the condition codes, to ensure this.
+   * This all assumes the compiler doesn't create unnecessary temporaries
+   * holding values like |ret|. Inspection of generated code shows it doesn't.
+   */
+  unsigned char tmp_in_replay = *rr_page_replay_flag_addr();
+  __asm__("test %1,%1\n\t"
+          "cmovne %2,%0\n\t"
+          "xor %1,%1\n\t"
+          : "+a"(ret), "+c"(tmp_in_replay)
+          : "m"(rec->ret)
+          : "cc");
+#elif defined(__aarch64__)
+  unsigned char *globals_in_replay = rr_page_replay_flag_addr();
+  long *rec_ret = &rec->ret;
+  __asm__("ldrb %w1, [%1]\n\t" // tmp_in_replay = *rr_page_replay_flag_addr()
+          "ldr %2, [%2]\n\t" // tmp = rec->ret
+          "cmp %w1, #0\n\t"
+          "csel %0, %0, %2, eq\n\t" // ret = tmp_in_replay ? tmp : ret
+          "subs %1, xzr, xzr\n\t" // clear tmp_in_replay and flag
+          "mov %2, xzr\n\t" // clear tmp
+          : "+r"(ret), "+r"(globals_in_replay), "+r"(rec_ret)
+          :
+          : "cc");
+#else
+#error Unknown architecture
+#endif
+  return ret;
+}
+#define untraced_syscall_base(no, a0, a1, a2, a3, a4, a5, inst) \
+  untraced_syscall_full(no, a0, a1, a2, a3, a4, a5, inst, 0, 0)
+#define untraced_syscall6(no, a0, a1, a2, a3, a4, a5)                          \
+  untraced_syscall_base(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2,       \
+                        (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5,           \
+                        RR_PAGE_SYSCALL_UNTRACED_RECORDING_ONLY)
+#define untraced_syscall5(no, a0, a1, a2, a3, a4)                              \
+  untraced_syscall6(no, a0, a1, a2, a3, a4, 0)
+#define untraced_syscall4(no, a0, a1, a2, a3)                                  \
+  untraced_syscall5(no, a0, a1, a2, a3, 0)
+#define untraced_syscall3(no, a0, a1, a2) untraced_syscall4(no, a0, a1, a2, 0)
+#define untraced_syscall2(no, a0, a1) untraced_syscall3(no, a0, a1, 0)
+#define untraced_syscall1(no, a0) untraced_syscall2(no, a0, 0)
+#define untraced_syscall0(no) untraced_syscall1(no, 0)
+
+#define untraced_replayed_syscall6(no, a0, a1, a2, a3, a4, a5)                 \
+  untraced_syscall_base(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2,       \
+                        (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5,           \
+                        RR_PAGE_SYSCALL_UNTRACED)
+#define untraced_replayed_syscall5(no, a0, a1, a2, a3, a4)                     \
+  untraced_replayed_syscall6(no, a0, a1, a2, a3, a4, 0)
+#define untraced_replayed_syscall4(no, a0, a1, a2, a3)                         \
+  untraced_replayed_syscall5(no, a0, a1, a2, a3, 0)
+#define untraced_replayed_syscall3(no, a0, a1, a2)                             \
+  untraced_replayed_syscall4(no, a0, a1, a2, 0)
+#define untraced_replayed_syscall2(no, a0, a1)                                 \
+  untraced_replayed_syscall3(no, a0, a1, 0)
+#define untraced_replayed_syscall1(no, a0) untraced_replayed_syscall2(no, a0, 0)
+#define untraced_replayed_syscall0(no) untraced_replayed_syscall1(no, 0)
+
+static long __attribute__((unused))
+untraced_replay_assist_syscall_base(int syscallno, long a0, long a1, long a2,
+                                    long a3, long a4, long a5,
+                                    void* syscall_instruction)  {
+  struct syscallbuf_record* rec = (struct syscallbuf_record*)buffer_last();
+  rec->replay_assist = 1;
+  return untraced_syscall_base(syscallno, a0, a1, a2, a3, a4, a5, syscall_instruction);
+}
+
+#define untraced_replay_assist_syscall6(no, a0, a1, a2, a3, a4, a5)            \
+  untraced_replay_assist_syscall_base(                                         \
+                        no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2,       \
+                        (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5,           \
+                        RR_PAGE_SYSCALL_UNTRACED_REPLAY_ASSIST)
+#define untraced_replay_assist_syscall5(no, a0, a1, a2, a3, a4)                \
+  untraced_replay_assist_syscall6(no, a0, a1, a2, a3, a4, 0)
+#define untraced_replay_assist_syscall4(no, a0, a1, a2, a3)                    \
+  untraced_replay_assist_syscall5(no, a0, a1, a2, a3, 0)
+#define untraced_replay_assist_syscall3(no, a0, a1, a2)                        \
+  untraced_replay_assist_syscall4(no, a0, a1, a2, 0)
+#define untraced_replay_assist_syscall2(no, a0, a1)                            \
+  untraced_replay_assist_syscall3(no, a0, a1, 0)
+#define untraced_replay_assist_syscall1(no, a0)                                \
+  untraced_replay_assist_syscall2(no, a0, 0)
+#define untraced_replay_assist_syscall0(no)                                    \
+  untraced_replay_assist_syscall1(no, 0)
+
+// "Privileged" syscalls are not affected by the application's own seccomp
+// filters.
+#define privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, a5)               \
+  untraced_syscall_base(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2,       \
+                        (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5,           \
+                        RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY)
+#define privileged_untraced_syscall5(no, a0, a1, a2, a3, a4)                   \
+  privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, 0)
+#define privileged_untraced_syscall4(no, a0, a1, a2, a3)                       \
+  privileged_untraced_syscall5(no, a0, a1, a2, a3, 0)
+#define privileged_untraced_syscall3(no, a0, a1, a2)                           \
+  privileged_untraced_syscall4(no, a0, a1, a2, 0)
+#define privileged_untraced_syscall2(no, a0, a1)                               \
+  privileged_untraced_syscall3(no, a0, a1, 0)
+#define privileged_untraced_syscall1(no, a0)                                   \
+  privileged_untraced_syscall2(no, a0, 0)
+#define privileged_untraced_syscall0(no) privileged_untraced_syscall1(no, 0)
+
+// "Unrecorded" syscalls are performed during recording only and are "raw";
+// they are not associated with syscallbuf records.
+#define privileged_unrecorded_syscall6(no, a0, a1, a2, a3, a4, a5)               \
+  _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2,                  \
+               (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5,                      \
+               RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY, 0, 0)
+#define privileged_unrecorded_syscall5(no, a0, a1, a2, a3, a4)                   \
+  privileged_unrecorded_syscall6(no, a0, a1, a2, a3, a4, 0)
+#define privileged_unrecorded_syscall4(no, a0, a1, a2, a3)                       \
+  privileged_unrecorded_syscall5(no, a0, a1, a2, a3, 0)
+#define privileged_unrecorded_syscall3(no, a0, a1, a2)                           \
+  privileged_unrecorded_syscall4(no, a0, a1, a2, 0)
+#define privileged_unrecorded_syscall2(no, a0, a1)                               \
+  privileged_unrecorded_syscall3(no, a0, a1, 0)
+#define privileged_unrecorded_syscall1(no, a0)                                   \
+  privileged_unrecorded_syscall2(no, a0, 0)
+#define privileged_unrecorded_syscall0(no) privileged_unrecorded_syscall1(no, 0)
+
+#define replay_only_syscall6(no, a0, a1, a2, a3, a4, a5)                       \
+  _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, (uintptr_t)a3, \
+               (uintptr_t)a4, (uintptr_t)a5,                                   \
+               RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_REPLAY_ONLY, 0, 0)
+#define replay_only_syscall5(no, a0, a1, a2, a3, a4)                           \
+  replay_only_syscall6(no, a0, a1, a2, a3, a4, 0)
+#define replay_only_syscall4(no, a0, a1, a2, a3)                               \
+  replay_only_syscall5(no, a0, a1, a2, a3, 0)
+#define replay_only_syscall3(no, a0, a1, a2)                                   \
+  replay_only_syscall4(no, a0, a1, a2, 0)
+#define replay_only_syscall2(no, a0, a1) replay_only_syscall3(no, a0, a1, 0)
+#define replay_only_syscall1(no, a0) replay_only_syscall2(no, a0, 0)
+#define replay_only_syscall0(no) replay_only_syscall1(no, 0)
+
+static int privileged_untraced_close(int fd) {
+  return privileged_unrecorded_syscall1(SYS_close, fd);
+}
+
+static int privileged_untraced_fcntl(int fd, int cmd, ...) {
+  va_list ap;
+  void* arg;
+
+  va_start(ap, cmd);
+  arg = va_arg(ap, void*);
+  va_end(ap);
+
+  return privileged_unrecorded_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg);
+}
+
+/**
+ * Do what's necessary to set up buffers for the caller.
+ * |untraced_syscall_ip| lets rr know where our untraced syscalls will
+ * originate from.  |addr| is the address of the control socket the
+ * child expects to connect to.  |msg| is a pre-prepared IPC that can
+ * be used to share fds; |fdptr| is a pointer to the control-message
+ * data buffer where the fd number being shared will be stored.
+ * |args_vec| provides the tracer with preallocated space to make
+ * socketcall syscalls.
+ *
+ * Return a pointer to the syscallbuf (with an initialized header
+ * including the available size), if syscallbuf is enabled.
+ *
+ * This is a "magic" syscall implemented by rr.
+ */
+static void rrcall_init_buffers(struct rrcall_init_buffers_params* args) {
+  privileged_traced_syscall1(SYS_rrcall_init_buffers, args);
+}
+
+/**
+ * Return a counter that generates a signal targeted at this task
+ * every time the task is descheduled |nr_descheds| times.
+ */
+static int open_desched_event_counter(size_t nr_descheds, pid_t tid) {
+  struct perf_event_attr attr;
+  int tmp_fd, fd;
+  struct rr_f_owner_ex own;
+
+  local_memset(&attr, 0, sizeof(attr));
+  attr.size = sizeof(attr);
+  attr.type = PERF_TYPE_SOFTWARE;
+  attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES;
+  attr.disabled = 1;
+  attr.sample_period = nr_descheds;
+
+  tmp_fd = privileged_traced_perf_event_open(&attr, 0 /*self*/, -1 /*any cpu*/,
+                                             -1, 0);
+  if (0 > tmp_fd) {
+    fatal("Failed to perf_event_open");
+  }
+  fd = privileged_traced_fcntl(tmp_fd, F_DUPFD_CLOEXEC,
+                               RR_DESCHED_EVENT_FLOOR_FD);
+  if (fd > 0) {
+    if (privileged_untraced_close(tmp_fd)) {
+      fatal("Failed to close tmp_fd");
+    }
+  } else {
+    // We may be unable to find an fd above the RR_DESCHED_EVENT_FLOOR_FD (e.g
+    // because of a low ulimit). In that case, just use the tmp_fd we already
+    // have.
+    fd = tmp_fd;
+  }
+  if (privileged_untraced_fcntl(fd, F_SETFL, FASYNC)) {
+    fatal("Failed to fcntl(FASYNC) the desched counter");
+  }
+  own.type = F_OWNER_TID;
+  own.pid = tid;
+  if (privileged_untraced_fcntl(fd, F_SETOWN_EX, &own)) {
+    fatal("Failed to fcntl(SETOWN_EX) the desched counter to this");
+  }
+  if (privileged_untraced_fcntl(fd, F_SETSIG, globals.desched_sig)) {
+    fatal("Failed to fcntl(SETSIG) the desched counter");
+  }
+
+  return fd;
+}
+
+/**
+ * Initialize thread-local buffering state, if enabled and not already
+ * initialized.
+ */
+static void init_thread(void) {
+  struct rrcall_init_buffers_params args;
+
+  assert(process_inited);
+  if (thread_locals->thread_inited) {
+    return;
+  }
+  thread_locals->thread_inited = 1;
+
+  /* Do not do any syscall buffering in a DiversionSession! */
+  if (!buffer_enabled || globals.in_diversion) {
+    return;
+  }
+
+  /* NB: we want this setup emulated during replay. */
+  thread_locals->desched_counter_fd =
+      open_desched_event_counter(1, privileged_traced_gettid());
+
+  args.desched_counter_fd = thread_locals->desched_counter_fd;
+
+  /* Trap to rr: let the magic begin!
+   *
+   * If the desched signal is currently blocked, then the tracer
+   * will clear our TCB guard and we won't be able to buffer
+   * syscalls.  But the tracee will set the guard when (or if)
+   * the signal is unblocked. */
+  rrcall_init_buffers(&args);
+
+  thread_locals->cloned_file_data_fd = args.cloned_file_data_fd;
+  /* rr initializes the buffer header. */
+  thread_locals->buffer = args.syscallbuf_ptr;
+  thread_locals->buffer_size = args.syscallbuf_size;
+  thread_locals->scratch_buf = args.scratch_buf;
+  thread_locals->usable_scratch_size = args.usable_scratch_size;
+}
+
+// We don't include libc headers, since they include with Linux headers,
+// so declared this prototype manually
+extern const char* getenv(const char*);
+
+// getauxval is from glibc 2.16 (2012) - don't assume it exists.
+unsigned long getauxval(unsigned long type) __attribute__((weak));
+#ifndef AT_SYSINFO_EHDR
+#define AT_SYSINFO_EHDR 33
+#endif
+
+extern RR_HIDDEN long syscall_hook(struct syscall_info* call);
+
+/**
+ * Initialize process-global buffering state, if enabled.
+ * NOTE: constructors go into a special section by default so this won't
+ * be counted as syscall-buffering code!
+ */
+static void __attribute__((constructor)) init_process(void) {
+  struct rrcall_init_preload_params params;
+
+  extern char _syscallbuf_final_exit_instruction;
+  extern char _syscallbuf_code_start;
+  extern char _syscallbuf_code_end;
+  extern char do_breakpoint_fault_addr;
+
+#if defined(__i386__)
+  extern RR_HIDDEN void __morestack(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_3d_01_f0_ff_ff(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_90_90_90(void);
+  struct syscall_patch_hook syscall_patch_hooks[] = {
+    /* pthread_cond_broadcast has 'int 80' followed by
+     * cmp $-4095,%eax (in glibc-2.18-16.fc20.i686) */
+    { 0,
+      5,
+      { 0x3d, 0x01, 0xf0, 0xff, 0xff },
+      (uintptr_t)_syscall_hook_trampoline_3d_01_f0_ff_ff },
+    /* Our vdso syscall patch has 'int 80' followed by onp; nop; nop */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      3,
+      { 0x90, 0x90, 0x90 },
+      (uintptr_t)_syscall_hook_trampoline_90_90_90 }
+  };
+  extern char _get_pc_thunks_start;
+  extern char _get_pc_thunks_end;
+#elif defined(__x86_64__)
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_3d_01_f0_ff_ff(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_3d_00_f0_ff_ff(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_3d_00_f0_ff_ff(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_8b_3c_24(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_89_45_f8(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_89_c3(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_5a_5e_c3(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_89_c2_f7_da(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_90_90_90(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_ba_01_00_00_00(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_89_c1_31_d2(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_c3_nop(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_40_80_f6_81(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_49_89_ca(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_89_c1(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_c1_e2_20(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_49_8b_44_24_28(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_4c_89_f7(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_4c_89_ff(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_49_c7_c1_ff_ff_ff_ff(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_b8_0e_00_00_00(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_b8_11_01_00_00(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_b8_ca_00_00_00(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_be_18_00_00_00(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_89_e5(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_89_fb(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_48_8d_b3_f0_08_00_00(void);
+  extern RR_HIDDEN void _syscall_hook_trampoline_nops(void);
+
+#define MOV_RDX_VARIANTS \
+  MOV_RDX_TO_REG(48, d0) \
+  MOV_RDX_TO_REG(48, d1) \
+  MOV_RDX_TO_REG(48, d2) \
+  MOV_RDX_TO_REG(48, d3) \
+  MOV_RDX_TO_REG(48, d4) \
+  MOV_RDX_TO_REG(48, d5) \
+  MOV_RDX_TO_REG(48, d6) \
+  MOV_RDX_TO_REG(48, d7) \
+  MOV_RDX_TO_REG(49, d0) \
+  MOV_RDX_TO_REG(49, d1) \
+  MOV_RDX_TO_REG(49, d2) \
+  MOV_RDX_TO_REG(49, d3) \
+  MOV_RDX_TO_REG(49, d4) \
+  MOV_RDX_TO_REG(49, d5) \
+  MOV_RDX_TO_REG(49, d6) \
+  MOV_RDX_TO_REG(49, d7)
+
+#define MOV_RDX_TO_REG(rex, op) \
+  extern RR_HIDDEN void _syscall_hook_trampoline_##rex##_89_##op(void);
+  MOV_RDX_VARIANTS
+
+  struct syscall_patch_hook syscall_patch_hooks[] = {
+    /* Many glibc syscall wrappers (e.g. read) have 'syscall' followed
+     * by
+     * cmp $-4095,%rax (in glibc-2.18-16.fc20.x86_64) */
+    { 0,
+      6,
+      { 0x48, 0x3d, 0x01, 0xf0, 0xff, 0xff },
+      (uintptr_t)_syscall_hook_trampoline_48_3d_01_f0_ff_ff },
+    /* Many glibc syscall wrappers (e.g. __libc_recv) have 'syscall'
+     * followed by
+     * cmp $-4096,%rax (in glibc-2.18-16.fc20.x86_64) */
+    { 0,
+      6,
+      { 0x48, 0x3d, 0x00, 0xf0, 0xff, 0xff },
+      (uintptr_t)_syscall_hook_trampoline_48_3d_00_f0_ff_ff },
+    /* glibc-2.35-20.fc36.x86_64 start_thread has 'syscall'
+     * followed by 'cmp $-4096,%eax' */
+    { 0,
+      5,
+      { 0x3d, 0x00, 0xf0, 0xff, 0xff },
+      (uintptr_t)_syscall_hook_trampoline_3d_00_f0_ff_ff },
+    /* Many glibc syscall wrappers (e.g. read) have 'syscall' followed
+     * by
+     * mov (%rsp),%rdi (in glibc-2.18-16.fc20.x86_64) */
+    { 0,
+      4,
+      { 0x48, 0x8b, 0x3c, 0x24 },
+      (uintptr_t)_syscall_hook_trampoline_48_8b_3c_24 },
+    /* Some syscall wrappers have 'syscall' followed
+     * by
+     * mov %rax,-8(%rbp) */
+    { 0,
+      4,
+      { 0x48, 0x89, 0x45, 0xf8 },
+      (uintptr_t)_syscall_hook_trampoline_48_89_45_f8 },
+    /* Some syscall wrappers (e.g. read) have 'syscall' followed
+     * by
+     * mov %rax,%rbx */
+    { 0,
+      3,
+      { 0x48, 0x89, 0xc3 },
+      (uintptr_t)_syscall_hook_trampoline_48_89_c3 },
+    /* Some RDTSC instructions are followed by 'mov %rax,%rcx'. */
+    { 0,
+      3,
+      { 0x48, 0x89, 0xc1 },
+      (uintptr_t)_syscall_hook_trampoline_48_89_c1 },
+    /* __lll_unlock_wake has 'syscall' followed by
+     * pop %rdx; pop %rsi; ret */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      3,
+      { 0x5a, 0x5e, 0xc3 },
+      (uintptr_t)_syscall_hook_trampoline_5a_5e_c3 },
+    /* posix_fadvise64 has 'syscall' followed by
+     * mov %eax,%edx; neg %edx (in glibc-2.22-11.fc23.x86_64) */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      4,
+      { 0x89, 0xc2, 0xf7, 0xda },
+      (uintptr_t)_syscall_hook_trampoline_89_c2_f7_da },
+    /* Our VDSO vsyscall patches have 'syscall' followed by "nop; nop;
+       nop" */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      3,
+      { 0x90, 0x90, 0x90 },
+      (uintptr_t)_syscall_hook_trampoline_90_90_90 },
+    /* glibc-2.22-17.fc23.x86_64 has 'syscall' followed by 'mov $1,%rdx'
+     * in
+     * pthread_barrier_wait.
+     */
+    { 0,
+      5,
+      { 0xba, 0x01, 0x00, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_ba_01_00_00_00 },
+    /* pthread_sigmask has 'syscall' followed by 'mov %eax,%ecx; xor
+       %edx,%edx' */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      4,
+      { 0x89, 0xc1, 0x31, 0xd2 },
+      (uintptr_t)_syscall_hook_trampoline_89_c1_31_d2 },
+    /* getpid has 'syscall' followed by 'retq; nopl 0x0(%rax,%rax,1) */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      9,
+      { 0xc3, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_c3_nop },
+    /* liblsan internal_close has 'syscall' followed by 'retq; nopl 0x0(%rax,%rax,1) */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      6,
+      { 0xc3, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_c3_nop },
+    /* glibc-2.29-15.fc30.x86_64 getpid has 'syscall' followed by 'retq; nopl 0x0(%rax) */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      5,
+      { 0xc3, 0x0f, 0x1f, 0x40, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_c3_nop },
+    /* liblsan internal_open has 'syscall' followed by 'retq; nopl (%rax) */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      4,
+      { 0xc3, 0x0f, 0x1f, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_c3_nop },
+    /* liblsan internal_dup2 has 'syscall' followed by 'retq; xchg %ax,%ax */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      3,
+      { 0xc3, 0x66, 0x90 },
+      (uintptr_t)_syscall_hook_trampoline_c3_nop },
+    /* Go runtime has 'syscall' followed by 'retq; int3; int3 */
+    { PATCH_IS_MULTIPLE_INSTRUCTIONS,
+      3,
+      { 0xc3, 0xcc, 0xcc },
+      (uintptr_t)_syscall_hook_trampoline_c3_nop },
+    /* glibc-2.31 on Ubuntu 20.04 has 'xor $0x81, %sil' followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      4,
+      { 0x40, 0x80, 0xf6, 0x81 },
+      (uintptr_t)_syscall_hook_trampoline_40_80_f6_81 },
+    /* DynamoRIO has 'mov r10, rcx' followed by 'syscall' */
+    {
+      PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      3,
+      { 0x49, 0x89, 0xca },
+      (uintptr_t)_syscall_hook_trampoline_49_89_ca },
+    /* Some applications have RDTSC followed by 'mov %rdx,any-reg' */
+#undef MOV_RDX_TO_REG
+#define MOV_RDX_TO_REG(rex, op) \
+    {                         \
+      0,                      \
+      3,                      \
+      { 0x##rex, 0x89, 0x##op }, \
+      (uintptr_t)_syscall_hook_trampoline_##rex##_89_##op },
+    MOV_RDX_VARIANTS
+    /* Some application has RDTSC followed by 'shl $32,%rdx' */
+    {
+      0,
+      4,
+      { 0x48, 0xc1, 0xe2, 0x20 },
+      (uintptr_t)_syscall_hook_trampoline_48_c1_e2_20 },
+    /* glibc-2.35-20.fc36.x86_64 __pthread_create_2_1 application has
+       syscall followed by 'mov 0x28(%r12),%rax' */
+    {
+      0,
+      5,
+      { 0x49, 0x8b, 0x44, 0x24, 0x28 },
+      (uintptr_t)_syscall_hook_trampoline_49_8b_44_24_28 },
+    /* glibc-2.35-20.fc36.x86_64 thread_start has
+       'lea 0x8f0(%rbx),%rsi' followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      7,
+      { 0x48, 0x8d, 0xb3, 0xf0, 0x08, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_48_8d_b3_f0_08_00_00 },
+    /* Some application has 'mov %r14,%rdi' followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      3,
+      { 0x4c, 0x89, 0xf7 },
+      (uintptr_t)_syscall_hook_trampoline_4c_89_f7 },
+    /* Some application has 'mov %r15,%rdi' followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      3,
+      { 0x4c, 0x89, 0xff },
+      (uintptr_t)_syscall_hook_trampoline_4c_89_ff },
+    /* Some application has 'mov $0xffffffffffffffff,%r9' followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      7,
+      { 0x49, 0xc7, 0xc1, 0xff, 0xff, 0xff, 0xff },
+      (uintptr_t)_syscall_hook_trampoline_49_c7_c1_ff_ff_ff_ff },
+    /* glibc-2.35-20.fc36.x86_64 __pthread_create_2_1 has
+       'mov $0xe,%eax' (sigprocmask) followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      5,
+      { 0xb8, 0x0e, 0x00, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_b8_0e_00_00_00 },
+    /* glibc-2.35-20.fc36.x86_64 thread_start has
+       'mov $0x111,%eax' (set_robust_list) followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      5,
+      { 0xb8, 0x11, 0x01, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_b8_11_01_00_00 },
+    /* Some application has 'mov $0xca,%eax' (futex) followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      5,
+      { 0xb8, 0xca, 0x00, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_b8_ca_00_00_00 },
+    /* Some application has 'mov $0x18,%esi' (sizeof(robust_list)) followed by 'syscall' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      5,
+      { 0xbe, 0x18, 0x00, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_be_18_00_00_00 },
+    /* Some application has 'mov %rsp,%rbp' followed by 'rdtsc' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      3,
+      { 0x48, 0x89, 0xe5 },
+      (uintptr_t)_syscall_hook_trampoline_48_89_e5 },
+    /* Some application has 'mov %rdi,%rbx' followed by 'rdtsc' */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST,
+      3,
+      { 0x48, 0x89, 0xfb },
+      (uintptr_t)_syscall_hook_trampoline_48_89_fb },
+    /* Support explicit 5 byte nop (`nopl 0(%ax, %ax, 1)`) before 'rdtsc' or syscall (may ignore interfering branches) */
+    { PATCH_SYSCALL_INSTRUCTION_IS_LAST |
+      PATCH_IS_NOP_INSTRUCTIONS,
+      5,
+      { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      (uintptr_t)_syscall_hook_trampoline_nops }
+  };
+#elif defined(__aarch64__)
+  extern RR_HIDDEN void _syscall_hook_trampoline_raw(void);
+  struct syscall_patch_hook syscall_patch_hooks[] = {
+    { 0, 4, { 0x01, 0, 0, 0xd4 }, (uintptr_t)_syscall_hook_trampoline_raw }
+  };
+#endif
+
+  assert(sizeof(struct preload_thread_locals) <= PRELOAD_THREAD_LOCALS_SIZE);
+
+  if (process_inited) {
+    return;
+  }
+
+  // Check if the rr page is mapped. We avoid a syscall if it looks like
+  // rr places librrpage as the vdso
+  // Use 1 as size since linux implementation of msync round it up to page size
+  if ((!getauxval || (getauxval(AT_SYSINFO_EHDR) != RR_PAGE_ADDR - 3*PRELOAD_LIBRARY_PAGE_SIZE)) &&
+      msync((void*)RR_PAGE_ADDR, 1, MS_ASYNC) != 0) {
+    // The RR page is not mapped - this process is not rr traced.
+    buffer_enabled = 0;
+    return;
+  }
+
+  buffer_enabled = !!getenv(SYSCALLBUF_ENABLED_ENV_VAR);
+
+  if (!buffer_enabled) {
+    // Don't risk executing the syscall before. If there is an external seccomp
+    // filter that doesn't like unknown syscalls, we risk breaking the recording.
+    return;
+  }
+
+  params.syscallbuf_enabled = buffer_enabled;
+
+#ifdef __i386__
+  params.get_pc_thunks_start = &_get_pc_thunks_start;
+  params.get_pc_thunks_end = &_get_pc_thunks_end;
+#else
+  params.get_pc_thunks_start = NULL;
+  params.get_pc_thunks_end = NULL;
+#endif
+  params.syscallbuf_code_start = &_syscallbuf_code_start;
+  params.syscallbuf_code_end = &_syscallbuf_code_end;
+  params.syscallbuf_final_exit_instruction =
+      &_syscallbuf_final_exit_instruction;
+  params.syscall_patch_hook_count =
+      sizeof(syscall_patch_hooks) / sizeof(syscall_patch_hooks[0]);
+  params.syscall_patch_hooks = syscall_patch_hooks;
+  params.globals = &globals;
+
+  globals.fdt_uniform = 1;
+  params.breakpoint_instr_addr = &do_breakpoint_fault_addr;
+  params.breakpoint_mode_sentinel = -1;
+  params.syscallbuf_syscall_hook = (void*)syscall_hook;
+
+  // We must not make any call into the syscall buffer in the init function
+  // in case a signal is delivered to us during initialization.
+  // This means that we must not call `_raw_syscall`.
+  int err = _traced_init_syscall(SYS_rrcall_init_preload, (long)&params,
+                                 0, 0, 0, 0, 0);
+  if (err != 0) {
+    // Check if the rr tracer is present by looking for the thread local page
+    // (mapped just after the rr page). If it is not present, we were
+    // preloaded without rr listening, which is allowed (e.g. after detach).
+    // Otherwise give an intelligent error message indicating that our connection
+    // to rr is broken.
+    // Use 1 as size since linux implementation of msync round it up to page size
+    if (msync((void*)RR_PAGE_ADDR + PRELOAD_LIBRARY_PAGE_SIZE, 1, MS_ASYNC) == 0) {
+      fatal("Failed to communicated with rr tracer.\n"
+            "Perhaps a restrictive seccomp filter is in effect (e.g. docker?)?\n"
+            "Adjust the seccomp filter to allow syscalls above 1000, disable it,\n"
+            "or try using `rr record -n` (slow).");
+    } else {
+      buffer_enabled = 0;
+      return;
+    }
+  }
+
+  process_inited = 1;
+}
+
+/**
+ * syscall hooks start here.
+ *
+ * !!! NBB !!!: from here on, all code that executes within the
+ * critical sections of transactions *MUST KEEP $ip IN THE SYSCALLBUF
+ * CODE*.  That means no calls into libc, even for innocent-looking
+ * functions like |memcpy()|.
+ *
+ * How syscall hooks operate:
+ *
+ * 1. The rr tracer monkey-patches __kernel_vsyscall() to jump to
+ *    _syscall_hook_trampoline() above.
+ * 2. When a call is made to __kernel_vsyscall(), it jumps to
+ *    _syscall_hook_trampoline(), where the syscall params are
+ *    packaged up into a call to syscall_hook() below.
+ * 3. syscall_hook() dispatches to a syscall processor function.
+ * 4. The syscall processor prepares a new record in the buffer. See
+ *    struct syscallbuf_record for record fields.  If the buffer runs
+ *    out of space, the processor function aborts and makes a traced
+ *    syscall, trapping to rr.  rr then flushes the buffer.  Records
+ *    are directly saved to trace, and a buffer-flush event is
+ *    recorded without execution info because it's a synthetic event.
+ * 5. Then, the syscall processor redirects all potential output
+ *    for the syscall to the record (and corrects the overall size of
+ *    the record while it does so).
+ * 6. The syscall is invoked through a asm helper that does *not*
+ *    ptrace-trap to rr.
+ * 7. The syscall output, written on the buffer, is copied to the
+ *    original pointers provided by the user.  Take notice that this
+ *    part saves us the injection of the data on replay, as we only
+ *    need to push the data to the buffer and the wrapper code will
+ *    copy it to the user address for us.
+ * 8. The return value and overall size are saved to the record.
+ */
+
+/**
+ * Call this and save the result at the start of every system call we
+ * want to buffer. The result is a pointer into the record space. You
+ * can add to this pointer to allocate space in the trace record.
+ * However, do not read or write through this pointer until
+ * start_commit_syscall() has been called.  And you *must* call
+ * start_commit_syscall() after this is called, otherwise buffering
+ * state will be inconsistent between syscalls.
+ *
+ * See |sys_clock_gettime()| for a simple example of how this helper
+ * should be used to buffer outparam data.
+ */
+static void* prep_syscall(void) {
+  /* We don't need to worry about a race between testing
+   * |locked| and setting it here. rr recording is responsible
+   * for ensuring signals are not delivered during
+   * syscall_buffer prologue and epilogue code.
+   *
+   * XXX except for synchronous signals generated in the syscall
+   * buffer code, while reading/writing user pointers */
+  buffer_hdr()->locked |= SYSCALLBUF_LOCKED_TRACEE;
+  /* "Allocate" space for a new syscall record, not including
+   * syscall outparam data. */
+  return buffer_last() + sizeof(struct syscallbuf_record);
+}
+
+static enum syscallbuf_fd_classes fd_class(int fd) {
+  if (fd < 0) {
+    return FD_CLASS_INVALID;
+  }
+  if (fd >= SYSCALLBUF_FDS_DISABLED_SIZE - 1) {
+    fd = SYSCALLBUF_FDS_DISABLED_SIZE - 1;
+  }
+  return globals.syscallbuf_fd_class[fd];
+}
+
+static int is_bufferable_fd(int fd) {
+  switch (fd_class(fd)) {
+    case FD_CLASS_INVALID:
+    case FD_CLASS_UNTRACED:
+      return 1;
+    default:
+      return 0;
+  }
+}
+
+/**
+ * Like prep_syscall, but preps a syscall to operate on a particular fd. If
+ * syscallbuf is disabled for this fd, returns NULL (in which case
+ * start_commit_syscall will abort cleanly and a traced syscall will be used).
+ * Allow negative fds to pass through; they'll either trigger an error or
+ * receive special treatment by the kernel (e.g. AT_FDCWD).
+ */
+static void* prep_syscall_for_fd(int fd) {
+  if (!is_bufferable_fd(fd)) {
+    return NULL;
+  }
+  return prep_syscall();
+}
+
+static void arm_desched_event(void) {
+  /* Don't trace the ioctl; doing so would trigger a flushing
+   * ptrace trap, which is exactly what this code is trying to
+   * avoid! :) Although we don't allocate extra space for these
+   * ioctl's, we do record that we called them; the replayer
+   * knows how to skip over them. */
+  if ((int)privileged_unrecorded_syscall3(SYS_ioctl,
+                                          thread_locals->desched_counter_fd,
+                                          PERF_EVENT_IOC_ENABLE, 0)) {
+    fatal("Failed to ENABLE counter");
+  }
+}
+
+static void disarm_desched_event(void) {
+  /* See above. */
+  if ((int)privileged_unrecorded_syscall3(SYS_ioctl,
+                                          thread_locals->desched_counter_fd,
+                                          PERF_EVENT_IOC_DISABLE, 0)) {
+    fatal("Failed to DISABLE counter");
+  }
+}
+
+/**
+ * Return 1 if it's ok to proceed with buffering this system call.
+ * Return 0 if we should trace the system call.
+ * This must be checked before proceeding with the buffered system call.
+ */
+/* (Negative numbers so as to not be valid syscall numbers, in case
+ * the |int| arguments below are passed in the wrong order.) */
+enum { MAY_BLOCK = -1, WONT_BLOCK = -2 };
+
+static int fd_write_blocks(int fd) {
+  if (!globals.fdt_uniform) {
+    // If we're not uniform, it is possible for this fd to be untraced in one
+    // of the other tasks that share this fd table. Always assume it could block.
+    return MAY_BLOCK;
+  }
+  switch (fd_class(fd)) {
+    case FD_CLASS_UNTRACED:
+    case FD_CLASS_TRACED:
+      return MAY_BLOCK;
+    case FD_CLASS_INVALID:
+    case FD_CLASS_PROC_MEM:
+      return WONT_BLOCK;
+  }
+  fatal("Unknown or corrupted fd class");
+}
+
+static int start_commit_buffered_syscall(int syscallno, void* record_end,
+                                         int blockness) {
+  void* record_start;
+  void* stored_end;
+  struct syscallbuf_record* rec;
+
+  if (!thread_locals->buffer) {
+    return 0;
+  }
+  record_start = buffer_last();
+  stored_end = record_start + stored_record_size(record_end - record_start);
+  rec = record_start;
+
+  if (stored_end < record_start + sizeof(struct syscallbuf_record)) {
+    /* Either a catastrophic buffer overflow or
+     * we failed to lock the buffer. Just bail out. */
+    return 0;
+  }
+  if (stored_end > (void*)buffer_end() - sizeof(struct syscallbuf_record)) {
+    /* Buffer overflow.
+     * Unlock the buffer and then execute the system call
+     * with a trap to rr.  Note that we reserve enough
+     * space in the buffer for the next prep_syscall(). */
+    buffer_hdr()->locked &= ~SYSCALLBUF_LOCKED_TRACEE;
+    return 0;
+  }
+  /* Store this breadcrumb so that the tracer can find out what
+   * syscall we're executing if our registers are in a weird
+   * state.  If we end up aborting this syscall, no worry, this
+   * will just be overwritten later.
+   *
+   * NBB: this *MUST* be set before the desched event is
+   * armed. */
+  rec->syscallno = syscallno;
+  rec->desched = MAY_BLOCK == blockness;
+  rec->size = record_end - record_start;
+
+  if (rec->desched) {
+    pid_t pid = 0;
+    pid_t tid = 0;
+    uid_t uid = 0;
+    if (impose_spurious_desched) {
+      pid = privileged_unrecorded_syscall0(SYS_getpid);
+      tid = privileged_unrecorded_syscall0(SYS_gettid);
+      uid = privileged_unrecorded_syscall0(SYS_getuid);
+    }
+
+    /* NB: the ordering of the next two statements is
+     * important.
+     *
+     * We set this flag to notify rr that it should pay
+     * attention to desched signals pending for this task.
+     * We have to set it *before* we arm the notification
+     * because we can't set the flag atomically with
+     * arming the event (too bad there's no ioctl() for
+     * querying the event enabled-ness state).  That's
+     * important because if the notification is armed,
+     * then rr must be confident that when it disarms the
+     * event, the tracee is at an execution point that
+     * *must not* need the desched event.
+     *
+     * If we were to set the flag non-atomically after the
+     * event was armed, then if a desched signal was
+     * delivered right at the instruction that set the
+     * flag, rr wouldn't know that it needed to advance
+     * the tracee to the untraced syscall entry point.
+     * (And if rr didn't do /that/, then the syscall might
+     * block without rr knowing it, and the recording
+     * session would deadlock.) */
+    buffer_hdr()->desched_signal_may_be_relevant = 1;
+    arm_desched_event();
+    if (impose_spurious_desched) {
+      siginfo_t si;
+      si.si_code = POLL_IN;
+      si.si_fd = thread_locals->desched_counter_fd;
+      si.si_pid = pid;
+      si.si_uid = uid;
+      privileged_unrecorded_syscall4(SYS_rt_tgsigqueueinfo, pid, tid,
+                                     globals.desched_sig,
+                                     &si);
+    }
+  }
+  return 1;
+}
+
+static void force_tick(void) {
+#if defined(__i386__) || defined(__x86_64__)
+  __asm__ __volatile__("je 1f\n\t"
+                       "1:");
+#elif defined(__aarch64__)
+  __asm__ __volatile__("cbz xzr, 1f\n"
+                       "1:");
+#else
+#error Unknown architecture
+#endif
+}
+
+static void __attribute__((noinline)) do_breakpoint(size_t value)
+{
+  char *unsafe_value = ((char*)-1)-0xf;
+  char **safe_value = &unsafe_value;
+  uint64_t *breakpoint_value_addr = (uint64_t*)RR_PAGE_BREAKPOINT_VALUE;
+#if defined(__i386__) || defined(__x86_64__)
+  __asm__ __volatile__(
+                      "mov (%1),%1\n\t"
+                      "cmp %0,%1\n\t"
+                      "cmove %3,%2\n\t"
+                      // This will segfault if `value` matches
+                      // the `breakpoint_value` set by rr. We
+                      // detect this segfault and treat it
+                      // specially.
+                      "do_breakpoint_fault_addr:\n\t"
+                      ".global do_breakpoint_fault_addr\n\t"
+                      "mov (%2),%2\n\t"
+                      "xor %1,%1\n\t"
+                      "xor %2,%2\n\t"
+                      "xor %3,%3\n\t"
+                      : "+a"(value), "+D"(breakpoint_value_addr),
+                        "+S"(safe_value), "+c"(unsafe_value)
+                      :
+                      : "cc", "memory");
+#elif defined(__aarch64__)
+  __asm__ __volatile__("ldr %1, [%1]\n\t"
+                       "cmp %0, %1\n\t"
+                       "csel %0, %3, %2, eq\n\t"
+                       "do_breakpoint_fault_addr:\n\t"
+                       ".global do_breakpoint_fault_addr\n\t"
+                       "ldr %0, [%0]\n\t"
+                       "subs %0, xzr, xzr\n\t"
+                       "mov %1, xzr\n\t"
+                       : "+r"(value), "+r"(breakpoint_value_addr),
+                         "+r"(safe_value), "+r"(unsafe_value)
+                       :
+                       : "cc", "memory");
+#else
+#error Unknown architecture
+#endif
+}
+
+/**
+ * Commit the record for a buffered system call.  record_end can be
+ * adjusted downward from what was passed to
+ * start_commit_buffered_syscall, if not all of the initially
+ * requested space is needed.  The result of this function should be
+ * returned directly by the kernel syscall hook.
+ */
+static long commit_raw_syscall(int syscallno, void* record_end, long ret) {
+  void* record_start = buffer_last();
+  struct syscallbuf_record* rec = record_start;
+  struct syscallbuf_hdr* hdr = buffer_hdr();
+  int call_breakpoint = 0;
+
+  assert(record_end >= record_start);
+  rec->size = record_end - record_start;
+
+  assert(hdr->locked);
+
+  /* NB: the ordering of this statement with the
+   * |disarm_desched_event()| call below is important.
+   *
+   * We clear this flag to notify rr that the may-block syscall
+   * has finished, so there's no danger of blocking anymore.
+   * (And thus the desched signal is no longer relevant.)  We
+   * have to clear this *before* disarming the event, because if
+   * rr sees the flag set, it has to PTRACE_SYSCALL this task to
+   * ensure it reaches an execution point where the desched
+   * signal is no longer relevant.  We have to use the ioctl()
+   * that disarms the event as a safe "backstop" that can be hit
+   * by the PTRACE_SYSCALL.
+   *
+   * If we were to clear the flag *after* disarming the event,
+   * and the signal arrived at the instruction that cleared the
+   * flag, and rr issued the PTRACE_SYSCALL, then this tracee
+   * could fly off to any unknown execution point, including an
+   * iloop.  So the recording session could livelock. */
+  hdr->desched_signal_may_be_relevant = 0;
+
+  if (rec->syscallno != syscallno) {
+    fatal("Record syscall number mismatch");
+  }
+
+  if (hdr->abort_commit) {
+    /* We were descheduled in the middle of a may-block
+     * syscall, and it was recorded as a normal entry/exit
+     * pair.  So don't record the syscall in the buffer or
+     * replay will go haywire. */
+    hdr->abort_commit = 0;
+    hdr->failed_during_preparation = 0;
+    /* Clear the return value that rr puts there during replay */
+    rec->ret = 0;
+  } else {
+    rec->ret = ret;
+    // Finish 'rec' first before updating num_rec_bytes, since
+    // rr might read the record anytime after this update.
+    hdr->num_rec_bytes += stored_record_size(rec->size);
+    call_breakpoint = 1;
+  }
+
+  if (rec->desched) {
+    disarm_desched_event();
+  }
+  /* NBB: for may-block syscalls that are descheduled, the
+   * tracer uses the previous ioctl() as a stable point to reset
+   * the record counter.  Therefore nothing from here on in the
+   * current txn must touch the record counter (at least, must
+   * not assume it's unchanged). */
+
+  buffer_hdr()->locked &= ~SYSCALLBUF_LOCKED_TRACEE;
+
+  if (call_breakpoint) {
+    /* Call the breakpoint function corresponding to the record we just
+     * committed. This function just returns, but during replay it gives rr
+     * a chance to set a breakpoint for when a specific syscallbuf record
+     * has been processed.
+     */
+    do_breakpoint(hdr->num_rec_bytes/8);
+    /* Force a tick now.
+     * During replay, if an async event (SIGKILL) happens between committing the syscall
+     * above and before this forced tick, we can detect that because the number of ticks
+     * recorded for the SIGKILL will be less than or equal to the number of ticks reported
+     * when the replay hits do_breakpoint.
+     */
+    force_tick();
+  }
+
+  return ret;
+}
+
+/**
+ * |ret_size| is the result of a syscall indicating how much data was returned
+ * in scratch buffer |buf2|; this function copies that data to |buf| and returns
+ * a pointer to the end of it. If there is no scratch buffer (|buf2| is NULL)
+ * just returns |ptr|.
+ */
+static void* copy_output_buffer(long ret_size, void* ptr, void* buf,
+                                void* buf2) {
+  if (!buf2) {
+    return ptr;
+  }
+  if (ret_size <= 0 || buffer_hdr()->failed_during_preparation) {
+    return buf2;
+  }
+  local_memcpy(buf, buf2, ret_size);
+  return buf2 + ret_size;
+}
+
+/**
+ * Copy an input parameter to the syscallbuf where the kernel needs to
+ * read and write it. During replay, we do a no-op self-copy in the buffer
+ * so that the buffered data is not lost.
+ * This code is written in assembler to ensure that the registers that receive
+ * values differing between record and replay (%0, rsi/esi, and flags)
+ * are reset to values that are the same between record and replay immediately
+ * afterward. This guards against diverging register values leaking into
+ * later code.
+ * Use local_memcpy or plain assignment instead if the kernel is not going to
+ * overwrite the values.
+ */
+static void memcpy_input_parameter(void* buf, void* src, int size) {
+#if defined(__i386__) || defined(__x86_64__)
+  unsigned char tmp_in_replay = *rr_page_replay_flag_addr();
+  __asm__ __volatile__("test %0,%0\n\t"
+                       "cmovne %1,%2\n\t"
+                       "rep movsb\n\t"
+                       "xor %0,%0\n\t"
+                       "xor %2,%2\n\t"
+                       : "+a"(tmp_in_replay), "+D"(buf), "+S"(src), "+c"(size)
+                       :
+                       : "cc", "memory");
+#elif defined(__aarch64__)
+  long c1;
+  long c2;
+  unsigned char *globals_in_replay = rr_page_replay_flag_addr();
+  __asm__ __volatile__("ldrb %w3, [%5]\n\t"
+                       "cmp %3, #0\n\t" // eq -> record
+                       "csel %1, %1, %0, eq\n\t"
+                       "subs %4, %2, 16\n\t"
+                       "b.lt 2f\n\t"
+                       "1:\n\t"
+                       "mov %2, %4\n\t"
+                       "ldp %3, %4, [%1], #16\n\t"
+                       "stp %3, %4, [%0], #16\n\t"
+                       "subs %4, %2, #16\n\t"
+                       "b.ge 1b\n"
+                       "2:\n\t"
+                       "tbz %2, 3, 3f\n\t"
+                       "ldr %3, [%1], #8\n\t"
+                       "str %3, [%0], #8\n\t"
+                       "3:\n\t"
+                       "tbz %2, 2, 3f\n\t"
+                       "ldr %w3, [%1], #4\n\t"
+                       "str %w3, [%0], #4\n\t"
+                       "3:\n\t"
+                       "tbz %2, 1, 3f\n\t"
+                       "ldrh %w3, [%1], #2\n\t"
+                       "strh %w3, [%0], #2\n\t"
+                       "3:\n\t"
+                       "tbz %2, 0, 3f\n\t"
+                       "ldrb %w3, [%1]\n\t"
+                       "strb %w3, [%0]\n\t"
+                       "3:\n\t"
+                       "subs %3, xzr, xzr\n\t"
+                       "mov %4, xzr\n\t"
+                       "mov %1, xzr\n\t"
+                       : "+r"(buf), "+r"(src),
+                         "+r"(size), "=&r"(c1), "=&r"(c2), "+r"(globals_in_replay)
+                       :
+                       : "cc", "memory");
+#else
+#error Unknown architecture
+#endif
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+/**
+ * Perform an RDTSC, writing the output to 'buf', but only if we're in recording mode.
+ * Otherwise 'buf' is unchanged.
+ */
+static void rdtsc_recording_only(uint32_t buf[2]) {
+  unsigned char tmp_in_replay = *rr_page_replay_flag_addr();
+  __asm__ __volatile__("test %%eax,%%eax\n\t"
+                       "jne 1f\n\t"
+                       "rdtsc\n\t"
+                       "mov %%eax,(%1)\n\t"
+                       "mov %%edx,4(%1)\n\t"
+                       "1:\n\t"
+                       "xor %%eax,%%eax\n\t"
+                       "xor %%edx,%%edx\n\t"
+                       : "+a"(tmp_in_replay)
+                       : "S"(buf)
+                       : "cc", "memory", "rdx");
+}
+#endif
+
+/**
+ * During recording, we copy *real to *buf.
+ * During replay, we copy *buf to *real.
+ * Behaves like memcpy_input_parameter in terms of hiding differences between
+ * recording and replay.
+ */
+static void copy_futex_int(uint32_t* buf, uint32_t* real) {
+#if defined(__i386__) || defined(__x86_64__)
+  uint32_t tmp_in_replay = *rr_page_replay_flag_addr();
+  __asm__ __volatile__("test %0,%0\n\t"
+                       "mov %2,%0\n\t"
+                       "cmovne %1,%0\n\t"
+                       "mov %0,%1\n\t"
+                       "mov %0,%2\n\t"
+                       /* This instruction is just to clear flags */
+                       "xor %0,%0\n\t"
+                       : "+a"(tmp_in_replay)
+                       : "m"(*buf), "m"(*real)
+                       : "cc", "memory");
+#elif defined(__aarch64__)
+  unsigned char *globals_in_replay = rr_page_replay_flag_addr();
+  __asm__ __volatile__("ldrb %w2, [%2]\n\t"
+                       "cmp %w2, #0\n\t" // eq -> record
+                       "csel %2, %1, %0, eq\n\t"
+                       "ldr %w2, [%2]\n\t"
+                       "csel %0, %0, %1, eq\n\t"
+                       "str %w2, [%0]\n\t"
+                       "subs %0, xzr, xzr\n\t"
+                       "mov %2, xzr\n\t"
+                       : "+r"(buf), "+r"(real), "+r"(globals_in_replay)
+                       :
+                       : "cc", "memory");
+#else
+#error Unknown architecture
+#endif
+}
+
+static int trace_chaos_mode_syscalls = 0;
+static int buffer_chaos_mode_syscalls = 0;
+
+static int force_traced_syscall_for_chaos_mode(void) {
+  if (!globals.in_chaos) {
+    return 0;
+  }
+  while (1) {
+    if (buffer_chaos_mode_syscalls) {
+      --buffer_chaos_mode_syscalls;
+      return 0;
+    }
+    if (trace_chaos_mode_syscalls) {
+      --trace_chaos_mode_syscalls;
+      return 1;
+    }
+    /* force a run of up to 50 syscalls to be traced */
+    trace_chaos_mode_syscalls = (local_random() % 50) + 1;
+    buffer_chaos_mode_syscalls = (trace_chaos_mode_syscalls - 5) * 10;
+    if (buffer_chaos_mode_syscalls < 0) {
+      buffer_chaos_mode_syscalls = 0;
+    }
+  }
+}
+
+/* Keep syscalls in alphabetical order, please. */
+
+/**
+ * Call this for syscalls that have no memory effects, don't block, and
+ * aren't fd-related.
+ */
+static long sys_generic_nonblocking(struct syscall_info* call) {
+  void* ptr = prep_syscall();
+  long ret;
+
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall6(call->no, call->args[0], call->args[1], call->args[2],
+                          call->args[3], call->args[4], call->args[5]);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+/**
+ * Call this for syscalls that have no memory effects, don't block, and
+ * have an fd as their first parameter.
+ */
+static long sys_generic_nonblocking_fd(struct syscall_info* call) {
+  int fd = call->args[0];
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall6(call->no, fd, call->args[1], call->args[2],
+                          call->args[3], call->args[4], call->args[5]);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+/**
+ * Call this for syscalls that have no memory effects, don't block, and
+ * have an fd as their first parameter, and should run privileged.
+ */
+static long privileged_sys_generic_nonblocking_fd(const struct syscall_info* call) {
+  int fd = call->args[0];
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return privileged_traced_raw_syscall(call);
+  }
+  ret = privileged_untraced_syscall6(call->no, fd, call->args[1], call->args[2],
+                                     call->args[3], call->args[4], call->args[5]);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+static long sys_clock_gettime(struct syscall_info* call) {
+  const int syscallno = SYS_clock_gettime;
+  __kernel_clockid_t clk_id = (__kernel_clockid_t)call->args[0];
+  struct timespec* tp = (struct timespec*)call->args[1];
+
+  void* ptr = prep_syscall();
+  struct timespec* tp2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (tp) {
+    tp2 = ptr;
+    ptr += sizeof(*tp2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall2(syscallno, clk_id, tp2);
+  if (tp && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    /* This is small and won't get optimized to a memcpy call outside
+       our library. */
+    *tp = *tp2;
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+#ifdef SYS_clock_gettime64
+
+static long sys_clock_gettime64(struct syscall_info* call) {
+  const int syscallno = SYS_clock_gettime64;
+  __kernel_clockid_t clk_id = (__kernel_clockid_t)call->args[0];
+  struct __kernel_timespec* tp = (struct __kernel_timespec*)call->args[1];
+
+  void* ptr = prep_syscall();
+  struct __kernel_timespec* tp2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (tp) {
+    tp2 = ptr;
+    ptr += sizeof(*tp2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall2(syscallno, clk_id, tp2);
+  if (tp && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    /* This is small and won't get optimized to a memcpy call outside
+       our library. */
+    *tp = *tp2;
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#if defined(SYS_creat)
+static long sys_open(struct syscall_info* call);
+static long sys_creat(struct syscall_info* call) {
+  const char* pathname = (const char*)call->args[0];
+  __kernel_mode_t mode = call->args[1];
+  /* Thus sayeth the man page:
+   *
+   *   creat() is equivalent to open() with flags equal to
+   *   O_CREAT|O_WRONLY|O_TRUNC. */
+  struct syscall_info open_call =
+    { SYS_open, { (long)pathname, O_CREAT | O_TRUNC | O_WRONLY, mode } };
+  return sys_open(&open_call);
+}
+#endif
+
+static int sys_fcntl64_no_outparams(struct syscall_info* call) {
+  const int syscallno = RR_FCNTL_SYSCALL;
+  int fd = call->args[0];
+  int cmd = call->args[1];
+  long arg = call->args[2];
+
+  /* None of the no-outparam fcntl's are known to be
+   * may-block. */
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall3(syscallno, fd, cmd, arg);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static int sys_fcntl64_own_ex(struct syscall_info* call) {
+  const int syscallno = RR_FCNTL_SYSCALL;
+  int fd = call->args[0];
+  int cmd = call->args[1];
+  struct rr_f_owner_ex* owner = (struct rr_f_owner_ex*)call->args[2];
+
+  /* The OWN_EX fcntl's aren't may-block. */
+  void* ptr = prep_syscall_for_fd(fd);
+  struct rr_f_owner_ex* owner2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (owner) {
+    owner2 = ptr;
+    ptr += sizeof(*owner2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  if (owner2) {
+    memcpy_input_parameter(owner2, owner, sizeof(*owner2));
+  }
+  ret = untraced_syscall3(syscallno, fd, cmd, owner2);
+  if (owner2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(owner, owner2, sizeof(*owner));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static int sys_fcntl64_setlk64(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Releasing a lock could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = RR_FCNTL_SYSCALL;
+  int fd = call->args[0];
+  int cmd = call->args[1];
+  struct rr_flock64* lock = (struct rr_flock64*)call->args[2];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  struct rr_flock64* lock2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (lock) {
+    lock2 = ptr;
+    ptr += sizeof(*lock2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  if (lock2) {
+    memcpy_input_parameter(lock2, lock, sizeof(*lock2));
+  }
+  ret = untraced_syscall3(syscallno, fd, cmd, lock2);
+  if (lock2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(lock, lock2, sizeof(*lock));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static int sys_fcntl64_setlkw64(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Releasing a lock could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = RR_FCNTL_SYSCALL;
+  int fd = call->args[0];
+  int cmd = call->args[1];
+  struct rr_flock64* lock = (struct rr_flock64*)call->args[2];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall3(syscallno, fd, cmd, lock);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+#if defined(SYS_fcntl64)
+/* 32-bit system */
+static long sys_fcntl64(struct syscall_info* call)
+#else
+/* 64-bit system */
+static long sys_fcntl(struct syscall_info* call)
+#endif
+{
+  switch (call->args[1]) {
+    case F_SETFL:
+      if (call->args[2] == O_DIRECT) {
+        /* This needs to go to rr so we can disable syscall buffering
+           on this fd. */
+        return traced_raw_syscall(call);
+      }
+      /* Falls through. */
+    case F_DUPFD:
+    case F_GETFD:
+    case F_GETFL:
+    case F_GETOWN:
+    case F_SETFD:
+    case F_SETOWN:
+    case F_SETSIG:
+      return sys_fcntl64_no_outparams(call);
+
+    case F_GETOWN_EX:
+    case F_SETOWN_EX:
+      return sys_fcntl64_own_ex(call);
+
+#ifndef F_SETLK64
+#define F_SETLK64 13
+#endif
+    case F_SETLK64:
+#if !defined(SYS_fcntl64)
+    /* Also uses 64-bit flock format */
+    case F_SETLK:
+#endif
+      return sys_fcntl64_setlk64(call);
+
+#ifndef F_SETLKW64
+#define F_SETLKW64 14
+#endif
+    case F_SETLKW64:
+#if !defined(SYS_fcntl64)
+    /* Also uses 64-bit flock format */
+    case F_SETLKW:
+#endif
+      return sys_fcntl64_setlkw64(call);
+
+    default:
+      return traced_raw_syscall(call);
+  }
+}
+
+static long ret_buf_len(long ret, size_t len) {
+  if (ret < 0) {
+    return 0;
+  }
+  if (len > LONG_MAX) {
+    return ret;
+  }
+  return ret < (long)len ? ret : (long)len;
+}
+
+static long sys_flistxattr(struct syscall_info* call) {
+  const int syscallno = SYS_flistxattr;
+  int fd = (int)call->args[0];
+  char* buf = (char*)call->args[1];
+  size_t size = call->args[2];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  void* buf2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (buf && size > 0) {
+    buf2 = ptr;
+    ptr += size;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, fd, buf2, size);
+  ptr = copy_output_buffer(ret_buf_len(ret, size), ptr, buf, buf2);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_safe_nonblocking_ioctl(struct syscall_info* call) {
+  const int syscallno = SYS_ioctl;
+  int fd = call->args[0];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall3(syscallno, fd, call->args[1], call->args[2]);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_ioctl_fionread(struct syscall_info* call) {
+  const int syscallno = SYS_ioctl;
+  int fd = call->args[0];
+  int* value = (int*)call->args[2];
+  void* buf = NULL;
+
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  if (value) {
+    buf = ptr;
+    ptr += sizeof(*value);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall3(syscallno, fd, FIONREAD, buf);
+  if (buf && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(value, buf, sizeof(*value));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_ioctl(struct syscall_info* call) {
+  switch (call->args[1]) {
+    case BTRFS_IOC_CLONE_RANGE:
+    case FIOCLEX:
+    case FIONCLEX:
+      return sys_safe_nonblocking_ioctl(call);
+    case FIONREAD:
+      return sys_ioctl_fionread(call);
+    default:
+      return traced_raw_syscall(call);
+  }
+}
+
+static long sys_futex(struct syscall_info* call) {
+  enum {
+    FUTEX_USES_UADDR2 = 1 << 0,
+  };
+
+  /* This can make wakeups a lot more expensive. We assume
+     that wakeups are only used when some thread is actually waiting,
+     in which case we're at most doubling the overhead of the combined
+     wait + wakeup. */
+  if (globals.in_chaos) {
+    return traced_raw_syscall(call);
+  }
+
+  int op = call->args[1];
+  int flags = 0;
+  switch (FUTEX_CMD_MASK & op) {
+    case FUTEX_WAKE_BITSET:
+    case FUTEX_WAKE:
+      break;
+    case FUTEX_REQUEUE:
+    case FUTEX_CMP_REQUEUE:
+    case FUTEX_WAKE_OP:
+      flags |= FUTEX_USES_UADDR2;
+      break;
+
+    /* It turns out not to be worth buffering the FUTEX_WAIT*
+     * calls.  When a WAIT call is made, we know almost for sure
+     * that the tracee is going to be desched'd (otherwise the
+     * userspace CAS would have succeeded).  This is unlike
+     * read/write, f.e., where the vast majority of calls aren't
+     * desched'd and the overhead is worth it.  So all that
+     * buffering WAIT does is add the overhead of arming/disarming
+     * desched (which is a measurable perf loss).
+     *
+     * NB: don't ever try to buffer FUTEX_LOCK_PI; it requires
+     * special processing in the tracer process (in addition to
+     * not being worth doing for perf reasons). */
+    default:
+      return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_futex;
+  uint32_t* uaddr = (uint32_t*)call->args[0];
+  uint32_t val = call->args[2];
+  const struct timespec* timeout = (const struct timespec*)call->args[3];
+  uint32_t* uaddr2 = (uint32_t*)call->args[4];
+  uint32_t val3 = call->args[5];
+
+  void* ptr = prep_syscall();
+  uint32_t* saved_uaddr;
+  uint32_t* saved_uaddr2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  /* We have to record the value of the futex at kernel exit,
+   * but we can't substitute a scratch pointer for the uaddrs:
+   * the futex identity is the memory cell.  There are schemes
+   * that would allow us to use scratch futexes, but they get
+   * complicated quickly. */
+  saved_uaddr = ptr;
+  ptr += sizeof(*saved_uaddr);
+  if (FUTEX_USES_UADDR2 & flags) {
+    saved_uaddr2 = ptr;
+    ptr += sizeof(*saved_uaddr2);
+  }
+  /* See above; it's not worth buffering may-block futex
+   * calls. */
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall6(syscallno, uaddr, op, val, timeout, uaddr2, val3);
+  /* During recording, save the real outparams to the buffer.
+   * During replay, save the values from the buffer to the real outparams.
+   *
+   * The *ONLY* reason it's correct for us to read the outparams
+   * carelessly is that rr protects this syscallbuf
+   * transaction as as a critical section. */
+  copy_futex_int(saved_uaddr, uaddr);
+  if (saved_uaddr2) {
+    copy_futex_int(saved_uaddr2, uaddr2);
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_getrandom(struct syscall_info* call) {
+  void* buf = (void*)call->args[0];
+  size_t buf_len = (size_t)call->args[1];
+  unsigned int flags = (unsigned int)call->args[2];
+  const int syscallno = SYS_getrandom;
+
+  void* ptr = prep_syscall();
+  void* buf2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (buf && buf_len > 0) {
+    buf2 = ptr;
+    ptr += buf_len;
+  }
+  if (!start_commit_buffered_syscall(call->no, ptr, (flags & GRND_NONBLOCK) ? WONT_BLOCK : MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(call->no, buf2, buf_len, flags);
+  ptr = copy_output_buffer(ret, ptr, buf, buf2);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+static long sys_generic_getdents(struct syscall_info* call) {
+  int fd = (int)call->args[0];
+  void* buf = (void*)call->args[1];
+  unsigned int count = (unsigned int)call->args[2];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  void* buf2 = NULL;
+  long ret;
+
+  if (buf && count > 0) {
+    buf2 = ptr;
+    ptr += count;
+  }
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(call->no, fd, buf2, count);
+  ptr = copy_output_buffer(ret, ptr, buf, buf2);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+#if defined(SYS_getdents)
+static long sys_getdents(struct syscall_info* call) {
+  return sys_generic_getdents(call);
+}
+#endif
+
+static long sys_getdents64(struct syscall_info* call) {
+  return sys_generic_getdents(call);
+}
+
+static long sys_gettimeofday(struct syscall_info* call) {
+  const int syscallno = SYS_gettimeofday;
+  struct timeval* tp = (struct timeval*)call->args[0];
+  struct timezone* tzp = (struct timezone*)call->args[1];
+
+  /* XXX it seems odd that clock_gettime() is spec'd to be
+   * async-signal-safe while gettimeofday() isn't, but that's
+   * what the docs say! */
+  void* ptr = prep_syscall();
+  struct timeval* tp2 = NULL;
+  struct timezone* tzp2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (tp) {
+    tp2 = ptr;
+    ptr += sizeof(*tp2);
+  }
+  if (tzp) {
+    tzp2 = ptr;
+    ptr += sizeof(*tzp2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall2(syscallno, tp2, tzp2);
+  if (ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    if (tp) {
+      /* This is small and won't get optimized to a memcpy call outside
+         our library. */
+      *tp = *tp2;
+    }
+    if (tzp) {
+      /* This is small and won't get optimized to a memcpy call outside
+         our library. */
+      *tzp = *tzp2;
+    }
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_generic_getxattr(struct syscall_info* call) {
+  const char* path = (const char*)call->args[0];
+  const char* name = (const char*)call->args[1];
+  void* value = (void*)call->args[2];
+  size_t size = call->args[3];
+
+  void* ptr = prep_syscall();
+  void* value2 = NULL;
+  long ret;
+
+  if (value && size > 0) {
+    value2 = ptr;
+    ptr += size;
+  }
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall4(call->no, path, name, value2, size);
+  ptr = copy_output_buffer(ret_buf_len(ret, size), ptr, value, value2);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+static long sys_getxattr(struct syscall_info* call) {
+  return sys_generic_getxattr(call);
+}
+
+static long sys_lgetxattr(struct syscall_info* call) {
+  return sys_generic_getxattr(call);
+}
+
+static long sys_fgetxattr(struct syscall_info* call) {
+  int fd = (int)call->args[0];
+  const char* name = (const char*)call->args[1];
+  void* value = (void*)call->args[2];
+  size_t size = call->args[3];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  void* value2 = NULL;
+  long ret;
+
+  if (value && size > 0) {
+    value2 = ptr;
+    ptr += size;
+  }
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall4(call->no, fd, name, value2, size);
+  ptr = copy_output_buffer(ret_buf_len(ret, size), ptr, value, value2);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+static long sys_generic_listxattr(struct syscall_info* call) {
+  char* path = (char*)call->args[0];
+  char* buf = (char*)call->args[1];
+  size_t size = call->args[2];
+
+  void* ptr = prep_syscall();
+  void* buf2 = NULL;
+  long ret;
+
+  if (buf && size > 0) {
+    buf2 = ptr;
+    ptr += size;
+  }
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(call->no, path, buf2, size);
+  ptr = copy_output_buffer(ret_buf_len(ret, size), ptr, buf, buf2);
+  return commit_raw_syscall(call->no, ptr, ret);
+}
+
+static long sys_listxattr(struct syscall_info* call) {
+  return sys_generic_listxattr(call);
+}
+
+static long sys_llistxattr(struct syscall_info* call) {
+  return sys_generic_listxattr(call);
+}
+
+#if defined(SYS__llseek)
+static long sys__llseek(struct syscall_info* call) {
+  const int syscallno = SYS__llseek;
+  int fd = call->args[0];
+  unsigned long offset_high = call->args[1];
+  unsigned long offset_low = call->args[2];
+  __kernel_loff_t* result = (__kernel_loff_t*)call->args[3];
+  unsigned int whence = call->args[4];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  __kernel_loff_t* result2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (result) {
+    result2 = ptr;
+    ptr += sizeof(*result2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  if (result2) {
+    memcpy_input_parameter(result2, result, sizeof(*result2));
+  }
+  ret = untraced_syscall5(syscallno, fd, offset_high, offset_low, result2,
+                          whence);
+  if (result2) {
+    *result = *result2;
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+static long sys_madvise(struct syscall_info* call) {
+  const int syscallno = SYS_madvise;
+  void* addr = (void*)call->args[0];
+  size_t length = call->args[1];
+  int advice = call->args[2];
+
+  void* ptr;
+  long ret;
+
+  switch (advice) {
+    // Whitelist advice values that we know are OK to pass through to the
+    // kernel directly.
+    case MADV_NORMAL:
+    case MADV_RANDOM:
+    case MADV_SEQUENTIAL:
+    case MADV_WILLNEED:
+    case MADV_DONTNEED:
+    case MADV_MERGEABLE:
+    case MADV_UNMERGEABLE:
+    case MADV_HUGEPAGE:
+    case MADV_NOHUGEPAGE:
+    case MADV_DONTDUMP:
+    case MADV_DODUMP:
+      break;
+    case MADV_FREE:
+      // See record_syscall. We disallow MADV_FREE because it creates
+      // nondeterminism.
+      advice = -1;
+      break;
+    default:
+      return traced_raw_syscall(call);
+  }
+
+  ptr = prep_syscall();
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  /* Ensure this syscall happens during replay. In particular MADV_DONTNEED
+   * must be executed.
+   */
+  ret = untraced_replayed_syscall3(syscallno, addr, length, advice);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_mprotect(struct syscall_info* call) {
+  const int syscallno = SYS_mprotect;
+  void* addr = (void*)call->args[0];
+  size_t length = call->args[1];
+  int prot = call->args[2];
+  struct mprotect_record* mrec;
+
+  void* ptr;
+  long ret;
+
+  if ((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) || !buffer_hdr() ||
+      buffer_hdr()->mprotect_record_count >= MPROTECT_RECORD_COUNT) {
+    return traced_raw_syscall(call);
+  }
+
+  ptr = prep_syscall();
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  mrec = &globals.mprotect_records[buffer_hdr()->mprotect_record_count++];
+  mrec->start = (uint64_t)(uintptr_t)addr;
+  mrec->size = length;
+  mrec->prot = prot;
+  ret = untraced_replayed_syscall3(syscallno, addr, length, prot);
+  if (ret < 0 && ret != -ENOMEM) {
+    /* indicate that nothing was mprotected */
+    mrec->size = 0;
+  }
+  buffer_hdr()->mprotect_record_count_completed++;
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static int supported_open(const char* file_name, int flags) {
+  if (is_gcrypt_deny_file(file_name)) {
+    /* This needs to be a traced syscall. We want to return an
+       open file even if the file doesn't exist and the untraced syscall
+       returns ENOENT. */
+    return 0;
+  }
+  if (flags & O_DIRECT) {
+    /* O_DIRECT needs to go to rr so we can blacklist the file for
+       syscall buffering. */
+    return 0;
+  }
+  /* Writeable opens need to go to rr to be checked in case
+     they could write to a mapped file.
+     But if they're O_EXCL | O_CREAT, a new file must be created
+     so that will be fine. */
+  return !(flags & (O_RDWR | O_WRONLY)) ||
+    (flags & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT);
+}
+
+static long sys_readlinkat(struct syscall_info* call, int privileged);
+
+struct check_open_state {
+  uint8_t did_abort;
+  uint8_t did_fail_during_preparation;
+};
+
+static int check_file_open_ok(struct syscall_info* call, int ret, struct check_open_state state) {
+  /* If we failed during preparation then a SIGSYS or similar prevented the syscall
+     from doing anything, so there is nothing for us to do here and we shouldn't
+     try to interpret the "syscall result". */
+  if (state.did_fail_during_preparation || ret < 0) {
+    return ret;
+  }
+  char buf[100];
+  sprintf(buf, "/proc/self/fd/%d", ret);
+  char link[PATH_MAX];
+  long link_ret;
+  if (state.did_abort) {
+    /* Don't add any new syscallbuf records, that won't work. */
+    link_ret = privileged_traced_syscall4(SYS_readlinkat, -1, (long)buf, (long)link, sizeof(link));
+  } else {
+    struct syscall_info readlink_call =
+      { SYS_readlinkat, { -1, (long)buf, (long)link, sizeof(link), 0, 0 } };
+    link_ret = sys_readlinkat(&readlink_call, 1);
+  }
+  if (link_ret >= 0 && link_ret < (ssize_t)sizeof(link)) {
+    link[link_ret] = 0;
+    if (allow_buffered_open(link)) {
+      return ret;
+    }
+  }
+  /* Clean up by closing the file descriptor we should not have opened and
+     opening it again, traced this time.
+     Use a privileged traced syscall for the close to ensure it
+     can't fail due to lack of privilege.
+     We expect this to return an error.
+     We could try an untraced close syscall here, falling back to traced
+     syscall, but that's a bit more complicated and we're already on
+     the slow (and hopefully rare) path. */
+  privileged_traced_syscall1(SYS_close, ret);
+  return traced_raw_syscall(call);
+}
+
+static struct check_open_state capture_check_open_state(void) {
+  struct check_open_state ret;
+  ret.did_abort = buffer_hdr()->abort_commit;
+  ret.did_fail_during_preparation = buffer_hdr()->failed_during_preparation;
+  return ret;
+}
+
+#if defined(SYS_open)
+static long sys_open(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Opening a FIFO could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_open;
+  const char* pathname = (const char*)call->args[0];
+  int flags = call->args[1];
+  __kernel_mode_t mode = call->args[2];
+  void* ptr;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!supported_open(pathname, flags)) {
+    return traced_raw_syscall(call);
+  }
+
+  ptr = prep_syscall();
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, pathname, flags, mode);
+  struct check_open_state state = capture_check_open_state();
+  ret = commit_raw_syscall(syscallno, ptr, ret);
+  return check_file_open_ok(call, ret, state);
+}
+#endif
+
+static long sys_openat(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Opening a FIFO could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_openat;
+  int dirfd = call->args[0];
+  const char* pathname = (const char*)call->args[1];
+  int flags = call->args[2];
+  __kernel_mode_t mode = call->args[3];
+  void* ptr;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!supported_open(pathname, flags)) {
+    return traced_raw_syscall(call);
+  }
+
+  ptr = prep_syscall();
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall4(syscallno, dirfd, pathname, flags, mode);
+  struct check_open_state state = capture_check_open_state();
+  ret = commit_raw_syscall(syscallno, ptr, ret);
+  return check_file_open_ok(call, ret, state);
+}
+
+#if defined(SYS_poll) || defined(SYS_ppoll)
+/**
+ * Make this function external so desched_ticks.py can set a breakpoint on it.
+ * Make it visibility-"protected" so that our local definition binds to it
+ * directly and doesn't go through a PLT thunk (which would mean temporarily
+ * leaving syscallbuf code).
+ */
+__attribute__((visibility("protected"))) void __before_poll_syscall_breakpoint(
+    void) {}
+#endif
+
+#if defined(SYS_poll)
+static long sys_poll(struct syscall_info* call) {
+  const int syscallno = SYS_poll;
+  struct pollfd* fds = (struct pollfd*)call->args[0];
+  unsigned int nfds = call->args[1];
+  int timeout = call->args[2];
+
+  void* ptr = prep_syscall();
+  struct pollfd* fds2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (fds && nfds > 0) {
+    fds2 = ptr;
+    ptr += nfds * sizeof(*fds2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  if (fds2) {
+    memcpy_input_parameter(fds2, fds, nfds * sizeof(*fds2));
+  }
+
+  __before_poll_syscall_breakpoint();
+
+  /* Try a no-timeout version of the syscall first. If this doesn't return
+     anything, and we should have blocked, we'll try again with a traced syscall
+     which will be the one that blocks. This usually avoids the
+     need to trigger desched logic, which adds overhead, especially the
+     rrcall_notify_syscall_hook_exit that gets triggered. */
+  ret = untraced_syscall3(syscallno, fds2, nfds, 0);
+
+  if (fds2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    /* NB: even when poll returns 0 indicating no pending
+     * fds, it still sets each .revent outparam to 0.
+     * (Reasonably.)  So we always need to copy on return
+     * value >= 0.
+     * It's important that we not copy when there's an error.
+     * The syscallbuf commit might have been aborted, which means
+     * during replay fds2 might be non-recorded data, so we'd be
+     * incorrectly trashing 'fds'. */
+    local_memcpy(fds, fds2, nfds * sizeof(*fds));
+  }
+  commit_raw_syscall(syscallno, ptr, ret);
+
+  if (ret != 0 || timeout == 0) {
+    return ret;
+  }
+  /* The syscall didn't return anything, and we should have blocked.
+     Just perform a raw syscall now since we're almost certain to block. */
+  return traced_raw_syscall(call);
+}
+#endif
+
+#if defined(SYS_ppoll)
+static long sys_ppoll(struct syscall_info* call) {
+  const int syscallno = SYS_ppoll;
+  struct pollfd* fds = (struct pollfd*)call->args[0];
+  unsigned int nfds = call->args[1];
+  const struct timespec *tmo_p = (struct timespec*)call->args[2];
+  const kernel_sigset_t *sigmask = (const kernel_sigset_t*)call->args[3];
+  size_t sigmask_size = call->args[4];
+
+  if (sigmask) {
+    // See ppoll_deliver. ppoll calls that temporarily change the
+    // sigmask are hard to handle; we may get a signal that we can't
+    // deliver later because it's blocked by the application.
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall();
+  struct pollfd* fds2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (fds && nfds > 0) {
+    fds2 = ptr;
+    ptr += nfds * sizeof(*fds2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  if (fds2) {
+    memcpy_input_parameter(fds2, fds, nfds * sizeof(*fds2));
+  }
+
+  __before_poll_syscall_breakpoint();
+
+  /* Try a no-timeout version of the syscall first. If this doesn't return
+     anything, and we should have blocked, we'll try again with a traced syscall
+     which will be the one that blocks. This usually avoids the
+     need to trigger desched logic, which adds overhead, especially the
+     rrcall_notify_syscall_hook_exit that gets triggered. */
+  const struct timespec tmo0 = {0, 0};
+  ret = untraced_syscall5(syscallno, fds2, nfds, &tmo0, sigmask, sigmask_size);
+
+  if (fds2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    /* NB: even when poll returns 0 indicating no pending
+     * fds, it still sets each .revent outparam to 0.
+     * (Reasonably.)  So we always need to copy on return
+     * value >= 0.
+     * It's important that we not copy when there's an error.
+     * The syscallbuf commit might have been aborted, which means
+     * during replay fds2 might be non-recorded data, so we'd be
+     * incorrectly trashing 'fds'. */
+    local_memcpy(fds, fds2, nfds * sizeof(*fds));
+  }
+  commit_raw_syscall(syscallno, ptr, ret);
+
+  if (ret != 0 || (tmo_p && tmo_p->tv_sec == 0 && tmo_p->tv_nsec == 0)) {
+    return ret;
+  }
+  /* The syscall didn't return anything, and we should have blocked.
+     Just perform a raw syscall now since we're almost certain to block. */
+  return traced_raw_syscall(call);
+}
+#endif
+
+static long sys_epoll_wait(struct syscall_info* call) {
+  int epfd = call->args[0];
+  struct epoll_event* events = (struct epoll_event*)call->args[1];
+  int max_events = call->args[2];
+  int timeout = call->args[3];
+
+  void* ptr;
+  struct epoll_event* events2 = NULL;
+  long ret;
+
+  ptr = prep_syscall();
+
+  assert(SYS_epoll_pwait == call->no
+#if defined(SYS_epoll_wait)
+        || SYS_epoll_wait == call->no
+#endif
+  );
+
+  if (events && max_events > 0) {
+    events2 = ptr;
+    ptr += max_events * sizeof(*events2);
+  }
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  /* Try a no-timeout version of the syscall first. If this doesn't return
+     anything, and we should have blocked, we'll try again with a traced syscall
+     which will be the one that blocks. This usually avoids the
+     need to trigger desched logic, which adds overhead, especially the
+     rrcall_notify_syscall_hook_exit that gets triggered.
+     N.B.: SYS_epoll_wait only has four arguments, but we don't care
+     if the last two arguments are garbage */
+  ret = untraced_syscall6(call->no, epfd, events2, max_events, 0,
+    call->args[4] /*sigmask*/, call->args[5] /*sizeof(*sigmask)*/);
+
+  ptr = copy_output_buffer(ret * sizeof(*events2), ptr, events, events2);
+  ret = commit_raw_syscall(call->no, ptr, ret);
+  if (timeout == 0 || (ret != EINTR && ret != 0)) {
+    /* If we got some real results, or a non-EINTR error, we can just
+       return it directly.
+       If we got no results and the timeout was 0, we can just return 0.
+       If we got EINTR and the timeout was 0, a signal must have
+       interrupted the syscall (not sure if this can happen...). If the signal
+       needs to be handled, we'll handle it as we exit the syscallbuf.
+       Returning EINTR is fine because that's what the syscall would have
+       returned had it run traced. (We didn't enable the desched signal
+       so no extra signals could have affected our untraced syscall that
+       could not have been delivered to a traced syscall.) */
+    return ret;
+  }
+  /* Some timeout was requested and either we got no results or we got
+     EINTR.
+     In the former case we just have to wait, so we do a traced syscall.
+     In the latter case, the syscall must have been interrupted by a
+     signal (which rr will have handled or stashed, and won't deliver until
+     we exit syscallbuf code or do a traced syscall). The kernel doesn't
+     automatically restart the syscall because of a longstanding bug (as of
+     4.17 anyway). Doing a traced syscall will allow a stashed signal to be
+     processed (if necessary) and allow things to proceed normally after that.
+     Note that if rr decides to deliver a signal to the tracee, that will
+     itself interrupt the syscall and cause it to return EINTR just as
+     would happen without rr.
+  */
+  return traced_raw_syscall(call);
+}
+
+struct timespec64 {
+  uint64_t tv_sec;
+  uint64_t tv_nsec;
+};
+
+#ifdef SYS_epoll_pwait2
+static long sys_epoll_pwait2(struct syscall_info* call) {
+  int epfd = call->args[0];
+  struct epoll_event* events = (struct epoll_event*)call->args[1];
+  int max_events = call->args[2];
+  struct timespec64* timeout = (struct timespec64*)call->args[3];
+
+  void* ptr;
+  struct epoll_event* events2 = NULL;
+  long ret;
+
+  ptr = prep_syscall();
+
+  assert(SYS_epoll_pwait2 == call->no);
+
+  if (events && max_events > 0) {
+    events2 = ptr;
+    ptr += max_events * sizeof(*events2);
+  }
+  if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  /* Try a no-timeout version of the syscall first. If this doesn't return
+     anything, and we should have blocked, we'll try again with a traced syscall
+     which will be the one that blocks. This usually avoids the
+     need to trigger desched logic, which adds overhead, especially the
+     rrcall_notify_syscall_hook_exit that gets triggered. */
+  struct timespec64 no_timeout = { 0, 0 };
+  ret = untraced_syscall6(call->no, epfd, events2, max_events, &no_timeout,
+    call->args[4] /*sigmask*/, call->args[5] /*sizeof(*sigmask)*/);
+
+  ptr = copy_output_buffer(ret * sizeof(*events2), ptr, events, events2);
+  ret = commit_raw_syscall(call->no, ptr, ret);
+  if ((timeout && timeout->tv_sec == 0 && timeout->tv_nsec == 0) ||
+      (ret != EINTR && ret != 0)) {
+    /* If we got some real results, or a non-EINTR error, we can just
+       return it directly.
+       If we got no results and the timeout was 0, we can just return 0.
+       If we got EINTR and the timeout was 0, a signal must have
+       interrupted the syscall (not sure if this can happen...). If the signal
+       needs to be handled, we'll handle it as we exit the syscallbuf.
+       Returning EINTR is fine because that's what the syscall would have
+       returned had it run traced. (We didn't enable the desched signal
+       so no extra signals could have affected our untraced syscall that
+       could not have been delivered to a traced syscall.) */
+    return ret;
+  }
+  /* Some timeout was requested and either we got no results or we got
+     EINTR.
+     In the former case we just have to wait, so we do a traced syscall.
+     In the latter case, the syscall must have been interrupted by a
+     signal (which rr will have handled or stashed, and won't deliver until
+     we exit syscallbuf code or do a traced syscall). The kernel doesn't
+     automatically restart the syscall because of a longstanding bug (as of
+     4.17 anyway). Doing a traced syscall will allow a stashed signal to be
+     processed (if necessary) and allow things to proceed normally after that.
+     Note that if rr decides to deliver a signal to the tracee, that will
+     itself interrupt the syscall and cause it to return EINTR just as
+     would happen without rr.
+  */
+  return traced_raw_syscall(call);
+}
+#endif
+
+#define CLONE_SIZE_THRESHOLD 0x10000
+
+static long sys_read(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Reading from a pipe could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_read;
+  int fd = call->args[0];
+  void* buf = (void*)call->args[1];
+  size_t count = call->args[2];
+
+  void* ptr;
+  void* buf2 = NULL;
+  long ret;
+
+  /* Try cloning data using CLONE_RANGE ioctl.
+   * XXX switch to FIOCLONERANGE when that's more widely available. It's the
+   * same ioctl number so it won't affect rr per se but it'd be cleaner code.
+   * 64-bit only for now, since lseek and pread64 need special handling for
+   * 32-bit.
+   * Basically we break down the read into three syscalls lseek, clone and
+   * read-from-clone, each of which is individually syscall-buffered.
+   * Crucially, the read-from-clone syscall does NOT store data in the syscall
+   * buffer; instead, we perform the syscall during replay, assuming that
+   * cloned_file_data_fd is open to the same file during replay.
+   * Reads that hit EOF are rejected by the CLONE_RANGE ioctl so we take the
+   * slow path. That's OK.
+   * There is a possible race here: between cloning the data and reading from
+   * |fd|, |fd|'s data may be overwritten, in which case the data read during
+   * replay will not match the data read during recording, causing divergence.
+   * I don't see any performant way to avoid this race; I tried reading from
+   * the cloned data instead of |fd|, but that is very slow because readahead
+   * doesn't work. (The cloned data file always ends at the current offset so
+   * there is nothing to readahead.) However, if an application triggers this
+   * race, it's almost certainly a bad bug because Linux can return any
+   * interleaving of old+new data for the read even without rr.
+   */
+  if (buf && count >= CLONE_SIZE_THRESHOLD &&
+      thread_locals->cloned_file_data_fd >= 0 && is_bufferable_fd(fd) &&
+      sizeof(void*) == 8 && !(count & 4095)) {
+    struct syscall_info lseek_call = { SYS_lseek,
+                                       { fd, 0, SEEK_CUR, 0, 0, 0 } };
+    off_t lseek_ret = privileged_sys_generic_nonblocking_fd(&lseek_call);
+    if (lseek_ret >= 0 && !(lseek_ret & 4095)) {
+      struct btrfs_ioctl_clone_range_args ioctl_args;
+      int ioctl_ret;
+      void* ioctl_ptr = prep_syscall();
+      ioctl_args.src_fd = fd;
+      ioctl_args.src_offset = lseek_ret;
+      ioctl_args.src_length = count;
+      ioctl_args.dest_offset = thread_locals->cloned_file_data_offset;
+
+      /* Don't call sys_ioctl here; cloned_file_data_fd has syscall buffering
+       * disabled for it so rr can reject attempts to close/dup to it. But
+       * we want to allow syscall buffering of this ioctl on it.
+       */
+      if (!start_commit_buffered_syscall(SYS_ioctl, ioctl_ptr, WONT_BLOCK)) {
+        struct syscall_info ioctl_call = { SYS_ioctl,
+                                           { thread_locals->cloned_file_data_fd,
+                                             BTRFS_IOC_CLONE_RANGE,
+                                             (long)&ioctl_args, 0, 0, 0 } };
+        ioctl_ret = privileged_traced_raw_syscall(&ioctl_call);
+      } else {
+        ioctl_ret =
+            privileged_untraced_syscall3(SYS_ioctl, thread_locals->cloned_file_data_fd,
+                                         BTRFS_IOC_CLONE_RANGE, &ioctl_args);
+        ioctl_ret = commit_raw_syscall(SYS_ioctl, ioctl_ptr, ioctl_ret);
+      }
+
+      if (ioctl_ret >= 0) {
+        struct syscall_info read_call = { SYS_read,
+                                          { fd, (long)buf, count, 0, 0, 0 } };
+        thread_locals->cloned_file_data_offset += count;
+
+        replay_only_syscall3(SYS_dup3, thread_locals->cloned_file_data_fd, fd, 0);
+
+        ptr = prep_syscall();
+        if (count > thread_locals->usable_scratch_size) {
+          if (!start_commit_buffered_syscall(SYS_read, ptr, WONT_BLOCK)) {
+            return traced_raw_syscall(&read_call);
+          }
+          ret = untraced_replayed_syscall3(SYS_read, fd, buf, count);
+        } else {
+          if (!start_commit_buffered_syscall(SYS_read, ptr, MAY_BLOCK)) {
+            return traced_raw_syscall(&read_call);
+          }
+          ret = untraced_replayed_syscall3(SYS_read, fd,
+                                           thread_locals->scratch_buf, count);
+          copy_output_buffer(ret, NULL, buf, thread_locals->scratch_buf);
+        }
+        // Do this now before we finish processing the syscallbuf record.
+        // This means the syscall will be executed in
+        // ReplaySession::flush_syscallbuf instead of
+        // ReplaySession::enter_syscall or something similar.
+        replay_only_syscall1(SYS_close, fd);
+        ret = commit_raw_syscall(SYS_read, ptr, ret);
+        return ret;
+      }
+    }
+  }
+
+  ptr = prep_syscall_for_fd(fd);
+
+  assert(syscallno == call->no);
+
+  if (buf && count > 0) {
+    buf2 = ptr;
+    ptr += count;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, fd, buf2, count);
+  ptr = copy_output_buffer(ret, ptr, buf, buf2);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+/* On x86-32, pread/pwrite take the offset in two registers. We don't bother
+ * handling that.
+ */
+#if !defined(__i386__)
+static long sys_pread64(struct syscall_info* call) {
+  const int syscallno = SYS_pread64;
+  int fd = call->args[0];
+  void* buf = (void*)call->args[1];
+  size_t count = call->args[2];
+  off_t offset = call->args[3];
+
+  void* ptr;
+  void* buf2 = NULL;
+  long ret;
+
+  ptr = prep_syscall_for_fd(fd);
+
+  assert(syscallno == call->no);
+
+  if (buf && count > 0) {
+    buf2 = ptr;
+    ptr += count;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall4(syscallno, fd, buf2, count, offset);
+  ptr = copy_output_buffer(ret, ptr, buf, buf2);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#if defined(SYS_readlink)
+static long sys_readlink(struct syscall_info* call) {
+  const int syscallno = SYS_readlink;
+  const char* path = (const char*)call->args[0];
+  char* buf = (char*)call->args[1];
+  int bufsiz = call->args[2];
+
+  void* ptr = prep_syscall();
+  char* buf2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (buf && bufsiz > 0) {
+    buf2 = ptr;
+    ptr += bufsiz;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, path, buf2, bufsiz);
+  ptr = copy_output_buffer(ret, ptr, buf, buf2);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+static long sys_readlinkat(struct syscall_info* call, int privileged) {
+  const int syscallno = SYS_readlinkat;
+  int dirfd = call->args[0];
+  const char* path = (const char*)call->args[1];
+  char* buf = (char*)call->args[2];
+  int bufsiz = call->args[3];
+
+  void* ptr = prep_syscall();
+  char* buf2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (buf && bufsiz > 0) {
+    buf2 = ptr;
+    ptr += bufsiz;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    if (privileged) {
+      return privileged_traced_raw_syscall(call);
+    }
+    return traced_raw_syscall(call);
+  }
+
+  if (privileged) {
+    ret = privileged_untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz);
+  } else {
+    ret = untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz);
+  }
+  ptr = copy_output_buffer(ret, ptr, buf, buf2);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+#if defined(SYS_socketcall)
+static long sys_socketcall_recv(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Reading from a socket could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_socketcall;
+  long* args = (long*)call->args[1];
+  int sockfd = args[0];
+  void* buf = (void*)args[1];
+  size_t len = args[2];
+  unsigned int flags = args[3];
+  unsigned long new_args[4];
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  void* buf2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (buf && len > 0) {
+    buf2 = ptr;
+    ptr += len;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  new_args[0] = sockfd;
+  new_args[1] = (unsigned long)buf2;
+  new_args[2] = len;
+  new_args[3] = flags;
+  ret = untraced_syscall2(SYS_socketcall, SYS_RECV, new_args);
+  /* Account for MSG_TRUNC */
+  ptr = copy_output_buffer(ret_buf_len(ret, len), ptr, buf, buf2);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_socketcall(struct syscall_info* call) {
+  switch (call->args[0]) {
+    case SYS_RECV:
+      return sys_socketcall_recv(call);
+    default:
+      return traced_raw_syscall(call);
+  }
+}
+#endif
+
+#ifdef SYS_recvfrom
+static long sys_recvfrom(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Reading from a socket could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_recvfrom;
+  int sockfd = call->args[0];
+  void* buf = (void*)call->args[1];
+  size_t len = call->args[2];
+  int flags = call->args[3];
+  /* struct sockaddr isn't useful here since some sockaddrs are bigger than
+   * it. To avoid making false assumptions, treat the sockaddr parameter
+   * as an untyped buffer.
+   */
+  void* src_addr = (void*)call->args[4];
+  socklen_t* addrlen = (socklen_t*)call->args[5];
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  void* buf2 = NULL;
+  struct sockaddr* src_addr2 = NULL;
+  socklen_t* addrlen2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+  /* If addrlen is NULL then src_addr must also be null */
+  assert(addrlen || !src_addr);
+
+  if (src_addr) {
+    src_addr2 = ptr;
+    ptr += *addrlen;
+  }
+  if (addrlen) {
+    addrlen2 = ptr;
+    ptr += sizeof(*addrlen);
+  }
+  if (buf && len > 0) {
+    buf2 = ptr;
+    ptr += len;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  if (addrlen) {
+    memcpy_input_parameter(addrlen2, addrlen, sizeof(*addrlen2));
+  }
+  ret = untraced_syscall6(syscallno, sockfd, buf2, len, flags, src_addr2,
+                          addrlen2);
+
+  if (ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    if (src_addr2) {
+      socklen_t actual_size = *addrlen2;
+      if (actual_size > *addrlen) {
+        actual_size = *addrlen;
+      }
+      local_memcpy(src_addr, src_addr2, actual_size);
+    }
+    if (addrlen2) {
+      *addrlen = *addrlen2;
+    }
+  }
+  ptr = copy_output_buffer(ret_buf_len(ret, len), ptr, buf, buf2);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#ifdef SYS_recvmsg
+
+/* These macros are from musl Copyright © 2005-2020 Rich Felker, et al. (MIT LICENSE) */
+#define __CMSG_LEN(cmsg) (((cmsg)->cmsg_len + sizeof(long) - 1) & ~(long)(sizeof(long) - 1))
+#define __CMSG_NEXT(cmsg) ((unsigned char *)(cmsg) + __CMSG_LEN(cmsg))
+#define __MHDR_END(mhdr) ((unsigned char *)(mhdr)->msg_control + (mhdr)->msg_controllen)
+
+#define CMSG_DATA(cmsg) ((unsigned char *) (((struct cmsghdr *)(cmsg)) + 1))
+#define CMSG_NXTHDR(mhdr, cmsg) ((cmsg)->cmsg_len < sizeof (struct cmsghdr) || \
+	(__CMSG_LEN(cmsg) + sizeof(struct cmsghdr) >= (unsigned long)(__MHDR_END(mhdr) - (unsigned char *)(cmsg))) \
+	? 0 : (struct cmsghdr *)__CMSG_NEXT(cmsg))
+#define CMSG_FIRSTHDR(mhdr) ((size_t) (mhdr)->msg_controllen >= sizeof (struct cmsghdr) ? (struct cmsghdr *) (mhdr)->msg_control : (struct cmsghdr *) 0)
+
+struct cmsghdr {
+  __kernel_size_t	cmsg_len;
+  int cmsg_level;
+  int cmsg_type;
+};
+
+struct msghdr /* struct user_msghdr in the kernel */ {
+  void* msg_name;
+  int msg_namelen;
+  struct iovec* msg_iov;
+  __kernel_size_t msg_iovlen;
+  void* msg_control;
+  __kernel_size_t msg_controllen;
+  unsigned int msg_flags;
+};
+
+#define SCM_RIGHTS 0x01
+#define SOL_PACKET 263
+
+static int msg_received_file_descriptors(struct msghdr* msg) {
+  struct cmsghdr* cmh;
+  for (cmh = CMSG_FIRSTHDR(msg); cmh; cmh = CMSG_NXTHDR(msg, cmh)) {
+    if (cmh->cmsg_level == SOL_SOCKET && cmh->cmsg_type == SCM_RIGHTS) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static long sys_recvmsg(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Reading from a socket could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_recvmsg;
+  int sockfd = call->args[0];
+  struct msghdr* msg = (struct msghdr*)call->args[1];
+  int flags = call->args[2];
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  long ret;
+  struct msghdr* msg2;
+  void* ptr_base = ptr;
+  void* ptr_overwritten_end;
+  void* ptr_bytes_start;
+  void* ptr_end;
+  size_t i;
+
+  assert(syscallno == call->no);
+
+  /* Compute final buffer size up front, before writing syscall inputs to the
+   * buffer. Thus if we decide not to buffer this syscall, we bail out
+   * before trying to write to a buffer that won't be recorded and may be
+   * invalid (e.g. overflow).
+   */
+  ptr += sizeof(struct msghdr) + sizeof(struct iovec) * msg->msg_iovlen;
+  if (msg->msg_name) {
+    ptr += msg->msg_namelen;
+  }
+  if (msg->msg_control) {
+    ptr += msg->msg_controllen;
+  }
+  for (i = 0; i < msg->msg_iovlen; ++i) {
+    ptr += msg->msg_iov[i].iov_len;
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  /**
+   * The kernel only writes to the struct msghdr, and the iov buffers. We must
+   * not overwrite that data (except using memcpy_input_parameter) during
+   * replay. For the rest of the data, the values we write here during replay
+   * are guaranteed to match what was recorded in the buffer.
+   * We can't rely on the values we wrote here during recording also being
+   * here during replay since the syscall might have been aborted and our
+   * written data not recorded.
+   */
+  msg2 = ptr = ptr_base;
+  memcpy_input_parameter(msg2, msg, sizeof(*msg));
+  ptr += sizeof(struct msghdr);
+  msg2->msg_iov = ptr;
+  ptr += sizeof(struct iovec) * msg->msg_iovlen;
+  ptr_overwritten_end = ptr;
+  if (msg->msg_name) {
+    msg2->msg_name = ptr;
+    ptr += msg->msg_namelen;
+  }
+  if (msg->msg_control) {
+    msg2->msg_control = ptr;
+    ptr += msg->msg_controllen;
+  }
+  ptr_bytes_start = ptr;
+  for (i = 0; i < msg->msg_iovlen; ++i) {
+    msg2->msg_iov[i].iov_base = ptr;
+    ptr += msg->msg_iov[i].iov_len;
+    msg2->msg_iov[i].iov_len = msg->msg_iov[i].iov_len;
+  }
+
+  ret = untraced_syscall3(syscallno, sockfd, msg2, flags);
+
+  if (ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    size_t bytes = ret;
+    size_t i;
+    if (msg->msg_name) {
+      local_memcpy(msg->msg_name, msg2->msg_name, msg2->msg_namelen);
+    }
+    msg->msg_namelen = msg2->msg_namelen;
+    if (msg->msg_control) {
+      local_memcpy(msg->msg_control, msg2->msg_control, msg2->msg_controllen);
+    }
+    msg->msg_controllen = msg2->msg_controllen;
+    ptr_end = ptr_bytes_start + bytes;
+    for (i = 0; i < msg->msg_iovlen; ++i) {
+      long copy_bytes =
+          bytes < msg->msg_iov[i].iov_len ? bytes : msg->msg_iov[i].iov_len;
+      local_memcpy(msg->msg_iov[i].iov_base, msg2->msg_iov[i].iov_base,
+                   copy_bytes);
+      bytes -= copy_bytes;
+    }
+    msg->msg_flags = msg2->msg_flags;
+
+    if (msg_received_file_descriptors(msg)) {
+      /* When we reach a safe point, notify rr that the control message with
+       * file descriptors was received.
+       */
+      thread_locals->notify_control_msg = msg;
+    }
+  } else {
+    /* Allocate record space as least to cover the data we overwrote above.
+     * We don't want to start the next record overlapping that data, since then
+     * we'll corrupt it during replay.
+     */
+    ptr_end = ptr_overwritten_end;
+  }
+  return commit_raw_syscall(syscallno, ptr_end, ret);
+}
+#endif
+
+#ifdef SYS_sendmsg
+static long sys_sendmsg(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Sending to a socket could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_sendmsg;
+  int sockfd = call->args[0];
+  struct msghdr* msg = (struct msghdr*)call->args[1];
+  int flags = call->args[2];
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, sockfd, msg, flags);
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#ifdef SYS_sendto
+static long sys_sendto(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Sending to a socket could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_sendto;
+  int sockfd = call->args[0];
+  void* buf = (void*)call->args[1];
+  size_t len = call->args[2];
+  int flags = call->args[3];
+  const struct sockaddr* dest_addr = (const struct sockaddr*)call->args[4];
+  socklen_t addrlen = call->args[5];
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret =
+      untraced_syscall6(syscallno, sockfd, buf, len, flags, dest_addr, addrlen);
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#ifdef SYS_setsockopt
+static long sys_setsockopt(struct syscall_info* call) {
+  const int syscallno = SYS_setsockopt;
+  int sockfd = call->args[0];
+  int level = call->args[1];
+  int optname = call->args[2];
+  void* optval = (void*)call->args[3];
+  socklen_t optlen = (socklen_t)call->args[4];
+
+  if (level == SOL_PACKET &&
+      (optname == PACKET_RX_RING || optname == PACKET_TX_RING)) {
+    // Let rr intercept this (and probably disable it)
+    return traced_raw_syscall(call);
+  }
+  if (level == SOL_NETLINK &&
+      (optname == NETLINK_RX_RING || optname == NETLINK_TX_RING)) {
+    // Let rr intercept this (and probably disable it)
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall5(syscallno, sockfd, level, optname, optval, optlen);
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#ifdef SYS_getsockopt
+static long sys_getsockopt(struct syscall_info* call) {
+  const int syscallno = SYS_getsockopt;
+  int sockfd = call->args[0];
+  int level = call->args[1];
+  int optname = call->args[2];
+  void* optval = (void*)call->args[3];
+  socklen_t* optlen = (socklen_t*)call->args[4];
+  socklen_t* optlen2;
+  void* optval2;
+
+  if (!optlen || !optval) {
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  long ret;
+
+  optlen2 = ptr;
+  ptr += sizeof(*optlen2);
+  optval2 = ptr;
+  ptr += *optlen;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  memcpy_input_parameter(optlen2, optlen, sizeof(*optlen2));
+  // Some variance of getsockopt does use the initial content of *optval
+  // (e.g. SOL_IP + IPT_SO_GET_INFO) so we need to copy it.
+  memcpy_input_parameter(optval2, optval, *optlen);
+
+  // We may need to manually restart this syscall due to kernel bug
+  // returning a EFAULT when interrupted by signal and we won't have
+  // access to the actual arg1 on aarch64 in a normal way in such case.
+  // Pass in the arg1 in the stack argument so that we can use it in the tracer.
+  ret = untraced_syscall_full(syscallno, sockfd, level, optname,
+                              (long)optval2, (long)optlen2, 0,
+                              RR_PAGE_SYSCALL_UNTRACED_RECORDING_ONLY, sockfd, 0);
+
+  if (ret >= 0) {
+    socklen_t val_len = *optlen < *optlen2 ? *optlen : *optlen2;
+    local_memcpy(optval, optval2, val_len);
+    local_memcpy(optlen, optlen2, sizeof(*optlen));
+  }
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#ifdef SYS_getsockname
+static long sys_getsockname(struct syscall_info* call) {
+  const int syscallno = SYS_getsockname;
+  int sockfd = call->args[0];
+  struct sockaddr* addr = (struct sockaddr*)call->args[1];
+  socklen_t* addrlen = (socklen_t*)call->args[2];
+  socklen_t* addrlen2;
+  struct sockaddr* addr2 = NULL;
+
+  void* ptr = prep_syscall_for_fd(sockfd);
+  long ret;
+
+  addrlen2 = ptr;
+  ptr += sizeof(*addrlen2);
+  if (addr) {
+    addr2 = ptr;
+    ptr += *addrlen;
+  }
+
+  assert(syscallno == call->no);
+
+  if (addrlen2) {
+    memcpy_input_parameter(addrlen2, addrlen, sizeof(*addrlen2));
+  }
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, sockfd, addr2, addrlen2);
+
+  if (ret >= 0) {
+    if (addr) {
+      socklen_t addr_len = *addrlen < *addrlen2 ? *addrlen : *addrlen2;
+      local_memcpy(addr, addr2, addr_len);
+    }
+    local_memcpy(addrlen, addrlen2, sizeof(*addrlen));
+  }
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#ifdef SYS_socketpair
+typedef int two_ints[2];
+static long sys_socketpair(struct syscall_info* call) {
+  const int syscallno = SYS_socketpair;
+  int domain = call->args[0];
+  int type = call->args[1];
+  int protocol = call->args[2];
+  two_ints* sv = (two_ints*)call->args[3];
+
+  void* ptr = prep_syscall();
+  struct timezone* sv2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  sv2 = ptr;
+  ptr += sizeof(*sv2);
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall4(syscallno, domain, type, protocol, sv2);
+  if (ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(sv, sv2, sizeof(*sv));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+static long sys_uname(struct syscall_info* call) {
+  const int syscallno = SYS_uname;
+  void* buf = (void*)call->args[0];
+
+  void* ptr = prep_syscall();
+  void* buf2;
+  long ret;
+  size_t bufsize = sizeof(struct new_utsname);
+
+  assert(syscallno == call->no);
+
+  buf2 = ptr;
+  ptr += bufsize;
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall1(syscallno, buf2);
+  if (ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(buf, buf2, bufsize);
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+
+#if defined(SYS_time)
+static long sys_time(struct syscall_info* call) {
+  const int syscallno = SYS_time;
+  __kernel_time_t* tp = (__kernel_time_t*)call->args[0];
+
+  void* ptr = prep_syscall();
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall1(syscallno, NULL);
+  if (tp) {
+    /* No error is possible here. */
+    *tp = ret;
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+#if defined(__i386__)
+typedef struct stat64 stat64_t;
+#else
+typedef struct stat stat64_t;
+#endif
+static long sys_xstat64(struct syscall_info* call) {
+  const int syscallno = call->no;
+  /* NB: this arg may be a string or an fd, but for the purposes
+   * of this generic helper we don't care. */
+  long what = call->args[0];
+  stat64_t* buf = (stat64_t*)call->args[1];
+
+  /* Like open(), not arming the desched event because it's not
+   * needed for correctness, and there are no data to suggest
+   * whether it's a good idea perf-wise. */
+  void* ptr = prep_syscall();
+  stat64_t* buf2 = NULL;
+  long ret;
+
+  if (buf) {
+    buf2 = ptr;
+    ptr += sizeof(*buf2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall2(syscallno, what, buf2);
+  if (buf2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(buf, buf2, sizeof(*buf));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+#ifdef SYS_statx
+/* Like sys_xstat64, but with different arguments */
+static long sys_statx(struct syscall_info* call) {
+  const int syscallno = call->no;
+  struct statx* buf = (struct statx*)call->args[4];
+
+  void* ptr = prep_syscall();
+  struct statx* buf2 = NULL;
+  long ret;
+
+  if (buf) {
+    buf2 = ptr;
+    ptr += sizeof(*buf2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall5(syscallno,
+    call->args[0], call->args[1], call->args[2], call->args[3],
+    buf2);
+  if (buf2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(buf, buf2, sizeof(*buf));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+static long sys_fstatat(struct syscall_info* call) {
+  const int syscallno = call->no;
+  stat64_t* buf = (stat64_t*)call->args[2];
+
+  /* Like stat(), not arming the desched event because it's not
+   * needed for correctness, and there are no data to suggest
+   * whether it's a good idea perf-wise. */
+  void* ptr = prep_syscall();
+  stat64_t* buf2 = NULL;
+  long ret;
+
+  if (buf) {
+    buf2 = ptr;
+    ptr += sizeof(*buf2);
+  }
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall4(syscallno,
+    call->args[0], call->args[1], buf2, call->args[3]);
+  if (buf2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(buf, buf2, sizeof(*buf));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_quotactl(struct syscall_info* call) {
+  const int syscallno = call->no;
+  int cmd = call->args[0];
+  const char* special = (const char*)call->args[1];
+  int id = call->args[2];
+  void* addr = (void*)call->args[3];
+
+  if ((cmd >> SUBCMDSHIFT) != Q_GETQUOTA) {
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall();
+  struct if_dqblk* buf2 = NULL;
+  long ret;
+
+  if (addr) {
+    buf2 = ptr;
+    ptr += sizeof(*buf2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall4(syscallno, cmd, special, id, buf2);
+  if (buf2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(addr, buf2, sizeof(*buf2));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_statfs(struct syscall_info* call) {
+  const int syscallno = call->no;
+  /* NB: this arg may be a string or an fd, but for the purposes
+   * of this generic helper we don't care. */
+  long what = call->args[0];
+  struct statfs* buf = (struct statfs*)call->args[1];
+
+  /* Like open(), not arming the desched event because it's not
+   * needed for correctness, and there are no data to suggest
+   * whether it's a good idea perf-wise. */
+  void* ptr = prep_syscall();
+  struct statfs* buf2 = NULL;
+  long ret;
+
+  if (buf) {
+    buf2 = ptr;
+    ptr += sizeof(*buf2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall2(syscallno, what, buf2);
+  if (buf2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(buf, buf2, sizeof(*buf));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_write(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Writing to a pipe or FIFO could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  const int syscallno = SYS_write;
+  int fd = call->args[0];
+  const void* buf = (const void*)call->args[1];
+  size_t count = call->args[2];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, fd_write_blocks(fd))) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, fd, buf, count);
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+/* On x86-32, pread/pwrite take the offset in two registers. We don't bother
+ * handling that.
+ */
+#if !defined(__i386__)
+static long sys_pwrite64(struct syscall_info* call) {
+  const int syscallno = SYS_pwrite64;
+  int fd = call->args[0];
+  const void* buf = (const void*)call->args[1];
+  size_t count = call->args[2];
+  off_t offset = call->args[3];
+
+  enum syscallbuf_fd_classes cls = fd_class(fd);
+  if (cls == FD_CLASS_TRACED) {
+    return traced_raw_syscall(call);
+  }
+  void* ptr = prep_syscall();
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, fd_write_blocks(fd))) {
+    return traced_raw_syscall(call);
+  }
+
+  long ret;
+  if (cls == FD_CLASS_PROC_MEM) {
+    ret = untraced_replay_assist_syscall4(syscallno, fd, buf, count, offset);
+  } else {
+    ret = untraced_syscall4(syscallno, fd, buf, count, offset);
+  }
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+#endif
+
+static long sys_writev(struct syscall_info* call) {
+  if (force_traced_syscall_for_chaos_mode()) {
+    /* Writing to a pipe or FIFO could unblock a higher priority task */
+    return traced_raw_syscall(call);
+  }
+
+  int syscallno = SYS_writev;
+  int fd = call->args[0];
+  const struct iovec* iov = (const struct iovec*)call->args[1];
+  unsigned long iovcnt = call->args[2];
+
+  void* ptr = prep_syscall_for_fd(fd);
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, fd_write_blocks(fd))) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall3(syscallno, fd, iov, iovcnt);
+
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_prctl(struct syscall_info* call) {
+  int syscallno = SYS_prctl;
+  long option = call->args[0];
+  unsigned long arg2 = call->args[1];
+  unsigned long arg3 = call->args[2];
+  unsigned long arg4 = call->args[3];
+  unsigned long arg5 = call->args[4];
+
+  if (option != PR_SET_NAME) {
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall();
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_replay_assist_syscall5(syscallno, option, arg2, arg3, arg4, arg5);
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_set_robust_list(struct syscall_info* call) {
+  int syscallno = SYS_set_robust_list;
+  void* head = (void*)call->args[0];
+  size_t len = call->args[1];
+  long ret;
+
+  assert(syscallno == call->no);
+
+  /* Avoid len values we don't support via our buffering mechanism */
+  if (len == 0 || len >= UINT32_MAX) {
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall();
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall2(syscallno, head, len);
+  if (!ret) {
+    thread_locals->robust_list.head = head;
+    thread_locals->robust_list.len = len;
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+#if defined(SYS_rseq)
+static long sys_rseq(struct syscall_info* call) {
+  int syscallno = SYS_rseq;
+  struct rr_rseq* rseq = (struct rr_rseq*)call->args[0];
+  size_t rseq_len = call->args[1];
+  int flags = call->args[2];
+  uint32_t sig = call->args[3];
+
+  assert(syscallno == call->no);
+
+  if (flags || ((uintptr_t)rseq & 31) || rseq_len != sizeof(*rseq) ||
+      thread_locals->rseq_called || globals.cpu_binding < 0) {
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall();
+  /* Allow buffering only for the simplest case: setting up the
+     initial rseq, all parameters OK and CPU binding in place. */
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  /* We don't actually need to make a syscall since rr is
+     going to emulate everything. */
+  rseq->cpu_id_start = rseq->cpu_id = globals.cpu_binding;
+  thread_locals->rseq_called = 1;
+  thread_locals->rseq.rseq = rseq;
+  thread_locals->rseq.len = rseq_len;
+  thread_locals->rseq.sig = sig;
+  /* We do need to commit a syscallbuf record to ensure that flushing
+     happens with associated processing. */
+  return commit_raw_syscall(syscallno, ptr, 0);
+}
+#endif
+
+static long sys_ptrace(struct syscall_info* call) {
+  int syscallno = SYS_ptrace;
+  long request = call->args[0];
+  pid_t pid = call->args[1];
+  void* addr = (void*)call->args[2];
+  void* data = (void*)call->args[3];
+
+  if (request != PTRACE_PEEKDATA || !data) {
+    return traced_raw_syscall(call);
+  }
+
+  /* We try to emulate PTRACE_PEEKDATA using process_vm_readv. That might not
+   * work for permissions reasons; if it fails for any reason, we retry with
+   * a traced syscall.
+   * This does mean that if a process issues a PTRACE_PEEKDATA while not
+   * actually ptracing the target, it might succeed under rr whereas normally
+   * it would have failed. That's hard to avoid and unlikely to be a real
+   * problem in practice (typically it would fail on some other ptrace call like
+   * PTRACE_GETREGS before or after the PEEKDATA).
+   */
+  void* ptr = prep_syscall();
+  long ret;
+  void* data2;
+
+  assert(syscallno == call->no);
+  syscallno = SYS_process_vm_readv;
+
+  data2 = ptr;
+  ptr += sizeof(long);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  struct iovec local_iov = { data2, sizeof(long) };
+  struct iovec remote_iov = { addr, sizeof(long) };
+  ret = untraced_syscall6(syscallno, pid, &local_iov, 1, &remote_iov, 1, 0);
+  if (ret > 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(data, data2, ret);
+  }
+  commit_raw_syscall(syscallno, ptr, ret);
+
+  if (ret != sizeof(long)) {
+    return traced_raw_syscall(call);
+  }
+  return ret;
+}
+
+static long sys_getrusage(struct syscall_info* call) {
+  const int syscallno = SYS_getrusage;
+  int who = (int)call->args[0];
+  struct rusage* buf = (struct rusage*)call->args[1];
+  void* ptr = prep_syscall();
+  long ret;
+  struct rusage* buf2 = NULL;
+
+  assert(syscallno == call->no);
+
+  if (buf) {
+    buf2 = ptr;
+    ptr += sizeof(struct rusage);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  ret = untraced_syscall2(syscallno, who, buf2);
+  if (buf2 && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    local_memcpy(buf, buf2, sizeof(*buf));
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_rt_sigprocmask(struct syscall_info* call) {
+  const int syscallno = SYS_rt_sigprocmask;
+  long ret;
+  kernel_sigset_t modified_set;
+  void* oldset2;
+  struct syscallbuf_hdr* hdr;
+
+  if (call->args[3] != sizeof(kernel_sigset_t)) {
+    // Unusual sigset size. Bail.
+    return traced_raw_syscall(call);
+  }
+
+  void* ptr = prep_syscall();
+
+  int how = (int)call->args[0];
+  const kernel_sigset_t* set = (const kernel_sigset_t*)call->args[1];
+  kernel_sigset_t* oldset = (kernel_sigset_t*)call->args[2];
+
+  oldset2 = ptr;
+  ptr += sizeof(kernel_sigset_t);
+
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  if (set && (how == SIG_BLOCK || how == SIG_SETMASK)) {
+    local_memcpy(&modified_set, set, sizeof(kernel_sigset_t));
+    // SIGSTKFLT (PerfCounters::TIME_SLICE_SIGNAL) and
+    // SIGPWR(SYSCALLBUF_DESCHED_SIGNAL) are used by rr
+    modified_set &=
+        ~(((uint64_t)1) << (SIGSTKFLT - 1)) &
+        ~(((uint64_t)1) << (globals.desched_sig - 1));
+    set = &modified_set;
+  }
+
+  hdr = buffer_hdr();
+  hdr->in_sigprocmask_critical_section = 1;
+
+  ret =
+      untraced_syscall4(syscallno, how, set, oldset2, sizeof(kernel_sigset_t));
+  if (ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    if (oldset) {
+      local_memcpy(oldset, oldset2, sizeof(kernel_sigset_t));
+    }
+    if (set) {
+      kernel_sigset_t previous_set;
+      local_memcpy(&previous_set, oldset2, sizeof(kernel_sigset_t));
+      switch (how) {
+        case SIG_UNBLOCK:
+          previous_set &= ~*set;
+          break;
+        case SIG_BLOCK:
+          previous_set |= *set;
+          break;
+        case SIG_SETMASK:
+          previous_set = *set;
+          break;
+      }
+      hdr->blocked_sigs = previous_set;
+      // We must update the generation last to ensure that an update is not
+      // lost.
+      ++hdr->blocked_sigs_generation;
+    }
+  }
+  hdr->in_sigprocmask_critical_section = 0;
+
+  commit_raw_syscall(syscallno, ptr, ret);
+
+  if (ret == -EAGAIN) {
+    // The rr supervisor emulated EAGAIN because there was a pending signal.
+    // Retry using a traced syscall so the pending signal(s) can be delivered.
+    return traced_raw_syscall(call);
+  }
+  return ret;
+}
+
+static long sys_sigaltstack(struct syscall_info* call) {
+  const int syscallno = SYS_sigaltstack;
+  stack_t* ss = (void*)call->args[0];
+  stack_t* old_ss = (void*)call->args[1];
+
+  void* ptr = prep_syscall();
+  stack_t* old_ss2 = NULL;
+  long ret;
+
+  assert(syscallno == call->no);
+
+  if (old_ss) {
+    old_ss2 = ptr;
+    ptr += sizeof(*old_ss2);
+  }
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+  ret = untraced_syscall2(syscallno, ss, old_ss2);
+  if (old_ss && ret >= 0 && !buffer_hdr()->failed_during_preparation) {
+    /* This is small and won't get optimized to a memcpy call outside
+       our library. */
+    *old_ss = *old_ss2;
+  }
+  return commit_raw_syscall(syscallno, ptr, ret);
+}
+
+static long sys_rrcall_rdtsc(struct syscall_info* call) {
+#if defined(__i386__) || defined(__x86_64__)
+  const int syscallno = SYS_rrcall_rdtsc;
+  uint32_t tsc[2];
+  void* ptr = prep_syscall();
+  void* buf = ptr;
+  ptr += 8;
+  if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) {
+    return traced_raw_syscall(call);
+  }
+
+  // Do an RDTSC without context-switching to rr. This is still a lot slower
+  // than a plain RDTSC. Maybe we coud do something better with RDPMC...
+  privileged_unrecorded_syscall5(SYS_prctl, PR_SET_TSC, PR_TSC_ENABLE, 0, 0, 0);
+  rdtsc_recording_only(buf);
+  privileged_unrecorded_syscall5(SYS_prctl, PR_SET_TSC, PR_TSC_SIGSEGV, 0, 0, 0);
+
+  local_memcpy(tsc, buf, sizeof(tsc));
+  // Overwrite RDX (syscall arg 3) with our TSC value.
+  call->args[2] = tsc[1];
+  return commit_raw_syscall(syscallno, ptr, tsc[0]);
+#else
+  (void)call;
+  fatal("RDTSC not supported in this architecture");
+  return 0;
+#endif
+}
+
+static long syscall_hook_internal(struct syscall_info* call) {
+  switch (call->no) {
+#define CASE(syscallname)                                                      \
+  case SYS_##syscallname:                                                      \
+    return sys_##syscallname(call)
+#define CASE_GENERIC_NONBLOCKING(syscallname)                                  \
+  case SYS_##syscallname:                                                      \
+    return sys_generic_nonblocking(call)
+#define CASE_GENERIC_NONBLOCKING_FD(syscallname)                               \
+  case SYS_##syscallname:                                                      \
+    return sys_generic_nonblocking_fd(call)
+    CASE(rrcall_rdtsc);
+#if defined(SYS_access)
+    CASE_GENERIC_NONBLOCKING(access);
+#endif
+    CASE(clock_gettime);
+#if defined(SYS_clock_gettime64)
+    CASE(clock_gettime64);
+#endif
+    CASE_GENERIC_NONBLOCKING_FD(close);
+#if defined(SYS_creat)
+    CASE(creat);
+#endif
+    CASE_GENERIC_NONBLOCKING_FD(dup);
+#if defined(SYS_epoll_wait)
+case SYS_epoll_wait:
+#endif
+case SYS_epoll_pwait:
+    return sys_epoll_wait(call);
+#if defined(SYS_epoll_pwait2)
+    CASE(epoll_pwait2);
+#endif
+    CASE_GENERIC_NONBLOCKING_FD(fadvise64);
+    CASE_GENERIC_NONBLOCKING(fchmod);
+#if defined(SYS_fcntl64)
+    CASE(fcntl64);
+#else
+    CASE(fcntl);
+#endif
+    CASE(fgetxattr);
+    CASE(flistxattr);
+    CASE_GENERIC_NONBLOCKING_FD(fsetxattr);
+    CASE_GENERIC_NONBLOCKING_FD(ftruncate);
+    CASE(futex);
+#if defined(SYS_getdents)
+    CASE(getdents);
+#endif
+    CASE(getdents64);
+    CASE_GENERIC_NONBLOCKING(getegid);
+    CASE_GENERIC_NONBLOCKING(geteuid);
+    CASE_GENERIC_NONBLOCKING(getgid);
+    CASE_GENERIC_NONBLOCKING(getpid);
+    CASE_GENERIC_NONBLOCKING(getppid);
+    CASE(getrandom);
+    CASE(getrusage);
+    CASE_GENERIC_NONBLOCKING(gettid);
+    CASE(gettimeofday);
+    CASE_GENERIC_NONBLOCKING(getuid);
+    CASE(getxattr);
+    CASE(ioctl);
+#if defined(lchown)
+    CASE_GENERIC_NONBLOCKING(lchown);
+#endif
+    CASE(lgetxattr);
+    CASE(listxattr);
+    CASE(llistxattr);
+#if defined(SYS__llseek)
+    CASE(_llseek);
+#endif
+    CASE_GENERIC_NONBLOCKING_FD(lseek);
+    CASE(madvise);
+#if defined(SYS_mkdir)
+    CASE_GENERIC_NONBLOCKING(mkdir);
+#endif
+#if defined(SYS_mkdor)
+    CASE_GENERIC_NONBLOCKING(mknod);
+#endif
+    CASE(mprotect);
+#if defined(SYS_open)
+    CASE(open);
+#endif
+    CASE(openat);
+#if defined(SYS_poll)
+    CASE(poll);
+#endif
+#if defined(SYS_ppoll)
+    CASE(ppoll);
+#endif
+    CASE(prctl);
+#if !defined(__i386__)
+    CASE(pread64);
+    CASE(pwrite64);
+#endif
+    CASE(ptrace);
+    CASE(quotactl);
+    CASE(read);
+#if defined(SYS_readlink)
+    CASE(readlink);
+#endif
+    case SYS_readlinkat:
+      return sys_readlinkat(call, 0);
+#if defined(SYS_recvfrom)
+    CASE(recvfrom);
+#endif
+#if defined(SYS_recvmsg)
+    CASE(recvmsg);
+#endif
+#if defined(SYS_rseq)
+    CASE(rseq);
+#endif
+#if defined(SYS_rmdir)
+    CASE_GENERIC_NONBLOCKING(rmdir);
+#endif
+    CASE(rt_sigprocmask);
+#if defined(SYS_sendmsg)
+    CASE(sendmsg);
+#endif
+#if defined(SYS_sendto)
+    CASE(sendto);
+#endif
+    CASE(set_robust_list);
+#if defined(SYS_setsockopt)
+    CASE(setsockopt);
+#endif
+#if defined(SYS_getsockopt)
+    CASE(getsockopt);
+#endif
+#if defined(SYS_getsockname)
+    CASE(getsockname);
+#endif
+    CASE_GENERIC_NONBLOCKING(setxattr);
+    CASE(sigaltstack);
+#if defined(SYS_socketcall)
+    CASE(socketcall);
+#endif
+#if defined(SYS_socketpair)
+    CASE(socketpair);
+#endif
+#if defined(SYS_symlink)
+    CASE_GENERIC_NONBLOCKING(symlink);
+#endif
+#if defined(SYS_time)
+    CASE(time);
+#endif
+    CASE_GENERIC_NONBLOCKING(truncate);
+    CASE(uname);
+#if defined(SYS_unlink)
+    CASE_GENERIC_NONBLOCKING(unlink);
+#endif
+    CASE_GENERIC_NONBLOCKING(unlinkat);
+    CASE_GENERIC_NONBLOCKING_FD(utimensat);
+    CASE(write);
+    CASE(writev);
+#if defined(SYS_fstat64)
+    case SYS_fstat64:
+#elif defined(SYS_fstat)
+    case SYS_fstat:
+#endif
+#if defined(SYS_lstat64)
+    case SYS_lstat64:
+#elif defined(SYS_lstat)
+    case SYS_lstat:
+#endif
+#if defined(SYS_stat64)
+    case SYS_stat64:
+#elif defined(SYS_stat)
+    case SYS_stat:
+#endif
+      return sys_xstat64(call);
+#if defined(SYS_statx)
+    case SYS_statx:
+      return sys_statx(call);
+#endif
+    case SYS_statfs:
+    case SYS_fstatfs:
+      return sys_statfs(call);
+#if defined(SYS_newfstatat)
+    case SYS_newfstatat:
+#elif defined(SYS_fstatat64)
+    case SYS_fstatat64:
+#endif
+      return sys_fstatat(call);
+#undef CASE
+#undef CASE_GENERIC_NONBLOCKING
+#undef CASE_GENERIC_NONBLOCKING_FD
+    default:
+      return traced_raw_syscall(call);
+  }
+}
+
+/* Delay for testing purposes */
+static void do_delay(void) {
+  int i;
+  int result = 0;
+  for (i = 0; i < 10000000; ++i) {
+    result += i * i;
+  }
+  // Make sure result is used so this doesn't get optimized away
+  impose_syscall_delay = result | 1;
+}
+
+/* Explicitly declare this as hidden so we can call it from
+ * _syscall_hook_trampoline without doing all sorts of special PIC handling.
+ */
+RR_HIDDEN long syscall_hook(struct syscall_info* call) {
+  // Initialize thread-local state if this is the first syscall for this
+  // thread.
+  init_thread();
+
+  if (!thread_locals->buffer || buffer_hdr()->locked) {
+    /* We may be reentering via a signal handler. Bail. */
+    return traced_raw_syscall(call);
+  }
+
+  thread_locals->original_syscall_parameters = call;
+
+  if (impose_syscall_delay) {
+    do_delay();
+  }
+
+  long result = syscall_hook_internal(call);
+  if (buffer_hdr() && buffer_hdr()->notify_on_syscall_hook_exit) {
+    // Sometimes a signal is delivered to interrupt an untraced syscall in
+    // a non-restartable way (e.g. seccomp SIGSYS). Those signals must be
+    // handled outside any syscallbuf transactions. We defer them until
+    // this SYS_rrcall_notify_syscall_hook_exit, which is triggered by rr
+    // setting notify_on_syscall_hook_exit. The parameters to the
+    // SYS_rrcall_notify_syscall_hook_exit are magical and fully control
+    // the syscall parameters and result seen by the signal handler.
+    //
+    // SYS_rrcall_notify_syscall_hook_exit will clear
+    // notify_on_syscall_hook_exit. Clearing it ourselves is tricky to get
+    // right without races.
+    //
+    // During recording, this flag is set when the recorder needs to delay
+    // delivery of a signal until we've stopped using the syscallbuf.
+    // During replay, this flag is set when the next event is entering a
+    // SYS_rrcall_notify_syscall_hook_exit.
+    //
+    // The correctness argument is as follows:
+    // Correctness requires that a) replay's setting of the flag happens before
+    // we read the flag in the call to syscall_hook that triggered the
+    // SYS_rrcall_notify_syscall_hook_exit and b) replay's setting of the flag
+    // must happen after we read the flag in the previous execution of
+    // syscall_hook.
+    // Condition a) holds as long as no events are recorded between the
+    // checking of the flag above and the execution of this syscall. This
+    // should be the case; no synchronous signals or syscalls are
+    // triggerable, all async signals other than SYSCALLBUF_DESCHED_SIGNAL
+    // are delayed, and SYSCALLBUF_DESCHED_SIGNAL shouldn't fire since we've
+    // disarmed the desched fd at this point. SYSCALLBUF_FLUSH events may be
+    // emitted when we process the SYS_rrcall_notify_syscall_hook_exit event,
+    // but replay of those events ends at the last flushed syscall, before
+    // we exit syscall_hook_internal.
+    // Condition b) failing would mean no new events were generated between
+    // testing the flag in the previous syscall_hook and the execution of this
+    // SYS_rrcall_notify_syscall_hook_exit. However, every invocation of
+    // syscall_hook_internal generates either a traced syscall or a syscallbuf
+    // record that would be flushed by SYSCALLBUF_FLUSH, so that can't
+    // happen.
+    result = _raw_syscall(SYS_rrcall_notify_syscall_hook_exit, call->args[0],
+                          call->args[1], call->args[2], call->args[3],
+                          call->args[4], call->args[5],
+                          RR_PAGE_SYSCALL_PRIVILEGED_TRACED, result, call->no);
+  }
+  // Do work that can only be safely done after syscallbuf can be flushed
+  if (thread_locals->notify_control_msg) {
+    privileged_traced_syscall1(SYS_rrcall_notify_control_msg,
+                               thread_locals->notify_control_msg);
+    thread_locals->notify_control_msg = NULL;
+  }
+  thread_locals->original_syscall_parameters = NULL;
+  return result;
+}

diff --git a/rr/android/x86_64/share/rr/src/preload/syscallbuf.h b/rr/android/x86_64/share/rr/src/preload/syscallbuf.h
new file mode 100644
index 0000000..84e87d3
--- /dev/null
+++ b/rr/android/x86_64/share/rr/src/preload/syscallbuf.h

@@ -0,0 +1,15 @@
+/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
+
+#ifndef RR_SYSCALLBUF_H_
+#define RR_SYSCALLBUF_H_
+
+struct timespec;
+
+#define RR_HIDDEN __attribute__((visibility("hidden")))
+
+RR_HIDDEN extern struct preload_globals globals;
+
+RR_HIDDEN extern char impose_syscall_delay;
+RR_HIDDEN extern char impose_spurious_desched;
+
+#endif /* RR_SYSCALLBUF_H_ */
commit	3c9bbafe9a22bfa7f5bd467952608d04eef0b20d	[log] [tgz]
author	Dan Albert <[email protected]>	Thu Sep 21 21:28:27 2023 +0000
committer	Automerger Merge Worker <[email protected]>	Thu Sep 21 21:28:27 2023 +0000
tree	be4aff1d27fc7f3f2a79caef37acfc79f5383106
parent	9e0c5b5a03f76944979e593d5f934c4ddb32b5b0 [diff]
parent	77e9faec15a2f30b8cad585bb2bcc8f3078287dd [diff]